src/spell-check-handler.js
import {CompositeDisposable, Disposable, Observable, Scheduler, SerialDisposable, Subject} from 'rx';
import {getInstalledKeyboardLanguages} from 'keyboard-layout';
import {spawn} from 'spawn-rx';
import './custom-operators';
import DictionarySync from './dictionary-sync';
import {normalizeLanguageCode} from './utility';
import FakeLocalStorage from './fake-local-storage';
import {Spellchecker} from './node-spellchecker';
let d = require('debug-electron')('electron-spellchecker:spell-check-handler');
let cld = null;
let fallbackLocaleTable = null;
let webFrame = (process.type === 'renderer' ?
require('electron').webFrame :
null);
// NB: Linux and Windows uses underscore in languages (i.e. 'en_US'), whereas
// we're trying really hard to match the Chromium way of `en-US`
const validLangCodeWindowsLinux = /[a-z]{2}[_][A-Z]{2}/;
// NB: This is to work around electron/electron#1005, where contractions
// are incorrectly marked as spelling errors. This lets people get away with
// incorrectly spelled contracted words, but it's the best we can do for now.
const contractions = [
"ain't", "aren't", "can't", "could've", "couldn't", "couldn't've", "didn't", "doesn't", "don't", "hadn't",
"hadn't've", "hasn't", "haven't", "he'd", "he'd've", "he'll", "he's", "how'd", "how'll", "how's", "I'd",
"I'd've", "I'll", "I'm", "I've", "isn't", "it'd", "it'd've", "it'll", "it's", "let's", "ma'am", "mightn't",
"mightn't've", "might've", "mustn't", "must've", "needn't", "not've", "o'clock", "shan't", "she'd", "she'd've",
"she'll", "she's", "should've", "shouldn't", "shouldn't've", "that'll", "that's", "there'd", "there'd've",
"there're", "there's", "they'd", "they'd've", "they'll", "they're", "they've", "wasn't", "we'd", "we'd've",
"we'll", "we're", "we've", "weren't", "what'll", "what're", "what's", "what've", "when's", "where'd",
"where's", "where've", "who'd", "who'll", "who're", "who's", "who've", "why'll", "why're", "why's", "won't",
"would've", "wouldn't", "wouldn't've", "y'all", "y'all'd've", "you'd", "you'd've", "you'll", "you're", "you've"
];
const contractionMap = contractions.reduce((acc, word) => {
acc[word.replace(/'.*/, '')] = true;
return acc;
}, {});
/**
* This method mimics Observable.fromEvent, but with capture semantics.
*/
function fromEventCapture(element, name) {
return Observable.create((subj) => {
const handler = function(...args) {
if (args.length > 1) {
subj.onNext(args);
} else {
subj.onNext(args[0] || true);
}
};
element.addEventListener(name, handler, true);
return Disposable.create(() => element.removeEventListener(name, handler, true));
});
}
/**
* SpellCheckHandler is the main class of this library, and handles all of the
* different pieces of spell checking except for the context menu information.
*
* Instantiate the class, then call {{attachToInput}} to wire it up. The spell
* checker will attempt to automatically check the language that the user is
* typing in and switch on-the fly. However, giving it an explicit hint by
* calling {{switchLanguage}}, or providing it a block of sample text via
* {{provideHintText}} will result in much better results.
*
* Sample text should be text that is reasonably likely to be in the same language
* as the user typing - for example, in an Email reply box, the original Email text
* would be a great sample, or in the case of Slack, the existing channel messages
* are used as the sample text.
*/
export default class SpellCheckHandler {
/**
* Constructs a SpellCheckHandler
*
* @param {DictionarySync} dictionarySync An instance of {{DictionarySync}},
* create a custom one if you want
* to override the dictionary cache
* location.
* @param {LocalStorage} localStorage An implementation of localStorage
* used for testing.
* @param {Scheduler} scheduler The Rx scheduler to use, for
* testing.
*/
constructor(dictionarySync=null, localStorage=null, scheduler=null) {
this.dictionarySync = dictionarySync || new DictionarySync();
this.switchToLanguage = new Subject();
this.currentSpellchecker = null;
this.currentSpellcheckerLanguage = null;
this.currentSpellcheckerChanged = new Subject();
this.spellCheckInvoked = new Subject();
this.spellingErrorOccurred = new Subject();
this.scheduler = scheduler || Scheduler.default;
this.shouldAutoCorrect = true;
// NB: A Cool thing is when window.localStorage is rigged to blow up
// if you touch it from a data: URI in Chromium.
try {
this.localStorage = localStorage || window.localStorage || new FakeLocalStorage();
} catch (ugh) {
this.localStorage = new FakeLocalStorage();
}
this.disp = new SerialDisposable();
if (process.platform === 'darwin') {
// NB: OS X does automatic language detection, we're gonna trust it
this.currentSpellchecker = new Spellchecker();
this.currentSpellcheckerLanguage = 'en-US';
if (webFrame) {
webFrame.setSpellCheckProvider(
this.currentSpellcheckerLanguage,
this.shouldAutoCorrect,
{ spellCheck: this.handleElectronSpellCheck.bind(this) });
}
return;
}
}
/**
* Disconnect the events that we connected in {{attachToInput}} or other places
* in the class.
*/
dispose() {
this.disp.dispose();
}
/**
* Override the default logger for this class. You probably want to use
* {{setGlobalLogger}} instead
*
* @param {Function} fn The function which will operate like console.log
*/
static setLogger(fn) {
d = fn;
}
/**
* Attach to document.body and register ourselves for Electron spell checking.
* This method will start to watch text entered by the user and automatically
* switch languages as well as enable Electron spell checking (i.e. the red
* squigglies).
*
* @param {Observable<String>} inputText Simulate the user typing text,
* for testing.
*
* @return {Disposable} A Disposable which will unregister all of the
* things that this method registered.
*/
attachToInput(inputText=null) {
// OS X has no need for any of this
if (process.platform === 'darwin' && !inputText) {
return Disposable.empty;
}
let possiblySwitchedCharacterSets = new Subject();
let wordsTyped = 0;
let input = inputText || (fromEventCapture(document.body, 'input')
.flatMap((e) => {
if (!e.target || !e.target.value) return Observable.empty();
if (e.target.value.match(/\S\s$/)) {
wordsTyped++;
}
if (wordsTyped > 2) {
d(`${wordsTyped} words typed without spell checking invoked, redetecting language`);
possiblySwitchedCharacterSets.onNext(true);
}
return Observable.just(e.target.value);
}));
let disp = new CompositeDisposable();
// NB: When users switch character sets (i.e. we're checking in English and
// the user suddenly starts typing in Russian), the spellchecker will no
// longer invoke us, so we don't have a chance to re-detect the language.
//
// If we see too many words typed without a spelling detection, we know we
// should start rechecking the input box for a language change.
disp.add(Observable.merge(this.spellCheckInvoked, this.currentSpellcheckerChanged)
.subscribe(() => wordsTyped = 0));
let lastInputText = '';
disp.add(input.subscribe((x) => lastInputText = x));
let initialInputText = input
.guaranteedThrottle(250, this.scheduler)
.takeUntil(this.currentSpellcheckerChanged);
if (this.currentSpellcheckerLanguage) {
initialInputText = Observable.empty();
}
let contentToCheck = Observable.merge(
this.spellingErrorOccurred,
initialInputText,
possiblySwitchedCharacterSets)
.observeOn(this.scheduler)
.flatMap(() => {
if (lastInputText.length < 8) return Observable.empty();
return Observable.just(lastInputText);
});
let languageDetectionMatches = contentToCheck
.flatMap((text) => {
d(`Attempting detection of ${text}`);
return Observable.fromPromise(this.detectLanguageForText(text))
.catch(() => Observable.empty());
});
disp.add(languageDetectionMatches
.flatMap(async (langWithoutLocale) => {
d(`Auto-detected language as ${langWithoutLocale}`);
let lang = await this.getLikelyLocaleForLanguage(langWithoutLocale);
if (lang !== this.currentSpellcheckerLanguage) await this.switchLanguage(lang);
return lang;
})
.catch((e) => {
d(`Failed to load dictionary: ${e.message}`);
return Observable.empty();
})
.subscribe(async (lang) => {
d(`New Language is ${lang}`);
}));
if (webFrame) {
disp.add(this.currentSpellcheckerChanged
.startWith(true)
.observeOn(this.scheduler)
.where(() => this.currentSpellchecker)
.subscribe(() => {
d('Actually installing spell check provider to Electron');
webFrame.setSpellCheckProvider(
this.currentSpellcheckerLanguage,
this.shouldAutoCorrect,
{ spellCheck: this.handleElectronSpellCheck.bind(this) });
}));
}
this.disp.setDisposable(disp);
return disp;
}
/**
* autoUnloadDictionariesOnBlur attempts to save memory by unloading
* dictionaries when the window loses focus.
*
* @return {Disposable} A {{Disposable}} that will unhook the events listened
* to by this method.
*/
autoUnloadDictionariesOnBlur() {
let ret = new CompositeDisposable();
let hasUnloaded = false;
if (process.platform === 'darwin') return Disposable.empty;
ret.add(Observable.fromEvent(window, 'blur').subscribe(() => {
d(`Unloading spellchecker`);
this.currentSpellchecker = null;
hasUnloaded = true;
}));
ret.add(Observable.fromEvent(window, 'focus').flatMap(() => {
if (!hasUnloaded) return Observable.empty();
if (!this.currentSpellcheckerLanguage) return Observable.empty();
d(`Restoring spellchecker`);
return Observable.fromPromise(this.switchLanguage(this.currentSpellcheckerLanguage))
.catch((e) => {
d(`Failed to restore spellchecker: ${e.message}`);
return Observable.empty();
});
}).subscribe());
return ret;
}
/**
* Switch the dictionary language to the language of the sample text provided.
* As described in the class documentation, call this method with text most
* likely in the same language as the user is typing. The locale (i.e. *US* vs
* *UK* vs *AU*) will be inferred heuristically based on the user's computer.
*
* @param {String} inputText A language code (i.e. 'en-US')
*
* @return {Promise} Completion
*/
async provideHintText(inputText) {
let langWithoutLocale = null;
try {
langWithoutLocale = await this.detectLanguageForText(inputText);
} catch (e) {
d(`Couldn't detect language for text '${inputText}': ${e.message}, ignoring sample`);
return;
}
let lang = await this.getLikelyLocaleForLanguage(langWithoutLocale);
await this.switchLanguage(lang);
}
/**
* Explicitly switch the language to a specific language. This method will
* automatically download the dictionary for the specific language and locale
* and on failure, will attempt to switch to dictionaries that are the same
* language but a default locale.
*
* @param {String} langCode A language code (i.e. 'en-US')
*
* @return {Promise} Completion
*/
async switchLanguage(langCode) {
let actualLang;
let dict = null;
try {
let {dictionary, language} = await this.loadDictionaryForLanguageWithAlternatives(langCode);
actualLang = language; dict = dictionary;
} catch (e) {
d(`Failed to load dictionary ${langCode}: ${e.message}`);
throw e;
}
d(`Setting current spellchecker to ${actualLang}, requested language was ${langCode}`);
if (this.currentSpellcheckerLanguage !== actualLang || !this.currentSpellchecker) {
d(`Creating node-spellchecker instance`);
this.currentSpellchecker = new Spellchecker();
this.currentSpellchecker.setDictionary(actualLang, dict);
this.currentSpellcheckerLanguage = actualLang;
this.currentSpellcheckerChanged.onNext(true);
}
}
/**
* Loads a dictionary and attempts to use fallbacks if it fails.
* @private
*/
async loadDictionaryForLanguageWithAlternatives(langCode, cacheOnly=false) {
const localStorageKey = 'electronSpellchecker_alternatesTable';
this.fallbackLocaleTable = this.fallbackLocaleTable || require('./fallback-locales');
let lang = langCode.substring(0, 2);
let alternatives = [langCode, await this.getLikelyLocaleForLanguage(lang), this.fallbackLocaleTable[lang]];
let alternatesTable = JSON.parse(this.localStorage.getItem(localStorageKey) || '{}');
if (langCode in alternatesTable) {
try {
return {
language: alternatesTable[langCode],
dictionary: await this.dictionarySync.loadDictionaryForLanguage(alternatesTable[langCode])
};
} catch (e) {
// If we fail to load a saved alternate, this is an indicator that our
// data is garbage and we should throw it out entirely.
this.localStorage.setItem(localStorageKey, '{}');
}
}
d(`Requesting to load ${langCode}, alternatives are ${JSON.stringify(alternatives)}`);
return await Observable.of(...alternatives)
.concatMap((l) => {
return Observable.defer(() =>
Observable.fromPromise(this.dictionarySync.loadDictionaryForLanguage(l, cacheOnly)))
.map((d) => ({language: l, dictionary: d}))
.do(({language}) => {
alternatesTable[langCode] = language;
this.localStorage.setItem(localStorageKey, JSON.stringify(alternatesTable));
})
.catch(() => Observable.just(null));
})
.filter((x) => x !== null)
.take(1)
.toPromise();
}
/**
* The actual callout called by Electron to handle spellchecking
* @private
*/
handleElectronSpellCheck(text) {
if (!this.currentSpellchecker) return true;
this.spellCheckInvoked.onNext(true);
if (contractionMap[text.toLocaleLowerCase()]) return true;
// NB: I'm not smart enough to fix this bug in Chromium's version of
// Hunspell so I'm going to fix it here instead. Chromium Hunspell for
// whatever reason marks the first word in a sentence as mispelled if it is
// capitalized.
let result = this.currentSpellchecker.checkSpelling(text);
if (result.length < 1) return true;
if (result[0].start !== 0) {
this.spellingErrorOccurred.onNext(text);
return false;
}
let ret = this.currentSpellchecker.isMisspelled(text.toLocaleLowerCase());
if (ret) {
this.spellingErrorOccurred.onNext(text);
}
return !ret;
}
/**
* Calls out to cld2 to detect the language of the given text
* @private
*/
detectLanguageForText(text) {
// NB: Unfortuantely cld marshals errors incorrectly, so we can't use pify
cld = cld || require('cld');
return new Promise((res,rej) => {
cld.detect(text, (err, result) => {
if (err) { rej(new Error(err.message)); return; }
if (!result.reliable || result.languages[0].percent < 85) {
rej(new Error('Not enough reliable text'));
return;
}
res(result.languages[0].code);
});
});
}
/**
* Returns the locale for a language code based on the user's machine (i.e.
* 'en' => 'en-GB')
*/
async getLikelyLocaleForLanguage(language) {
let lang = language.toLowerCase();
if (!this.likelyLocaleTable) this.likelyLocaleTable = await this.buildLikelyLocaleTable();
if (this.likelyLocaleTable[lang]) return this.likelyLocaleTable[lang];
this.fallbackLocaleTable = this.fallbackLocaleTable || require('./fallback-locales');
return this.fallbackLocaleTable[lang];
}
/**
* A proxy for the current spellchecker's method of the same name
* @private
*/
async getCorrectionsForMisspelling(text) {
// NB: This is async even though we don't use await, to make it easy for
// ContextMenuBuilder to use this method even when it's hosted in another
// renderer process via electron-remote.
if (!this.currentSpellchecker) {
return null;
}
return this.currentSpellchecker.getCorrectionsForMisspelling(text);
}
/**
* A proxy for the current spellchecker's method of the same name
* @private
*/
async addToDictionary(text) {
// NB: Same deal as getCorrectionsForMisspelling.
if (process.platform !== 'darwin') return;
if (!this.currentSpellchecker) return;
this.currentSpellchecker.add(text);
}
/**
* Call out to the OS to figure out what locales the user is probably
* interested in then save it off as a table.
* @private
*/
async buildLikelyLocaleTable() {
let localeList = [];
if (process.platform === 'linux') {
let locales = await spawn('locale', ['-a'])
.catch(() => Observable.just(null))
.reduce((acc,x) => { acc.push(...x.split('\n')); return acc; }, [])
.toPromise();
d(`Raw Locale list: ${JSON.stringify(locales)}`);
localeList = locales.reduce((acc, x) => {
let m = x.match(validLangCodeWindowsLinux);
if (!m) return acc;
acc.push(m[0]);
return acc;
}, []);
}
if (process.platform === 'win32') {
localeList = getInstalledKeyboardLanguages();
}
if (process.platform === 'darwin') {
fallbackLocaleTable = fallbackLocaleTable || require('./fallback-locales');
// NB: OS X will return lists that are half just a language, half
// language + locale, like ['en', 'pt_BR', 'ko']
localeList = this.currentSpellchecker.getAvailableDictionaries()
.map((x => {
if (x.length === 2) return fallbackLocaleTable[x];
return normalizeLanguageCode(x);
}));
}
d(`Filtered Locale list: ${JSON.stringify(localeList)}`);
// Some distros like Ubuntu make locale -a useless by dumping
// every possible locale for the language into the list :-/
let counts = localeList.reduce((acc,x) => {
let k = x.substring(0,2);
acc[k] = acc[k] || [];
acc[k].push(x);
return acc;
}, {});
d(`Counts: ${JSON.stringify(counts)}`);
let ret = Object.keys(counts).reduce((acc, x) => {
if (counts[x].length > 1) return acc;
d(`Setting ${x}`);
acc[x] = normalizeLanguageCode(counts[x][0]);
return acc;
}, {});
// NB: LANG has a Special Place In Our Hearts
if (process.platform === 'linux' && process.env.LANG) {
let m = process.env.LANG.match(validLangCodeWindowsLinux);
if (!m) return ret;
ret[m[0].substring(0, 2)] = normalizeLanguageCode(m[0]);
}
d(`Result: ${JSON.stringify(ret)}`);
return ret;
}
}