From 72b1c9394105b6fbc0d8c6ff00f3574ee37a9aaa Mon Sep 17 00:00:00 2001
From: Jean Chalard <jchalard@google.com>
Date: Fri, 31 Aug 2012 15:24:39 +0900
Subject: [PATCH] Reinstate the shortcut-only attribute

Also add the blacklist attribute

Bug: 7005742
Bug: 2704000
Change-Id: Icbe60bdf25bfb098d9e3f20870be30d6aef07c9d
---
 .../latin/ExpandableBinaryDictionary.java     |  4 +-
 .../latin/makedict/BinaryDictInputOutput.java | 26 +++++--
 .../latin/makedict/FusionDictionary.java      | 76 ++++++++++++++-----
 .../inputmethod/latin/makedict/Word.java      | 15 +++-
 native/jni/src/binary_format.h                |  4 +
 native/jni/src/terminal_attributes.h          |  4 +
 native/jni/src/unigram_dictionary.cpp         | 10 ++-
 .../inputmethod/latin/BinaryDictIOTests.java  |  2 +-
 .../latin/FusionDictionaryTests.java          |  8 +-
 .../latin/dicttool/XmlDictInputOutput.java    |  8 +-
 .../makedict/BinaryDictInputOutputTest.java   | 10 +--
 11 files changed, 125 insertions(+), 42 deletions(-)

diff --git a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
index cdf5247de..8a509be48 100644
--- a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
@@ -172,12 +172,12 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
     // considering performance regression.
     protected void addWord(final String word, final String shortcutTarget, final int frequency) {
         if (shortcutTarget == null) {
-            mFusionDictionary.add(word, frequency, null);
+            mFusionDictionary.add(word, frequency, null, false /* isNotAWord */);
         } else {
             // TODO: Do this in the subclass, with this class taking an arraylist.
             final ArrayList<WeightedString> shortcutTargets = CollectionUtils.newArrayList();
             shortcutTargets.add(new WeightedString(shortcutTarget, frequency));
-            mFusionDictionary.add(word, frequency, shortcutTargets);
+            mFusionDictionary.add(word, frequency, shortcutTargets, false /* isNotAWord */);
         }
     }
 
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
index d4f7cab5c..0c6b9c319 100644
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
@@ -55,6 +55,8 @@ public class BinaryDictInputOutput {
      * s | has a terminal ?            1 bit, 1 = yes, 0 = no   : FLAG_IS_TERMINAL
      *   | has shortcut targets ?      1 bit, 1 = yes, 0 = no   : FLAG_HAS_SHORTCUT_TARGETS
      *   | has bigrams ?               1 bit, 1 = yes, 0 = no   : FLAG_HAS_BIGRAMS
+     *   | is not a word ?             1 bit, 1 = yes, 0 = no   : FLAG_IS_NOT_A_WORD
+     *   | is blacklisted ?            1 bit, 1 = yes, 0 = no   : FLAG_IS_BLACKLISTED
      *
      * c | IF FLAG_HAS_MULTIPLE_CHARS
      * h |   char, char, char, char    n * (1 or 3 bytes) : use CharGroupInfo for i/o helpers
@@ -153,6 +155,8 @@ public class BinaryDictInputOutput {
     private static final int FLAG_IS_TERMINAL = 0x10;
     private static final int FLAG_HAS_SHORTCUT_TARGETS = 0x08;
     private static final int FLAG_HAS_BIGRAMS = 0x04;
+    private static final int FLAG_IS_NOT_A_WORD = 0x02;
+    private static final int FLAG_IS_BLACKLISTED = 0x01;
 
     private static final int FLAG_ATTRIBUTE_HAS_NEXT = 0x80;
     private static final int FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40;
@@ -778,6 +782,12 @@ public class BinaryDictInputOutput {
             }
             flags |= FLAG_HAS_BIGRAMS;
         }
+        if (group.mIsNotAWord) {
+            flags |= FLAG_IS_NOT_A_WORD;
+        }
+        if (group.mIsBlacklistEntry) {
+            flags |= FLAG_IS_BLACKLISTED;
+        }
         return flags;
     }
 
@@ -1352,12 +1362,14 @@ public class BinaryDictInputOutput {
                     buffer.position(currentPosition);
                 }
                 nodeContents.add(
-                        new CharGroup(info.mCharacters, shortcutTargets,
-                                bigrams, info.mFrequency, children));
+                        new CharGroup(info.mCharacters, shortcutTargets, bigrams, info.mFrequency,
+                                0 != (info.mFlags & FLAG_IS_NOT_A_WORD),
+                                0 != (info.mFlags & FLAG_IS_BLACKLISTED), children));
             } else {
                 nodeContents.add(
-                        new CharGroup(info.mCharacters, shortcutTargets,
-                                bigrams, info.mFrequency));
+                        new CharGroup(info.mCharacters, shortcutTargets, bigrams, info.mFrequency,
+                                0 != (info.mFlags & FLAG_IS_NOT_A_WORD),
+                                0 != (info.mFlags & FLAG_IS_BLACKLISTED)));
             }
             groupOffset = info.mEndAddress;
         }
@@ -1478,7 +1490,11 @@ public class BinaryDictInputOutput {
                         0 != (optionsFlags & FRENCH_LIGATURE_PROCESSING_FLAG)));
         if (null != dict) {
             for (final Word w : dict) {
-                newDict.add(w.mWord, w.mFrequency, w.mShortcutTargets);
+                if (w.mIsBlacklistEntry) {
+                    newDict.addBlacklistEntry(w.mWord, w.mShortcutTargets, w.mIsNotAWord);
+                } else {
+                    newDict.add(w.mWord, w.mFrequency, w.mShortcutTargets, w.mIsNotAWord);
+                }
             }
             for (final Word w : dict) {
                 // By construction a binary dictionary may not have bigrams pointing to
diff --git a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
index 7c15ba54d..f1abea9ec 100644
--- a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
+++ b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java
@@ -101,26 +101,34 @@ public class FusionDictionary implements Iterable<Word> {
         ArrayList<WeightedString> mBigrams;
         int mFrequency; // NOT_A_TERMINAL == mFrequency indicates this is not a terminal.
         Node mChildren;
+        boolean mIsNotAWord; // Only a shortcut
+        boolean mIsBlacklistEntry;
         // The two following members to help with binary generation
         int mCachedSize;
         int mCachedAddress;
 
         public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets,
-                final ArrayList<WeightedString> bigrams, final int frequency) {
+                final ArrayList<WeightedString> bigrams, final int frequency,
+                final boolean isNotAWord, final boolean isBlacklistEntry) {
             mChars = chars;
             mFrequency = frequency;
             mShortcutTargets = shortcutTargets;
             mBigrams = bigrams;
             mChildren = null;
+            mIsNotAWord = isNotAWord;
+            mIsBlacklistEntry = isBlacklistEntry;
         }
 
         public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets,
-                final ArrayList<WeightedString> bigrams, final int frequency, final Node children) {
+                final ArrayList<WeightedString> bigrams, final int frequency,
+                final boolean isNotAWord, final boolean isBlacklistEntry, final Node children) {
             mChars = chars;
             mFrequency = frequency;
             mShortcutTargets = shortcutTargets;
             mBigrams = bigrams;
             mChildren = children;
+            mIsNotAWord = isNotAWord;
+            mIsBlacklistEntry = isBlacklistEntry;
         }
 
         public void addChild(CharGroup n) {
@@ -197,8 +205,9 @@ public class FusionDictionary implements Iterable<Word> {
          * the existing ones if any. Note: unigram, bigram, and shortcut frequencies are only
          * updated if they are higher than the existing ones.
          */
-        public void update(int frequency, ArrayList<WeightedString> shortcutTargets,
-                ArrayList<WeightedString> bigrams) {
+        public void update(final int frequency, final ArrayList<WeightedString> shortcutTargets,
+                final ArrayList<WeightedString> bigrams,
+                final boolean isNotAWord, final boolean isBlacklistEntry) {
             if (frequency > mFrequency) {
                 mFrequency = frequency;
             }
@@ -234,6 +243,8 @@ public class FusionDictionary implements Iterable<Word> {
                     }
                 }
             }
+            mIsNotAWord = isNotAWord;
+            mIsBlacklistEntry = isBlacklistEntry;
         }
     }
 
@@ -296,10 +307,24 @@ public class FusionDictionary implements Iterable<Word> {
      * @param word the word to add.
      * @param frequency the frequency of the word, in the range [0..255].
      * @param shortcutTargets a list of shortcut targets for this word, or null.
+     * @param isNotAWord true if this should not be considered a word (e.g. shortcut only)
      */
     public void add(final String word, final int frequency,
-            final ArrayList<WeightedString> shortcutTargets) {
-        add(getCodePoints(word), frequency, shortcutTargets);
+            final ArrayList<WeightedString> shortcutTargets, final boolean isNotAWord) {
+        add(getCodePoints(word), frequency, shortcutTargets, isNotAWord,
+                false /* isBlacklistEntry */);
+    }
+
+    /**
+     * Helper method to add a blacklist entry as a string.
+     *
+     * @param word the word to add as a blacklist entry.
+     * @param shortcutTargets a list of shortcut targets for this word, or null.
+     * @param isNotAWord true if this is not a word for spellcheking purposes (shortcut only or so)
+     */
+    public void addBlacklistEntry(final String word,
+            final ArrayList<WeightedString> shortcutTargets, final boolean isNotAWord) {
+        add(getCodePoints(word), 0, shortcutTargets, isNotAWord, true /* isBlacklistEntry */);
     }
 
     /**
@@ -332,7 +357,8 @@ public class FusionDictionary implements Iterable<Word> {
         if (charGroup != null) {
             final CharGroup charGroup2 = findWordInTree(mRoot, word2);
             if (charGroup2 == null) {
-                add(getCodePoints(word2), 0, null);
+                add(getCodePoints(word2), 0, null, false /* isNotAWord */,
+                        false /* isBlacklistEntry */);
             }
             charGroup.addBigram(word2, frequency);
         } else {
@@ -349,9 +375,12 @@ public class FusionDictionary implements Iterable<Word> {
      * @param word the word, as an int array.
      * @param frequency the frequency of the word, in the range [0..255].
      * @param shortcutTargets an optional list of shortcut targets for this word (null if none).
+     * @param isNotAWord true if this is not a word for spellcheking purposes (shortcut only or so)
+     * @param isBlacklistEntry true if this is a blacklisted word, false otherwise
      */
     private void add(final int[] word, final int frequency,
-            final ArrayList<WeightedString> shortcutTargets) {
+            final ArrayList<WeightedString> shortcutTargets,
+            final boolean isNotAWord, final boolean isBlacklistEntry) {
         assert(frequency >= 0 && frequency <= 255);
         Node currentNode = mRoot;
         int charIndex = 0;
@@ -376,7 +405,7 @@ public class FusionDictionary implements Iterable<Word> {
             final int insertionIndex = findInsertionIndex(currentNode, word[charIndex]);
             final CharGroup newGroup = new CharGroup(
                     Arrays.copyOfRange(word, charIndex, word.length),
-                    shortcutTargets, null /* bigrams */, frequency);
+                    shortcutTargets, null /* bigrams */, frequency, isNotAWord, isBlacklistEntry);
             currentNode.mData.add(insertionIndex, newGroup);
             if (DBG) checkStack(currentNode);
         } else {
@@ -386,13 +415,15 @@ public class FusionDictionary implements Iterable<Word> {
                     // The new word is a prefix of an existing word, but the node on which it
                     // should end already exists as is. Since the old CharNode was not a terminal, 
                     // make it one by filling in its frequency and other attributes
-                    currentGroup.update(frequency, shortcutTargets, null);
+                    currentGroup.update(frequency, shortcutTargets, null, isNotAWord,
+                            isBlacklistEntry);
                 } else {
                     // The new word matches the full old word and extends past it.
                     // We only have to create a new node and add it to the end of this.
                     final CharGroup newNode = new CharGroup(
                             Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length),
-                                    shortcutTargets, null /* bigrams */, frequency);
+                                    shortcutTargets, null /* bigrams */, frequency, isNotAWord,
+                                    isBlacklistEntry);
                     currentGroup.mChildren = new Node();
                     currentGroup.mChildren.mData.add(newNode);
                 }
@@ -400,7 +431,9 @@ public class FusionDictionary implements Iterable<Word> {
                 if (0 == differentCharIndex) {
                     // Exact same word. Update the frequency if higher. This will also add the
                     // new shortcuts to the existing shortcut list if it already exists.
-                    currentGroup.update(frequency, shortcutTargets, null);
+                    currentGroup.update(frequency, shortcutTargets, null,
+                            currentGroup.mIsNotAWord && isNotAWord,
+                            currentGroup.mIsBlacklistEntry || isBlacklistEntry);
                 } else {
                     // Partial prefix match only. We have to replace the current node with a node
                     // containing the current prefix and create two new ones for the tails.
@@ -408,21 +441,26 @@ public class FusionDictionary implements Iterable<Word> {
                     final CharGroup newOldWord = new CharGroup(
                             Arrays.copyOfRange(currentGroup.mChars, differentCharIndex,
                                     currentGroup.mChars.length), currentGroup.mShortcutTargets,
-                            currentGroup.mBigrams, currentGroup.mFrequency, currentGroup.mChildren);
+                            currentGroup.mBigrams, currentGroup.mFrequency,
+                            currentGroup.mIsNotAWord, currentGroup.mIsBlacklistEntry,
+                            currentGroup.mChildren);
                     newChildren.mData.add(newOldWord);
 
                     final CharGroup newParent;
                     if (charIndex + differentCharIndex >= word.length) {
                         newParent = new CharGroup(
                                 Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex),
-                                shortcutTargets, null /* bigrams */, frequency, newChildren);
+                                shortcutTargets, null /* bigrams */, frequency,
+                                isNotAWord, isBlacklistEntry, newChildren);
                     } else {
                         newParent = new CharGroup(
                                 Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex),
-                                null /* shortcutTargets */, null /* bigrams */, -1, newChildren);
+                                null /* shortcutTargets */, null /* bigrams */, -1, 
+                                false /* isNotAWord */, false /* isBlacklistEntry */, newChildren);
                         final CharGroup newWord = new CharGroup(Arrays.copyOfRange(word,
                                 charIndex + differentCharIndex, word.length),
-                                shortcutTargets, null /* bigrams */, frequency);
+                                shortcutTargets, null /* bigrams */, frequency,
+                                isNotAWord, isBlacklistEntry);
                         final int addIndex = word[charIndex + differentCharIndex]
                                 > currentGroup.mChars[differentCharIndex] ? 1 : 0;
                         newChildren.mData.add(addIndex, newWord);
@@ -483,7 +521,8 @@ public class FusionDictionary implements Iterable<Word> {
     private static int findInsertionIndex(final Node node, int character) {
         final ArrayList<CharGroup> data = node.mData;
         final CharGroup reference = new CharGroup(new int[] { character },
-                null /* shortcutTargets */, null /* bigrams */, 0);
+                null /* shortcutTargets */, null /* bigrams */, 0, false /* isNotAWord */,
+                false /* isBlacklistEntry */);
         int result = Collections.binarySearch(data, reference, CHARGROUP_COMPARATOR);
         return result >= 0 ? result : -result - 1;
     }
@@ -748,7 +787,8 @@ public class FusionDictionary implements Iterable<Word> {
                     }
                     if (currentGroup.mFrequency >= 0)
                         return new Word(mCurrentString.toString(), currentGroup.mFrequency,
-                                currentGroup.mShortcutTargets, currentGroup.mBigrams);
+                                currentGroup.mShortcutTargets, currentGroup.mBigrams,
+                                currentGroup.mIsNotAWord, currentGroup.mIsBlacklistEntry);
                 } else {
                     mPositions.removeLast();
                     currentPos = mPositions.getLast();
diff --git a/java/src/com/android/inputmethod/latin/makedict/Word.java b/java/src/com/android/inputmethod/latin/makedict/Word.java
index 65fc72c40..4683ef154 100644
--- a/java/src/com/android/inputmethod/latin/makedict/Word.java
+++ b/java/src/com/android/inputmethod/latin/makedict/Word.java
@@ -31,16 +31,21 @@ public class Word implements Comparable<Word> {
     public final int mFrequency;
     public final ArrayList<WeightedString> mShortcutTargets;
     public final ArrayList<WeightedString> mBigrams;
+    public final boolean mIsNotAWord;
+    public final boolean mIsBlacklistEntry;
 
     private int mHashCode = 0;
 
     public Word(final String word, final int frequency,
             final ArrayList<WeightedString> shortcutTargets,
-            final ArrayList<WeightedString> bigrams) {
+            final ArrayList<WeightedString> bigrams,
+            final boolean isNotAWord, final boolean isBlacklistEntry) {
         mWord = word;
         mFrequency = frequency;
         mShortcutTargets = shortcutTargets;
         mBigrams = bigrams;
+        mIsNotAWord = isNotAWord;
+        mIsBlacklistEntry = isBlacklistEntry;
     }
 
     private static int computeHashCode(Word word) {
@@ -48,7 +53,9 @@ public class Word implements Comparable<Word> {
                 word.mWord,
                 word.mFrequency,
                 word.mShortcutTargets.hashCode(),
-                word.mBigrams.hashCode()
+                word.mBigrams.hashCode(),
+                word.mIsNotAWord,
+                word.mIsBlacklistEntry
         });
     }
 
@@ -78,7 +85,9 @@ public class Word implements Comparable<Word> {
         Word w = (Word)o;
         return mFrequency == w.mFrequency && mWord.equals(w.mWord)
                 && mShortcutTargets.equals(w.mShortcutTargets)
-                && mBigrams.equals(w.mBigrams);
+                && mBigrams.equals(w.mBigrams)
+                && mIsNotAWord == w.mIsNotAWord
+                && mIsBlacklistEntry == w.mIsBlacklistEntry;
     }
 
     @Override
diff --git a/native/jni/src/binary_format.h b/native/jni/src/binary_format.h
index d8f3e83dd..25d504bfb 100644
--- a/native/jni/src/binary_format.h
+++ b/native/jni/src/binary_format.h
@@ -43,6 +43,10 @@ class BinaryFormat {
     static const int FLAG_HAS_SHORTCUT_TARGETS = 0x08;
     // Flag for bigram presence
     static const int FLAG_HAS_BIGRAMS = 0x04;
+    // Flag for non-words (typically, shortcut only entries)
+    static const int FLAG_IS_NOT_A_WORD = 0x02;
+    // Flag for blacklist
+    static const int FLAG_IS_BLACKLISTED = 0x01;
 
     // Attribute (bigram/shortcut) related flags:
     // Flag for presence of more attributes
diff --git a/native/jni/src/terminal_attributes.h b/native/jni/src/terminal_attributes.h
index 34ab8f0ef..9ff2772b1 100644
--- a/native/jni/src/terminal_attributes.h
+++ b/native/jni/src/terminal_attributes.h
@@ -72,6 +72,10 @@ class TerminalAttributes {
         return ShortcutIterator(mDict, mStartPos + BinaryFormat::SHORTCUT_LIST_SIZE_SIZE, mFlags);
     }
 
+    bool isBlacklistedOrNotAWord() const {
+        return mFlags & (BinaryFormat::FLAG_IS_BLACKLISTED | BinaryFormat::FLAG_IS_NOT_A_WORD);
+    }
+
  private:
     DISALLOW_IMPLICIT_CONSTRUCTORS(TerminalAttributes);
     const uint8_t *const mDict;
diff --git a/native/jni/src/unigram_dictionary.cpp b/native/jni/src/unigram_dictionary.cpp
index ba3c2db6b..d4c51df63 100644
--- a/native/jni/src/unigram_dictionary.cpp
+++ b/native/jni/src/unigram_dictionary.cpp
@@ -391,9 +391,11 @@ inline void UnigramDictionary::onTerminal(const int probability,
         const int finalProbability =
                 correction->getFinalProbability(probability, &wordPointer, &wordLength);
 
-        if (0 != finalProbability) {
+        if (0 != finalProbability && !terminalAttributes.isBlacklistedOrNotAWord()) {
             // If the probability is 0, we don't want to add this word. However we still
             // want to add its shortcuts (including a possible whitelist entry) if any.
+            // Furthermore, if this is not a word (shortcut only for example) or a blacklisted
+            // entry then we never want to suggest this.
             addWord(wordPointer, wordLength, finalProbability, masterQueue,
                     Dictionary::KIND_CORRECTION);
         }
@@ -841,6 +843,12 @@ int UnigramDictionary::getFrequency(const int32_t *const inWord, const int lengt
         return NOT_A_PROBABILITY;
     }
     const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
+    if (flags & (BinaryFormat::FLAG_IS_BLACKLISTED | BinaryFormat::FLAG_IS_NOT_A_WORD)) {
+        // If this is not a word, or if it's a blacklisted entry, it should behave as
+        // having no frequency outside of the suggestion process (where it should be used
+        // for shortcuts).
+        return NOT_A_PROBABILITY;
+    }
     const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
     if (hasMultipleChars) {
         pos = BinaryFormat::skipOtherCharacters(root, pos);
diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictIOTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictIOTests.java
index 0094db8a7..6c8e1ca4d 100644
--- a/tests/src/com/android/inputmethod/latin/BinaryDictIOTests.java
+++ b/tests/src/com/android/inputmethod/latin/BinaryDictIOTests.java
@@ -80,7 +80,7 @@ public class BinaryDictIOTests extends AndroidTestCase {
             final List<String> words) {
         for (int i = 0; i < number; ++i) {
             final String word = words.get(i);
-            dict.add(word, UNIGRAM_FREQ, null);
+            dict.add(word, UNIGRAM_FREQ, null, false /* isNotAWord */);
         }
     }
 
diff --git a/tests/src/com/android/inputmethod/latin/FusionDictionaryTests.java b/tests/src/com/android/inputmethod/latin/FusionDictionaryTests.java
index 8ecdcc366..123959b4d 100644
--- a/tests/src/com/android/inputmethod/latin/FusionDictionaryTests.java
+++ b/tests/src/com/android/inputmethod/latin/FusionDictionaryTests.java
@@ -31,16 +31,16 @@ public class FusionDictionaryTests extends AndroidTestCase {
         FusionDictionary dict = new FusionDictionary(new Node(),
                 new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false));
 
-        dict.add("abc", 10, null);
+        dict.add("abc", 10, null, false /* isNotAWord */);
         assertNull(FusionDictionary.findWordInTree(dict.mRoot, "aaa"));
         assertNotNull(FusionDictionary.findWordInTree(dict.mRoot, "abc"));
 
-        dict.add("aa", 10, null);
+        dict.add("aa", 10, null, false /* isNotAWord */);
         assertNull(FusionDictionary.findWordInTree(dict.mRoot, "aaa"));
         assertNotNull(FusionDictionary.findWordInTree(dict.mRoot, "aa"));
 
-        dict.add("babcd", 10, null);
-        dict.add("bacde", 10, null);
+        dict.add("babcd", 10, null, false /* isNotAWord */);
+        dict.add("bacde", 10, null, false /* isNotAWord */);
         assertNull(FusionDictionary.findWordInTree(dict.mRoot, "ba"));
         assertNotNull(FusionDictionary.findWordInTree(dict.mRoot, "babcd"));
         assertNotNull(FusionDictionary.findWordInTree(dict.mRoot, "bacde"));
diff --git a/tools/dicttool/src/android/inputmethod/latin/dicttool/XmlDictInputOutput.java b/tools/dicttool/src/android/inputmethod/latin/dicttool/XmlDictInputOutput.java
index 9ce8c4934..c31cd724a 100644
--- a/tools/dicttool/src/android/inputmethod/latin/dicttool/XmlDictInputOutput.java
+++ b/tools/dicttool/src/android/inputmethod/latin/dicttool/XmlDictInputOutput.java
@@ -50,6 +50,7 @@ public class XmlDictInputOutput {
     private static final String SHORTCUT_TAG = "shortcut";
     private static final String FREQUENCY_ATTR = "f";
     private static final String WORD_ATTR = "word";
+    private static final String NOT_A_WORD_ATTR = "not_a_word";
 
     private static final int SHORTCUT_ONLY_DEFAULT_FREQ = 1;
 
@@ -92,7 +93,7 @@ public class XmlDictInputOutput {
             final FusionDictionary dict = mDictionary;
             for (final String shortcutOnly : mShortcutsMap.keySet()) {
                 if (dict.hasWord(shortcutOnly)) continue;
-                dict.add(shortcutOnly, 0, mShortcutsMap.get(shortcutOnly));
+                dict.add(shortcutOnly, 0, mShortcutsMap.get(shortcutOnly), true /* isNotAWord */);
             }
             mDictionary = null;
             mShortcutsMap.clear();
@@ -144,7 +145,7 @@ public class XmlDictInputOutput {
         @Override
         public void endElement(String uri, String localName, String qName) {
             if (WORD == mState) {
-                mDictionary.add(mWord, mFreq, mShortcutsMap.get(mWord));
+                mDictionary.add(mWord, mFreq, mShortcutsMap.get(mWord), false /* isNotAWord */);
                 mState = START;
             }
         }
@@ -345,7 +346,8 @@ public class XmlDictInputOutput {
         destination.write("<!-- Warning: there is no code to read this format yet. -->\n");
         for (Word word : set) {
             destination.write("  <" + WORD_TAG + " " + WORD_ATTR + "=\"" + word.mWord + "\" "
-                    + FREQUENCY_ATTR + "=\"" + word.mFrequency + "\">");
+                    + FREQUENCY_ATTR + "=\"" + word.mFrequency
+                    + (word.mIsNotAWord ? "\" " + NOT_A_WORD_ATTR + "=\"true" : "") + "\">");
             if (null != word.mShortcutTargets) {
                 destination.write("\n");
                 for (WeightedString target : word.mShortcutTargets) {
diff --git a/tools/dicttool/tests/com/android/inputmethod/latin/makedict/BinaryDictInputOutputTest.java b/tools/dicttool/tests/com/android/inputmethod/latin/makedict/BinaryDictInputOutputTest.java
index 24042f120..88589b815 100644
--- a/tools/dicttool/tests/com/android/inputmethod/latin/makedict/BinaryDictInputOutputTest.java
+++ b/tools/dicttool/tests/com/android/inputmethod/latin/makedict/BinaryDictInputOutputTest.java
@@ -43,11 +43,11 @@ public class BinaryDictInputOutputTest extends TestCase {
         final FusionDictionary dict = new FusionDictionary(new Node(),
                 new DictionaryOptions(new HashMap<String, String>(),
                         false /* germanUmlautProcessing */, false /* frenchLigatureProcessing */));
-        dict.add("foo", 1, null);
-        dict.add("fta", 1, null);
-        dict.add("ftb", 1, null);
-        dict.add("bar", 1, null);
-        dict.add("fool", 1, null);
+        dict.add("foo", 1, null, false /* isNotAWord */);
+        dict.add("fta", 1, null, false /* isNotAWord */);
+        dict.add("ftb", 1, null, false /* isNotAWord */);
+        dict.add("bar", 1, null, false /* isNotAWord */);
+        dict.add("fool", 1, null, false /* isNotAWord */);
         final ArrayList<Node> result = BinaryDictInputOutput.flattenTree(dict.mRoot);
         assertEquals(4, result.size());
         while (!result.isEmpty()) {