From 3225b6fe66a84ed7f499daf84d085141a66bb346 Mon Sep 17 00:00:00 2001
From: Keisuke Kuroyanagi <ksk@google.com>
Date: Mon, 28 Jul 2014 21:48:00 +0900
Subject: [PATCH] Add boundary check for ver2 bigram reading.

Bug: 16330528
Change-Id: I6aca6c7a735e2a652eb325572d44dff660789cff
---
 .../dictionary_bigrams_structure_policy.h     |  2 +-
 .../v402/bigram/ver4_bigram_list_policy.h     |  3 ++-
 .../bigram/bigram_list_read_write_utils.cpp   | 21 +++++++++++++------
 .../bigram/bigram_list_read_write_utils.h     |  7 ++++---
 .../structure/v2/bigram/bigram_list_policy.h  | 17 ++++++++++-----
 .../structure/v2/patricia_trie_policy.cpp     | 17 +++++++++++++--
 .../structure/v2/patricia_trie_policy.h       |  2 +-
 .../v4/bigram/ver4_bigram_list_policy.h       |  3 ++-
 8 files changed, 52 insertions(+), 20 deletions(-)

diff --git a/native/jni/src/suggest/core/policy/dictionary_bigrams_structure_policy.h b/native/jni/src/suggest/core/policy/dictionary_bigrams_structure_policy.h
index 661ef1b1a..aa0d068aa 100644
--- a/native/jni/src/suggest/core/policy/dictionary_bigrams_structure_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_bigrams_structure_policy.h
@@ -30,7 +30,7 @@ class DictionaryBigramsStructurePolicy {
 
     virtual void getNextBigram(int *const outBigramPos, int *const outProbability,
             bool *const outHasNext, int *const pos) const = 0;
-    virtual void skipAllBigrams(int *const pos) const = 0;
+    virtual bool skipAllBigrams(int *const pos) const = 0;
 
  protected:
     DictionaryBigramsStructurePolicy() {}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h
index 61623468e..50a4c9743 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h
@@ -58,8 +58,9 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
     void getNextBigram(int *const outBigramPos, int *const outProbability,
             bool *const outHasNext, int *const bigramEntryPos) const;
 
-    void skipAllBigrams(int *const pos) const {
+    bool skipAllBigrams(int *const pos) const {
         // Do nothing because we don't need to skip bigram lists in ver4 dictionaries.
+        return true;
     }
 
     bool addNewEntry(const int terminalId, const int newTargetTerminalId,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.cpp
index 08b4e0b5e..f7fd5c071 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.cpp
@@ -38,9 +38,14 @@ const BigramListReadWriteUtils::BigramFlags BigramListReadWriteUtils::FLAG_ATTRI
 const BigramListReadWriteUtils::BigramFlags
         BigramListReadWriteUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
 
-/* static */ void BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
-        const uint8_t *const bigramsBuf, BigramFlags *const outBigramFlags,
+/* static */ bool BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
+        const uint8_t *const bigramsBuf, const int bufSize, BigramFlags *const outBigramFlags,
         int *const outTargetPtNodePos, int *const bigramEntryPos) {
+    if (bufSize <= *bigramEntryPos) {
+        AKLOGE("Read invalid pos in getBigramEntryPropertiesAndAdvancePosition(). bufSize: %d, "
+                "bigramEntryPos: %d.", bufSize, *bigramEntryPos);
+        return false;
+    }
     const BigramFlags bigramFlags = ByteArrayUtils::readUint8AndAdvancePosition(bigramsBuf,
             bigramEntryPos);
     if (outBigramFlags) {
@@ -51,15 +56,19 @@ const BigramListReadWriteUtils::BigramFlags
     if (outTargetPtNodePos) {
         *outTargetPtNodePos = targetPos;
     }
+    return true;
 }
 
-/* static */ void BigramListReadWriteUtils::skipExistingBigrams(const uint8_t *const bigramsBuf,
-        int *const bigramListPos) {
+/* static */ bool BigramListReadWriteUtils::skipExistingBigrams(const uint8_t *const bigramsBuf,
+        const int bufSize, int *const bigramListPos) {
     BigramFlags flags;
     do {
-        getBigramEntryPropertiesAndAdvancePosition(bigramsBuf, &flags, 0 /* outTargetPtNodePos */,
-                bigramListPos);
+        if (!getBigramEntryPropertiesAndAdvancePosition(bigramsBuf, bufSize, &flags,
+                0 /* outTargetPtNodePos */, bigramListPos)) {
+            return false;
+        }
     } while(hasNext(flags));
+    return true;
 }
 
 /* static */ int BigramListReadWriteUtils::getBigramAddressAndAdvancePosition(
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h
index 15f924a6a..10f93fb7a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h
@@ -30,8 +30,8 @@ class BigramListReadWriteUtils {
 public:
    typedef uint8_t BigramFlags;
 
-   static void getBigramEntryPropertiesAndAdvancePosition(const uint8_t *const bigramsBuf,
-           BigramFlags *const outBigramFlags, int *const outTargetPtNodePos,
+   static bool getBigramEntryPropertiesAndAdvancePosition(const uint8_t *const bigramsBuf,
+           const int bufSize, BigramFlags *const outBigramFlags, int *const outTargetPtNodePos,
            int *const bigramEntryPos);
 
    static AK_FORCE_INLINE int getProbabilityFromFlags(const BigramFlags flags) {
@@ -43,7 +43,8 @@ public:
    }
 
    // Bigrams reading methods
-   static void skipExistingBigrams(const uint8_t *const bigramsBuf, int *const bigramListPos);
+   static bool skipExistingBigrams(const uint8_t *const bigramsBuf, const int bufSize,
+           int *const bigramListPos);
 
 private:
    DISALLOW_IMPLICIT_CONSTRUCTORS(BigramListReadWriteUtils);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/bigram/bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/bigram/bigram_list_policy.h
index 00bb502dc..73e291ec2 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/bigram/bigram_list_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/bigram/bigram_list_policy.h
@@ -27,27 +27,34 @@ namespace latinime {
 
 class BigramListPolicy : public DictionaryBigramsStructurePolicy {
  public:
-    explicit BigramListPolicy(const uint8_t *const bigramsBuf) : mBigramsBuf(bigramsBuf) {}
+    BigramListPolicy(const uint8_t *const bigramsBuf, const int bufSize)
+            : mBigramsBuf(bigramsBuf), mBufSize(bufSize) {}
 
     ~BigramListPolicy() {}
 
     void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext,
             int *const pos) const {
         BigramListReadWriteUtils::BigramFlags flags;
-        BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(mBigramsBuf, &flags,
-                outBigramPos, pos);
+        if (!BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(mBigramsBuf,
+                mBufSize, &flags, outBigramPos, pos)) {
+            AKLOGE("Cannot read bigram entry. mBufSize: %d, pos: %d. ", mBufSize, *pos);
+            *outProbability = NOT_A_PROBABILITY;
+            *outHasNext = false;
+            return;
+        }
         *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(flags);
         *outHasNext = BigramListReadWriteUtils::hasNext(flags);
     }
 
-    void skipAllBigrams(int *const pos) const {
-        BigramListReadWriteUtils::skipExistingBigrams(mBigramsBuf, pos);
+    bool skipAllBigrams(int *const pos) const {
+        return BigramListReadWriteUtils::skipExistingBigrams(mBigramsBuf, mBufSize, pos);
     }
 
  private:
     DISALLOW_IMPLICIT_CONSTRUCTORS(BigramListPolicy);
 
     const uint8_t *const mBigramsBuf;
+    const int mBufSize;
 };
 } // namespace latinime
 #endif // LATINIME_BIGRAM_LIST_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
index 91d76040f..531cbb710 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
@@ -223,7 +223,14 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
                         mShortcutListPolicy.skipAllShortcuts(&pos);
                     }
                     if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
-                        mBigramListPolicy.skipAllBigrams(&pos);
+                        if (!mBigramListPolicy.skipAllBigrams(&pos)) {
+                            AKLOGE("Cannot skip bigrams. BufSize: %d, pos: %d.", mDictBufferSize,
+                                    pos);
+                            mIsCorrupted = true;
+                            ASSERT(false);
+                            *outUnigramProbability = NOT_A_PROBABILITY;
+                            return 0;
+                        }
                     }
                 }
             } else {
@@ -240,7 +247,13 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
                     mShortcutListPolicy.skipAllShortcuts(&pos);
                 }
                 if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
-                    mBigramListPolicy.skipAllBigrams(&pos);
+                    if (!mBigramListPolicy.skipAllBigrams(&pos)) {
+                        AKLOGE("Cannot skip bigrams. BufSize: %d, pos: %d.", mDictBufferSize, pos);
+                        mIsCorrupted = true;
+                        ASSERT(false);
+                        *outUnigramProbability = NOT_A_PROBABILITY;
+                        return 0;
+                    }
                 }
             }
 
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
index 7c0b9d3c5..d40b9af8d 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
@@ -42,7 +42,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
               mHeaderPolicy(mMmappedBuffer->getBuffer(), FormatUtils::VERSION_2),
               mDictRoot(mMmappedBuffer->getBuffer() + mHeaderPolicy.getSize()),
               mDictBufferSize(mMmappedBuffer->getBufferSize() - mHeaderPolicy.getSize()),
-              mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot),
+              mBigramListPolicy(mDictRoot, mDictBufferSize), mShortcutListPolicy(mDictRoot),
               mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy),
               mPtNodeArrayReader(mDictRoot, mDictBufferSize),
               mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {}
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h
index 55ba613a5..4b3bb3725 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h
@@ -40,8 +40,9 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
     void getNextBigram(int *const outBigramPos, int *const outProbability,
             bool *const outHasNext, int *const bigramEntryPos) const;
 
-    void skipAllBigrams(int *const pos) const {
+    bool skipAllBigrams(int *const pos) const {
         // Do nothing because we don't need to skip bigram lists in ver4 dictionaries.
+        return true;
     }
 
     bool addNewEntry(const int terminalId, const int newTargetTerminalId,