mirror of
https://gitlab.futo.org/keyboard/latinime.git
synced 2024-09-28 14:54:30 +01:00
am 700ce8df
: Merge "[Rlog56] Buffer words before pushing out LogUnit"
* commit '700ce8df07eb242ce93f4f5e3e0ceb78473938ab': [Rlog56] Buffer words before pushing out LogUnit
This commit is contained in:
commit
e3c0015b0d
@ -81,7 +81,7 @@ public class FixedLogBuffer extends LogBuffer {
|
|||||||
return logUnit;
|
return logUnit;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void shiftOutThroughFirstWord() {
|
public void shiftOutThroughFirstWord() {
|
||||||
final LinkedList<LogUnit> logUnits = getLogUnits();
|
final LinkedList<LogUnit> logUnits = getLogUnits();
|
||||||
while (!logUnits.isEmpty()) {
|
while (!logUnits.isEmpty()) {
|
||||||
final LogUnit logUnit = logUnits.removeFirst();
|
final LogUnit logUnit = logUnits.removeFirst();
|
||||||
|
@ -26,18 +26,42 @@ import java.util.LinkedList;
|
|||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Provide a log buffer of fixed length that enforces privacy restrictions.
|
* MainLogBuffer is a FixedLogBuffer that tracks the state of LogUnits to make privacy guarantees.
|
||||||
*
|
*
|
||||||
* The privacy restrictions include making sure that no numbers are logged, that all logged words
|
* There are three forms of privacy protection: 1) only words in the main dictionary are allowed to
|
||||||
* are in the dictionary, and that words are recorded infrequently enough that the user's meaning
|
* be logged in enough detail to determine their contents, 2) only a subset of words are logged
|
||||||
* cannot be easily determined.
|
* in detail, such as 10%, and 3) no numbers are logged.
|
||||||
|
*
|
||||||
|
* This class maintains a list of LogUnits, each corresponding to a word. As the user completes
|
||||||
|
* words, they are added here. But if the user backs up over their current word to edit a word
|
||||||
|
* entered earlier, then it is pulled out of this LogBuffer, changes are then added to the end of
|
||||||
|
* the LogUnit, and it is pushed back in here when the user is done. Because words may be pulled
|
||||||
|
* back out even after they are pushed in, we must not publish the contents of this LogBuffer too
|
||||||
|
* quickly. However, we cannot let the contents pile up either, or it will limit the editing that
|
||||||
|
* a user can perform.
|
||||||
|
*
|
||||||
|
* To balance these requirements (keep history so user can edit, flush history so it does not pile
|
||||||
|
* up), the LogBuffer is considered "complete" when the user has entered enough words to form an
|
||||||
|
* n-gram, followed by enough additional non-detailed words (that are in the 90%, as per above).
|
||||||
|
* Once complete, the n-gram may be published to flash storage (via the ResearchLog class).
|
||||||
|
* However, the additional non-detailed words are retained, in case the user backspaces to edit
|
||||||
|
* them. The MainLogBuffer then continues to add words, publishing individual non-detailed words
|
||||||
|
* as new words arrive. After enough non-detailed words have been pushed out to account for the
|
||||||
|
* 90% between words, the words at the front of the LogBuffer can be published as an n-gram again.
|
||||||
|
*
|
||||||
|
* If the words that would form the valid n-gram are not in the dictionary, then words are pushed
|
||||||
|
* through the LogBuffer one at a time until an n-gram is found that is entirely composed of
|
||||||
|
* dictionary words.
|
||||||
|
*
|
||||||
|
* If the user closes a session, then the entire LogBuffer is flushed, publishing any embedded
|
||||||
|
* n-gram containing dictionary words.
|
||||||
*/
|
*/
|
||||||
public class MainLogBuffer extends FixedLogBuffer {
|
public class MainLogBuffer extends FixedLogBuffer {
|
||||||
private static final String TAG = MainLogBuffer.class.getSimpleName();
|
private static final String TAG = MainLogBuffer.class.getSimpleName();
|
||||||
private static final boolean DEBUG = false && ProductionFlag.IS_EXPERIMENTAL_DEBUG;
|
private static final boolean DEBUG = false && ProductionFlag.IS_EXPERIMENTAL_DEBUG;
|
||||||
|
|
||||||
// The size of the n-grams logged. E.g. N_GRAM_SIZE = 2 means to sample bigrams.
|
// The size of the n-grams logged. E.g. N_GRAM_SIZE = 2 means to sample bigrams.
|
||||||
private static final int N_GRAM_SIZE = 2;
|
public static final int N_GRAM_SIZE = 2;
|
||||||
// The number of words between n-grams to omit from the log. If debugging, record 50% of all
|
// The number of words between n-grams to omit from the log. If debugging, record 50% of all
|
||||||
// words. Otherwise, only record 10%.
|
// words. Otherwise, only record 10%.
|
||||||
private static final int DEFAULT_NUMBER_OF_WORDS_BETWEEN_SAMPLES =
|
private static final int DEFAULT_NUMBER_OF_WORDS_BETWEEN_SAMPLES =
|
||||||
@ -46,49 +70,31 @@ public class MainLogBuffer extends FixedLogBuffer {
|
|||||||
private final ResearchLog mResearchLog;
|
private final ResearchLog mResearchLog;
|
||||||
private Suggest mSuggest;
|
private Suggest mSuggest;
|
||||||
|
|
||||||
// The minimum periodicity with which n-grams can be sampled. E.g. mWinWordPeriod is 10 if
|
/* package for test */ int mNumWordsBetweenNGrams;
|
||||||
// every 10th bigram is sampled, i.e., words 1-8 are not, but the bigram at words 9 and 10, etc.
|
|
||||||
// for 11-18, and the bigram at words 19 and 20. If an n-gram is not safe (e.g. it contains a
|
|
||||||
// number in the middle or an out-of-vocabulary word), then sampling is delayed until a safe
|
|
||||||
// n-gram does appear.
|
|
||||||
/* package for test */ int mMinWordPeriod;
|
|
||||||
|
|
||||||
// Counter for words left to suppress before an n-gram can be sampled. Reset to mMinWordPeriod
|
// Counter for words left to suppress before an n-gram can be sampled. Reset to mMinWordPeriod
|
||||||
// after a sample is taken.
|
// after a sample is taken.
|
||||||
/* package for test */ int mWordsUntilSafeToSample;
|
/* package for test */ int mNumWordsUntilSafeToSample;
|
||||||
|
|
||||||
public MainLogBuffer(final ResearchLog researchLog) {
|
public MainLogBuffer(final ResearchLog researchLog) {
|
||||||
super(N_GRAM_SIZE);
|
super(N_GRAM_SIZE + DEFAULT_NUMBER_OF_WORDS_BETWEEN_SAMPLES);
|
||||||
mResearchLog = researchLog;
|
mResearchLog = researchLog;
|
||||||
mMinWordPeriod = DEFAULT_NUMBER_OF_WORDS_BETWEEN_SAMPLES + N_GRAM_SIZE;
|
mNumWordsBetweenNGrams = DEFAULT_NUMBER_OF_WORDS_BETWEEN_SAMPLES;
|
||||||
final Random random = new Random();
|
final Random random = new Random();
|
||||||
mWordsUntilSafeToSample = random.nextInt(mMinWordPeriod);
|
mNumWordsUntilSafeToSample = DEBUG ? 0 : random.nextInt(mNumWordsBetweenNGrams + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setSuggest(final Suggest suggest) {
|
public void setSuggest(final Suggest suggest) {
|
||||||
mSuggest = suggest;
|
mSuggest = suggest;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public void shiftIn(final LogUnit newLogUnit) {
|
|
||||||
super.shiftIn(newLogUnit);
|
|
||||||
if (newLogUnit.hasWord()) {
|
|
||||||
if (mWordsUntilSafeToSample > 0) {
|
|
||||||
mWordsUntilSafeToSample--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (DEBUG) {
|
|
||||||
Log.d(TAG, "shiftedIn " + (newLogUnit.hasWord() ? newLogUnit.getWord() : ""));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void resetWordCounter() {
|
public void resetWordCounter() {
|
||||||
mWordsUntilSafeToSample = mMinWordPeriod;
|
mNumWordsUntilSafeToSample = mNumWordsBetweenNGrams;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Determines whether the content of the MainLogBuffer can be safely uploaded in its complete
|
* Determines whether uploading the n words at the front the MainLogBuffer will not violate
|
||||||
* form and still protect the user's privacy.
|
* user privacy.
|
||||||
*
|
*
|
||||||
* The size of the MainLogBuffer is just enough to hold one n-gram, its corrections, and any
|
* The size of the MainLogBuffer is just enough to hold one n-gram, its corrections, and any
|
||||||
* non-character data that is typed between words. The decision about privacy is made based on
|
* non-character data that is typed between words. The decision about privacy is made based on
|
||||||
@ -97,10 +103,10 @@ public class MainLogBuffer extends FixedLogBuffer {
|
|||||||
* the screen orientation and other characteristics about the device can be uploaded without
|
* the screen orientation and other characteristics about the device can be uploaded without
|
||||||
* revealing much about the user.
|
* revealing much about the user.
|
||||||
*/
|
*/
|
||||||
public boolean isSafeToLog() {
|
public boolean isNGramSafe() {
|
||||||
// Check that we are not sampling too frequently. Having sampled recently might disclose
|
// Check that we are not sampling too frequently. Having sampled recently might disclose
|
||||||
// too much of the user's intended meaning.
|
// too much of the user's intended meaning.
|
||||||
if (mWordsUntilSafeToSample > 0) {
|
if (mNumWordsUntilSafeToSample > 0) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (mSuggest == null || !mSuggest.hasMainDictionary()) {
|
if (mSuggest == null || !mSuggest.hasMainDictionary()) {
|
||||||
@ -119,8 +125,8 @@ public class MainLogBuffer extends FixedLogBuffer {
|
|||||||
// complete buffer contents in detail.
|
// complete buffer contents in detail.
|
||||||
final LinkedList<LogUnit> logUnits = getLogUnits();
|
final LinkedList<LogUnit> logUnits = getLogUnits();
|
||||||
final int length = logUnits.size();
|
final int length = logUnits.size();
|
||||||
int wordsFound = 0;
|
int wordsNeeded = N_GRAM_SIZE;
|
||||||
for (int i = 0; i < length; i++) {
|
for (int i = 0; i < length && wordsNeeded > 0; i++) {
|
||||||
final LogUnit logUnit = logUnits.get(i);
|
final LogUnit logUnit = logUnits.get(i);
|
||||||
final String word = logUnit.getWord();
|
final String word = logUnit.getWord();
|
||||||
if (word == null) {
|
if (word == null) {
|
||||||
@ -136,26 +142,41 @@ public class MainLogBuffer extends FixedLogBuffer {
|
|||||||
+ ", isValid: " + (dictionary.isValidWord(word)));
|
+ ", isValid: " + (dictionary.isValidWord(word)));
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
} else {
|
|
||||||
wordsFound++;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (wordsFound < N_GRAM_SIZE) {
|
|
||||||
// Not enough words. Not unsafe, but reject anyway.
|
|
||||||
if (DEBUG) {
|
|
||||||
Log.d(TAG, "not enough words");
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
// All checks have passed; this buffer's content can be safely uploaded.
|
// All checks have passed; this buffer's content can be safely uploaded.
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean isNGramComplete() {
|
||||||
|
final LinkedList<LogUnit> logUnits = getLogUnits();
|
||||||
|
final int length = logUnits.size();
|
||||||
|
int wordsNeeded = N_GRAM_SIZE;
|
||||||
|
for (int i = 0; i < length && wordsNeeded > 0; i++) {
|
||||||
|
final LogUnit logUnit = logUnits.get(i);
|
||||||
|
final String word = logUnit.getWord();
|
||||||
|
if (word != null) {
|
||||||
|
wordsNeeded--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return wordsNeeded == 0;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void onShiftOut(final LogUnit logUnit) {
|
protected void onShiftOut(final LogUnit logUnit) {
|
||||||
if (mResearchLog != null) {
|
if (mResearchLog != null) {
|
||||||
mResearchLog.publish(logUnit, false /* isIncludingPrivateData */);
|
mResearchLog.publish(logUnit,
|
||||||
|
ResearchLogger.IS_LOGGING_EVERYTHING /* isIncludingPrivateData */);
|
||||||
|
}
|
||||||
|
if (logUnit.hasWord()) {
|
||||||
|
if (mNumWordsUntilSafeToSample > 0) {
|
||||||
|
mNumWordsUntilSafeToSample--;
|
||||||
|
Log.d(TAG, "wordsUntilSafeToSample now at " + mNumWordsUntilSafeToSample);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (DEBUG) {
|
||||||
|
Log.d(TAG, "shiftedOut " + (logUnit.hasWord() ? logUnit.getWord() : ""));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -85,7 +85,7 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
|
|||||||
private static final String TAG = ResearchLogger.class.getSimpleName();
|
private static final String TAG = ResearchLogger.class.getSimpleName();
|
||||||
private static final boolean DEBUG = false && ProductionFlag.IS_EXPERIMENTAL_DEBUG;
|
private static final boolean DEBUG = false && ProductionFlag.IS_EXPERIMENTAL_DEBUG;
|
||||||
// Whether all n-grams should be logged. true will disclose private info.
|
// Whether all n-grams should be logged. true will disclose private info.
|
||||||
private static final boolean IS_LOGGING_EVERYTHING = false
|
public static final boolean IS_LOGGING_EVERYTHING = false
|
||||||
&& ProductionFlag.IS_EXPERIMENTAL_DEBUG;
|
&& ProductionFlag.IS_EXPERIMENTAL_DEBUG;
|
||||||
// Whether the TextView contents are logged at the end of the session. true will disclose
|
// Whether the TextView contents are logged at the end of the session. true will disclose
|
||||||
// private info.
|
// private info.
|
||||||
@ -394,8 +394,16 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
|
|||||||
commitCurrentLogUnit();
|
commitCurrentLogUnit();
|
||||||
|
|
||||||
if (mMainLogBuffer != null) {
|
if (mMainLogBuffer != null) {
|
||||||
publishLogBuffer(mMainLogBuffer, mMainResearchLog,
|
while (!mMainLogBuffer.isEmpty()) {
|
||||||
IS_LOGGING_EVERYTHING /* isIncludingPrivateData */);
|
if ((mMainLogBuffer.isNGramSafe() || IS_LOGGING_EVERYTHING) &&
|
||||||
|
mMainResearchLog != null) {
|
||||||
|
publishLogBuffer(mMainLogBuffer, mMainResearchLog,
|
||||||
|
true /* isIncludingPrivateData */);
|
||||||
|
mMainLogBuffer.resetWordCounter();
|
||||||
|
} else {
|
||||||
|
mMainLogBuffer.shiftOutThroughFirstWord();
|
||||||
|
}
|
||||||
|
}
|
||||||
mMainResearchLog.close(null /* callback */);
|
mMainResearchLog.close(null /* callback */);
|
||||||
mMainLogBuffer = null;
|
mMainLogBuffer = null;
|
||||||
}
|
}
|
||||||
@ -702,8 +710,9 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
|
|||||||
}
|
}
|
||||||
if (!mCurrentLogUnit.isEmpty()) {
|
if (!mCurrentLogUnit.isEmpty()) {
|
||||||
if (mMainLogBuffer != null) {
|
if (mMainLogBuffer != null) {
|
||||||
if ((mMainLogBuffer.isSafeToLog() || IS_LOGGING_EVERYTHING)
|
if ((mMainLogBuffer.isNGramSafe() || IS_LOGGING_EVERYTHING) &&
|
||||||
&& mMainResearchLog != null) {
|
mMainLogBuffer.isNGramComplete() &&
|
||||||
|
mMainResearchLog != null) {
|
||||||
publishLogBuffer(mMainLogBuffer, mMainResearchLog,
|
publishLogBuffer(mMainLogBuffer, mMainResearchLog,
|
||||||
true /* isIncludingPrivateData */);
|
true /* isIncludingPrivateData */);
|
||||||
mMainLogBuffer.resetWordCounter();
|
mMainLogBuffer.resetWordCounter();
|
||||||
@ -714,6 +723,10 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
|
|||||||
mFeedbackLogBuffer.shiftIn(mCurrentLogUnit);
|
mFeedbackLogBuffer.shiftIn(mCurrentLogUnit);
|
||||||
}
|
}
|
||||||
mCurrentLogUnit = new LogUnit();
|
mCurrentLogUnit = new LogUnit();
|
||||||
|
} else {
|
||||||
|
if (DEBUG) {
|
||||||
|
Log.d(TAG, "Warning: tried to commit empty log unit.");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -756,8 +769,8 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
|
|||||||
mFeedbackLogBuffer.unshiftIn();
|
mFeedbackLogBuffer.unshiftIn();
|
||||||
}
|
}
|
||||||
if (DEBUG) {
|
if (DEBUG) {
|
||||||
Log.d(TAG, "uncommitCurrentLogUnit back to " + (mCurrentLogUnit.hasWord()
|
Log.d(TAG, "uncommitCurrentLogUnit (dump=" + dumpCurrentLogUnit + ") back to "
|
||||||
? ": '" + mCurrentLogUnit.getWord() + "'" : ""));
|
+ (mCurrentLogUnit.hasWord() ? ": '" + mCurrentLogUnit.getWord() + "'" : ""));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -773,12 +786,16 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
|
|||||||
isIncludingPrivateData);
|
isIncludingPrivateData);
|
||||||
researchLog.publish(openingLogUnit, true /* isIncludingPrivateData */);
|
researchLog.publish(openingLogUnit, true /* isIncludingPrivateData */);
|
||||||
LogUnit logUnit;
|
LogUnit logUnit;
|
||||||
while ((logUnit = logBuffer.shiftOut()) != null) {
|
int numWordsToPublish = MainLogBuffer.N_GRAM_SIZE;
|
||||||
|
while ((logUnit = logBuffer.shiftOut()) != null && numWordsToPublish > 0) {
|
||||||
if (DEBUG) {
|
if (DEBUG) {
|
||||||
Log.d(TAG, "publishLogBuffer: " + (logUnit.hasWord() ? logUnit.getWord()
|
Log.d(TAG, "publishLogBuffer: " + (logUnit.hasWord() ? logUnit.getWord()
|
||||||
: "<wordless>"));
|
: "<wordless>"));
|
||||||
}
|
}
|
||||||
researchLog.publish(logUnit, isIncludingPrivateData);
|
researchLog.publish(logUnit, isIncludingPrivateData);
|
||||||
|
if (logUnit.getWord() != null) {
|
||||||
|
numWordsToPublish--;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
final LogUnit closingLogUnit = new LogUnit();
|
final LogUnit closingLogUnit = new LogUnit();
|
||||||
closingLogUnit.addLogStatement(LOGSTATEMENT_LOG_SEGMENT_CLOSING,
|
closingLogUnit.addLogStatement(LOGSTATEMENT_LOG_SEGMENT_CLOSING,
|
||||||
@ -1254,9 +1271,12 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
|
|||||||
public static void latinIME_revertCommit(final String committedWord,
|
public static void latinIME_revertCommit(final String committedWord,
|
||||||
final String originallyTypedWord, final boolean isBatchMode) {
|
final String originallyTypedWord, final boolean isBatchMode) {
|
||||||
final ResearchLogger researchLogger = getInstance();
|
final ResearchLogger researchLogger = getInstance();
|
||||||
final LogUnit logUnit = researchLogger.mMainLogBuffer.peekLastLogUnit();
|
// Assume that mCurrentLogUnit has been restored to contain the reverted word.
|
||||||
|
final LogUnit logUnit = researchLogger.mCurrentLogUnit;
|
||||||
if (originallyTypedWord.length() > 0 && hasLetters(originallyTypedWord)) {
|
if (originallyTypedWord.length() > 0 && hasLetters(originallyTypedWord)) {
|
||||||
if (logUnit != null) {
|
if (logUnit != null) {
|
||||||
|
// Probably not necessary, but setting as a precaution in case the word isn't
|
||||||
|
// committed later.
|
||||||
logUnit.setWord(originallyTypedWord);
|
logUnit.setWord(originallyTypedWord);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user