mirror of
https://gitlab.futo.org/keyboard/latinime.git
synced 2024-09-28 14:54:30 +01:00
Make keyboard less sluggish by initializing model in background thread
This commit is contained in:
parent
875e9862ec
commit
166edae77b
@ -57,20 +57,27 @@ public class GGMLDictionary extends Dictionary {
|
||||
return outputFile.getAbsolutePath();
|
||||
}
|
||||
|
||||
Thread initThread = null;
|
||||
public GGMLDictionary(Context context, String dictType, Locale locale) {
|
||||
super(dictType, locale);
|
||||
|
||||
String modelPath = getPathToModelResource(context, R.raw.pythia_160m_q4_0, false);
|
||||
mNativeState = openNative(modelPath, 0, 0, false);
|
||||
initThread = new Thread() {
|
||||
@Override public void run() {
|
||||
String modelPath = getPathToModelResource(context, R.raw.pythia_160m_q4_0, false);
|
||||
mNativeState = openNative(modelPath, 0, 0, false);
|
||||
|
||||
if(mNativeState == 0){
|
||||
modelPath = getPathToModelResource(context, R.raw.pythia_160m_q4_0, true);
|
||||
mNativeState = openNative(modelPath, 0, 0, false);
|
||||
}
|
||||
if(mNativeState == 0){
|
||||
modelPath = getPathToModelResource(context, R.raw.pythia_160m_q4_0, true);
|
||||
mNativeState = openNative(modelPath, 0, 0, false);
|
||||
}
|
||||
|
||||
if(mNativeState == 0){
|
||||
throw new RuntimeException("Failed to load pythia_160m model");
|
||||
}
|
||||
if(mNativeState == 0){
|
||||
throw new RuntimeException("Failed to load pythia_160m model");
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
initThread.start();
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -83,9 +90,8 @@ public class GGMLDictionary extends Dictionary {
|
||||
float weightForLocale,
|
||||
float[] inOutWeightOfLangModelVsSpatialModel
|
||||
) {
|
||||
if (mNativeState == 0) {
|
||||
return null;
|
||||
}
|
||||
if (mNativeState == 0) return null;
|
||||
if (initThread != null && initThread.isAlive()) return null;
|
||||
|
||||
final InputPointers inputPointers = composedData.mInputPointers;
|
||||
final boolean isGesture = composedData.mIsBatchMode;
|
||||
@ -143,6 +149,12 @@ public class GGMLDictionary extends Dictionary {
|
||||
|
||||
|
||||
private synchronized void closeInternalLocked() {
|
||||
try {
|
||||
if (initThread != null) initThread.join();
|
||||
} catch (InterruptedException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
if (mNativeState != 0) {
|
||||
closeNative(mNativeState);
|
||||
mNativeState = 0;
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include "ggml/ggml.h"
|
||||
#include "gpt_neox.h"
|
||||
#include "common.h"
|
||||
#include "defines.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
@ -387,17 +388,9 @@ bool gpt_neox_eval(
|
||||
|
||||
// TODO: All of this allocates over 800 megabytes of memory, way more than the size of the model!
|
||||
|
||||
static size_t buf_size = 256u*1024*1024;
|
||||
static size_t buf_size = 128u*1024*1024;
|
||||
static void * buf = malloc(buf_size);
|
||||
|
||||
// use 2 scratch buffers
|
||||
// TODO: very hacky solution - reimplement in a more elegant way
|
||||
static size_t scr0_size = 256u*1024*1024;
|
||||
static void * scr0 = malloc(scr0_size);
|
||||
|
||||
static size_t scr1_size = 256u*1024*1024;
|
||||
static void * scr1 = malloc(scr1_size);
|
||||
|
||||
if (mem_per_token > 0 && mem_per_token*N > buf_size) {
|
||||
const size_t buf_size_new = 1.1*(mem_per_token*N); // add 10% to account for ggml object overhead
|
||||
//printf("\n%s: reallocating buffer from %zu to %zu bytes\n", __func__, buf_size, buf_size_new);
|
||||
@ -409,6 +402,7 @@ bool gpt_neox_eval(
|
||||
fprintf(stderr, "%s: failed to allocate %zu bytes\n", __func__, buf_size);
|
||||
return false;
|
||||
}
|
||||
AKLOGI("Allocated %.2fMB", (float)buf_size_new / 1024.0f / 1024.0f);
|
||||
}
|
||||
|
||||
struct ggml_init_params params = {
|
||||
@ -429,8 +423,6 @@ bool gpt_neox_eval(
|
||||
for (int il = 0; il < n_layer; ++il) {
|
||||
struct ggml_tensor * cur;
|
||||
|
||||
ggml_set_scratch(ctx0, { 0, scr0_size, scr0, });
|
||||
|
||||
// self-attention
|
||||
{
|
||||
{
|
||||
@ -534,8 +526,6 @@ bool gpt_neox_eval(
|
||||
}
|
||||
}
|
||||
|
||||
ggml_set_scratch(ctx0, { 0, scr1_size, scr1, });
|
||||
|
||||
if (hparams.par_res == 0) {
|
||||
struct ggml_tensor * inpFF = ggml_add(ctx0, cur, inpL);
|
||||
|
||||
@ -558,8 +548,6 @@ bool gpt_neox_eval(
|
||||
}
|
||||
}
|
||||
|
||||
ggml_set_scratch(ctx0, { 0, scr0_size, scr0, });
|
||||
|
||||
// norm
|
||||
{
|
||||
inpL = ggml_norm(ctx0, inpL);
|
||||
|
Loading…
Reference in New Issue
Block a user