Fix modified utf-8 errors when returning strings

This commit is contained in:
Aleksandras Kostarevas 2024-03-21 16:49:45 -05:00
parent 925eda3be7
commit 434a751d63
5 changed files with 30 additions and 11 deletions

View File

@ -1060,7 +1060,7 @@ namespace latinime {
// Output
size_t size = env->GetArrayLength(outPredictions);
jstring result_str = env->NewStringUTF(result_probability_mode);
jstring result_str = string2jstring(env, result_probability_mode);
env->SetObjectArrayElement(outPredictions, size - 1, result_str);
env->DeleteLocalRef(result_str);
@ -1068,7 +1068,7 @@ namespace latinime {
// Output predictions for next word
for (int i = 0; i < results.size(); i++) {
jstring jstr = env->NewStringUTF(results[i].second.c_str());
jstring jstr = string2jstring(env, results[i].second.c_str());
env->SetObjectArrayElement(outPredictions, i, jstr);
probsArray[i] = results[i].first;
env->DeleteLocalRef(jstr);

View File

@ -23,10 +23,10 @@ namespace latinime {
jmethodID constructor = env->GetMethodID(modelInfoClass, "<init>", "(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Ljava/lang/String;ILjava/lang/String;)V");
// Create example data
jstring name = env->NewStringUTF(metadata.name.c_str());
jstring description = env->NewStringUTF(metadata.description.c_str());
jstring author = env->NewStringUTF(metadata.author.c_str());
jstring license = env->NewStringUTF(metadata.license.c_str());
jstring name = string2jstring(env, metadata.name.c_str());
jstring description = string2jstring(env, metadata.description.c_str());
jstring author = string2jstring(env, metadata.author.c_str());
jstring license = string2jstring(env, metadata.license.c_str());
const char *tokenizer_type_value;
switch(metadata.ext_tokenizer_type) {
@ -41,7 +41,7 @@ namespace latinime {
break;
}
jstring tokenizer_type = env->NewStringUTF(tokenizer_type_value);
jstring tokenizer_type = string2jstring(env, tokenizer_type_value);
jint finetune_count = metadata.finetuning_count;
// Create example features and languages lists
@ -53,13 +53,13 @@ namespace latinime {
jobject languages = env->NewObject(listClass, listConstructor);
for (const auto& feature : metadata.features) {
jstring jFeature = env->NewStringUTF(feature.c_str());
jstring jFeature = string2jstring(env, feature.c_str());
env->CallBooleanMethod(features, listAdd, jFeature);
env->DeleteLocalRef(jFeature);
}
for (const auto& language : metadata.languages) {
jstring jLanguage = env->NewStringUTF(language.c_str());
jstring jLanguage = string2jstring(env, language.c_str());
env->CallBooleanMethod(languages, listAdd, jLanguage);
env->DeleteLocalRef(jLanguage);
}

View File

@ -90,7 +90,7 @@ static jstring WhisperGGML_infer(JNIEnv *env, jobject instance, jlong handle, jf
wparams.max_tokens = 256;
wparams.n_threads = (int)num_procs;
wparams.audio_ctx = std::max(160, std::min(1500, (int)ceil((double)num_samples / (double)(320.0)) + 16));
wparams.audio_ctx = std::max(160, std::min(1500, (int)ceil((double)num_samples / (double)(320.0)) + 32));
wparams.temperature_inc = 0.0f;
// Replicates old tflite behavior
@ -148,7 +148,7 @@ static jstring WhisperGGML_infer(JNIEnv *env, jobject instance, jlong handle, jf
auto *wstate = reinterpret_cast<WhisperModelState *>(user_data);
jstring pjstr = wstate->env->NewStringUTF(partial.c_str());
jstring pjstr = string2jstring(wstate->env, partial.c_str());
wstate->env->CallVoidMethod(wstate->partial_result_instance, wstate->partial_result_method, pjstr);
wstate->env->DeleteLocalRef(pjstr);
};

View File

@ -17,4 +17,22 @@ std::string jstring2string(JNIEnv *env, jstring jStr) {
stringChars[stringUtf8Length] = '\0';
return {stringChars};
}
jstring string2jstring(JNIEnv *env, const char *str) {
jobject bb = env->NewDirectByteBuffer((void *)str, strlen(str));
jclass cls_Charset = env->FindClass("java/nio/charset/Charset");
jmethodID mid_Charset_forName = env->GetStaticMethodID(cls_Charset, "forName", "(Ljava/lang/String;)Ljava/nio/charset/Charset;");
jobject charset = env->CallStaticObjectMethod(cls_Charset, mid_Charset_forName, env->NewStringUTF("UTF-8"));
jmethodID mid_Charset_decode = env->GetMethodID(cls_Charset, "decode", "(Ljava/nio/ByteBuffer;)Ljava/nio/CharBuffer;");
jobject cb = env->CallObjectMethod(charset, mid_Charset_decode, bb);
env->DeleteLocalRef(bb);
jclass cls_CharBuffer = env->FindClass("java/nio/CharBuffer");
jmethodID mid_CharBuffer_toString = env->GetMethodID(cls_CharBuffer, "toString", "()Ljava/lang/String;");
jstring s = (jstring)env->CallObjectMethod(cb, mid_CharBuffer_toString);
return s;
}

View File

@ -9,5 +9,6 @@
#include "../jni_common.h"
std::string jstring2string(JNIEnv *env, jstring jStr);
jstring string2jstring(JNIEnv *env, const char *str);
#endif //LATINIME_JNI_UTILS_H