Update training hyperparameters

This commit is contained in:
Aleksandras Kostarevas 2023-11-21 17:07:43 +02:00
parent e57c1072d8
commit cb2edca601
2 changed files with 4 additions and 2 deletions

View File

@ -130,6 +130,8 @@ class TrainingWorker(context: Context, parameters: WorkerParameters) : Coroutine
builder.setLossFlow(TrainingWorkerStatus.loss) builder.setLossFlow(TrainingWorkerStatus.loss)
builder.setProgressFlow(TrainingWorkerStatus.progress) builder.setProgressFlow(TrainingWorkerStatus.progress)
builder.setWeight(0.75f)
val data = getTrainingData() val data = getTrainingData()
builder.addExamples(data.lines()) builder.addExamples(data.lines())

View File

@ -68,13 +68,13 @@ namespace latinime {
params.common.n_threads = 6; params.common.n_threads = 6;
params.common.n_gradient_accumulation = 2; params.common.n_gradient_accumulation = 2;
params.common.n_batch = 2; params.common.n_batch = 2;
params.common.n_ctx = 32; params.common.n_ctx = 64;
params.common.sample_random_offsets = true; params.common.sample_random_offsets = true;
params.common.warmup = 10; params.common.warmup = 10;
params.common.n_epochs = 1; params.common.n_epochs = 1;
params.common.adam_alpha = 1e-3; params.common.adam_alpha = 1e-3;
params.common.adam_n_iter = 64; params.common.adam_n_iter = 128;
// Increasing/decreasing this doesn't appear to significantly affect training time // Increasing/decreasing this doesn't appear to significantly affect training time
params.lora_r = 16; params.lora_r = 16;