| global: |
| name: pretrain-language-model |
| phase: train |
| stage: pretrain-language |
| workdir: workdir |
| seed: ~ |
| |
| dataset: |
| train: { |
| roots: ['data/WikiText-103.csv'], |
| batch_size: 4096 |
| } |
| test: { |
| roots: ['data/WikiText-103_eval_d1.csv'], |
| batch_size: 4096 |
| } |
|
|
| training: |
| epochs: 80 |
| show_iters: 50 |
| eval_iters: 6000 |
| save_iters: 3000 |
|
|
| optimizer: |
| type: Adam |
| true_wd: False |
| wd: 0.0 |
| bn_wd: False |
| clip_grad: 20 |
| lr: 0.0001 |
| args: { |
| betas: !!python/tuple [0.9, 0.999], |
| } |
| scheduler: { |
| periods: [70, 10], |
| gamma: 0.1, |
| } |
|
|
| model: |
| name: 'modules.model_language.BCNLanguage' |
| language: { |
| num_layers: 4, |
| loss_weight: 1., |
| use_self_attn: False |
| } |
|
|