| { | |
| "tensorboard": true, | |
| "seed": 1234, | |
| "tgt_vocab": "id.eole.vocab", | |
| "overwrite": true, | |
| "tensorboard_log_dir": "tensorboard", | |
| "n_sample": 0, | |
| "vocab_size_multiple": 8, | |
| "report_every": 100, | |
| "transforms": [ | |
| "sentencepiece", | |
| "filtertoolong" | |
| ], | |
| "src_vocab_size": 20000, | |
| "valid_metrics": [ | |
| "BLEU" | |
| ], | |
| "share_vocab": false, | |
| "tgt_vocab_size": 20000, | |
| "save_data": "data", | |
| "tensorboard_log_dir_dated": "tensorboard/May-28_20-39-34", | |
| "src_vocab": "en.eole.vocab", | |
| "training": { | |
| "normalization": "tokens", | |
| "attention_dropout": [ | |
| 0.1 | |
| ], | |
| "gpu_ranks": [ | |
| 0 | |
| ], | |
| "max_grad_norm": 0.0, | |
| "decay_method": "noam", | |
| "accum_steps": [ | |
| 0 | |
| ], | |
| "keep_checkpoint": 4, | |
| "batch_size_multiple": 8, | |
| "prefetch_factor": 32, | |
| "valid_batch_size": 4096, | |
| "learning_rate": 2.0, | |
| "batch_size": 8000, | |
| "warmup_steps": 4000, | |
| "compute_dtype": "torch.float16", | |
| "valid_steps": 5000, | |
| "bucket_size": 128000, | |
| "average_decay": 0.0001, | |
| "world_size": 1, | |
| "dropout": [ | |
| 0.1 | |
| ], | |
| "save_checkpoint_steps": 5000, | |
| "dropout_steps": [ | |
| 0 | |
| ], | |
| "train_steps": 100000, | |
| "optim": "adamw", | |
| "adam_beta2": 0.998, | |
| "num_workers": 0, | |
| "model_path": "quickmt-en-id-eole-model", | |
| "batch_type": "tokens", | |
| "accum_count": [ | |
| 10 | |
| ], | |
| "param_init_method": "xavier_uniform", | |
| "label_smoothing": 0.1 | |
| }, | |
| "transforms_configs": { | |
| "sentencepiece": { | |
| "src_subword_model": "${MODEL_PATH}/en.spm.model", | |
| "tgt_subword_model": "${MODEL_PATH}/id.spm.model" | |
| }, | |
| "filtertoolong": { | |
| "tgt_seq_length": 256, | |
| "src_seq_length": 256 | |
| } | |
| }, | |
| "data": { | |
| "corpus_1": { | |
| "transforms": [ | |
| "sentencepiece", | |
| "filtertoolong" | |
| ], | |
| "path_src": "train.en", | |
| "path_tgt": "train.id", | |
| "path_align": null | |
| }, | |
| "valid": { | |
| "transforms": [ | |
| "sentencepiece", | |
| "filtertoolong" | |
| ], | |
| "path_src": "dev.en", | |
| "path_tgt": "dev.id", | |
| "path_align": null | |
| } | |
| }, | |
| "model": { | |
| "share_embeddings": false, | |
| "transformer_ff": 4096, | |
| "position_encoding_type": "SinusoidalInterleaved", | |
| "share_decoder_embeddings": false, | |
| "hidden_size": 1024, | |
| "heads": 8, | |
| "architecture": "transformer", | |
| "embeddings": { | |
| "tgt_word_vec_size": 1024, | |
| "src_word_vec_size": 1024, | |
| "position_encoding_type": "SinusoidalInterleaved", | |
| "word_vec_size": 1024 | |
| }, | |
| "decoder": { | |
| "n_positions": null, | |
| "transformer_ff": 4096, | |
| "position_encoding_type": "SinusoidalInterleaved", | |
| "tgt_word_vec_size": 1024, | |
| "decoder_type": "transformer", | |
| "hidden_size": 1024, | |
| "heads": 8, | |
| "layers": 2 | |
| }, | |
| "encoder": { | |
| "src_word_vec_size": 1024, | |
| "n_positions": null, | |
| "transformer_ff": 4096, | |
| "position_encoding_type": "SinusoidalInterleaved", | |
| "encoder_type": "transformer", | |
| "hidden_size": 1024, | |
| "heads": 8, | |
| "layers": 8 | |
| } | |
| } | |
| } |