Ray121381 commited on
Commit
e3e3f87
·
1 Parent(s): 3f4bd2a
Files changed (49) hide show
  1. .gitattributes +0 -0
  2. lora_checkpoints/checkpoints/all_results.json +19 -0
  3. lora_checkpoints/checkpoints/checkpoint-3000/config.json +33 -0
  4. lora_checkpoints/checkpoints/checkpoint-3000/configuration_chatglm.py +103 -0
  5. lora_checkpoints/checkpoints/checkpoint-3000/generation_config.json +7 -0
  6. lora_checkpoints/checkpoints/checkpoint-3000/ice_text.model +3 -0
  7. lora_checkpoints/checkpoints/checkpoint-3000/modeling_chatglm.py +1677 -0
  8. lora_checkpoints/checkpoints/checkpoint-3000/optimizer.pt +3 -0
  9. lora_checkpoints/checkpoints/checkpoint-3000/pytorch_model.bin +3 -0
  10. lora_checkpoints/checkpoints/checkpoint-3000/quantization.py +249 -0
  11. lora_checkpoints/checkpoints/checkpoint-3000/rng_state.pth +3 -0
  12. lora_checkpoints/checkpoints/checkpoint-3000/scheduler.pt +3 -0
  13. lora_checkpoints/checkpoints/checkpoint-3000/special_tokens_map.json +7 -0
  14. lora_checkpoints/checkpoints/checkpoint-3000/tokenization_chatglm.py +443 -0
  15. lora_checkpoints/checkpoints/checkpoint-3000/tokenizer_config.json +22 -0
  16. lora_checkpoints/checkpoints/checkpoint-3000/trainer_state.json +1816 -0
  17. lora_checkpoints/checkpoints/checkpoint-3000/training_args.bin +3 -0
  18. lora_checkpoints/checkpoints/generated_predictions.txt +200 -0
  19. lora_checkpoints/checkpoints/predict_results.json +13 -0
  20. lora_checkpoints/checkpoints/train_results.json +8 -0
  21. lora_checkpoints/data/QiaoBan/.idea/.gitignore +8 -0
  22. lora_checkpoints/data/QiaoBan/.idea/Qiaoban.iml +8 -0
  23. lora_checkpoints/data/QiaoBan/.idea/inspectionProfiles/Project_Default.xml +27 -0
  24. lora_checkpoints/data/QiaoBan/.idea/inspectionProfiles/profiles_settings.xml +6 -0
  25. lora_checkpoints/data/QiaoBan/.idea/misc.xml +4 -0
  26. lora_checkpoints/data/QiaoBan/.idea/modules.xml +8 -0
  27. lora_checkpoints/data/QiaoBan/README.md +150 -0
  28. lora_checkpoints/data/QiaoBan/README_EN.md +147 -0
  29. lora_checkpoints/data/QiaoBan/collect.py +77 -0
  30. lora_checkpoints/data/QiaoBan/data/child_chat_100.json +0 -0
  31. lora_checkpoints/data/QiaoBan/data/child_chat_data.json +0 -0
  32. lora_checkpoints/data/QiaoBan/example.png +0 -0
  33. lora_checkpoints/data/QiaoBan/finetune.py +202 -0
  34. lora_checkpoints/data/QiaoBan/generate_train_data.py +32 -0
  35. lora_checkpoints/data/QiaoBan/logo.png +0 -0
  36. lora_checkpoints/data/QiaoBan/run_config/config.json +17 -0
  37. lora_checkpoints/data/QiaoBan/run_config/deepspeed_config.json +39 -0
  38. lora_checkpoints/data/QiaoBan/split_data.py +23 -0
  39. lora_checkpoints/data/QiaoBan/test_data.json +0 -0
  40. lora_checkpoints/data/QiaoBan/topic.txt +98 -0
  41. lora_checkpoints/data/baselines/chatglm/result.json +0 -0
  42. lora_checkpoints/data/baselines/chatgpt/result.json +0 -0
  43. lora_checkpoints/data/baselines/evaluate_model.py +60 -0
  44. lora_checkpoints/data/baselines/inference_by_chatgpt.py +61 -0
  45. lora_checkpoints/data/baselines/inference_model.py +29 -0
  46. lora_checkpoints/data/mandarin-cantonese-mt/chat_train_data.json +0 -0
  47. lora_checkpoints/data/mandarin-cantonese-mt/cleaned_parallel_sentences.txt +0 -0
  48. lora_checkpoints/data/mandarin-cantonese-mt/generate_train_data.py +24 -0
  49. lora_checkpoints/test_inference.py +44 -0
.gitattributes CHANGED
File without changes
lora_checkpoints/checkpoints/all_results.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.6,
3
+ "predict_bleu-1": 24.954275499999998,
4
+ "predict_bleu-2": 14.941613499999999,
5
+ "predict_bleu-3": 9.877983500000001,
6
+ "predict_bleu-4": 7.0543495,
7
+ "predict_rouge-1": 29.6945075,
8
+ "predict_rouge-2": 8.0306785,
9
+ "predict_rouge-l": 25.090445499999998,
10
+ "predict_runtime": 104.8315,
11
+ "predict_samples": 200,
12
+ "predict_samples_per_second": 1.908,
13
+ "predict_steps_per_second": 0.238,
14
+ "train_loss": 2.8635472005208333,
15
+ "train_runtime": 23647.0353,
16
+ "train_samples": 29942,
17
+ "train_samples_per_second": 2.03,
18
+ "train_steps_per_second": 0.127
19
+ }
lora_checkpoints/checkpoints/checkpoint-3000/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "../base-model/chatglm-6b",
3
+ "architectures": [
4
+ "ChatGLMForConditionalGeneration"
5
+ ],
6
+ "auto_map": {
7
+ "AutoConfig": "configuration_chatglm.ChatGLMConfig",
8
+ "AutoModel": "modeling_chatglm.ChatGLMForConditionalGeneration",
9
+ "AutoModelForSeq2SeqLM": "modeling_chatglm.ChatGLMForConditionalGeneration"
10
+ },
11
+ "bos_token_id": 130004,
12
+ "eos_token_id": 130005,
13
+ "gmask_token_id": 130001,
14
+ "hidden_size": 4096,
15
+ "inner_hidden_size": 16384,
16
+ "layernorm_epsilon": 1e-05,
17
+ "mask_token_id": 130000,
18
+ "max_sequence_length": 2048,
19
+ "model_type": "chatglm",
20
+ "num_attention_heads": 32,
21
+ "num_layers": 28,
22
+ "pad_token_id": 3,
23
+ "position_encoding_2d": true,
24
+ "pre_seq_len": 128,
25
+ "prefix_cur_expert": 1,
26
+ "prefix_n_experts": 1,
27
+ "prefix_projection": false,
28
+ "quantization_bit": 4,
29
+ "torch_dtype": "float16",
30
+ "transformers_version": "4.27.1",
31
+ "use_cache": true,
32
+ "vocab_size": 130528
33
+ }
lora_checkpoints/checkpoints/checkpoint-3000/configuration_chatglm.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ ChatGLM model configuration """
2
+
3
+ from transformers.configuration_utils import PretrainedConfig
4
+ from transformers.utils import logging
5
+
6
+ logger = logging.get_logger(__name__)
7
+
8
+
9
+ class ChatGLMConfig(PretrainedConfig):
10
+ r"""
11
+ This is the configuration class to store the configuration of a [`~ChatGLMModel`].
12
+ It is used to instantiate an ChatGLM model according to the specified arguments, defining the model
13
+ architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of
14
+ the ChatGLM-6B [THUDM/ChatGLM-6B](https://huggingface.co/THUDM/chatglm-6b) architecture.
15
+
16
+ Configuration objects inherit from [`PretrainedConfig`] and can be used
17
+ to control the model outputs. Read the documentation from [`PretrainedConfig`]
18
+ for more information.
19
+
20
+
21
+ Args:
22
+ vocab_size (`int`, *optional*, defaults to 150528):
23
+ Vocabulary size of the ChatGLM-6B model. Defines the number of different tokens that can be represented by the
24
+ `inputs_ids` passed when calling [`~ChatGLMModel`] or
25
+ [`~TFChatGLMModel`].
26
+ hidden_size (`int`, *optional*, defaults to 4096):
27
+ Dimension of the encoder layers and the pooler layer.
28
+ num_hidden_layers (`int`, *optional*, defaults to 28):
29
+ Number of hidden layers in the Transformer encoder.
30
+ num_attention_heads (`int`, *optional*, defaults to 32):
31
+ Number of attention heads for each attention layer in the Transformer encoder.
32
+ inner_hidden_size (`int`, *optional*, defaults to 16384):
33
+ Dimension of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.
34
+ max_sequence_length (`int`, *optional*, defaults to 512):
35
+ The maximum sequence length that this model might ever be used with.
36
+ Typically set this to something large just in case (e.g., 512 or 1024 or 2048).
37
+ layernorm_epsilon (`float`, *optional*, defaults to 1e-5):
38
+ The epsilon used by the layer normalization layers.
39
+ use_cache (`bool`, *optional*, defaults to `True`):
40
+ Whether the model should return the last key/values attentions (not used by all models).
41
+ Example:
42
+
43
+ ```python
44
+ >>> from configuration_chatglm import ChatGLMConfig
45
+ >>> from modeling_chatglm import ChatGLMModel
46
+
47
+ >>> # Initializing a ChatGLM-6B THUDM/ChatGLM-6B style configuration
48
+ >>> configuration = ChatGLMConfig()
49
+
50
+ >>> # Initializing a model from the THUDM/ChatGLM-6B style configuration
51
+ >>> model = ChatGLMModel(configuration)
52
+
53
+ >>> # Accessing the model configuration
54
+ >>> configuration = model.config
55
+ ```
56
+ """
57
+ model_type = "chatglm"
58
+
59
+ def __init__(
60
+ self,
61
+ vocab_size=150528,
62
+ hidden_size=4096,
63
+ num_layers=28,
64
+ num_attention_heads=32,
65
+ layernorm_epsilon=1e-5,
66
+ use_cache=False,
67
+ bos_token_id=150004,
68
+ eos_token_id=150005,
69
+ mask_token_id=150000,
70
+ gmask_token_id=150001,
71
+ pad_token_id=0,
72
+ max_sequence_length=2048,
73
+ inner_hidden_size=16384,
74
+ position_encoding_2d=True,
75
+ quantization_bit=0,
76
+ pre_seq_len=None,
77
+ prefix_projection=False,
78
+ **kwargs
79
+ ):
80
+ self.num_layers = num_layers
81
+ self.vocab_size = vocab_size
82
+ self.hidden_size = hidden_size
83
+ self.num_attention_heads = num_attention_heads
84
+ self.max_sequence_length = max_sequence_length
85
+ self.layernorm_epsilon = layernorm_epsilon
86
+ self.inner_hidden_size = inner_hidden_size
87
+ self.use_cache = use_cache
88
+ self.bos_token_id = bos_token_id
89
+ self.eos_token_id = eos_token_id
90
+ self.pad_token_id = pad_token_id
91
+ self.mask_token_id = mask_token_id
92
+ self.gmask_token_id = gmask_token_id
93
+ self.position_encoding_2d = position_encoding_2d
94
+ self.quantization_bit = quantization_bit
95
+ self.pre_seq_len = pre_seq_len
96
+ self.prefix_projection = prefix_projection
97
+
98
+ super().__init__(
99
+ pad_token_id=pad_token_id,
100
+ bos_token_id=bos_token_id,
101
+ eos_token_id=eos_token_id,
102
+ **kwargs
103
+ )
lora_checkpoints/checkpoints/checkpoint-3000/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 130004,
4
+ "eos_token_id": 130005,
5
+ "pad_token_id": 3,
6
+ "transformers_version": "4.27.1"
7
+ }
lora_checkpoints/checkpoints/checkpoint-3000/ice_text.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e974d9a69c242ce014c88c2b26089270f6198f3c0b700a887666cd3e816f17e
3
+ size 2706249
lora_checkpoints/checkpoints/checkpoint-3000/modeling_chatglm.py ADDED
@@ -0,0 +1,1677 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ PyTorch ChatGLM model. """
2
+
3
+ import math
4
+ import copy
5
+ import os
6
+ import warnings
7
+ import re
8
+ import sys
9
+
10
+ import torch
11
+ import torch.utils.checkpoint
12
+ import torch.nn.functional as F
13
+ from torch import nn
14
+ from torch.nn import CrossEntropyLoss, LayerNorm
15
+ from torch.nn.utils import skip_init
16
+ from typing import Optional, Tuple, Union, List, Callable, Dict, Any
17
+
18
+ from transformers.utils import (
19
+ add_code_sample_docstrings,
20
+ add_start_docstrings,
21
+ add_start_docstrings_to_model_forward,
22
+ )
23
+ from transformers.modeling_outputs import (
24
+ BaseModelOutputWithPast,
25
+ CausalLMOutputWithPast,
26
+ BaseModelOutputWithPastAndCrossAttentions,
27
+ )
28
+ from transformers.modeling_utils import PreTrainedModel
29
+ from transformers.utils import logging
30
+ from transformers.generation.logits_process import LogitsProcessor
31
+ from transformers.generation.utils import (
32
+ LogitsProcessorList,
33
+ StoppingCriteriaList,
34
+ GenerationConfig,
35
+ ModelOutput,
36
+ )
37
+
38
+ from .configuration_chatglm import ChatGLMConfig
39
+
40
+ # flags required to enable jit fusion kernels
41
+
42
+ if sys.platform != "darwin":
43
+ torch._C._jit_set_profiling_mode(False)
44
+ torch._C._jit_set_profiling_executor(False)
45
+ torch._C._jit_override_can_fuse_on_cpu(True)
46
+ torch._C._jit_override_can_fuse_on_gpu(True)
47
+
48
+ logger = logging.get_logger(__name__)
49
+
50
+ _CHECKPOINT_FOR_DOC = "THUDM/ChatGLM-6B"
51
+ _CONFIG_FOR_DOC = "ChatGLM6BConfig"
52
+
53
+ CHATGLM_6B_PRETRAINED_MODEL_ARCHIVE_LIST = [
54
+ "THUDM/chatglm-6b",
55
+ # See all ChatGLM-6B models at https://huggingface.co/models?filter=chatglm
56
+ ]
57
+
58
+
59
+ class InvalidScoreLogitsProcessor(LogitsProcessor):
60
+ def __call__(
61
+ self, input_ids: torch.LongTensor, scores: torch.FloatTensor
62
+ ) -> torch.FloatTensor:
63
+ if torch.isnan(scores).any() or torch.isinf(scores).any():
64
+ scores.zero_()
65
+ scores[..., 5] = 5e4
66
+ return scores
67
+
68
+
69
+ def load_tf_weights_in_chatglm_6b(model, config, tf_checkpoint_path):
70
+ """Load tf checkpoints in a pytorch model."""
71
+ try:
72
+ import re
73
+
74
+ import numpy as np
75
+ import tensorflow as tf
76
+ except ImportError:
77
+ logger.error(
78
+ "Loading a TensorFlow model in PyTorch, requires TensorFlow to be installed. Please see "
79
+ "https://www.tensorflow.org/install/ for installation instructions."
80
+ )
81
+ raise
82
+ tf_path = os.path.abspath(tf_checkpoint_path)
83
+ logger.info(f"Converting TensorFlow checkpoint from {tf_path}")
84
+ # Load weights from TF model
85
+ init_vars = tf.train.list_variables(tf_path)
86
+ names = []
87
+ arrays = []
88
+ for name, shape in init_vars:
89
+ logger.info(f"Loading TF weight {name} with shape {shape}")
90
+ array = tf.train.load_variable(tf_path, name)
91
+ names.append(name)
92
+ arrays.append(array)
93
+
94
+ for name, array in zip(names, arrays):
95
+ name = name.split("/")
96
+ # adam_v and adam_m are variables used in AdamWeightDecayOptimizer to calculated m and v
97
+ # which are not required for using pretrained model
98
+ if any(
99
+ n
100
+ in [
101
+ "adam_v",
102
+ "adam_m",
103
+ "AdamWeightDecayOptimizer",
104
+ "AdamWeightDecayOptimizer_1",
105
+ "global_step",
106
+ ]
107
+ for n in name
108
+ ):
109
+ logger.info(f"Skipping {'/'.join(name)}")
110
+ continue
111
+ pointer = model
112
+ for m_name in name:
113
+ if re.fullmatch(r"[A-Za-z]+_\d+", m_name):
114
+ scope_names = re.split(r"_(\d+)", m_name)
115
+ else:
116
+ scope_names = [m_name]
117
+ if scope_names[0] == "kernel" or scope_names[0] == "gamma":
118
+ pointer = getattr(pointer, "weight")
119
+ elif scope_names[0] == "output_bias" or scope_names[0] == "beta":
120
+ pointer = getattr(pointer, "bias")
121
+ elif scope_names[0] == "output_weights":
122
+ pointer = getattr(pointer, "weight")
123
+ elif scope_names[0] == "squad":
124
+ pointer = getattr(pointer, "classifier")
125
+ else:
126
+ try:
127
+ pointer = getattr(pointer, scope_names[0])
128
+ except AttributeError:
129
+ logger.info(f"Skipping {'/'.join(name)}")
130
+ continue
131
+ if len(scope_names) >= 2:
132
+ num = int(scope_names[1])
133
+ pointer = pointer[num]
134
+ if m_name[-11:] == "_embeddings":
135
+ pointer = getattr(pointer, "weight")
136
+ elif m_name == "kernel":
137
+ array = np.transpose(array)
138
+ try:
139
+ assert (
140
+ pointer.shape == array.shape
141
+ ), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched"
142
+ except AssertionError as e:
143
+ e.args += (pointer.shape, array.shape)
144
+ raise
145
+ logger.info(f"Initialize PyTorch weight {name}")
146
+ pointer.data = torch.from_numpy(array)
147
+ return model
148
+
149
+
150
+ class PrefixExperts(torch.nn.Module):
151
+ def __init__(self, config):
152
+ super().__init__()
153
+ self.pre_seq_len = config.pre_seq_len
154
+ self.n_experts = config.prefix_n_experts
155
+ self.cur_expert = config.prefix_cur_expert
156
+ self.pre_seq_h_dim = config.num_layers * config.hidden_size * 2
157
+ self.expert_weights = (
158
+ config.expert_weights if self.cur_expert <= 0 else [1.0, 1.0]
159
+ )
160
+
161
+ # self.gate = torch.nn.Sequential(
162
+ # torch.nn.Linear(self.pre_seq_h_dim, self.n_experts),
163
+ # torch.nn.Softmax(dim=-1),
164
+ # )
165
+ self.experts = torch.nn.Embedding(
166
+ config.pre_seq_len, self.pre_seq_h_dim * self.n_experts,
167
+ )
168
+
169
+ def forward(self, prefix: torch.Tensor):
170
+ exp_values = self.experts(prefix).view(
171
+ -1, self.pre_seq_len, self.pre_seq_h_dim, self.n_experts
172
+ )
173
+ cur_expert_values = torch.zeros_like(exp_values[:, :, :, 0])
174
+ if self.cur_expert > 0:
175
+ cur_expert_values += exp_values[:, :, :, self.cur_expert - 1]
176
+ else:
177
+ for i, w in enumerate(self.expert_weights):
178
+ cur_expert_values += w * exp_values[:, :, :, i]
179
+ past_key_values = cur_expert_values
180
+ # g = self.gate(past_key_values)
181
+ # if self.cur_expert > 0:
182
+ # mask = torch.zeros_like(g)
183
+ # mask[:, :, self.cur_expert - 1] = 1.0
184
+ # g = g * mask
185
+ # g = torch.unsqueeze(g, dim=-1)
186
+ # g[:, :, 0] = 0.5
187
+ # g[:, :, 1] = 0.5
188
+ # past_key_values = torch.squeeze(torch.matmul(exp_key_values, g), dim=-1)
189
+ return past_key_values
190
+
191
+
192
+ class PrefixEncoder(torch.nn.Module):
193
+ """
194
+ The torch.nn model to encode the prefix
195
+ Input shape: (batch-size, prefix-length)
196
+ Output shape: (batch-size, prefix-length, 2*layers*hidden)
197
+ """
198
+
199
+ def __init__(self, config):
200
+ super().__init__()
201
+ self.prefix_projection = config.prefix_projection
202
+ self.prefix_experts = PrefixExperts(config)
203
+ # if self.prefix_projection:
204
+ # # Use a two-layer MLP to encode the prefix
205
+ # self.embedding = torch.nn.Embedding(config.pre_seq_len, config.hidden_size)
206
+ # self.trans = torch.nn.Sequential(
207
+ # torch.nn.Linear(config.hidden_size, config.hidden_size),
208
+ # torch.nn.Tanh(),
209
+ # torch.nn.Linear(
210
+ # config.hidden_size, config.num_layers * config.hidden_size * 2
211
+ # ),
212
+ # )
213
+ # else:
214
+ # self.embedding = torch.nn.Embedding(
215
+ # config.pre_seq_len, config.num_layers * config.hidden_size * 2
216
+ # )
217
+
218
+ def forward(self, prefix: torch.Tensor):
219
+ # if self.prefix_projection:
220
+ # prefix_tokens = self.embedding(prefix)
221
+ # past_key_values = self.trans(prefix_tokens)
222
+ # else:
223
+ # past_key_values = self.embedding(prefix)
224
+ past_key_values = self.prefix_experts(prefix)
225
+ return past_key_values
226
+
227
+
228
+ @torch.jit.script
229
+ def gelu_impl(x):
230
+ """OpenAI's gelu implementation."""
231
+ return (
232
+ 0.5 * x * (1.0 + torch.tanh(0.7978845608028654 * x * (1.0 + 0.044715 * x * x)))
233
+ )
234
+
235
+
236
+ def gelu(x):
237
+ return gelu_impl(x)
238
+
239
+
240
+ class RotaryEmbedding(torch.nn.Module):
241
+ def __init__(self, dim, base=10000, precision=torch.half, learnable=False):
242
+ super().__init__()
243
+ inv_freq = 1.0 / (base ** (torch.arange(0, dim, 2).float() / dim))
244
+ inv_freq = inv_freq.half()
245
+ self.learnable = learnable
246
+ if learnable:
247
+ self.inv_freq = torch.nn.Parameter(inv_freq)
248
+ self.max_seq_len_cached = None
249
+ else:
250
+ self.register_buffer("inv_freq", inv_freq)
251
+ self.max_seq_len_cached = None
252
+ self.cos_cached = None
253
+ self.sin_cached = None
254
+ self.precision = precision
255
+
256
+ def _load_from_state_dict(
257
+ self,
258
+ state_dict,
259
+ prefix,
260
+ local_metadata,
261
+ strict,
262
+ missing_keys,
263
+ unexpected_keys,
264
+ error_msgs,
265
+ ):
266
+ pass
267
+
268
+ def forward(self, x, seq_dim=1, seq_len=None):
269
+ if seq_len is None:
270
+ seq_len = x.shape[seq_dim]
271
+ if self.max_seq_len_cached is None or (seq_len > self.max_seq_len_cached):
272
+ self.max_seq_len_cached = None if self.learnable else seq_len
273
+ t = torch.arange(seq_len, device=x.device, dtype=self.inv_freq.dtype)
274
+ freqs = torch.einsum("i,j->ij", t, self.inv_freq)
275
+ # Different from paper, but it uses a different permutation in order to obtain the same calculation
276
+ emb = torch.cat((freqs, freqs), dim=-1).to(x.device)
277
+ if self.precision == torch.bfloat16:
278
+ emb = emb.float()
279
+
280
+ # [sx, 1 (b * np), hn]
281
+ cos_cached = emb.cos()[:, None, :]
282
+ sin_cached = emb.sin()[:, None, :]
283
+ if self.precision == torch.bfloat16:
284
+ cos_cached = cos_cached.bfloat16()
285
+ sin_cached = sin_cached.bfloat16()
286
+ if self.learnable:
287
+ return cos_cached, sin_cached
288
+ self.cos_cached, self.sin_cached = cos_cached, sin_cached
289
+ return self.cos_cached[:seq_len, ...], self.sin_cached[:seq_len, ...]
290
+
291
+ def _apply(self, fn):
292
+ if self.cos_cached is not None:
293
+ self.cos_cached = fn(self.cos_cached)
294
+ if self.sin_cached is not None:
295
+ self.sin_cached = fn(self.sin_cached)
296
+ return super()._apply(fn)
297
+
298
+
299
+ def rotate_half(x):
300
+ x1, x2 = x[..., : x.shape[-1] // 2], x[..., x.shape[-1] // 2 :]
301
+ return torch.cat(
302
+ (-x2, x1), dim=x1.ndim - 1
303
+ ) # dim=-1 triggers a bug in earlier torch versions
304
+
305
+
306
+ @torch.jit.script
307
+ def apply_rotary_pos_emb_index(q, k, cos, sin, position_id):
308
+ # position_id: [sq, b], q, k: [sq, b, np, hn], cos: [sq, 1, hn] -> [sq, b, 1, hn]
309
+ cos, sin = (
310
+ F.embedding(position_id, cos.squeeze(1)).unsqueeze(2),
311
+ F.embedding(position_id, sin.squeeze(1)).unsqueeze(2),
312
+ )
313
+ q, k = (q * cos) + (rotate_half(q) * sin), (k * cos) + (rotate_half(k) * sin)
314
+ return q, k
315
+
316
+
317
+ def attention_fn(
318
+ self,
319
+ query_layer,
320
+ key_layer,
321
+ value_layer,
322
+ attention_mask,
323
+ hidden_size_per_partition,
324
+ layer_id,
325
+ layer_past=None,
326
+ scaling_attention_score=True,
327
+ use_cache=False,
328
+ ):
329
+ if layer_past is not None:
330
+ past_key, past_value = layer_past[0], layer_past[1]
331
+ key_layer = torch.cat((past_key, key_layer), dim=0)
332
+ value_layer = torch.cat((past_value, value_layer), dim=0)
333
+
334
+ # seqlen, batch, num_attention_heads, hidden_size_per_attention_head
335
+ seq_len, b, nh, hidden_size = key_layer.shape
336
+
337
+ if use_cache:
338
+ present = (key_layer, value_layer)
339
+ else:
340
+ present = None
341
+
342
+ query_key_layer_scaling_coeff = float(layer_id + 1)
343
+ if scaling_attention_score:
344
+ query_layer = query_layer / (
345
+ math.sqrt(hidden_size) * query_key_layer_scaling_coeff
346
+ )
347
+
348
+ # ===================================
349
+ # Raw attention scores. [b, np, s, s]
350
+ # ===================================
351
+
352
+ # [b, np, sq, sk]
353
+ output_size = (
354
+ query_layer.size(1),
355
+ query_layer.size(2),
356
+ query_layer.size(0),
357
+ key_layer.size(0),
358
+ )
359
+
360
+ # [sq, b, np, hn] -> [sq, b * np, hn]
361
+ query_layer = query_layer.view(output_size[2], output_size[0] * output_size[1], -1)
362
+ # [sk, b, np, hn] -> [sk, b * np, hn]
363
+ key_layer = key_layer.view(output_size[3], output_size[0] * output_size[1], -1)
364
+
365
+ matmul_result = torch.zeros(
366
+ 1, 1, 1, dtype=query_layer.dtype, device=query_layer.device,
367
+ )
368
+
369
+ matmul_result = torch.baddbmm(
370
+ matmul_result,
371
+ query_layer.transpose(0, 1), # [b * np, sq, hn]
372
+ key_layer.transpose(0, 1).transpose(1, 2), # [b * np, hn, sk]
373
+ beta=0.0,
374
+ alpha=1.0,
375
+ )
376
+
377
+ # change view to [b, np, sq, sk]
378
+ attention_scores = matmul_result.view(*output_size)
379
+
380
+ if self.scale_mask_softmax:
381
+ self.scale_mask_softmax.scale = query_key_layer_scaling_coeff
382
+ attention_probs = self.scale_mask_softmax(
383
+ attention_scores, attention_mask.contiguous()
384
+ )
385
+ else:
386
+ if not (attention_mask == 0).all():
387
+ # if auto-regressive, skip
388
+ attention_scores.masked_fill_(attention_mask, -10000.0)
389
+ dtype = attention_scores.dtype
390
+ attention_scores = attention_scores.float()
391
+ attention_scores = attention_scores * query_key_layer_scaling_coeff
392
+
393
+ attention_probs = F.softmax(attention_scores, dim=-1)
394
+
395
+ attention_probs = attention_probs.type(dtype)
396
+
397
+ # =========================
398
+ # Context layer. [sq, b, hp]
399
+ # =========================
400
+
401
+ # value_layer -> context layer.
402
+ # [sk, b, np, hn] --> [b, np, sq, hn]
403
+
404
+ # context layer shape: [b, np, sq, hn]
405
+ output_size = (
406
+ value_layer.size(1),
407
+ value_layer.size(2),
408
+ query_layer.size(0),
409
+ value_layer.size(3),
410
+ )
411
+
412
+ # change view [sk, b * np, hn]
413
+ value_layer = value_layer.view(
414
+ value_layer.size(0), output_size[0] * output_size[1], -1
415
+ )
416
+
417
+ # change view [b * np, sq, sk]
418
+ attention_probs = attention_probs.view(
419
+ output_size[0] * output_size[1], output_size[2], -1
420
+ )
421
+
422
+ # matmul: [b * np, sq, hn]
423
+ context_layer = torch.bmm(attention_probs, value_layer.transpose(0, 1))
424
+
425
+ # change view [b, np, sq, hn]
426
+ context_layer = context_layer.view(*output_size)
427
+
428
+ # [b, np, sq, hn] --> [sq, b, np, hn]
429
+ context_layer = context_layer.permute(2, 0, 1, 3).contiguous()
430
+
431
+ # [sq, b, np, hn] --> [sq, b, hp]
432
+ new_context_layer_shape = context_layer.size()[:-2] + (hidden_size_per_partition,)
433
+ context_layer = context_layer.view(*new_context_layer_shape)
434
+
435
+ outputs = (context_layer, present, attention_probs)
436
+
437
+ return outputs
438
+
439
+
440
+ def default_init(cls, *args, **kwargs):
441
+ return cls(*args, **kwargs)
442
+
443
+
444
+ class SelfAttention(torch.nn.Module):
445
+ def __init__(
446
+ self,
447
+ hidden_size,
448
+ num_attention_heads,
449
+ layer_id,
450
+ hidden_size_per_attention_head=None,
451
+ bias=True,
452
+ params_dtype=torch.float,
453
+ position_encoding_2d=True,
454
+ empty_init=True,
455
+ ):
456
+ if empty_init:
457
+ init_method = skip_init
458
+ else:
459
+ init_method = default_init
460
+ super(SelfAttention, self).__init__()
461
+
462
+ self.layer_id = layer_id
463
+ self.hidden_size = hidden_size
464
+ self.hidden_size_per_partition = hidden_size
465
+ self.num_attention_heads = num_attention_heads
466
+ self.num_attention_heads_per_partition = num_attention_heads
467
+ self.position_encoding_2d = position_encoding_2d
468
+ self.rotary_emb = RotaryEmbedding(
469
+ self.hidden_size // (self.num_attention_heads * 2)
470
+ if position_encoding_2d
471
+ else self.hidden_size // self.num_attention_heads,
472
+ base=10000,
473
+ precision=torch.half,
474
+ learnable=False,
475
+ )
476
+
477
+ self.scale_mask_softmax = None
478
+
479
+ if hidden_size_per_attention_head is None:
480
+ self.hidden_size_per_attention_head = hidden_size // num_attention_heads
481
+ else:
482
+ self.hidden_size_per_attention_head = hidden_size_per_attention_head
483
+
484
+ self.inner_hidden_size = (
485
+ num_attention_heads * self.hidden_size_per_attention_head
486
+ )
487
+
488
+ # Strided linear layer.
489
+ self.query_key_value = init_method(
490
+ torch.nn.Linear,
491
+ hidden_size,
492
+ 3 * self.inner_hidden_size,
493
+ bias=bias,
494
+ dtype=params_dtype,
495
+ )
496
+
497
+ self.dense = init_method(
498
+ torch.nn.Linear,
499
+ self.inner_hidden_size,
500
+ hidden_size,
501
+ bias=bias,
502
+ dtype=params_dtype,
503
+ )
504
+
505
+ @staticmethod
506
+ def attention_mask_func(attention_scores, attention_mask):
507
+ attention_scores.masked_fill_(attention_mask, -10000.0)
508
+ return attention_scores
509
+
510
+ def split_tensor_along_last_dim(
511
+ self, tensor, num_partitions, contiguous_split_chunks=False
512
+ ):
513
+ """Split a tensor along its last dimension.
514
+ Arguments:
515
+ tensor: input tensor.
516
+ num_partitions: number of partitions to split the tensor
517
+ contiguous_split_chunks: If True, make each chunk contiguous
518
+ in memory.
519
+ """
520
+ # Get the size and dimension.
521
+ last_dim = tensor.dim() - 1
522
+ last_dim_size = tensor.size()[last_dim] // num_partitions
523
+ # Split.
524
+ tensor_list = torch.split(tensor, last_dim_size, dim=last_dim)
525
+ # Note: torch.split does not create contiguous tensors by default.
526
+ if contiguous_split_chunks:
527
+ return tuple(chunk.contiguous() for chunk in tensor_list)
528
+
529
+ return tensor_list
530
+
531
+ def forward(
532
+ self,
533
+ hidden_states: torch.Tensor,
534
+ position_ids,
535
+ attention_mask: torch.Tensor,
536
+ layer_id,
537
+ layer_past: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
538
+ use_cache: bool = False,
539
+ output_attentions: bool = False,
540
+ ):
541
+ """
542
+ hidden_states: [seq_len, batch, hidden_size]
543
+ attention_mask: [(1, 1), seq_len, seq_len]
544
+ """
545
+
546
+ # [seq_len, batch, 3 * hidden_size]
547
+ mixed_raw_layer = self.query_key_value(hidden_states)
548
+
549
+ # [seq_len, batch, 3 * hidden_size] --> [seq_len, batch, num_attention_heads, 3 * hidden_size_per_attention_head]
550
+ new_tensor_shape = mixed_raw_layer.size()[:-1] + (
551
+ self.num_attention_heads_per_partition,
552
+ 3 * self.hidden_size_per_attention_head,
553
+ )
554
+ mixed_raw_layer = mixed_raw_layer.view(*new_tensor_shape)
555
+
556
+ # [seq_len, batch, num_attention_heads, hidden_size_per_attention_head]
557
+ (query_layer, key_layer, value_layer) = self.split_tensor_along_last_dim(
558
+ mixed_raw_layer, 3
559
+ )
560
+
561
+ if self.position_encoding_2d:
562
+ q1, q2 = query_layer.chunk(2, dim=(query_layer.ndim - 1))
563
+ k1, k2 = key_layer.chunk(2, dim=(key_layer.ndim - 1))
564
+ cos, sin = self.rotary_emb(q1, seq_len=position_ids.max() + 1)
565
+ position_ids, block_position_ids = (
566
+ position_ids[:, 0, :].transpose(0, 1).contiguous(),
567
+ position_ids[:, 1, :].transpose(0, 1).contiguous(),
568
+ )
569
+ q1, k1 = apply_rotary_pos_emb_index(q1, k1, cos, sin, position_ids)
570
+ q2, k2 = apply_rotary_pos_emb_index(q2, k2, cos, sin, block_position_ids)
571
+ query_layer = torch.concat([q1, q2], dim=(q1.ndim - 1))
572
+ key_layer = torch.concat([k1, k2], dim=(k1.ndim - 1))
573
+ else:
574
+ position_ids = position_ids.transpose(0, 1)
575
+ cos, sin = self.rotary_emb(value_layer, seq_len=position_ids.max() + 1)
576
+ # [seq_len, batch, num_attention_heads, hidden_size_per_attention_head]
577
+ query_layer, key_layer = apply_rotary_pos_emb_index(
578
+ query_layer, key_layer, cos, sin, position_ids
579
+ )
580
+
581
+ # [seq_len, batch, hidden_size]
582
+ context_layer, present, attention_probs = attention_fn(
583
+ self=self,
584
+ query_layer=query_layer,
585
+ key_layer=key_layer,
586
+ value_layer=value_layer,
587
+ attention_mask=attention_mask,
588
+ hidden_size_per_partition=self.hidden_size_per_partition,
589
+ layer_id=layer_id,
590
+ layer_past=layer_past,
591
+ use_cache=use_cache,
592
+ )
593
+
594
+ output = self.dense(context_layer)
595
+
596
+ outputs = (output, present)
597
+
598
+ if output_attentions:
599
+ outputs += (attention_probs,)
600
+
601
+ return outputs # output, present, attention_probs
602
+
603
+
604
+ class GEGLU(torch.nn.Module):
605
+ def __init__(self):
606
+ super().__init__()
607
+ self.activation_fn = F.gelu
608
+
609
+ def forward(self, x):
610
+ # dim=-1 breaks in jit for pt<1.10
611
+ x1, x2 = x.chunk(2, dim=(x.ndim - 1))
612
+ return x1 * self.activation_fn(x2)
613
+
614
+
615
+ class GLU(torch.nn.Module):
616
+ def __init__(
617
+ self,
618
+ hidden_size,
619
+ inner_hidden_size=None,
620
+ layer_id=None,
621
+ bias=True,
622
+ activation_func=gelu,
623
+ params_dtype=torch.float,
624
+ empty_init=True,
625
+ ):
626
+ super(GLU, self).__init__()
627
+ if empty_init:
628
+ init_method = skip_init
629
+ else:
630
+ init_method = default_init
631
+ self.layer_id = layer_id
632
+ self.activation_func = activation_func
633
+
634
+ # Project to 4h.
635
+ self.hidden_size = hidden_size
636
+ if inner_hidden_size is None:
637
+ inner_hidden_size = 4 * hidden_size
638
+ self.inner_hidden_size = inner_hidden_size
639
+ self.dense_h_to_4h = init_method(
640
+ torch.nn.Linear,
641
+ self.hidden_size,
642
+ self.inner_hidden_size,
643
+ bias=bias,
644
+ dtype=params_dtype,
645
+ )
646
+ # Project back to h.
647
+ self.dense_4h_to_h = init_method(
648
+ torch.nn.Linear,
649
+ self.inner_hidden_size,
650
+ self.hidden_size,
651
+ bias=bias,
652
+ dtype=params_dtype,
653
+ )
654
+
655
+ def forward(self, hidden_states):
656
+ """
657
+ hidden_states: [seq_len, batch, hidden_size]
658
+ """
659
+
660
+ # [seq_len, batch, inner_hidden_size]
661
+ intermediate_parallel = self.dense_h_to_4h(hidden_states)
662
+
663
+ intermediate_parallel = self.activation_func(intermediate_parallel)
664
+
665
+ output = self.dense_4h_to_h(intermediate_parallel)
666
+
667
+ return output
668
+
669
+
670
+ class GLMBlock(torch.nn.Module):
671
+ def __init__(
672
+ self,
673
+ hidden_size,
674
+ num_attention_heads,
675
+ layernorm_epsilon,
676
+ layer_id,
677
+ inner_hidden_size=None,
678
+ hidden_size_per_attention_head=None,
679
+ layernorm=LayerNorm,
680
+ use_bias=True,
681
+ params_dtype=torch.float,
682
+ num_layers=28,
683
+ position_encoding_2d=True,
684
+ empty_init=True,
685
+ ):
686
+ super(GLMBlock, self).__init__()
687
+ # Set output layer initialization if not provided.
688
+
689
+ self.layer_id = layer_id
690
+
691
+ # Layernorm on the input data.
692
+ self.input_layernorm = layernorm(hidden_size, eps=layernorm_epsilon)
693
+
694
+ self.position_encoding_2d = position_encoding_2d
695
+
696
+ # Self attention.
697
+ self.attention = SelfAttention(
698
+ hidden_size,
699
+ num_attention_heads,
700
+ layer_id,
701
+ hidden_size_per_attention_head=hidden_size_per_attention_head,
702
+ bias=use_bias,
703
+ params_dtype=params_dtype,
704
+ position_encoding_2d=self.position_encoding_2d,
705
+ empty_init=empty_init,
706
+ )
707
+
708
+ # Layernorm on the input data.
709
+ self.post_attention_layernorm = layernorm(hidden_size, eps=layernorm_epsilon)
710
+
711
+ self.num_layers = num_layers
712
+
713
+ # GLU
714
+ self.mlp = GLU(
715
+ hidden_size,
716
+ inner_hidden_size=inner_hidden_size,
717
+ bias=use_bias,
718
+ layer_id=layer_id,
719
+ params_dtype=params_dtype,
720
+ empty_init=empty_init,
721
+ )
722
+
723
+ def forward(
724
+ self,
725
+ hidden_states: torch.Tensor,
726
+ position_ids,
727
+ attention_mask: torch.Tensor,
728
+ layer_id,
729
+ layer_past: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
730
+ use_cache: bool = False,
731
+ output_attentions: bool = False,
732
+ ):
733
+ """
734
+ hidden_states: [seq_len, batch, hidden_size]
735
+ attention_mask: [(1, 1), seq_len, seq_len]
736
+ """
737
+
738
+ # Layer norm at the begining of the transformer layer.
739
+ # [seq_len, batch, hidden_size]
740
+ attention_input = self.input_layernorm(hidden_states)
741
+
742
+ # Self attention.
743
+ attention_outputs = self.attention(
744
+ attention_input,
745
+ position_ids,
746
+ attention_mask=attention_mask,
747
+ layer_id=layer_id,
748
+ layer_past=layer_past,
749
+ use_cache=use_cache,
750
+ output_attentions=output_attentions,
751
+ )
752
+
753
+ attention_output = attention_outputs[0]
754
+
755
+ outputs = attention_outputs[1:]
756
+
757
+ # Residual connection.
758
+ alpha = (2 * self.num_layers) ** 0.5
759
+ hidden_states = attention_input * alpha + attention_output
760
+
761
+ mlp_input = self.post_attention_layernorm(hidden_states)
762
+
763
+ # MLP.
764
+ mlp_output = self.mlp(mlp_input)
765
+
766
+ # Second residual connection.
767
+ output = mlp_input * alpha + mlp_output
768
+
769
+ if use_cache:
770
+ outputs = (output,) + outputs
771
+ else:
772
+ outputs = (output,) + outputs[1:]
773
+
774
+ return outputs # hidden_states, present, attentions
775
+
776
+
777
+ class ChatGLMPreTrainedModel(PreTrainedModel):
778
+ """
779
+ An abstract class to handle weights initialization and
780
+ a simple interface for downloading and loading pretrained models.
781
+ """
782
+
783
+ is_parallelizable = False
784
+ supports_gradient_checkpointing = True
785
+ config_class = ChatGLMConfig
786
+ base_model_prefix = "transformer"
787
+ _no_split_modules = ["GLMBlock"]
788
+
789
+ def __init__(self, *inputs, **kwargs):
790
+ super().__init__(*inputs, **kwargs)
791
+
792
+ def _init_weights(self, module: nn.Module):
793
+ """Initialize the weights."""
794
+ return
795
+
796
+ def get_masks(self, input_ids, device):
797
+ batch_size, seq_length = input_ids.shape
798
+ context_lengths = [
799
+ seq.tolist().index(self.config.bos_token_id) for seq in input_ids
800
+ ]
801
+ attention_mask = torch.ones((batch_size, seq_length, seq_length), device=device)
802
+ attention_mask.tril_()
803
+ for i, context_length in enumerate(context_lengths):
804
+ attention_mask[i, :, :context_length] = 1
805
+ attention_mask.unsqueeze_(1)
806
+ attention_mask = (attention_mask < 0.5).bool()
807
+
808
+ return attention_mask
809
+
810
+ def get_position_ids(self, input_ids, mask_positions, device, use_gmasks=None):
811
+ batch_size, seq_length = input_ids.shape
812
+ if use_gmasks is None:
813
+ use_gmasks = [False] * batch_size
814
+ context_lengths = [
815
+ seq.tolist().index(self.config.bos_token_id) for seq in input_ids
816
+ ]
817
+ if self.position_encoding_2d:
818
+ position_ids = (
819
+ torch.arange(seq_length, dtype=torch.long, device=device)
820
+ .unsqueeze(0)
821
+ .repeat(batch_size, 1)
822
+ )
823
+ for i, context_length in enumerate(context_lengths):
824
+ position_ids[i, context_length:] = mask_positions[i]
825
+ block_position_ids = [
826
+ torch.cat(
827
+ (
828
+ torch.zeros(context_length, dtype=torch.long, device=device),
829
+ torch.arange(
830
+ seq_length - context_length, dtype=torch.long, device=device
831
+ )
832
+ + 1,
833
+ )
834
+ )
835
+ for context_length in context_lengths
836
+ ]
837
+ block_position_ids = torch.stack(block_position_ids, dim=0)
838
+ position_ids = torch.stack((position_ids, block_position_ids), dim=1)
839
+ else:
840
+ position_ids = (
841
+ torch.arange(seq_length, dtype=torch.long, device=device)
842
+ .unsqueeze(0)
843
+ .repeat(batch_size, 1)
844
+ )
845
+ for i, context_length in enumerate(context_lengths):
846
+ if not use_gmasks[i]:
847
+ position_ids[i, context_length:] = mask_positions[i]
848
+
849
+ return position_ids
850
+
851
+ def _set_gradient_checkpointing(self, module, value=False):
852
+ if isinstance(module, ChatGLMModel):
853
+ module.gradient_checkpointing = value
854
+
855
+
856
+ CHATGLM_6B_START_DOCSTRING = r"""
857
+ This model is a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) sub-class.
858
+ Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general
859
+ usage and behavior.
860
+
861
+ Parameters:
862
+ config ([`~ChatGLM6BConfig`]): Model configuration class with all the parameters of the model.
863
+ Initializing with a config file does not load the weights associated with the model, only the configuration.
864
+ Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
865
+ """
866
+
867
+ CHATGLM_6B_INPUTS_DOCSTRING = r"""
868
+ Args:
869
+ input_ids (`torch.LongTensor` of shape `({0})`):
870
+ Indices of input sequence tokens in the vocabulary.
871
+
872
+ Indices can be obtained using [`ChatGLM6BTokenizer`].
873
+ See [`PreTrainedTokenizer.encode`] and
874
+ [`PreTrainedTokenizer.__call__`] for details.
875
+
876
+ [What are input IDs?](../glossary#input-ids)
877
+ attention_mask (`torch.FloatTensor` of shape `({0})`, *optional*):
878
+ Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
879
+
880
+ - 1 for tokens that are **not masked**,
881
+ - 0 for tokens that are **masked**.
882
+
883
+ [What are attention masks?](../glossary#attention-mask)
884
+ token_type_ids (`torch.LongTensor` of shape `({0})`, *optional*):
885
+ Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0, 1]`:
886
+
887
+ - 0 corresponds to a *sentence A* token,
888
+ - 1 corresponds to a *sentence B* token.
889
+
890
+ [What are token type IDs?](../glossary#token-type-ids)
891
+ position_ids (`torch.LongTensor` of shape `({0})`, *optional*):
892
+ Indices of positions of each input sequence tokens in the position embeddings.
893
+ Selected in the range `[0, config.max_position_embeddings - 1]`.
894
+
895
+ [What are position IDs?](../glossary#position-ids)
896
+ head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
897
+ Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:
898
+
899
+ - 1 indicates the head is **not masked**,
900
+ - 0 indicates the head is **masked**.
901
+
902
+ inputs_embeds (`torch.FloatTensor` of shape `({0}, hidden_size)`, *optional*):
903
+ Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation.
904
+ This is useful if you want more control over how to convert *input_ids* indices into associated vectors
905
+ than the model's internal embedding lookup matrix.
906
+ output_attentions (`bool`, *optional*):
907
+ Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
908
+ tensors for more detail.
909
+ output_hidden_states (`bool`, *optional*):
910
+ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
911
+ more detail.
912
+ return_dict (`bool`, *optional*):
913
+ Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
914
+ """
915
+
916
+
917
+ @add_start_docstrings(
918
+ "The bare ChatGLM-6B Model transformer outputting raw hidden-states without any specific head on top.",
919
+ CHATGLM_6B_START_DOCSTRING,
920
+ )
921
+ class ChatGLMModel(ChatGLMPreTrainedModel):
922
+ """
923
+
924
+ The model can behave as an encoder (with only self-attention) as well
925
+ as a decoder, in which case a layer of cross-attention is added between
926
+ the self-attention layers, following the architecture described in [Attention is
927
+ all you need](https://arxiv.org/abs/1706.03762) by Ashish Vaswani,
928
+ Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Lukasz Kaiser and Illia Polosukhin.
929
+
930
+ To behave as an decoder the model needs to be initialized with the
931
+ `is_decoder` argument of the configuration set to `True`.
932
+ To be used in a Seq2Seq model, the model needs to initialized with both `is_decoder`
933
+ argument and `add_cross_attention` set to `True`; an
934
+ `encoder_hidden_states` is then expected as an input to the forward pass.
935
+ """
936
+
937
+ def __init__(self, config: ChatGLMConfig, empty_init=True):
938
+ super().__init__(config)
939
+ if empty_init:
940
+ init_method = skip_init
941
+ else:
942
+ init_method = default_init
943
+ # recording parameters
944
+ self.max_sequence_length = config.max_sequence_length
945
+ self.hidden_size = config.hidden_size
946
+ self.params_dtype = torch.half
947
+ self.num_attention_heads = config.num_attention_heads
948
+ self.vocab_size = config.vocab_size
949
+ self.num_layers = config.num_layers
950
+ self.layernorm_epsilon = config.layernorm_epsilon
951
+ self.inner_hidden_size = config.inner_hidden_size
952
+ self.hidden_size_per_attention_head = (
953
+ self.hidden_size // self.num_attention_heads
954
+ )
955
+ self.position_encoding_2d = config.position_encoding_2d
956
+ self.pre_seq_len = config.pre_seq_len
957
+ self.prefix_projection = config.prefix_projection
958
+
959
+ self.word_embeddings = init_method(
960
+ torch.nn.Embedding,
961
+ num_embeddings=self.vocab_size,
962
+ embedding_dim=self.hidden_size,
963
+ dtype=self.params_dtype,
964
+ )
965
+ self.gradient_checkpointing = False
966
+
967
+ def get_layer(layer_id):
968
+ return GLMBlock(
969
+ self.hidden_size,
970
+ self.num_attention_heads,
971
+ self.layernorm_epsilon,
972
+ layer_id,
973
+ inner_hidden_size=self.inner_hidden_size,
974
+ hidden_size_per_attention_head=self.hidden_size_per_attention_head,
975
+ layernorm=LayerNorm,
976
+ use_bias=True,
977
+ params_dtype=self.params_dtype,
978
+ position_encoding_2d=self.position_encoding_2d,
979
+ empty_init=empty_init,
980
+ )
981
+
982
+ self.layers = torch.nn.ModuleList(
983
+ [get_layer(layer_id) for layer_id in range(self.num_layers)]
984
+ )
985
+
986
+ # Final layer norm before output.
987
+ self.final_layernorm = LayerNorm(self.hidden_size, eps=self.layernorm_epsilon)
988
+
989
+ if self.pre_seq_len is not None:
990
+ for param in self.parameters():
991
+ param.requires_grad = False
992
+ self.prefix_tokens = torch.arange(self.pre_seq_len).long()
993
+ self.prefix_encoder = PrefixEncoder(config)
994
+ self.dropout = torch.nn.Dropout(0.1)
995
+
996
+ # total_params = sum(p.numel() for p in self.parameters())
997
+ # trainable_params = sum(p.numel() for p in self.parameters() if p.requires_grad)
998
+ # print("Using p-tuning v2: # trainable_params = {} / {}".format(trainable_params, total_params))
999
+
1000
+ def get_input_embeddings(self):
1001
+ return self.word_embeddings
1002
+
1003
+ def set_input_embeddings(self, new_embeddings: torch.Tensor):
1004
+ self.word_embeddings = new_embeddings
1005
+
1006
+ def get_prompt(self, batch_size, device, dtype=torch.half):
1007
+ prefix_tokens = (
1008
+ self.prefix_tokens.unsqueeze(0).expand(batch_size, -1).to(device)
1009
+ )
1010
+ past_key_values = self.prefix_encoder(prefix_tokens).type(dtype)
1011
+ past_key_values = past_key_values.view(
1012
+ batch_size,
1013
+ self.pre_seq_len,
1014
+ self.num_layers * 2,
1015
+ self.num_attention_heads,
1016
+ self.hidden_size // self.num_attention_heads,
1017
+ )
1018
+ # seq_len, b, nh, hidden_size
1019
+ past_key_values = self.dropout(past_key_values)
1020
+ past_key_values = past_key_values.permute([2, 1, 0, 3, 4]).split(2)
1021
+ # past_key_values = [(v[0], v[1]) for v in past_key_values]
1022
+ return past_key_values
1023
+
1024
+ @add_start_docstrings_to_model_forward(
1025
+ CHATGLM_6B_INPUTS_DOCSTRING.format("batch_size, sequence_length")
1026
+ )
1027
+ @add_code_sample_docstrings(
1028
+ checkpoint=_CHECKPOINT_FOR_DOC,
1029
+ output_type=BaseModelOutputWithPastAndCrossAttentions,
1030
+ config_class=_CONFIG_FOR_DOC,
1031
+ )
1032
+ def forward(
1033
+ self,
1034
+ input_ids: Optional[torch.LongTensor] = None,
1035
+ position_ids: Optional[torch.LongTensor] = None,
1036
+ attention_mask: Optional[torch.Tensor] = None,
1037
+ past_key_values: Optional[Tuple[Tuple[torch.Tensor, torch.Tensor], ...]] = None,
1038
+ inputs_embeds: Optional[torch.LongTensor] = None,
1039
+ use_cache: Optional[bool] = None,
1040
+ output_attentions: Optional[bool] = None,
1041
+ output_hidden_states: Optional[bool] = None,
1042
+ return_dict: Optional[bool] = None,
1043
+ ) -> Union[Tuple[torch.Tensor, ...], BaseModelOutputWithPast]:
1044
+
1045
+ output_attentions = (
1046
+ output_attentions
1047
+ if output_attentions is not None
1048
+ else self.config.output_attentions
1049
+ )
1050
+ output_hidden_states = (
1051
+ output_hidden_states
1052
+ if output_hidden_states is not None
1053
+ else self.config.output_hidden_states
1054
+ )
1055
+ use_cache = use_cache if use_cache is not None else self.config.use_cache
1056
+ return_dict = (
1057
+ return_dict if return_dict is not None else self.config.use_return_dict
1058
+ )
1059
+
1060
+ if self.gradient_checkpointing and self.training:
1061
+ if use_cache:
1062
+ logger.warning_once(
1063
+ "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
1064
+ )
1065
+ use_cache = False
1066
+
1067
+ if input_ids is not None and inputs_embeds is not None:
1068
+ raise ValueError(
1069
+ "You cannot specify both input_ids and inputs_embeds at the same time"
1070
+ )
1071
+ elif input_ids is not None:
1072
+ batch_size, seq_length = input_ids.shape[:2]
1073
+ elif inputs_embeds is not None:
1074
+ batch_size, seq_length = inputs_embeds.shape[:2]
1075
+ else:
1076
+ raise ValueError("You have to specify either input_ids or inputs_embeds")
1077
+
1078
+ if inputs_embeds is None:
1079
+ inputs_embeds = self.word_embeddings(input_ids)
1080
+
1081
+ if past_key_values is None:
1082
+ if self.pre_seq_len is not None:
1083
+ past_key_values = self.get_prompt(
1084
+ batch_size=input_ids.shape[0],
1085
+ device=input_ids.device,
1086
+ dtype=inputs_embeds.dtype,
1087
+ )
1088
+ else:
1089
+ past_key_values = tuple([None] * len(self.layers))
1090
+
1091
+ if attention_mask is None:
1092
+ attention_mask = self.get_masks(input_ids, device=input_ids.device)
1093
+
1094
+ if position_ids is None:
1095
+ MASK, gMASK = self.config.mask_token_id, self.config.gmask_token_id
1096
+ seqs = input_ids.tolist()
1097
+
1098
+ mask_positions, use_gmasks = [], []
1099
+ for seq in seqs:
1100
+ mask_token = gMASK if gMASK in seq else MASK
1101
+ use_gmask = mask_token == gMASK
1102
+ mask_positions.append(seq.index(mask_token))
1103
+ use_gmasks.append(use_gmask)
1104
+
1105
+ position_ids = self.get_position_ids(
1106
+ input_ids,
1107
+ mask_positions=mask_positions,
1108
+ device=input_ids.device,
1109
+ use_gmasks=use_gmasks,
1110
+ )
1111
+
1112
+ if self.pre_seq_len is not None and attention_mask is not None:
1113
+ prefix_attention_mask = torch.ones(
1114
+ batch_size, 1, input_ids.size(-1), self.pre_seq_len
1115
+ ).to(attention_mask.device)
1116
+ prefix_attention_mask = (prefix_attention_mask < 0.5).bool()
1117
+ attention_mask = torch.cat((prefix_attention_mask, attention_mask), dim=3)
1118
+
1119
+ # [seq_len, batch, hidden_size]
1120
+ hidden_states = inputs_embeds.transpose(0, 1)
1121
+
1122
+ presents = () if use_cache else None
1123
+ all_self_attentions = () if output_attentions else None
1124
+ all_hidden_states = () if output_hidden_states else None
1125
+
1126
+ if attention_mask is None:
1127
+ attention_mask = torch.zeros(1, 1, device=input_ids.device).bool()
1128
+ else:
1129
+ attention_mask = attention_mask.to(hidden_states.device)
1130
+
1131
+ for i, layer in enumerate(self.layers):
1132
+
1133
+ if output_hidden_states:
1134
+ all_hidden_states = all_hidden_states + (hidden_states,)
1135
+ layer_past = past_key_values[i]
1136
+
1137
+ if self.gradient_checkpointing and self.training:
1138
+ layer_ret = torch.utils.checkpoint.checkpoint(
1139
+ layer,
1140
+ hidden_states,
1141
+ position_ids,
1142
+ attention_mask,
1143
+ torch.tensor(i),
1144
+ layer_past,
1145
+ use_cache,
1146
+ output_attentions,
1147
+ )
1148
+ else:
1149
+ layer_ret = layer(
1150
+ hidden_states,
1151
+ position_ids=position_ids,
1152
+ attention_mask=attention_mask,
1153
+ layer_id=torch.tensor(i),
1154
+ layer_past=layer_past,
1155
+ use_cache=use_cache,
1156
+ output_attentions=output_attentions,
1157
+ )
1158
+
1159
+ hidden_states = layer_ret[0]
1160
+
1161
+ if use_cache:
1162
+ presents = presents + (layer_ret[1],)
1163
+
1164
+ if output_attentions:
1165
+ all_self_attentions = all_self_attentions + (
1166
+ layer_ret[2 if use_cache else 1],
1167
+ )
1168
+
1169
+ # Final layer norm.
1170
+ hidden_states = self.final_layernorm(hidden_states)
1171
+
1172
+ if output_hidden_states:
1173
+ all_hidden_states = all_hidden_states + (hidden_states,)
1174
+
1175
+ if not return_dict:
1176
+ return tuple(
1177
+ v
1178
+ for v in [
1179
+ hidden_states,
1180
+ presents,
1181
+ all_hidden_states,
1182
+ all_self_attentions,
1183
+ ]
1184
+ if v is not None
1185
+ )
1186
+
1187
+ return BaseModelOutputWithPast(
1188
+ last_hidden_state=hidden_states,
1189
+ past_key_values=presents,
1190
+ hidden_states=all_hidden_states,
1191
+ attentions=all_self_attentions,
1192
+ )
1193
+
1194
+
1195
+ class ChatGLMForConditionalGeneration(ChatGLMPreTrainedModel):
1196
+ def __init__(self, config: ChatGLMConfig, empty_init=True):
1197
+ super().__init__(config)
1198
+ if empty_init:
1199
+ init_method = skip_init
1200
+ else:
1201
+ init_method = default_init
1202
+
1203
+ # self.hidden_size = config.hidden_size
1204
+ # self.params_dtype = torch.half
1205
+ # self.vocab_size = config.vocab_size
1206
+ self.max_sequence_length = config.max_sequence_length
1207
+
1208
+ self.position_encoding_2d = config.position_encoding_2d
1209
+
1210
+ self.transformer = ChatGLMModel(config, empty_init=empty_init)
1211
+
1212
+ self.lm_head = init_method(
1213
+ nn.Linear,
1214
+ config.hidden_size,
1215
+ config.vocab_size,
1216
+ bias=False,
1217
+ dtype=torch.half,
1218
+ )
1219
+
1220
+ self.config = config
1221
+
1222
+ self.quantized = False
1223
+
1224
+ if self.config.quantization_bit:
1225
+ self.quantize(self.config.quantization_bit, empty_init=True)
1226
+
1227
+ def get_output_embeddings(self):
1228
+ return self.lm_head
1229
+
1230
+ def set_output_embeddings(self, new_embeddings):
1231
+ self.lm_head = new_embeddings
1232
+
1233
+ def _update_model_kwargs_for_generation(
1234
+ self,
1235
+ outputs: ModelOutput,
1236
+ model_kwargs: Dict[str, Any],
1237
+ is_encoder_decoder: bool = False,
1238
+ standardize_cache_format: bool = False,
1239
+ ) -> Dict[str, Any]:
1240
+ # update past_key_values
1241
+ model_kwargs["past_key_values"] = self._extract_past_from_model_output(
1242
+ outputs, standardize_cache_format=standardize_cache_format
1243
+ )
1244
+
1245
+ # update attention mask
1246
+ if "attention_mask" in model_kwargs:
1247
+ attention_mask = model_kwargs["attention_mask"]
1248
+ if attention_mask is not None and attention_mask.dtype == torch.bool:
1249
+ attention_mask = torch.cat(
1250
+ [
1251
+ attention_mask,
1252
+ attention_mask.new_ones((*attention_mask.shape[:3], 1)),
1253
+ ],
1254
+ dim=3,
1255
+ )
1256
+ new_attention_mask = attention_mask[:, :, -1:].clone()
1257
+ new_attention_mask[..., -1] = False
1258
+ model_kwargs["attention_mask"] = torch.cat(
1259
+ [attention_mask, new_attention_mask], dim=2
1260
+ )
1261
+
1262
+ # update position ids
1263
+ if "position_ids" in model_kwargs:
1264
+ position_ids = model_kwargs["position_ids"]
1265
+ new_position_id = position_ids[..., -1:].clone()
1266
+ new_position_id[:, 1, :] += 1
1267
+ model_kwargs["position_ids"] = torch.cat(
1268
+ [position_ids, new_position_id], dim=-1
1269
+ )
1270
+
1271
+ return model_kwargs
1272
+
1273
+ def prepare_inputs_for_generation(
1274
+ self,
1275
+ input_ids: torch.LongTensor,
1276
+ past: Optional[torch.Tensor] = None,
1277
+ past_key_values: Optional[torch.Tensor] = None,
1278
+ attention_mask: Optional[torch.Tensor] = None,
1279
+ position_ids: Optional[torch.Tensor] = None,
1280
+ **kwargs,
1281
+ ) -> dict:
1282
+ batch_size, seq_length = input_ids.shape
1283
+ MASK, gMASK = self.config.mask_token_id, self.config.gmask_token_id
1284
+ seqs = input_ids.tolist()
1285
+ mask_positions, use_gmasks = [], []
1286
+ for seq in seqs:
1287
+ mask_token = gMASK if gMASK in seq else MASK
1288
+ use_gmask = mask_token == gMASK
1289
+ mask_positions.append(seq.index(mask_token))
1290
+ use_gmasks.append(use_gmask)
1291
+
1292
+ # only last token for input_ids if past is not None
1293
+ if past is not None or past_key_values is not None:
1294
+ last_token = input_ids[:, -1].unsqueeze(-1)
1295
+ if attention_mask is not None and attention_mask.dtype == torch.bool:
1296
+ attention_mask = attention_mask[:, :, -1:]
1297
+ else:
1298
+ attention_mask = None
1299
+ if position_ids is not None:
1300
+ position_ids = position_ids[..., -1:]
1301
+ else:
1302
+ context_lengths = [seq.index(self.config.bos_token_id) for seq in seqs]
1303
+ if self.position_encoding_2d:
1304
+ position_ids = torch.tensor(
1305
+ [
1306
+ [mask_position, seq_length - context_length]
1307
+ for mask_position, context_length in zip(
1308
+ mask_positions, context_lengths
1309
+ )
1310
+ ],
1311
+ dtype=torch.long,
1312
+ device=input_ids.device,
1313
+ ).unsqueeze(-1)
1314
+ else:
1315
+ position_ids = torch.tensor(
1316
+ [mask_position for mask_position in mask_positions],
1317
+ dtype=torch.long,
1318
+ device=input_ids.device,
1319
+ ).unsqueeze(-1)
1320
+
1321
+ if past is None:
1322
+ past = past_key_values
1323
+ return {
1324
+ "input_ids": last_token,
1325
+ "past_key_values": past,
1326
+ "position_ids": position_ids,
1327
+ "attention_mask": attention_mask,
1328
+ }
1329
+ else:
1330
+ if attention_mask is not None and attention_mask.dtype != torch.bool:
1331
+ logger.warning_once(
1332
+ f"The dtype of attention mask ({attention_mask.dtype}) is not bool"
1333
+ )
1334
+ attention_mask = None
1335
+ if attention_mask is None:
1336
+ attention_mask = self.get_masks(input_ids, device=input_ids.device)
1337
+ if position_ids is None:
1338
+ position_ids = self.get_position_ids(
1339
+ input_ids,
1340
+ device=input_ids.device,
1341
+ mask_positions=mask_positions,
1342
+ use_gmasks=use_gmasks,
1343
+ )
1344
+
1345
+ return {
1346
+ "input_ids": input_ids,
1347
+ "past_key_values": past,
1348
+ "position_ids": position_ids,
1349
+ "attention_mask": attention_mask,
1350
+ }
1351
+
1352
+ def forward(
1353
+ self,
1354
+ input_ids: Optional[torch.Tensor] = None,
1355
+ position_ids: Optional[torch.Tensor] = None,
1356
+ attention_mask: Optional[torch.Tensor] = None,
1357
+ past_key_values: Optional[Tuple[torch.FloatTensor]] = None,
1358
+ inputs_embeds: Optional[torch.Tensor] = None,
1359
+ labels: Optional[torch.Tensor] = None,
1360
+ use_cache: Optional[bool] = None,
1361
+ output_attentions: Optional[bool] = None,
1362
+ output_hidden_states: Optional[bool] = None,
1363
+ return_dict: Optional[bool] = None,
1364
+ ):
1365
+ use_cache = use_cache if use_cache is not None else self.config.use_cache
1366
+ return_dict = (
1367
+ return_dict if return_dict is not None else self.config.use_return_dict
1368
+ )
1369
+
1370
+ transformer_outputs = self.transformer(
1371
+ input_ids=input_ids,
1372
+ position_ids=position_ids,
1373
+ attention_mask=attention_mask,
1374
+ past_key_values=past_key_values,
1375
+ inputs_embeds=inputs_embeds,
1376
+ use_cache=use_cache,
1377
+ output_attentions=output_attentions,
1378
+ output_hidden_states=output_hidden_states,
1379
+ return_dict=return_dict,
1380
+ )
1381
+
1382
+ hidden_states = transformer_outputs[0]
1383
+
1384
+ lm_logits = self.lm_head(hidden_states).permute(1, 0, 2).contiguous()
1385
+
1386
+ loss = None
1387
+ if labels is not None:
1388
+ lm_logits = lm_logits.to(torch.float32)
1389
+
1390
+ # Shift so that tokens < n predict n
1391
+ shift_logits = lm_logits[..., :-1, :].contiguous()
1392
+ shift_labels = labels[..., 1:].contiguous()
1393
+ # Flatten the tokens
1394
+ loss_fct = CrossEntropyLoss(ignore_index=-100)
1395
+ loss = loss_fct(
1396
+ shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1)
1397
+ )
1398
+
1399
+ lm_logits = lm_logits.to(hidden_states.dtype)
1400
+ loss = loss.to(hidden_states.dtype)
1401
+
1402
+ if not return_dict:
1403
+ output = (lm_logits,) + transformer_outputs[1:]
1404
+ return ((loss,) + output) if loss is not None else output
1405
+
1406
+ return CausalLMOutputWithPast(
1407
+ loss=loss,
1408
+ logits=lm_logits,
1409
+ past_key_values=transformer_outputs.past_key_values,
1410
+ hidden_states=transformer_outputs.hidden_states,
1411
+ attentions=transformer_outputs.attentions,
1412
+ )
1413
+
1414
+ @staticmethod
1415
+ def _reorder_cache(
1416
+ past: Tuple[Tuple[torch.Tensor, torch.Tensor], ...], beam_idx: torch.LongTensor
1417
+ ) -> Tuple[Tuple[torch.Tensor, torch.Tensor], ...]:
1418
+ """
1419
+ This function is used to re-order the `past_key_values` cache if [`~PreTrainedModel.beam_search`] or
1420
+ [`~PreTrainedModel.beam_sample`] is called. This is required to match `past_key_values` with the correct
1421
+ beam_idx at every generation step.
1422
+
1423
+ Output shares the same memory storage as `past`.
1424
+ """
1425
+ return tuple(
1426
+ (
1427
+ layer_past[0].index_select(1, beam_idx.to(layer_past[0].device)),
1428
+ layer_past[1].index_select(1, beam_idx.to(layer_past[1].device)),
1429
+ )
1430
+ for layer_past in past
1431
+ )
1432
+
1433
+ def process_response(self, response):
1434
+ response = response.strip()
1435
+ response = response.replace("[[训练时间]]", "2023年")
1436
+ punkts = [
1437
+ [",", ","],
1438
+ ["!", "!"],
1439
+ [":", ":"],
1440
+ [";", ";"],
1441
+ ["\?", "?"],
1442
+ ]
1443
+ for item in punkts:
1444
+ response = re.sub(
1445
+ r"([\u4e00-\u9fff])%s" % item[0], r"\1%s" % item[1], response
1446
+ )
1447
+ response = re.sub(
1448
+ r"%s([\u4e00-\u9fff])" % item[0], r"%s\1" % item[1], response
1449
+ )
1450
+ return response
1451
+
1452
+ @torch.no_grad()
1453
+ def chat(
1454
+ self,
1455
+ tokenizer,
1456
+ query: str,
1457
+ history: List[Tuple[str, str]] = None,
1458
+ max_length: int = 2048,
1459
+ num_beams=1,
1460
+ do_sample=True,
1461
+ top_p=0.7,
1462
+ temperature=0.95,
1463
+ logits_processor=None,
1464
+ **kwargs,
1465
+ ):
1466
+ if history is None:
1467
+ history = []
1468
+ if logits_processor is None:
1469
+ logits_processor = LogitsProcessorList()
1470
+ logits_processor.append(InvalidScoreLogitsProcessor())
1471
+ gen_kwargs = {
1472
+ "max_length": max_length,
1473
+ "num_beams": num_beams,
1474
+ "do_sample": do_sample,
1475
+ "top_p": top_p,
1476
+ "temperature": temperature,
1477
+ "logits_processor": logits_processor,
1478
+ **kwargs,
1479
+ }
1480
+ if not history:
1481
+ prompt = query
1482
+ else:
1483
+ prompt = ""
1484
+ for i, (old_query, response) in enumerate(history):
1485
+ prompt += "[Round {}]\n问:{}\n答:{}\n".format(i, old_query, response)
1486
+ prompt += "[Round {}]\n问:{}\n答:".format(len(history), query)
1487
+ inputs = tokenizer([prompt], return_tensors="pt")
1488
+ inputs = inputs.to(self.device)
1489
+ outputs = self.generate(**inputs, **gen_kwargs)
1490
+ outputs = outputs.tolist()[0][len(inputs["input_ids"][0]) :]
1491
+ response = tokenizer.decode(outputs)
1492
+ response = self.process_response(response)
1493
+ history = history + [(query, response)]
1494
+ return response, history
1495
+
1496
+ @torch.no_grad()
1497
+ def stream_chat(
1498
+ self,
1499
+ tokenizer,
1500
+ query: str,
1501
+ history: List[Tuple[str, str]] = None,
1502
+ max_length: int = 2048,
1503
+ do_sample=True,
1504
+ top_p=0.7,
1505
+ temperature=0.95,
1506
+ logits_processor=None,
1507
+ **kwargs,
1508
+ ):
1509
+ if history is None:
1510
+ history = []
1511
+ if logits_processor is None:
1512
+ logits_processor = LogitsProcessorList()
1513
+ logits_processor.append(InvalidScoreLogitsProcessor())
1514
+ gen_kwargs = {
1515
+ "max_length": max_length,
1516
+ "do_sample": do_sample,
1517
+ "top_p": top_p,
1518
+ "temperature": temperature,
1519
+ "logits_processor": logits_processor,
1520
+ **kwargs,
1521
+ }
1522
+ if not history:
1523
+ prompt = query
1524
+ else:
1525
+ prompt = ""
1526
+ for i, (old_query, response) in enumerate(history):
1527
+ prompt += "[Round {}]\n问:{}\n答:{}\n".format(i, old_query, response)
1528
+ prompt += "[Round {}]\n问:{}\n答:".format(len(history), query)
1529
+ inputs = tokenizer([prompt], return_tensors="pt")
1530
+ inputs = inputs.to(self.device)
1531
+ for outputs in self.stream_generate(**inputs, **gen_kwargs):
1532
+ outputs = outputs.tolist()[0][len(inputs["input_ids"][0]) :]
1533
+ response = tokenizer.decode(outputs)
1534
+ response = self.process_response(response)
1535
+ new_history = history + [(query, response)]
1536
+ yield response, new_history
1537
+
1538
+ @torch.no_grad()
1539
+ def stream_generate(
1540
+ self,
1541
+ input_ids,
1542
+ generation_config: Optional[GenerationConfig] = None,
1543
+ logits_processor: Optional[LogitsProcessorList] = None,
1544
+ stopping_criteria: Optional[StoppingCriteriaList] = None,
1545
+ prefix_allowed_tokens_fn: Optional[
1546
+ Callable[[int, torch.Tensor], List[int]]
1547
+ ] = None,
1548
+ **kwargs,
1549
+ ):
1550
+ batch_size, input_ids_seq_length = input_ids.shape[0], input_ids.shape[-1]
1551
+
1552
+ if generation_config is None:
1553
+ generation_config = self.generation_config
1554
+ generation_config = copy.deepcopy(generation_config)
1555
+ model_kwargs = generation_config.update(**kwargs)
1556
+ bos_token_id, eos_token_id = (
1557
+ generation_config.bos_token_id,
1558
+ generation_config.eos_token_id,
1559
+ )
1560
+
1561
+ if isinstance(eos_token_id, int):
1562
+ eos_token_id = [eos_token_id]
1563
+
1564
+ has_default_max_length = (
1565
+ kwargs.get("max_length") is None
1566
+ and generation_config.max_length is not None
1567
+ )
1568
+ if has_default_max_length and generation_config.max_new_tokens is None:
1569
+ warnings.warn(
1570
+ f"Using `max_length`'s default ({generation_config.max_length}) to control the generation length. "
1571
+ "This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we"
1572
+ " recommend using `max_new_tokens` to control the maximum length of the generation.",
1573
+ UserWarning,
1574
+ )
1575
+ elif generation_config.max_new_tokens is not None:
1576
+ generation_config.max_length = (
1577
+ generation_config.max_new_tokens + input_ids_seq_length
1578
+ )
1579
+ if not has_default_max_length:
1580
+ logger.warn(
1581
+ f"Both `max_new_tokens` (={generation_config.max_new_tokens}) and `max_length`(="
1582
+ f"{generation_config.max_length}) seem to have been set. `max_new_tokens` will take precedence. "
1583
+ "Please refer to the documentation for more information. "
1584
+ "(https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)",
1585
+ UserWarning,
1586
+ )
1587
+
1588
+ if input_ids_seq_length >= generation_config.max_length:
1589
+ input_ids_string = (
1590
+ "decoder_input_ids" if self.config.is_encoder_decoder else "input_ids"
1591
+ )
1592
+ logger.warning(
1593
+ f"Input length of {input_ids_string} is {input_ids_seq_length}, but `max_length` is set to"
1594
+ f" {generation_config.max_length}. This can lead to unexpected behavior. You should consider"
1595
+ " increasing `max_new_tokens`."
1596
+ )
1597
+
1598
+ # 2. Set generation parameters if not already defined
1599
+ logits_processor = (
1600
+ logits_processor if logits_processor is not None else LogitsProcessorList()
1601
+ )
1602
+ stopping_criteria = (
1603
+ stopping_criteria
1604
+ if stopping_criteria is not None
1605
+ else StoppingCriteriaList()
1606
+ )
1607
+
1608
+ logits_processor = self._get_logits_processor(
1609
+ generation_config=generation_config,
1610
+ input_ids_seq_length=input_ids_seq_length,
1611
+ encoder_input_ids=input_ids,
1612
+ prefix_allowed_tokens_fn=prefix_allowed_tokens_fn,
1613
+ logits_processor=logits_processor,
1614
+ )
1615
+
1616
+ stopping_criteria = self._get_stopping_criteria(
1617
+ generation_config=generation_config, stopping_criteria=stopping_criteria
1618
+ )
1619
+ logits_warper = self._get_logits_warper(generation_config)
1620
+
1621
+ unfinished_sequences = input_ids.new(input_ids.shape[0]).fill_(1)
1622
+ scores = None
1623
+ while True:
1624
+ model_inputs = self.prepare_inputs_for_generation(input_ids, **model_kwargs)
1625
+ # forward pass to get next token
1626
+ outputs = self(
1627
+ **model_inputs,
1628
+ return_dict=True,
1629
+ output_attentions=False,
1630
+ output_hidden_states=False,
1631
+ )
1632
+
1633
+ next_token_logits = outputs.logits[:, -1, :]
1634
+
1635
+ # pre-process distribution
1636
+ next_token_scores = logits_processor(input_ids, next_token_logits)
1637
+ next_token_scores = logits_warper(input_ids, next_token_scores)
1638
+
1639
+ # sample
1640
+ probs = nn.functional.softmax(next_token_scores, dim=-1)
1641
+ if generation_config.do_sample:
1642
+ next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1)
1643
+ else:
1644
+ next_tokens = torch.argmax(probs, dim=-1)
1645
+
1646
+ # update generated ids, model inputs, and length for next step
1647
+ input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1)
1648
+ model_kwargs = self._update_model_kwargs_for_generation(
1649
+ outputs, model_kwargs, is_encoder_decoder=self.config.is_encoder_decoder
1650
+ )
1651
+ unfinished_sequences = unfinished_sequences.mul(
1652
+ (sum(next_tokens != i for i in eos_token_id)).long()
1653
+ )
1654
+
1655
+ # stop when each sentence is finished, or if we exceed the maximum length
1656
+ if unfinished_sequences.max() == 0 or stopping_criteria(input_ids, scores):
1657
+ break
1658
+ yield input_ids
1659
+
1660
+ def quantize(self, bits: int, empty_init=False, **kwargs):
1661
+ if bits == 0:
1662
+ return
1663
+
1664
+ from .quantization import quantize
1665
+
1666
+ if self.quantized:
1667
+ logger.info("Already quantized.")
1668
+ return self
1669
+
1670
+ self.quantized = True
1671
+
1672
+ self.config.quantization_bit = bits
1673
+
1674
+ self.transformer = quantize(
1675
+ self.transformer, bits, empty_init=empty_init, **kwargs
1676
+ )
1677
+ return self
lora_checkpoints/checkpoints/checkpoint-3000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a822f43d34197caff7c133ba5f3b85aab1c5a7218124d2671b8b1a0e0a106e0
3
+ size 234882351
lora_checkpoints/checkpoints/checkpoint-3000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:931e5be9beae84b50654908f9ed1add6077c0babb9996dc48c53976884e6c784
3
+ size 117441341
lora_checkpoints/checkpoints/checkpoint-3000/quantization.py ADDED
@@ -0,0 +1,249 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch.nn import Linear
2
+ from torch.nn.parameter import Parameter
3
+
4
+ import bz2
5
+ import torch
6
+ import base64
7
+ import ctypes
8
+ from transformers.utils import logging
9
+
10
+ from typing import List
11
+ from functools import partial
12
+
13
+ logger = logging.get_logger(__name__)
14
+
15
+ try:
16
+ from cpm_kernels.kernels.base import LazyKernelCModule, KernelFunction, round_up
17
+
18
+ class Kernel:
19
+ def __init__(self, code: bytes, function_names: List[str]):
20
+ self.code = code
21
+ self._function_names = function_names
22
+ self._cmodule = LazyKernelCModule(self.code)
23
+
24
+ for name in self._function_names:
25
+ setattr(self, name, KernelFunction(self._cmodule, name))
26
+
27
+ quantization_code = "$QlpoOTFBWSZTWU9yuJUAQHN//////////f/n/8/n///n//bt4dTidcVx8X3V9FV/92/v4B7/AD5FBQFAAAChSgKpFCFAFVSigUAAAEKhSgUUqgFBKigqVREQAABQBQIANDTTIGI00BkZBkNGE0A0BkBkGQGRkaNAaAGQNBoGgDIAAYIGTI0DQAQAaGmmQMRpoDIyDIaMJoBoDIDIMgMjI0aA0AMgaDQNAGQAAwQMmRoGgAgA0NNMgYjTQGRkGQ0YTQDQGQGQZAZGRo0BoAZA0GgaAMgABggZMjQNABABoaaZAxGmgMjIMhowmgGgMgMgyAyMjRoDQAyBoNA0AZAADBAyZGgaAAmqU1NEgJqnptU/Sn4jRR6J6epk2pqb1Q/SgAPUGgyNNGjQ2SBpoAZAAGg0NB6mgDIAAAAA2oaApSREBNAARhGiYEaEwU8pvImlP0k2aam1GaGqbFNM1MHpTwmkepmyU9R6nqPKekHqNNPUxNGhp6n6p6QaZ6o9TG1GMqcoV9ly6nRanHlq6zPNbnGZNi6HSug+2nPiZ13XcnFYZW+45W11CumhzYhchOJ2GLLV1OBjBjGf4TptOddTSOcVxhqYZMYwZXZZY00zI1paX5X9J+b+f4e+x43RXSxXPOdquiGpduatGyXneN696M9t4HU2eR5XX/kPhP261NTx3JO1Ow7LyuDmeo9a7d351T1ZxnvnrvYnrXv/hXxPCeuYx2XsNmO003eg9J3Z6U7b23meJ4ri01OdzTk9BNO96brz+qT5nuvvH3ds/G+m/JcG/F2XYuhXlvO+jP7U3XgrzPN/lr8Sf1n6j4j7jZs+s/T0tNaNNYzTs12rxjwztHlnire3Nzc3N1wuBwOBwXBvZfoHpD7rFmR99V5vj3aXza3xdBbXMalubTg/jIv5dfAi54Pdc75j4z412n3Npj3Ld/ENm7a3b/Cod6h/ret1/5vn/C+l+gdslMvgPSLJ8d8q+U66fevYn/tW1chleEtNTGlcHCbLRlq0tHzF5tsbbZZfHjjLgZu42XCuC3NrdjTasZGNzgxPIrGqp7r3p7L2p5XjnpPSmTd5XtzqnB6U87zzg1Ol0zd0zsLszxR6lkxp35u6/teL0L0W922cR7Lu1lpL9CsHirzuM2T+BgsyViT6LHcm0/Vr6U/7LGGyJeqTEjt0PHWhF5mCT7R9mtlDwriYv0Tyr/OxYt6qp5r0mPVT0608TqnqMZaarU2nFwrTzzlrs1ed7z1ux60wyr4ydCaTi3enW8x68x0zU7tXSlcmPSW1mGpWJMg4zmPC2lK96tp0OE80y4MfEvnZj8zGluR6b22ki1Ou9V2nCd9xovcPvcYMZYy0lvN60ScZ45vN6yeCeeXFb1lVjnnCar5fwXwE2bzJ4HI1XVPXfXZMm44GUsMpYsmLB65TuVdm0cl0b+i/wGNN66XjeV7zuPpHcnK/juhhjdfId5jMdE5nN0dGmmm2zZs2cexD5n9p/dY352XsvXHaZNWWsmmS1atjR452nYudzvqv2HMRyvNNnlMcDl3R2+yx2uVrBubTW9icHDVtbNXlZm7jma1rM4VurZZd2y6nUau7ZXZ7bVU+mnoOVxZGMrVmvX60605JwmzGZhhhjTWtaaaMaaGTGmNMZasY0iX8VMUl8eepaIrzGSpemWOQyZORk2bNpjUybMmxqYmknCGCFynutfksaZpjTNMaaatM0xsxcGR0sociNqxNSmhhR1ZJPbsn8qyF0t2qH6iYBclclalbtTTcHTDsPaX6rlnElph2Jyumumtynv2Kk8GI7rsvXbIcJgHJOSaSXnnGaI3m87RtVXJOZ/YtgdTE6Wpha6ZlE8ayXkef1fh602r2WwvfMXtMdLlkfnLFdYYwYso+bWqm7yJqHXZGw2nrS5ZanSYnWlxBxMF1V940K2wdrI7R6OYf7DGGamMmTSbRhlS45xmVOumF1EyPCmHrrN8wwZOOrdNtLeMtzFzDlWnfTBxMk2NaXIZHBYxYLD4w8yju0ao65Vz1OIXoS9dLanwCe1PWrYuWMqf1if1z2k2yYfKJ741PDgno1ZQ8DRqvUny3mNoWTzGO6m1DkrJI8JiR5cSd+vZdGOO8nrMoc5+NDUFsMSXaZJeNlMmGLtJsovOsUp7I9S5VojKxF6bTVEelXqlfJobQr3LozSh2Jk7VcrVMfhXqszGWMzNqGhqZY0OadxkyyMssKugZR0KNFXBHlqwmJgTE/BNVMk6ItJXZMR0H47GpXv/DMOvNkmVuaV1PRfEdxuqc7Hcd+ZV/zTLaRxWk0nl9CdCeM6mn5rstHIBcpiuwmUZXeq81DacHI2rmrZ5SuE5mOZd6LQrZg9mx32TprA8BMo5jKN6yLTCi3WzQaZSuhzTtM1fUTGVpG8Tw+KXI0tjEpiWxtLYynOlktSbVlaI5kxP8TDH8kx50xoxi5KcA4pcja8KWLRlO/Ks6q06ergnvm1ca3Tq8Uw7LTUsmWyctXPWmpitl/uvGcWTGXGuAXDfhqazGmjkxcJW5hMMMMpYsXl2TZYtVOddG3XCarUt6Ptq9CZXSNzyuRzqRZOjsxdBbFVz6OA5HI43r1jityVlVpVkxmOsyaYWE1NTGq1sOVh36mHMcxtSvcy70edG0ZGR3I1Go1GRlV7mWWo1G0ZGRqlvH40l7o4m5xMWLLLYyNjnqc8556mdPqLJ31n/1nWOncxzG1tizrHs/Z+d2vP/B/l8wdJ6rHUn2nbbDq4p6htFtYzMMMTaZis1K5GKzGNmxhmUx2DDlZ/qNnIx41xnaMfCZWYaZWtNLTNW8ND4Fw1MyZOCdM428suKG1ehW8TesOydg7J+YYcD4cYR+8dFK6M4E3HM9ZfRNNL+Sn6rsl4DsrDl2HpPCnfxjGXtbZtYys1ttlyJ4T+BvexjGWRjMszK4Jpc77D3GyuVD7q0+G8m9G+2+rGm7cOR2y7FdtY2XUYx/oNlfRYxhMYyYZkyyg55enna9Kt/FFi6GMMwYwdwxWgxGMLKYmUyGExTKMZkMFhkymKuh0NOBNnBu+23LdwDoZYYzGGMxtORaTU1pjTGWTTGGtMrNWUsyyTTLLG1qy2ZjbK2DBllWqxMtBMaYZQmcE7zvvRcTkclUwdkxTaSdyySt/7fpL+T1v516Ji97fwr5JbLu305zMn5+GMTTZ9F+y7ExwmGVfG44yxn3dLv6l5i+Wth1jCrDq21nW9LqvvDzz3Vf3LLH/O/32TJ/erx3bXftO4eF+G956D952K/An4NfvOpjFjExjevP/UmE0fIoZXx6/w6lX/no3D0bLt+ixjieBM6ksRd0yB4Lt2SwYNE+gd1detlZWUnpiZfGfFaK+4PyCa/v18V8X75pe9fLXzp7l3VjF76vWZmHwGz1IZNWT7b8yddJ4q5kyrVdfru6atWc7bVYztL9Jf4GXvT+Y8m9/YsXP6H018a8D4XVOqvfzqeR+6yZOD8dPv0+U7/q5Pl+2dNb0MjzGVH5p6MNQ7cOWvw62U9aHE8DprDek+McLyvDz+te+9Zhq5+YTruufMcWMabqysTmZVWjKPfnK0wyVcrsuhjZRdLkHNvD72b9abriOSGIxiLixMOoalNPXzy+wT/tf+U6HHONfsz+xe8ufHBdQWWGWLA9if0rsnmrxK5LvRZQeWsTCsrmOYy8VteVfuRfcVTtDLItLIsMYxZLdU/DbtSemxF6Z6Zo5WBXE4tFdCyVMMXMTEMZXVlS6Xec2T4e0tHsRcEuWshcJ2YsNF5rUx1E8ifCq6Z+ZP7qdCeu/aTwFd53l16/o0NOw6O3dLavP4Hbi4RdmuDk6DoYaninC0+o4uZjbJ7Rxeu0/FbuFg+q7DVS6fQe0rZ6NDGUNNU6DEqOaLTicKnYZMnBWruljQxoaS3dZhocDge0bSTyOvdAbG5hxe2xji7E/L55xX13wWNDi6HCekcFxfCPGxY0MXC+s7afWaMdDyjyr+o8Rudm/NabOZvdl274zH4f5XK9z6On1Pe/K5TdPAslg77BjuO6Y3eO7GqvOPG/stknp1leyvLL0Z7bl9I4noMvLkzytLhWYzrOZzLXCORe028rORzOg4N/L0HlMOQ3Pgmnbb6KczlabORpu980q37TBqRu0/p3PO6234Bl03Ynuz+9W7gnsEcmvYaYY3aMYY0wx3pYd+ujsXauWdaY5Xkbtl23fPzFHiDB/QMo0yFjBllYxTQYYyxkrwn7JufwJ/PfgJ+C83X69ni6zvXcnyXabv0ncbLwsceS+RNlyN2mnneJtX0ngYO0+e+0+UnA+Wch3ji8hj5an4h+i6XBySU4n+R0roVcbw5yvHrmr4Yw8Y7x6c+9POPYHI5HI5HI5HI5HGXGww4nE4nrVyOR8XeqPEO7PLOiukYa3Novk5hV4cdtYZLI93e+uxff2jRo0aNGjRo0aNG1bVtW1dy3m83m8+tQ5ZzHw3nObwOu8La9Rc1dtkdS8A3eTk823tnktXWlxN6Oixe06zrN70Isd9jiOgZFq9yfkPqP/SLhN2Myl8jDM43bl1nbcb4cO57jlh8Jow6pzXZdL4dyODTuuhu77FyO27DdwdRxmvO+O+3N2+BdqyTwLHVczDVY4UPE4O66/ZO2cx1LFzVdSXtF7G4HMbrauOHRw6c8FdZ5m9fHZHYZXfTlZquyynSyTTKke6vcffSD9pzPA/G7n7jxPmuhc1DHMynPMrGL6AdewYmwu5ko+UUyTwrMv27rPH1v1nGqd87+p6N6LU8k3NEng53xXyHS97+44OSg/sy/hn+Se6yfYNjW0/uTgP+PvWYzLMmjhcLB/gGpri6H83/84eUXWT6T9Hsv7785z/7z4icpW+zfXypuR7rx/gMdZb1/wC678pcs8/2a3mDitGHxl9mfPlll5MafWWqxk/eYuTDgcNMzDGWLWvsuglNxs53GtN6uWpktlW1tZZYcuinMMWmnNnJydze3b2Y1McBxrBkXw799izLMZZYyy0TkbsGM4p03S2uVu5s/XXUdSdec6smVxZYYGpVmT8A+8ajuEyV5FatkvVru2x6uxGXXbH4A+jvgP4GMYy3iPLXzq/6z65+E005ey+cwMZD3fZcqc6xpjTFjQ0P3U+e++cPYmTIwj0nrK5NPTfl3WvpfLtXDcb2HQMudYOxFXQBor4L4T6vrOauFctYXJQ++NUWmJe5bmx1jDiZS1dTqWxo4GR8jm3fttpmPHppk9PEyv4/y8/sO07XacOmcqc0x2Vi9BvNJvN5oW8x4mOsydpidRxMYJPx06m1bqPzq9KtK8sxXNXFodD/+MYYaJTLwOhc9brCsV18oOR1i4tXChyTkq4lf4y1Ke+9axjDHqs1mfBbMXuP4Hzi+X7t8vzv7bHerrUPgPCxhjre4fXdfLNtNM+Jd+Zdh8xd8wP87uNPoPgv4W7/5P2BuxfsMabNnMnza+54Pdi5U671GPZY8CehX8Voeoo7FHpkeEc6715FwHZrIrUrHaviPUbPZHND+IhczrP6FcYvhOZ0Di/ETt0OI+YwNWR9r7tpf6WDeZKZDB1+z2IthOl1mPyb5FluvEx9h9d0NnM0Y1XPFkWIsk1WotJ0PBMmkvjvQTd0e71tfeV+8r8lQ/tpzpsmxJ+InrI/dj2UajUajVTUajatRqNRtGo1Go1Go4wjeMpZFMVV9CHbofPraLsJ3JpWV2XOoanCuFky4y3PPNxucK2uKC1Lbdb1eo+m5XomN6HfeZsabHLHRX/K+offtNGGmHWctcVcG44MdSqsOLY9VzX+Zxfxn2HPdWTpzWvkrtJ8M5zorrKcquRytJ5N5DZmcaW02l76nWO+BqPXm1A2Ry/0q71dH/mqrqeFjkYxjEXtsX8qubTk67rGycyqsdm4tZx5D6D5hhi0waaWmiaMP81Yjii5qxPlPuU/GfTL1Y5E6Jyfiq63qTa39A4J0sOGDgO9WF9bOXl0XfPRbsY2bPNKPy1YrFYrFYmRhhlTIyMjJWJYZHXuCXI8OoXsvfljGLFicNifpp2XunoPiG1wtx3p1Tah+/DD66OnVtVXP9rKbVxOnL0tR/rHtqB5UDErUVcl11D4qqvjpOcxX7armUNJB3LpW6bxVvD08e8h3odKKvyCFZBdSh2FVcST9xV3n3T8t1j7Kr9qgrqXg+13Pt5U7JCvFXVIV1YG5lRhkVYZJYYDDD4KOIMoHCp26WS8GB7uBh2zIdgq/PKyInjV2STShuoapUdCpX1yTwqq/z1VvET7Kh5nVPkO8YyxjLt2MaaMmWTLQvx3qnzltnXW0p2jxgbEtSny/Osv8Y9pLMXYoHVPAhkVdWVeODhR6q9/Sxe2liwwZWMVvFXfRkeIDxAePUPIrdJ4ey6yquzH+PD/bUOWAu05qVHtFd8rrKHSoeNIOUqrYr3FXyToqfYJgwmJdKpXXOwYYegNNGMzfZPp/t3t/DVs4zjNTN61rRqaWaa4NYbRjTa0tWwy2Y2tGN8ZO8ofNKq4j9SL7I+cSm4/6ovLV5HNXLI0jJidwrtk6ynCaP6Z++GjRlWS3tLeW129Mi9evxU9mtz6s5J3Z7M2ngTgnKvmpomxpaLCzPfmx0JWE+m3NLDDGOX47RctdYYNK5jakdqLkRlI39n590T5zctGSwwZZDJj6kW8XSi6ot2MmWWJ0DUT3nuvebBudScjZ79g8cWJ8av0k+/bE5WKd5MdbFpbDVMxu1DVMmtNZGJvq1mtRbn6M+g/kP0FwDwr7quZs7xosNGpbscyxhhd9TyJyFwbLcxlTasg75vW7TsV5K7ji44XPMMrdoj+Y3rT0Hie62nlYV/pwczzOmdLqLhYkzGMzCZWGMQzGMSsZYY6Di1t4nlJ+Em63mJxrVLxPbYxNEdgc1dU2iOKyoYYWjNrEeHTYybVk0atSa7ehuwsWMWTqn1TrnS6hYsi71d1+s+k+ic70e20fzE/VaTdxT9ZtU4GIXdeNx3X77guYYfpHeTQjaMX6brOu4OY4K7Y2d9mbHarI5ox3p4GpJ2Vd/Tst60f7j999pppjR+Q/Qf8J/VaORs3cji7FfFuN61+ui9s8hix1OCh5KGVV23BPXvZfz3CLyHpix+exi8z/KnCnosY2eunor+cxyPO/xJ0vKey9OvE9VjqaYu0x3Z3jd6o2b1T12D+F8l232lwaaacD5LE8LBxu7WTlbWraWpew8Xexjel3E+wWD4APITdNqR8F3R3T0lunCQ4GaE9R37DxeCYfcHi4xci5ovKfxVs55y2hf+65E/Xdp6jR5nrebTmi5incpkyOjs50JvrZwstbbW6kfuuQw+2mykf/EXNFzxfKTrxew929TR6bWnGL//F3JFOFCQT3K4lQ"
28
+
29
+ kernels = Kernel(
30
+ bz2.decompress(base64.b64decode(quantization_code)),
31
+ [
32
+ "int4WeightCompression",
33
+ "int4WeightExtractionFloat",
34
+ "int4WeightExtractionHalf",
35
+ "int8WeightExtractionFloat",
36
+ "int8WeightExtractionHalf",
37
+ ],
38
+ )
39
+ except Exception as exception:
40
+ kernels = None
41
+ logger.warning("Failed to load cpm_kernels:" + str(exception))
42
+
43
+
44
+ class W8A16Linear(torch.autograd.Function):
45
+ @staticmethod
46
+ def forward(
47
+ ctx,
48
+ inp: torch.Tensor,
49
+ quant_w: torch.Tensor,
50
+ scale_w: torch.Tensor,
51
+ weight_bit_width,
52
+ ):
53
+ ctx.inp_shape = inp.size()
54
+ ctx.weight_bit_width = weight_bit_width
55
+ out_features = quant_w.size(0)
56
+ inp = inp.contiguous().view(-1, inp.size(-1))
57
+ weight = extract_weight_to_half(quant_w, scale_w, weight_bit_width)
58
+ ctx.weight_shape = weight.size()
59
+ output = inp.mm(weight.t())
60
+ ctx.save_for_backward(inp, quant_w, scale_w)
61
+ return output.view(*(ctx.inp_shape[:-1] + (out_features,)))
62
+
63
+ @staticmethod
64
+ def backward(ctx, grad_output: torch.Tensor):
65
+ inp, quant_w, scale_w = ctx.saved_tensors
66
+ weight = extract_weight_to_half(quant_w, scale_w, ctx.weight_bit_width)
67
+ grad_output = grad_output.contiguous().view(-1, weight.size(0))
68
+ grad_input = grad_output.mm(weight)
69
+ grad_weight = grad_output.t().mm(inp)
70
+ return (
71
+ grad_input.view(ctx.inp_shape),
72
+ grad_weight.view(ctx.weight_shape),
73
+ None,
74
+ None,
75
+ )
76
+
77
+
78
+ def compress_int4_weight(weight: torch.Tensor): # (n, m)
79
+ with torch.cuda.device(weight.device):
80
+ n, m = weight.size(0), weight.size(1)
81
+ assert m % 2 == 0
82
+ m = m // 2
83
+ out = torch.empty(n, m, dtype=torch.int8, device="cuda")
84
+ stream = torch.cuda.current_stream()
85
+
86
+ gridDim = (n, 1, 1)
87
+ blockDim = (min(round_up(m, 32), 1024), 1, 1)
88
+
89
+ kernels.int4WeightCompression(
90
+ gridDim,
91
+ blockDim,
92
+ 0,
93
+ stream,
94
+ [
95
+ ctypes.c_void_p(weight.data_ptr()),
96
+ ctypes.c_void_p(out.data_ptr()),
97
+ ctypes.c_int32(n),
98
+ ctypes.c_int32(m),
99
+ ],
100
+ )
101
+ return out
102
+
103
+
104
+ def extract_weight_to_half(
105
+ weight: torch.Tensor, scale_list: torch.Tensor, source_bit_width: int
106
+ ):
107
+ if source_bit_width == 8:
108
+ func = kernels.int8WeightExtractionHalf
109
+ elif source_bit_width == 4:
110
+ func = kernels.int4WeightExtractionHalf
111
+ else:
112
+ assert False, "Unsupported bit-width"
113
+
114
+ with torch.cuda.device(weight.device):
115
+ n, m = weight.size(0), weight.size(1)
116
+ out = torch.empty(
117
+ n, m * (8 // source_bit_width), dtype=torch.half, device="cuda"
118
+ )
119
+ stream = torch.cuda.current_stream()
120
+
121
+ gridDim = (n, 1, 1)
122
+ blockDim = (min(round_up(m, 32), 1024), 1, 1)
123
+
124
+ func(
125
+ gridDim,
126
+ blockDim,
127
+ 0,
128
+ stream,
129
+ [
130
+ ctypes.c_void_p(weight.data_ptr()),
131
+ ctypes.c_void_p(scale_list.data_ptr()),
132
+ ctypes.c_void_p(out.data_ptr()),
133
+ ctypes.c_int32(n),
134
+ ctypes.c_int32(m),
135
+ ],
136
+ )
137
+ return out
138
+
139
+
140
+ class QuantizedLinear(Linear):
141
+ def __init__(
142
+ self,
143
+ weight_bit_width: int,
144
+ weight_tensor=None,
145
+ bias_tensor=None,
146
+ empty_init=False,
147
+ *args,
148
+ **kwargs
149
+ ):
150
+ super(QuantizedLinear, self).__init__(*args, **kwargs)
151
+ self.weight_bit_width = weight_bit_width
152
+
153
+ shape = self.weight.shape
154
+ del self.weight
155
+
156
+ if weight_tensor is None or empty_init:
157
+ self.weight = torch.empty(
158
+ shape[0],
159
+ shape[1] * weight_bit_width // 8,
160
+ dtype=torch.int8,
161
+ device=kwargs["device"],
162
+ )
163
+ self.weight_scale = torch.empty(
164
+ shape[0], dtype=kwargs["dtype"], device=kwargs["device"]
165
+ )
166
+ else:
167
+ self.weight_scale = (
168
+ weight_tensor.abs().max(dim=-1).values
169
+ / ((2 ** (weight_bit_width - 1)) - 1)
170
+ ).half()
171
+ self.weight = torch.round(weight_tensor / self.weight_scale[:, None]).to(
172
+ torch.int8
173
+ )
174
+ if weight_bit_width == 4:
175
+ self.weight = compress_int4_weight(self.weight)
176
+
177
+ self.weight = Parameter(self.weight.to(kwargs["device"]), requires_grad=False)
178
+ self.weight_scale = Parameter(
179
+ self.weight_scale.to(kwargs["device"]), requires_grad=False
180
+ )
181
+ if bias_tensor is not None:
182
+ self.bias = Parameter(bias_tensor.to(kwargs["device"]), requires_grad=False)
183
+ else:
184
+ self.bias = None
185
+
186
+ def forward(self, input):
187
+ output = W8A16Linear.apply(
188
+ input, self.weight, self.weight_scale, self.weight_bit_width
189
+ )
190
+ if self.bias is not None:
191
+ output = output + self.bias
192
+ return output
193
+
194
+
195
+ def quantize(model, weight_bit_width, empty_init=False, **kwargs):
196
+ """Replace fp16 linear with quantized linear"""
197
+
198
+ for layer in model.layers:
199
+ layer.attention.query_key_value = QuantizedLinear(
200
+ weight_bit_width=weight_bit_width,
201
+ weight_tensor=layer.attention.query_key_value.weight.to(
202
+ torch.cuda.current_device()
203
+ ),
204
+ bias_tensor=layer.attention.query_key_value.bias,
205
+ in_features=layer.attention.query_key_value.in_features,
206
+ out_features=layer.attention.query_key_value.out_features,
207
+ bias=True,
208
+ dtype=torch.half,
209
+ device=layer.attention.query_key_value.weight.device,
210
+ empty_init=empty_init,
211
+ )
212
+ layer.attention.dense = QuantizedLinear(
213
+ weight_bit_width=weight_bit_width,
214
+ weight_tensor=layer.attention.dense.weight.to(torch.cuda.current_device()),
215
+ bias_tensor=layer.attention.dense.bias,
216
+ in_features=layer.attention.dense.in_features,
217
+ out_features=layer.attention.dense.out_features,
218
+ bias=True,
219
+ dtype=torch.half,
220
+ device=layer.attention.dense.weight.device,
221
+ empty_init=empty_init,
222
+ )
223
+ layer.mlp.dense_h_to_4h = QuantizedLinear(
224
+ weight_bit_width=weight_bit_width,
225
+ weight_tensor=layer.mlp.dense_h_to_4h.weight.to(
226
+ torch.cuda.current_device()
227
+ ),
228
+ bias_tensor=layer.mlp.dense_h_to_4h.bias,
229
+ in_features=layer.mlp.dense_h_to_4h.in_features,
230
+ out_features=layer.mlp.dense_h_to_4h.out_features,
231
+ bias=True,
232
+ dtype=torch.half,
233
+ device=layer.mlp.dense_h_to_4h.weight.device,
234
+ empty_init=empty_init,
235
+ )
236
+ layer.mlp.dense_4h_to_h = QuantizedLinear(
237
+ weight_bit_width=weight_bit_width,
238
+ weight_tensor=layer.mlp.dense_4h_to_h.weight.to(
239
+ torch.cuda.current_device()
240
+ ),
241
+ bias_tensor=layer.mlp.dense_4h_to_h.bias,
242
+ in_features=layer.mlp.dense_4h_to_h.in_features,
243
+ out_features=layer.mlp.dense_4h_to_h.out_features,
244
+ bias=True,
245
+ dtype=torch.half,
246
+ device=layer.mlp.dense_4h_to_h.weight.device,
247
+ empty_init=empty_init,
248
+ )
249
+ return model
lora_checkpoints/checkpoints/checkpoint-3000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15fac4ae3ab687e4e1c6260cb03f353752d4442b9ec8be834d30e070bf76737a
3
+ size 14575
lora_checkpoints/checkpoints/checkpoint-3000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1505c75b267a11c5661f4cba5331fc2b30406beec216a02fb1be5f878615deae
3
+ size 627
lora_checkpoints/checkpoints/checkpoint-3000/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<sop>",
3
+ "eos_token": "<eop>",
4
+ "mask_token": "[MASK]",
5
+ "pad_token": "<pad>",
6
+ "unk_token": "<unk>"
7
+ }
lora_checkpoints/checkpoints/checkpoint-3000/tokenization_chatglm.py ADDED
@@ -0,0 +1,443 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Tokenization classes for ChatGLM."""
2
+ from typing import List, Optional, Union
3
+ import os
4
+
5
+ from transformers.tokenization_utils import PreTrainedTokenizer
6
+ from transformers.utils import logging, PaddingStrategy
7
+ from transformers.tokenization_utils_base import EncodedInput, BatchEncoding
8
+ from typing import Dict
9
+ import sentencepiece as spm
10
+ import numpy as np
11
+
12
+ logger = logging.get_logger(__name__)
13
+
14
+ PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
15
+ "THUDM/chatglm-6b": 2048,
16
+ }
17
+
18
+
19
+ class TextTokenizer:
20
+ def __init__(self, model_path):
21
+ self.sp = spm.SentencePieceProcessor()
22
+ self.sp.Load(model_path)
23
+ self.num_tokens = self.sp.vocab_size()
24
+
25
+ def encode(self, text):
26
+ return self.sp.EncodeAsIds(text)
27
+
28
+ def decode(self, ids: List[int]):
29
+ return self.sp.DecodeIds(ids)
30
+
31
+ def tokenize(self, text):
32
+ return self.sp.EncodeAsPieces(text)
33
+
34
+ def convert_tokens_to_string(self, tokens):
35
+ return self.sp.DecodePieces(tokens)
36
+
37
+ def convert_tokens_to_ids(self, tokens):
38
+ return [self.sp.PieceToId(token) for token in tokens]
39
+
40
+ def convert_token_to_id(self, token):
41
+ return self.sp.PieceToId(token)
42
+
43
+ def convert_id_to_token(self, idx):
44
+ return self.sp.IdToPiece(idx)
45
+
46
+ def __len__(self):
47
+ return self.num_tokens
48
+
49
+
50
+ class SPTokenizer:
51
+ def __init__(
52
+ self,
53
+ vocab_file,
54
+ num_image_tokens=20000,
55
+ max_blank_length=80,
56
+ byte_fallback=True,
57
+ ):
58
+ assert vocab_file is not None
59
+ self.vocab_file = vocab_file
60
+ self.num_image_tokens = num_image_tokens
61
+ self.special_tokens = ["[MASK]", "[gMASK]", "[sMASK]", "<unused_0>", "<sop>", "<eop>", "<ENC>", "<dBLOCK>"]
62
+ self.max_blank_length = max_blank_length
63
+ self.byte_fallback = byte_fallback
64
+ self.text_tokenizer = TextTokenizer(vocab_file)
65
+
66
+ def _get_text_tokenizer(self):
67
+ return self.text_tokenizer
68
+
69
+ @staticmethod
70
+ def get_blank_token(length: int):
71
+ assert length >= 2
72
+ return f"<|blank_{length}|>"
73
+
74
+ @staticmethod
75
+ def get_tab_token():
76
+ return f"<|tab|>"
77
+
78
+ @property
79
+ def num_text_tokens(self):
80
+ return self.text_tokenizer.num_tokens
81
+
82
+ @property
83
+ def num_tokens(self):
84
+ return self.num_image_tokens + self.num_text_tokens
85
+
86
+ @staticmethod
87
+ def _encode_whitespaces(text: str, max_len: int = 80):
88
+ text = text.replace("\t", SPTokenizer.get_tab_token())
89
+ for i in range(max_len, 1, -1):
90
+ text = text.replace(" " * i, SPTokenizer.get_blank_token(i))
91
+ return text
92
+
93
+ def _preprocess(self, text: str, linebreak=True, whitespaces=True):
94
+ if linebreak:
95
+ text = text.replace("\n", "<n>")
96
+ if whitespaces:
97
+ text = self._encode_whitespaces(text, max_len=self.max_blank_length)
98
+ return text
99
+
100
+ def encode(
101
+ self, text: str, linebreak=True, whitespaces=True, add_dummy_prefix=True
102
+ ) -> List[int]:
103
+ """
104
+ @param text: Text to encode.
105
+ @param linebreak: Whether to encode newline (\n) in text.
106
+ @param whitespaces: Whether to encode multiple whitespaces or tab in text, useful for source code encoding.
107
+ @param special_tokens: Whether to encode special token ([MASK], [gMASK], etc.) in text.
108
+ @param add_dummy_prefix: Whether to add dummy blank space in the beginning.
109
+ """
110
+ text = self._preprocess(text, linebreak, whitespaces)
111
+ if not add_dummy_prefix:
112
+ text = "<n>" + text
113
+ tmp = self._get_text_tokenizer().encode(text)
114
+ tokens = [x + self.num_image_tokens for x in tmp]
115
+ return tokens if add_dummy_prefix else tokens[2:]
116
+
117
+ def postprocess(self, text):
118
+ text = text.replace("<n>", "\n")
119
+ text = text.replace(SPTokenizer.get_tab_token(), "\t")
120
+ for i in range(2, self.max_blank_length + 1):
121
+ text = text.replace(self.get_blank_token(i), " " * i)
122
+ return text
123
+
124
+ def decode(self, text_ids: List[int]) -> str:
125
+ ids = [int(_id) - self.num_image_tokens for _id in text_ids]
126
+ ids = [_id for _id in ids if _id >= 0]
127
+ text = self._get_text_tokenizer().decode(ids)
128
+ text = self.postprocess(text)
129
+ return text
130
+
131
+ def decode_tokens(self, tokens: List[str]) -> str:
132
+ text = self._get_text_tokenizer().convert_tokens_to_string(tokens)
133
+ text = self.postprocess(text)
134
+ return text
135
+
136
+ def tokenize(
137
+ self, text: str, linebreak=True, whitespaces=True, add_dummy_prefix=True
138
+ ) -> List[str]:
139
+ """
140
+ @param text: Text to encode.
141
+ @param linebreak: Whether to encode newline (\n) in text.
142
+ @param whitespaces: Whether to encode multiple whitespaces or tab in text, useful for source code encoding.
143
+ @param special_tokens: Whether to encode special token ([MASK], [gMASK], etc.) in text.
144
+ @param add_dummy_prefix: Whether to add dummy blank space in the beginning.
145
+ """
146
+ text = self._preprocess(text, linebreak, whitespaces)
147
+ if not add_dummy_prefix:
148
+ text = "<n>" + text
149
+ tokens = self._get_text_tokenizer().tokenize(text)
150
+ return tokens if add_dummy_prefix else tokens[2:]
151
+
152
+ def __getitem__(self, x: Union[int, str]):
153
+ if isinstance(x, int):
154
+ if x < self.num_image_tokens:
155
+ return "<image_{}>".format(x)
156
+ else:
157
+ return self.text_tokenizer.convert_id_to_token(x - self.num_image_tokens)
158
+ elif isinstance(x, str):
159
+ if x.startswith("<image_") and x.endswith(">") and x[7:-1].isdigit():
160
+ return int(x[7:-1])
161
+ else:
162
+ return self.text_tokenizer.convert_token_to_id(x) + self.num_image_tokens
163
+ else:
164
+ raise ValueError("The key should be str or int.")
165
+
166
+
167
+ class ChatGLMTokenizer(PreTrainedTokenizer):
168
+ """
169
+ Construct a ChatGLM tokenizer. Based on byte-level Byte-Pair-Encoding.
170
+
171
+ Args:
172
+ vocab_file (`str`):
173
+ Path to the vocabulary file.
174
+ """
175
+
176
+ vocab_files_names = {"vocab_file": "ice_text.model"}
177
+ max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
178
+ model_input_names = ["input_ids", "attention_mask", "position_ids"]
179
+
180
+ def __init__(
181
+ self,
182
+ vocab_file,
183
+ do_lower_case=False,
184
+ remove_space=False,
185
+ bos_token='<sop>',
186
+ eos_token='<eop>',
187
+ end_token='</s>',
188
+ mask_token='[MASK]',
189
+ gmask_token='[gMASK]',
190
+ padding_side="left",
191
+ pad_token="<pad>",
192
+ unk_token="<unk>",
193
+ num_image_tokens=20000,
194
+ **kwargs
195
+ ) -> None:
196
+ super().__init__(
197
+ do_lower_case=do_lower_case,
198
+ remove_space=remove_space,
199
+ padding_side=padding_side,
200
+ bos_token=bos_token,
201
+ eos_token=eos_token,
202
+ end_token=end_token,
203
+ mask_token=mask_token,
204
+ gmask_token=gmask_token,
205
+ pad_token=pad_token,
206
+ unk_token=unk_token,
207
+ num_image_tokens=num_image_tokens,
208
+ **kwargs
209
+ )
210
+
211
+ self.do_lower_case = do_lower_case
212
+ self.remove_space = remove_space
213
+ self.vocab_file = vocab_file
214
+
215
+ self.bos_token = bos_token
216
+ self.eos_token = eos_token
217
+ self.end_token = end_token
218
+ self.mask_token = mask_token
219
+ self.gmask_token = gmask_token
220
+
221
+ self.sp_tokenizer = SPTokenizer(vocab_file, num_image_tokens=num_image_tokens)
222
+
223
+ """ Initialisation """
224
+
225
+ @property
226
+ def gmask_token_id(self) -> Optional[int]:
227
+ if self.gmask_token is None:
228
+ return None
229
+ return self.convert_tokens_to_ids(self.gmask_token)
230
+
231
+ @property
232
+ def end_token_id(self) -> Optional[int]:
233
+ """
234
+ `Optional[int]`: Id of the end of context token in the vocabulary. Returns `None` if the token has not been
235
+ set.
236
+ """
237
+ if self.end_token is None:
238
+ return None
239
+ return self.convert_tokens_to_ids(self.end_token)
240
+
241
+ @property
242
+ def vocab_size(self):
243
+ """ Returns vocab size """
244
+ return self.sp_tokenizer.num_tokens
245
+
246
+ def get_vocab(self):
247
+ """ Returns vocab as a dict """
248
+ vocab = {self._convert_id_to_token(i): i for i in range(self.vocab_size)}
249
+ vocab.update(self.added_tokens_encoder)
250
+ return vocab
251
+
252
+ def preprocess_text(self, inputs):
253
+ if self.remove_space:
254
+ outputs = " ".join(inputs.strip().split())
255
+ else:
256
+ outputs = inputs
257
+
258
+ if self.do_lower_case:
259
+ outputs = outputs.lower()
260
+
261
+ return outputs
262
+
263
+ def _tokenize(self, text, **kwargs):
264
+ """ Returns a tokenized string. """
265
+ text = self.preprocess_text(text)
266
+
267
+ seq = self.sp_tokenizer.tokenize(text)
268
+
269
+ return seq
270
+
271
+ def convert_tokens_to_string(self, tokens: List[str]) -> str:
272
+ return self.sp_tokenizer.decode_tokens(tokens)
273
+
274
+ def _decode(
275
+ self,
276
+ token_ids: Union[int, List[int]],
277
+ **kwargs
278
+ ) -> str:
279
+ if isinstance(token_ids, int):
280
+ token_ids = [token_ids]
281
+ if len(token_ids) == 0:
282
+ return ""
283
+ if self.pad_token_id in token_ids: # remove pad
284
+ token_ids = list(filter((self.pad_token_id).__ne__, token_ids))
285
+ return super()._decode(token_ids, **kwargs)
286
+
287
+ def _convert_token_to_id(self, token):
288
+ """ Converts a token (str) in an id using the vocab. """
289
+ return self.sp_tokenizer[token]
290
+
291
+ def _convert_id_to_token(self, index):
292
+ """Converts an index (integer) in a token (str) using the vocab."""
293
+ return self.sp_tokenizer[index]
294
+
295
+ def save_vocabulary(self, save_directory, filename_prefix=None):
296
+ """
297
+ Save the vocabulary and special tokens file to a directory.
298
+
299
+ Args:
300
+ save_directory (`str`):
301
+ The directory in which to save the vocabulary.
302
+ filename_prefix (`str`, *optional*):
303
+ An optional prefix to add to the named of the saved files.
304
+
305
+ Returns:
306
+ `Tuple(str)`: Paths to the files saved.
307
+ """
308
+ if os.path.isdir(save_directory):
309
+ vocab_file = os.path.join(
310
+ save_directory, self.vocab_files_names["vocab_file"]
311
+ )
312
+ else:
313
+ vocab_file = save_directory
314
+
315
+ with open(self.vocab_file, 'rb') as fin:
316
+ proto_str = fin.read()
317
+
318
+ with open(vocab_file, "wb") as writer:
319
+ writer.write(proto_str)
320
+
321
+ return (vocab_file,)
322
+
323
+ def build_inputs_with_special_tokens(
324
+ self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
325
+ ) -> List[int]:
326
+ """
327
+ Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
328
+ adding special tokens. A BERT sequence has the following format:
329
+
330
+ - single sequence: `[CLS] X [SEP]`
331
+ - pair of sequences: `[CLS] A [SEP] B [SEP]`
332
+
333
+ Args:
334
+ token_ids_0 (`List[int]`):
335
+ List of IDs to which the special tokens will be added.
336
+ token_ids_1 (`List[int]`, *optional*):
337
+ Optional second list of IDs for sequence pairs.
338
+
339
+ Returns:
340
+ `List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
341
+ """
342
+ gmask_id = self.sp_tokenizer[self.gmask_token]
343
+ eos_id = self.sp_tokenizer[self.eos_token]
344
+ token_ids_0 = token_ids_0 + [gmask_id, self.sp_tokenizer[self.bos_token]]
345
+ if token_ids_1 is not None:
346
+ token_ids_0 = token_ids_0 + token_ids_1 + [eos_id]
347
+ return token_ids_0
348
+
349
+ def _pad(
350
+ self,
351
+ encoded_inputs: Union[Dict[str, EncodedInput], BatchEncoding],
352
+ max_length: Optional[int] = None,
353
+ padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
354
+ pad_to_multiple_of: Optional[int] = None,
355
+ return_attention_mask: Optional[bool] = None,
356
+ ) -> dict:
357
+ """
358
+ Pad encoded inputs (on left/right and up to predefined length or max length in the batch)
359
+
360
+ Args:
361
+ encoded_inputs:
362
+ Dictionary of tokenized inputs (`List[int]`) or batch of tokenized inputs (`List[List[int]]`).
363
+ max_length: maximum length of the returned list and optionally padding length (see below).
364
+ Will truncate by taking into account the special tokens.
365
+ padding_strategy: PaddingStrategy to use for padding.
366
+
367
+ - PaddingStrategy.LONGEST Pad to the longest sequence in the batch
368
+ - PaddingStrategy.MAX_LENGTH: Pad to the max length (default)
369
+ - PaddingStrategy.DO_NOT_PAD: Do not pad
370
+ The tokenizer padding sides are defined in self.padding_side:
371
+
372
+ - 'left': pads on the left of the sequences
373
+ - 'right': pads on the right of the sequences
374
+ pad_to_multiple_of: (optional) Integer if set will pad the sequence to a multiple of the provided value.
375
+ This is especially useful to enable the use of Tensor Core on NVIDIA hardware with compute capability
376
+ `>= 7.5` (Volta).
377
+ return_attention_mask:
378
+ (optional) Set to False to avoid returning attention mask (default: set to model specifics)
379
+ """
380
+ # Load from model defaults
381
+ bos_token_id = self.sp_tokenizer[self.bos_token]
382
+ mask_token_id = self.sp_tokenizer[self.mask_token]
383
+ gmask_token_id = self.sp_tokenizer[self.gmask_token]
384
+ assert self.padding_side == "left"
385
+
386
+ required_input = encoded_inputs[self.model_input_names[0]]
387
+ seq_length = len(required_input)
388
+
389
+ if padding_strategy == PaddingStrategy.LONGEST:
390
+ max_length = len(required_input)
391
+
392
+ if max_length is not None and pad_to_multiple_of is not None and (max_length % pad_to_multiple_of != 0):
393
+ max_length = ((max_length // pad_to_multiple_of) + 1) * pad_to_multiple_of
394
+
395
+ needs_to_be_padded = padding_strategy != PaddingStrategy.DO_NOT_PAD and len(required_input) != max_length
396
+
397
+ # Initialize attention mask if not present.
398
+ if max_length is not None:
399
+ if "attention_mask" not in encoded_inputs:
400
+ if bos_token_id in required_input:
401
+ context_length = required_input.index(bos_token_id)
402
+ else:
403
+ context_length = seq_length
404
+ attention_mask = np.ones((1, seq_length, seq_length))
405
+ attention_mask = np.tril(attention_mask)
406
+ attention_mask[:, :, :context_length] = 1
407
+ attention_mask = np.bool_(attention_mask < 0.5)
408
+ encoded_inputs["attention_mask"] = attention_mask
409
+
410
+ if "position_ids" not in encoded_inputs:
411
+ if bos_token_id in required_input:
412
+ context_length = required_input.index(bos_token_id)
413
+ else:
414
+ context_length = seq_length
415
+ position_ids = np.arange(seq_length, dtype=np.int64)
416
+ mask_token = mask_token_id if mask_token_id in required_input else gmask_token_id
417
+ if mask_token in required_input:
418
+ mask_position = required_input.index(mask_token)
419
+ position_ids[context_length:] = mask_position
420
+ block_position_ids = np.concatenate(
421
+ [np.zeros(context_length, dtype=np.int64),
422
+ np.arange(1, seq_length - context_length + 1, dtype=np.int64)])
423
+ encoded_inputs["position_ids"] = np.stack([position_ids, block_position_ids], axis=0)
424
+
425
+ if needs_to_be_padded:
426
+ difference = max_length - len(required_input)
427
+
428
+ if "attention_mask" in encoded_inputs:
429
+ encoded_inputs["attention_mask"] = np.pad(encoded_inputs["attention_mask"],
430
+ pad_width=[(0, 0), (difference, 0), (difference, 0)],
431
+ mode='constant', constant_values=True)
432
+ if "token_type_ids" in encoded_inputs:
433
+ encoded_inputs["token_type_ids"] = [self.pad_token_type_id] * difference + encoded_inputs[
434
+ "token_type_ids"
435
+ ]
436
+ if "special_tokens_mask" in encoded_inputs:
437
+ encoded_inputs["special_tokens_mask"] = [1] * difference + encoded_inputs["special_tokens_mask"]
438
+ if "position_ids" in encoded_inputs:
439
+ encoded_inputs["position_ids"] = np.pad(encoded_inputs["position_ids"],
440
+ pad_width=[(0, 0), (difference, 0)])
441
+ encoded_inputs[self.model_input_names[0]] = [self.pad_token_id] * difference + required_input
442
+
443
+ return encoded_inputs
lora_checkpoints/checkpoints/checkpoint-3000/tokenizer_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_map": {
3
+ "AutoTokenizer": [
4
+ "tokenization_chatglm.ChatGLMTokenizer",
5
+ null
6
+ ]
7
+ },
8
+ "bos_token": "<sop>",
9
+ "do_lower_case": false,
10
+ "end_token": "</s>",
11
+ "eos_token": "<eop>",
12
+ "gmask_token": "[gMASK]",
13
+ "mask_token": "[MASK]",
14
+ "model_max_length": 1000000000000000019884624838656,
15
+ "num_image_tokens": 0,
16
+ "pad_token": "<pad>",
17
+ "padding_side": "left",
18
+ "remove_space": false,
19
+ "special_tokens_map_file": null,
20
+ "tokenizer_class": "ChatGLMTokenizer",
21
+ "unk_token": "<unk>"
22
+ }
lora_checkpoints/checkpoints/checkpoint-3000/trainer_state.json ADDED
@@ -0,0 +1,1816 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.6029922522041145,
5
+ "global_step": 3000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.01,
12
+ "learning_rate": 0.009966666666666667,
13
+ "loss": 4.0887,
14
+ "step": 10
15
+ },
16
+ {
17
+ "epoch": 0.01,
18
+ "learning_rate": 0.009933333333333334,
19
+ "loss": 3.4007,
20
+ "step": 20
21
+ },
22
+ {
23
+ "epoch": 0.02,
24
+ "learning_rate": 0.0099,
25
+ "loss": 3.1167,
26
+ "step": 30
27
+ },
28
+ {
29
+ "epoch": 0.02,
30
+ "learning_rate": 0.009866666666666668,
31
+ "loss": 3.1554,
32
+ "step": 40
33
+ },
34
+ {
35
+ "epoch": 0.03,
36
+ "learning_rate": 0.009833333333333333,
37
+ "loss": 3.1095,
38
+ "step": 50
39
+ },
40
+ {
41
+ "epoch": 0.03,
42
+ "learning_rate": 0.0098,
43
+ "loss": 3.0915,
44
+ "step": 60
45
+ },
46
+ {
47
+ "epoch": 0.04,
48
+ "learning_rate": 0.009766666666666667,
49
+ "loss": 2.982,
50
+ "step": 70
51
+ },
52
+ {
53
+ "epoch": 0.04,
54
+ "learning_rate": 0.009733333333333333,
55
+ "loss": 2.9944,
56
+ "step": 80
57
+ },
58
+ {
59
+ "epoch": 0.05,
60
+ "learning_rate": 0.0097,
61
+ "loss": 3.1368,
62
+ "step": 90
63
+ },
64
+ {
65
+ "epoch": 0.05,
66
+ "learning_rate": 0.009666666666666667,
67
+ "loss": 3.1288,
68
+ "step": 100
69
+ },
70
+ {
71
+ "epoch": 0.06,
72
+ "learning_rate": 0.009633333333333334,
73
+ "loss": 2.9818,
74
+ "step": 110
75
+ },
76
+ {
77
+ "epoch": 0.06,
78
+ "learning_rate": 0.0096,
79
+ "loss": 3.0066,
80
+ "step": 120
81
+ },
82
+ {
83
+ "epoch": 0.07,
84
+ "learning_rate": 0.009566666666666666,
85
+ "loss": 3.0357,
86
+ "step": 130
87
+ },
88
+ {
89
+ "epoch": 0.07,
90
+ "learning_rate": 0.009533333333333335,
91
+ "loss": 3.0504,
92
+ "step": 140
93
+ },
94
+ {
95
+ "epoch": 0.08,
96
+ "learning_rate": 0.0095,
97
+ "loss": 2.9895,
98
+ "step": 150
99
+ },
100
+ {
101
+ "epoch": 0.09,
102
+ "learning_rate": 0.009466666666666667,
103
+ "loss": 3.0137,
104
+ "step": 160
105
+ },
106
+ {
107
+ "epoch": 0.09,
108
+ "learning_rate": 0.009433333333333334,
109
+ "loss": 2.9836,
110
+ "step": 170
111
+ },
112
+ {
113
+ "epoch": 0.1,
114
+ "learning_rate": 0.0094,
115
+ "loss": 3.0618,
116
+ "step": 180
117
+ },
118
+ {
119
+ "epoch": 0.1,
120
+ "learning_rate": 0.009366666666666667,
121
+ "loss": 3.0484,
122
+ "step": 190
123
+ },
124
+ {
125
+ "epoch": 0.11,
126
+ "learning_rate": 0.009333333333333334,
127
+ "loss": 3.0454,
128
+ "step": 200
129
+ },
130
+ {
131
+ "epoch": 0.11,
132
+ "learning_rate": 0.009300000000000001,
133
+ "loss": 3.0356,
134
+ "step": 210
135
+ },
136
+ {
137
+ "epoch": 0.12,
138
+ "learning_rate": 0.009266666666666666,
139
+ "loss": 3.0633,
140
+ "step": 220
141
+ },
142
+ {
143
+ "epoch": 0.12,
144
+ "learning_rate": 0.009233333333333333,
145
+ "loss": 3.1764,
146
+ "step": 230
147
+ },
148
+ {
149
+ "epoch": 0.13,
150
+ "learning_rate": 0.0092,
151
+ "loss": 2.9706,
152
+ "step": 240
153
+ },
154
+ {
155
+ "epoch": 0.13,
156
+ "learning_rate": 0.009166666666666667,
157
+ "loss": 2.9756,
158
+ "step": 250
159
+ },
160
+ {
161
+ "epoch": 0.14,
162
+ "learning_rate": 0.009133333333333334,
163
+ "loss": 3.0229,
164
+ "step": 260
165
+ },
166
+ {
167
+ "epoch": 0.14,
168
+ "learning_rate": 0.0091,
169
+ "loss": 3.0316,
170
+ "step": 270
171
+ },
172
+ {
173
+ "epoch": 0.15,
174
+ "learning_rate": 0.009066666666666666,
175
+ "loss": 3.0794,
176
+ "step": 280
177
+ },
178
+ {
179
+ "epoch": 0.15,
180
+ "learning_rate": 0.009033333333333334,
181
+ "loss": 2.8561,
182
+ "step": 290
183
+ },
184
+ {
185
+ "epoch": 0.16,
186
+ "learning_rate": 0.009000000000000001,
187
+ "loss": 2.8994,
188
+ "step": 300
189
+ },
190
+ {
191
+ "epoch": 0.17,
192
+ "learning_rate": 0.008966666666666666,
193
+ "loss": 3.0197,
194
+ "step": 310
195
+ },
196
+ {
197
+ "epoch": 0.17,
198
+ "learning_rate": 0.008933333333333333,
199
+ "loss": 2.9631,
200
+ "step": 320
201
+ },
202
+ {
203
+ "epoch": 0.18,
204
+ "learning_rate": 0.0089,
205
+ "loss": 2.9059,
206
+ "step": 330
207
+ },
208
+ {
209
+ "epoch": 0.18,
210
+ "learning_rate": 0.008866666666666667,
211
+ "loss": 2.9527,
212
+ "step": 340
213
+ },
214
+ {
215
+ "epoch": 0.19,
216
+ "learning_rate": 0.008833333333333334,
217
+ "loss": 2.8575,
218
+ "step": 350
219
+ },
220
+ {
221
+ "epoch": 0.19,
222
+ "learning_rate": 0.0088,
223
+ "loss": 2.9363,
224
+ "step": 360
225
+ },
226
+ {
227
+ "epoch": 0.2,
228
+ "learning_rate": 0.008766666666666667,
229
+ "loss": 3.0735,
230
+ "step": 370
231
+ },
232
+ {
233
+ "epoch": 0.2,
234
+ "learning_rate": 0.008733333333333333,
235
+ "loss": 2.9766,
236
+ "step": 380
237
+ },
238
+ {
239
+ "epoch": 0.21,
240
+ "learning_rate": 0.0087,
241
+ "loss": 3.0783,
242
+ "step": 390
243
+ },
244
+ {
245
+ "epoch": 0.21,
246
+ "learning_rate": 0.008666666666666668,
247
+ "loss": 2.9696,
248
+ "step": 400
249
+ },
250
+ {
251
+ "epoch": 0.22,
252
+ "learning_rate": 0.008633333333333333,
253
+ "loss": 2.9803,
254
+ "step": 410
255
+ },
256
+ {
257
+ "epoch": 0.22,
258
+ "learning_rate": 0.0086,
259
+ "loss": 3.0466,
260
+ "step": 420
261
+ },
262
+ {
263
+ "epoch": 0.23,
264
+ "learning_rate": 0.008566666666666667,
265
+ "loss": 2.9929,
266
+ "step": 430
267
+ },
268
+ {
269
+ "epoch": 0.24,
270
+ "learning_rate": 0.008533333333333334,
271
+ "loss": 2.9688,
272
+ "step": 440
273
+ },
274
+ {
275
+ "epoch": 0.24,
276
+ "learning_rate": 0.0085,
277
+ "loss": 2.9774,
278
+ "step": 450
279
+ },
280
+ {
281
+ "epoch": 0.25,
282
+ "learning_rate": 0.008466666666666667,
283
+ "loss": 2.891,
284
+ "step": 460
285
+ },
286
+ {
287
+ "epoch": 0.25,
288
+ "learning_rate": 0.008433333333333334,
289
+ "loss": 2.9403,
290
+ "step": 470
291
+ },
292
+ {
293
+ "epoch": 0.26,
294
+ "learning_rate": 0.0084,
295
+ "loss": 2.995,
296
+ "step": 480
297
+ },
298
+ {
299
+ "epoch": 0.26,
300
+ "learning_rate": 0.008366666666666666,
301
+ "loss": 3.0767,
302
+ "step": 490
303
+ },
304
+ {
305
+ "epoch": 0.27,
306
+ "learning_rate": 0.008333333333333333,
307
+ "loss": 3.0292,
308
+ "step": 500
309
+ },
310
+ {
311
+ "epoch": 0.27,
312
+ "learning_rate": 0.0083,
313
+ "loss": 2.977,
314
+ "step": 510
315
+ },
316
+ {
317
+ "epoch": 0.28,
318
+ "learning_rate": 0.008266666666666667,
319
+ "loss": 2.9253,
320
+ "step": 520
321
+ },
322
+ {
323
+ "epoch": 0.28,
324
+ "learning_rate": 0.008233333333333334,
325
+ "loss": 2.9523,
326
+ "step": 530
327
+ },
328
+ {
329
+ "epoch": 0.29,
330
+ "learning_rate": 0.008199999999999999,
331
+ "loss": 3.0647,
332
+ "step": 540
333
+ },
334
+ {
335
+ "epoch": 0.29,
336
+ "learning_rate": 0.008166666666666666,
337
+ "loss": 2.92,
338
+ "step": 550
339
+ },
340
+ {
341
+ "epoch": 0.3,
342
+ "learning_rate": 0.008133333333333334,
343
+ "loss": 2.8838,
344
+ "step": 560
345
+ },
346
+ {
347
+ "epoch": 0.3,
348
+ "learning_rate": 0.008100000000000001,
349
+ "loss": 2.9555,
350
+ "step": 570
351
+ },
352
+ {
353
+ "epoch": 0.31,
354
+ "learning_rate": 0.008066666666666666,
355
+ "loss": 3.0833,
356
+ "step": 580
357
+ },
358
+ {
359
+ "epoch": 0.32,
360
+ "learning_rate": 0.008033333333333333,
361
+ "loss": 3.0882,
362
+ "step": 590
363
+ },
364
+ {
365
+ "epoch": 0.32,
366
+ "learning_rate": 0.008,
367
+ "loss": 2.9753,
368
+ "step": 600
369
+ },
370
+ {
371
+ "epoch": 0.33,
372
+ "learning_rate": 0.007966666666666667,
373
+ "loss": 3.0302,
374
+ "step": 610
375
+ },
376
+ {
377
+ "epoch": 0.33,
378
+ "learning_rate": 0.007933333333333334,
379
+ "loss": 2.9204,
380
+ "step": 620
381
+ },
382
+ {
383
+ "epoch": 0.34,
384
+ "learning_rate": 0.0079,
385
+ "loss": 3.009,
386
+ "step": 630
387
+ },
388
+ {
389
+ "epoch": 0.34,
390
+ "learning_rate": 0.007866666666666666,
391
+ "loss": 2.9843,
392
+ "step": 640
393
+ },
394
+ {
395
+ "epoch": 0.35,
396
+ "learning_rate": 0.007833333333333333,
397
+ "loss": 2.977,
398
+ "step": 650
399
+ },
400
+ {
401
+ "epoch": 0.35,
402
+ "learning_rate": 0.0078000000000000005,
403
+ "loss": 3.0556,
404
+ "step": 660
405
+ },
406
+ {
407
+ "epoch": 0.36,
408
+ "learning_rate": 0.0077666666666666665,
409
+ "loss": 2.8938,
410
+ "step": 670
411
+ },
412
+ {
413
+ "epoch": 0.36,
414
+ "learning_rate": 0.007733333333333333,
415
+ "loss": 2.9768,
416
+ "step": 680
417
+ },
418
+ {
419
+ "epoch": 0.37,
420
+ "learning_rate": 0.0077,
421
+ "loss": 3.0236,
422
+ "step": 690
423
+ },
424
+ {
425
+ "epoch": 0.37,
426
+ "learning_rate": 0.007666666666666667,
427
+ "loss": 2.9083,
428
+ "step": 700
429
+ },
430
+ {
431
+ "epoch": 0.38,
432
+ "learning_rate": 0.007633333333333333,
433
+ "loss": 2.9196,
434
+ "step": 710
435
+ },
436
+ {
437
+ "epoch": 0.38,
438
+ "learning_rate": 0.0076,
439
+ "loss": 2.8527,
440
+ "step": 720
441
+ },
442
+ {
443
+ "epoch": 0.39,
444
+ "learning_rate": 0.007566666666666667,
445
+ "loss": 2.9551,
446
+ "step": 730
447
+ },
448
+ {
449
+ "epoch": 0.4,
450
+ "learning_rate": 0.007533333333333333,
451
+ "loss": 3.0219,
452
+ "step": 740
453
+ },
454
+ {
455
+ "epoch": 0.4,
456
+ "learning_rate": 0.0075,
457
+ "loss": 2.9648,
458
+ "step": 750
459
+ },
460
+ {
461
+ "epoch": 0.41,
462
+ "learning_rate": 0.0074666666666666675,
463
+ "loss": 2.9104,
464
+ "step": 760
465
+ },
466
+ {
467
+ "epoch": 0.41,
468
+ "learning_rate": 0.0074333333333333335,
469
+ "loss": 3.0018,
470
+ "step": 770
471
+ },
472
+ {
473
+ "epoch": 0.42,
474
+ "learning_rate": 0.0074,
475
+ "loss": 2.8262,
476
+ "step": 780
477
+ },
478
+ {
479
+ "epoch": 0.42,
480
+ "learning_rate": 0.007366666666666667,
481
+ "loss": 2.8603,
482
+ "step": 790
483
+ },
484
+ {
485
+ "epoch": 0.43,
486
+ "learning_rate": 0.007333333333333333,
487
+ "loss": 2.9395,
488
+ "step": 800
489
+ },
490
+ {
491
+ "epoch": 0.43,
492
+ "learning_rate": 0.0073,
493
+ "loss": 3.0424,
494
+ "step": 810
495
+ },
496
+ {
497
+ "epoch": 0.44,
498
+ "learning_rate": 0.007266666666666667,
499
+ "loss": 3.0356,
500
+ "step": 820
501
+ },
502
+ {
503
+ "epoch": 0.44,
504
+ "learning_rate": 0.007233333333333334,
505
+ "loss": 2.9415,
506
+ "step": 830
507
+ },
508
+ {
509
+ "epoch": 0.45,
510
+ "learning_rate": 0.0072,
511
+ "loss": 2.9299,
512
+ "step": 840
513
+ },
514
+ {
515
+ "epoch": 0.45,
516
+ "learning_rate": 0.007166666666666667,
517
+ "loss": 2.9333,
518
+ "step": 850
519
+ },
520
+ {
521
+ "epoch": 0.46,
522
+ "learning_rate": 0.0071333333333333335,
523
+ "loss": 2.8684,
524
+ "step": 860
525
+ },
526
+ {
527
+ "epoch": 0.46,
528
+ "learning_rate": 0.0070999999999999995,
529
+ "loss": 3.0278,
530
+ "step": 870
531
+ },
532
+ {
533
+ "epoch": 0.47,
534
+ "learning_rate": 0.007066666666666666,
535
+ "loss": 2.9266,
536
+ "step": 880
537
+ },
538
+ {
539
+ "epoch": 0.48,
540
+ "learning_rate": 0.007033333333333334,
541
+ "loss": 2.8282,
542
+ "step": 890
543
+ },
544
+ {
545
+ "epoch": 0.48,
546
+ "learning_rate": 0.006999999999999999,
547
+ "loss": 2.9939,
548
+ "step": 900
549
+ },
550
+ {
551
+ "epoch": 0.49,
552
+ "learning_rate": 0.006966666666666667,
553
+ "loss": 3.0281,
554
+ "step": 910
555
+ },
556
+ {
557
+ "epoch": 0.49,
558
+ "learning_rate": 0.006933333333333334,
559
+ "loss": 2.8782,
560
+ "step": 920
561
+ },
562
+ {
563
+ "epoch": 0.5,
564
+ "learning_rate": 0.0069,
565
+ "loss": 2.9878,
566
+ "step": 930
567
+ },
568
+ {
569
+ "epoch": 0.5,
570
+ "learning_rate": 0.006866666666666667,
571
+ "loss": 2.8329,
572
+ "step": 940
573
+ },
574
+ {
575
+ "epoch": 0.51,
576
+ "learning_rate": 0.006833333333333334,
577
+ "loss": 2.9321,
578
+ "step": 950
579
+ },
580
+ {
581
+ "epoch": 0.51,
582
+ "learning_rate": 0.0068000000000000005,
583
+ "loss": 2.8503,
584
+ "step": 960
585
+ },
586
+ {
587
+ "epoch": 0.52,
588
+ "learning_rate": 0.0067666666666666665,
589
+ "loss": 2.9154,
590
+ "step": 970
591
+ },
592
+ {
593
+ "epoch": 0.52,
594
+ "learning_rate": 0.006733333333333333,
595
+ "loss": 2.9163,
596
+ "step": 980
597
+ },
598
+ {
599
+ "epoch": 0.53,
600
+ "learning_rate": 0.0067,
601
+ "loss": 2.8998,
602
+ "step": 990
603
+ },
604
+ {
605
+ "epoch": 0.53,
606
+ "learning_rate": 0.006666666666666666,
607
+ "loss": 2.9344,
608
+ "step": 1000
609
+ },
610
+ {
611
+ "epoch": 0.54,
612
+ "learning_rate": 0.006633333333333333,
613
+ "loss": 3.0409,
614
+ "step": 1010
615
+ },
616
+ {
617
+ "epoch": 0.55,
618
+ "learning_rate": 0.006600000000000001,
619
+ "loss": 2.9749,
620
+ "step": 1020
621
+ },
622
+ {
623
+ "epoch": 0.55,
624
+ "learning_rate": 0.006566666666666666,
625
+ "loss": 2.7688,
626
+ "step": 1030
627
+ },
628
+ {
629
+ "epoch": 0.56,
630
+ "learning_rate": 0.006533333333333334,
631
+ "loss": 2.952,
632
+ "step": 1040
633
+ },
634
+ {
635
+ "epoch": 0.56,
636
+ "learning_rate": 0.006500000000000001,
637
+ "loss": 2.9117,
638
+ "step": 1050
639
+ },
640
+ {
641
+ "epoch": 0.57,
642
+ "learning_rate": 0.006466666666666667,
643
+ "loss": 3.0269,
644
+ "step": 1060
645
+ },
646
+ {
647
+ "epoch": 0.57,
648
+ "learning_rate": 0.0064333333333333334,
649
+ "loss": 3.0253,
650
+ "step": 1070
651
+ },
652
+ {
653
+ "epoch": 0.58,
654
+ "learning_rate": 0.0064,
655
+ "loss": 2.8005,
656
+ "step": 1080
657
+ },
658
+ {
659
+ "epoch": 0.58,
660
+ "learning_rate": 0.006366666666666667,
661
+ "loss": 3.0853,
662
+ "step": 1090
663
+ },
664
+ {
665
+ "epoch": 0.59,
666
+ "learning_rate": 0.006333333333333333,
667
+ "loss": 2.8968,
668
+ "step": 1100
669
+ },
670
+ {
671
+ "epoch": 0.59,
672
+ "learning_rate": 0.0063,
673
+ "loss": 2.95,
674
+ "step": 1110
675
+ },
676
+ {
677
+ "epoch": 0.6,
678
+ "learning_rate": 0.006266666666666667,
679
+ "loss": 2.9189,
680
+ "step": 1120
681
+ },
682
+ {
683
+ "epoch": 0.6,
684
+ "learning_rate": 0.006233333333333333,
685
+ "loss": 2.8623,
686
+ "step": 1130
687
+ },
688
+ {
689
+ "epoch": 0.61,
690
+ "learning_rate": 0.0062,
691
+ "loss": 2.902,
692
+ "step": 1140
693
+ },
694
+ {
695
+ "epoch": 0.61,
696
+ "learning_rate": 0.0061666666666666675,
697
+ "loss": 2.8977,
698
+ "step": 1150
699
+ },
700
+ {
701
+ "epoch": 0.62,
702
+ "learning_rate": 0.006133333333333333,
703
+ "loss": 2.8396,
704
+ "step": 1160
705
+ },
706
+ {
707
+ "epoch": 0.63,
708
+ "learning_rate": 0.0061,
709
+ "loss": 2.894,
710
+ "step": 1170
711
+ },
712
+ {
713
+ "epoch": 0.63,
714
+ "learning_rate": 0.006066666666666667,
715
+ "loss": 2.9556,
716
+ "step": 1180
717
+ },
718
+ {
719
+ "epoch": 0.64,
720
+ "learning_rate": 0.006033333333333334,
721
+ "loss": 2.9222,
722
+ "step": 1190
723
+ },
724
+ {
725
+ "epoch": 0.64,
726
+ "learning_rate": 0.006,
727
+ "loss": 2.9178,
728
+ "step": 1200
729
+ },
730
+ {
731
+ "epoch": 0.65,
732
+ "learning_rate": 0.005966666666666667,
733
+ "loss": 2.8163,
734
+ "step": 1210
735
+ },
736
+ {
737
+ "epoch": 0.65,
738
+ "learning_rate": 0.005933333333333334,
739
+ "loss": 2.884,
740
+ "step": 1220
741
+ },
742
+ {
743
+ "epoch": 0.66,
744
+ "learning_rate": 0.0059,
745
+ "loss": 2.8649,
746
+ "step": 1230
747
+ },
748
+ {
749
+ "epoch": 0.66,
750
+ "learning_rate": 0.005866666666666667,
751
+ "loss": 2.9421,
752
+ "step": 1240
753
+ },
754
+ {
755
+ "epoch": 0.67,
756
+ "learning_rate": 0.005833333333333334,
757
+ "loss": 2.849,
758
+ "step": 1250
759
+ },
760
+ {
761
+ "epoch": 0.67,
762
+ "learning_rate": 0.0058,
763
+ "loss": 2.9116,
764
+ "step": 1260
765
+ },
766
+ {
767
+ "epoch": 0.68,
768
+ "learning_rate": 0.0057666666666666665,
769
+ "loss": 2.8607,
770
+ "step": 1270
771
+ },
772
+ {
773
+ "epoch": 0.68,
774
+ "learning_rate": 0.005733333333333333,
775
+ "loss": 2.7439,
776
+ "step": 1280
777
+ },
778
+ {
779
+ "epoch": 0.69,
780
+ "learning_rate": 0.005699999999999999,
781
+ "loss": 2.7979,
782
+ "step": 1290
783
+ },
784
+ {
785
+ "epoch": 0.69,
786
+ "learning_rate": 0.005666666666666666,
787
+ "loss": 2.8251,
788
+ "step": 1300
789
+ },
790
+ {
791
+ "epoch": 0.7,
792
+ "learning_rate": 0.005633333333333334,
793
+ "loss": 2.8146,
794
+ "step": 1310
795
+ },
796
+ {
797
+ "epoch": 0.71,
798
+ "learning_rate": 0.005600000000000001,
799
+ "loss": 2.7919,
800
+ "step": 1320
801
+ },
802
+ {
803
+ "epoch": 0.71,
804
+ "learning_rate": 0.005566666666666667,
805
+ "loss": 2.8605,
806
+ "step": 1330
807
+ },
808
+ {
809
+ "epoch": 0.72,
810
+ "learning_rate": 0.005533333333333334,
811
+ "loss": 2.8675,
812
+ "step": 1340
813
+ },
814
+ {
815
+ "epoch": 0.72,
816
+ "learning_rate": 0.0055000000000000005,
817
+ "loss": 2.8249,
818
+ "step": 1350
819
+ },
820
+ {
821
+ "epoch": 0.73,
822
+ "learning_rate": 0.0054666666666666665,
823
+ "loss": 2.9331,
824
+ "step": 1360
825
+ },
826
+ {
827
+ "epoch": 0.73,
828
+ "learning_rate": 0.005433333333333333,
829
+ "loss": 2.9039,
830
+ "step": 1370
831
+ },
832
+ {
833
+ "epoch": 0.74,
834
+ "learning_rate": 0.0054,
835
+ "loss": 2.8516,
836
+ "step": 1380
837
+ },
838
+ {
839
+ "epoch": 0.74,
840
+ "learning_rate": 0.005366666666666666,
841
+ "loss": 2.8574,
842
+ "step": 1390
843
+ },
844
+ {
845
+ "epoch": 0.75,
846
+ "learning_rate": 0.005333333333333333,
847
+ "loss": 2.9919,
848
+ "step": 1400
849
+ },
850
+ {
851
+ "epoch": 0.75,
852
+ "learning_rate": 0.0053,
853
+ "loss": 2.873,
854
+ "step": 1410
855
+ },
856
+ {
857
+ "epoch": 0.76,
858
+ "learning_rate": 0.005266666666666666,
859
+ "loss": 2.8407,
860
+ "step": 1420
861
+ },
862
+ {
863
+ "epoch": 0.76,
864
+ "learning_rate": 0.005233333333333333,
865
+ "loss": 2.8594,
866
+ "step": 1430
867
+ },
868
+ {
869
+ "epoch": 0.77,
870
+ "learning_rate": 0.005200000000000001,
871
+ "loss": 2.9165,
872
+ "step": 1440
873
+ },
874
+ {
875
+ "epoch": 0.77,
876
+ "learning_rate": 0.0051666666666666675,
877
+ "loss": 2.8918,
878
+ "step": 1450
879
+ },
880
+ {
881
+ "epoch": 0.78,
882
+ "learning_rate": 0.0051333333333333335,
883
+ "loss": 3.0013,
884
+ "step": 1460
885
+ },
886
+ {
887
+ "epoch": 0.79,
888
+ "learning_rate": 0.0051,
889
+ "loss": 2.8998,
890
+ "step": 1470
891
+ },
892
+ {
893
+ "epoch": 0.79,
894
+ "learning_rate": 0.005066666666666667,
895
+ "loss": 2.7544,
896
+ "step": 1480
897
+ },
898
+ {
899
+ "epoch": 0.8,
900
+ "learning_rate": 0.005033333333333333,
901
+ "loss": 2.9723,
902
+ "step": 1490
903
+ },
904
+ {
905
+ "epoch": 0.8,
906
+ "learning_rate": 0.005,
907
+ "loss": 2.8145,
908
+ "step": 1500
909
+ },
910
+ {
911
+ "epoch": 0.81,
912
+ "learning_rate": 0.004966666666666667,
913
+ "loss": 2.818,
914
+ "step": 1510
915
+ },
916
+ {
917
+ "epoch": 0.81,
918
+ "learning_rate": 0.004933333333333334,
919
+ "loss": 2.8831,
920
+ "step": 1520
921
+ },
922
+ {
923
+ "epoch": 0.82,
924
+ "learning_rate": 0.0049,
925
+ "loss": 2.8453,
926
+ "step": 1530
927
+ },
928
+ {
929
+ "epoch": 0.82,
930
+ "learning_rate": 0.004866666666666667,
931
+ "loss": 2.8491,
932
+ "step": 1540
933
+ },
934
+ {
935
+ "epoch": 0.83,
936
+ "learning_rate": 0.004833333333333334,
937
+ "loss": 2.8245,
938
+ "step": 1550
939
+ },
940
+ {
941
+ "epoch": 0.83,
942
+ "learning_rate": 0.0048,
943
+ "loss": 2.8585,
944
+ "step": 1560
945
+ },
946
+ {
947
+ "epoch": 0.84,
948
+ "learning_rate": 0.004766666666666667,
949
+ "loss": 2.833,
950
+ "step": 1570
951
+ },
952
+ {
953
+ "epoch": 0.84,
954
+ "learning_rate": 0.004733333333333333,
955
+ "loss": 2.8379,
956
+ "step": 1580
957
+ },
958
+ {
959
+ "epoch": 0.85,
960
+ "learning_rate": 0.0047,
961
+ "loss": 2.8753,
962
+ "step": 1590
963
+ },
964
+ {
965
+ "epoch": 0.85,
966
+ "learning_rate": 0.004666666666666667,
967
+ "loss": 2.8465,
968
+ "step": 1600
969
+ },
970
+ {
971
+ "epoch": 0.86,
972
+ "learning_rate": 0.004633333333333333,
973
+ "loss": 2.9163,
974
+ "step": 1610
975
+ },
976
+ {
977
+ "epoch": 0.87,
978
+ "learning_rate": 0.0046,
979
+ "loss": 2.8788,
980
+ "step": 1620
981
+ },
982
+ {
983
+ "epoch": 0.87,
984
+ "learning_rate": 0.004566666666666667,
985
+ "loss": 2.8636,
986
+ "step": 1630
987
+ },
988
+ {
989
+ "epoch": 0.88,
990
+ "learning_rate": 0.004533333333333333,
991
+ "loss": 2.8465,
992
+ "step": 1640
993
+ },
994
+ {
995
+ "epoch": 0.88,
996
+ "learning_rate": 0.0045000000000000005,
997
+ "loss": 2.7802,
998
+ "step": 1650
999
+ },
1000
+ {
1001
+ "epoch": 0.89,
1002
+ "learning_rate": 0.0044666666666666665,
1003
+ "loss": 2.8533,
1004
+ "step": 1660
1005
+ },
1006
+ {
1007
+ "epoch": 0.89,
1008
+ "learning_rate": 0.004433333333333333,
1009
+ "loss": 2.9001,
1010
+ "step": 1670
1011
+ },
1012
+ {
1013
+ "epoch": 0.9,
1014
+ "learning_rate": 0.0044,
1015
+ "loss": 2.8818,
1016
+ "step": 1680
1017
+ },
1018
+ {
1019
+ "epoch": 0.9,
1020
+ "learning_rate": 0.004366666666666666,
1021
+ "loss": 2.8498,
1022
+ "step": 1690
1023
+ },
1024
+ {
1025
+ "epoch": 0.91,
1026
+ "learning_rate": 0.004333333333333334,
1027
+ "loss": 2.8629,
1028
+ "step": 1700
1029
+ },
1030
+ {
1031
+ "epoch": 0.91,
1032
+ "learning_rate": 0.0043,
1033
+ "loss": 2.769,
1034
+ "step": 1710
1035
+ },
1036
+ {
1037
+ "epoch": 0.92,
1038
+ "learning_rate": 0.004266666666666667,
1039
+ "loss": 2.8826,
1040
+ "step": 1720
1041
+ },
1042
+ {
1043
+ "epoch": 0.92,
1044
+ "learning_rate": 0.004233333333333334,
1045
+ "loss": 2.867,
1046
+ "step": 1730
1047
+ },
1048
+ {
1049
+ "epoch": 0.93,
1050
+ "learning_rate": 0.0042,
1051
+ "loss": 2.9155,
1052
+ "step": 1740
1053
+ },
1054
+ {
1055
+ "epoch": 0.94,
1056
+ "learning_rate": 0.004166666666666667,
1057
+ "loss": 2.9031,
1058
+ "step": 1750
1059
+ },
1060
+ {
1061
+ "epoch": 0.94,
1062
+ "learning_rate": 0.0041333333333333335,
1063
+ "loss": 2.9666,
1064
+ "step": 1760
1065
+ },
1066
+ {
1067
+ "epoch": 0.95,
1068
+ "learning_rate": 0.0040999999999999995,
1069
+ "loss": 2.8097,
1070
+ "step": 1770
1071
+ },
1072
+ {
1073
+ "epoch": 0.95,
1074
+ "learning_rate": 0.004066666666666667,
1075
+ "loss": 2.9457,
1076
+ "step": 1780
1077
+ },
1078
+ {
1079
+ "epoch": 0.96,
1080
+ "learning_rate": 0.004033333333333333,
1081
+ "loss": 2.8746,
1082
+ "step": 1790
1083
+ },
1084
+ {
1085
+ "epoch": 0.96,
1086
+ "learning_rate": 0.004,
1087
+ "loss": 2.9243,
1088
+ "step": 1800
1089
+ },
1090
+ {
1091
+ "epoch": 0.97,
1092
+ "learning_rate": 0.003966666666666667,
1093
+ "loss": 2.7506,
1094
+ "step": 1810
1095
+ },
1096
+ {
1097
+ "epoch": 0.97,
1098
+ "learning_rate": 0.003933333333333333,
1099
+ "loss": 2.9817,
1100
+ "step": 1820
1101
+ },
1102
+ {
1103
+ "epoch": 0.98,
1104
+ "learning_rate": 0.0039000000000000003,
1105
+ "loss": 2.9476,
1106
+ "step": 1830
1107
+ },
1108
+ {
1109
+ "epoch": 0.98,
1110
+ "learning_rate": 0.0038666666666666667,
1111
+ "loss": 2.9803,
1112
+ "step": 1840
1113
+ },
1114
+ {
1115
+ "epoch": 0.99,
1116
+ "learning_rate": 0.0038333333333333336,
1117
+ "loss": 2.8027,
1118
+ "step": 1850
1119
+ },
1120
+ {
1121
+ "epoch": 0.99,
1122
+ "learning_rate": 0.0038,
1123
+ "loss": 2.8793,
1124
+ "step": 1860
1125
+ },
1126
+ {
1127
+ "epoch": 1.0,
1128
+ "learning_rate": 0.0037666666666666664,
1129
+ "loss": 2.9018,
1130
+ "step": 1870
1131
+ },
1132
+ {
1133
+ "epoch": 1.0,
1134
+ "learning_rate": 0.0037333333333333337,
1135
+ "loss": 2.7221,
1136
+ "step": 1880
1137
+ },
1138
+ {
1139
+ "epoch": 1.01,
1140
+ "learning_rate": 0.0037,
1141
+ "loss": 2.6557,
1142
+ "step": 1890
1143
+ },
1144
+ {
1145
+ "epoch": 1.02,
1146
+ "learning_rate": 0.0036666666666666666,
1147
+ "loss": 2.7212,
1148
+ "step": 1900
1149
+ },
1150
+ {
1151
+ "epoch": 1.02,
1152
+ "learning_rate": 0.0036333333333333335,
1153
+ "loss": 2.759,
1154
+ "step": 1910
1155
+ },
1156
+ {
1157
+ "epoch": 1.03,
1158
+ "learning_rate": 0.0036,
1159
+ "loss": 2.7565,
1160
+ "step": 1920
1161
+ },
1162
+ {
1163
+ "epoch": 1.03,
1164
+ "learning_rate": 0.0035666666666666668,
1165
+ "loss": 2.7665,
1166
+ "step": 1930
1167
+ },
1168
+ {
1169
+ "epoch": 1.04,
1170
+ "learning_rate": 0.003533333333333333,
1171
+ "loss": 2.6326,
1172
+ "step": 1940
1173
+ },
1174
+ {
1175
+ "epoch": 1.04,
1176
+ "learning_rate": 0.0034999999999999996,
1177
+ "loss": 2.7236,
1178
+ "step": 1950
1179
+ },
1180
+ {
1181
+ "epoch": 1.05,
1182
+ "learning_rate": 0.003466666666666667,
1183
+ "loss": 2.7307,
1184
+ "step": 1960
1185
+ },
1186
+ {
1187
+ "epoch": 1.05,
1188
+ "learning_rate": 0.0034333333333333334,
1189
+ "loss": 2.8281,
1190
+ "step": 1970
1191
+ },
1192
+ {
1193
+ "epoch": 1.06,
1194
+ "learning_rate": 0.0034000000000000002,
1195
+ "loss": 2.8165,
1196
+ "step": 1980
1197
+ },
1198
+ {
1199
+ "epoch": 1.06,
1200
+ "learning_rate": 0.0033666666666666667,
1201
+ "loss": 2.7946,
1202
+ "step": 1990
1203
+ },
1204
+ {
1205
+ "epoch": 1.07,
1206
+ "learning_rate": 0.003333333333333333,
1207
+ "loss": 2.6679,
1208
+ "step": 2000
1209
+ },
1210
+ {
1211
+ "epoch": 1.07,
1212
+ "learning_rate": 0.0033000000000000004,
1213
+ "loss": 2.7616,
1214
+ "step": 2010
1215
+ },
1216
+ {
1217
+ "epoch": 1.08,
1218
+ "learning_rate": 0.003266666666666667,
1219
+ "loss": 2.706,
1220
+ "step": 2020
1221
+ },
1222
+ {
1223
+ "epoch": 1.08,
1224
+ "learning_rate": 0.0032333333333333333,
1225
+ "loss": 2.8121,
1226
+ "step": 2030
1227
+ },
1228
+ {
1229
+ "epoch": 1.09,
1230
+ "learning_rate": 0.0032,
1231
+ "loss": 2.6763,
1232
+ "step": 2040
1233
+ },
1234
+ {
1235
+ "epoch": 1.1,
1236
+ "learning_rate": 0.0031666666666666666,
1237
+ "loss": 2.6566,
1238
+ "step": 2050
1239
+ },
1240
+ {
1241
+ "epoch": 1.1,
1242
+ "learning_rate": 0.0031333333333333335,
1243
+ "loss": 2.7536,
1244
+ "step": 2060
1245
+ },
1246
+ {
1247
+ "epoch": 1.11,
1248
+ "learning_rate": 0.0031,
1249
+ "loss": 2.7246,
1250
+ "step": 2070
1251
+ },
1252
+ {
1253
+ "epoch": 1.11,
1254
+ "learning_rate": 0.0030666666666666663,
1255
+ "loss": 2.6712,
1256
+ "step": 2080
1257
+ },
1258
+ {
1259
+ "epoch": 1.12,
1260
+ "learning_rate": 0.0030333333333333336,
1261
+ "loss": 2.6933,
1262
+ "step": 2090
1263
+ },
1264
+ {
1265
+ "epoch": 1.12,
1266
+ "learning_rate": 0.003,
1267
+ "loss": 2.7509,
1268
+ "step": 2100
1269
+ },
1270
+ {
1271
+ "epoch": 1.13,
1272
+ "learning_rate": 0.002966666666666667,
1273
+ "loss": 2.7758,
1274
+ "step": 2110
1275
+ },
1276
+ {
1277
+ "epoch": 1.13,
1278
+ "learning_rate": 0.0029333333333333334,
1279
+ "loss": 2.7221,
1280
+ "step": 2120
1281
+ },
1282
+ {
1283
+ "epoch": 1.14,
1284
+ "learning_rate": 0.0029,
1285
+ "loss": 2.7005,
1286
+ "step": 2130
1287
+ },
1288
+ {
1289
+ "epoch": 1.14,
1290
+ "learning_rate": 0.0028666666666666667,
1291
+ "loss": 2.7714,
1292
+ "step": 2140
1293
+ },
1294
+ {
1295
+ "epoch": 1.15,
1296
+ "learning_rate": 0.002833333333333333,
1297
+ "loss": 2.7245,
1298
+ "step": 2150
1299
+ },
1300
+ {
1301
+ "epoch": 1.15,
1302
+ "learning_rate": 0.0028000000000000004,
1303
+ "loss": 2.6423,
1304
+ "step": 2160
1305
+ },
1306
+ {
1307
+ "epoch": 1.16,
1308
+ "learning_rate": 0.002766666666666667,
1309
+ "loss": 2.7222,
1310
+ "step": 2170
1311
+ },
1312
+ {
1313
+ "epoch": 1.16,
1314
+ "learning_rate": 0.0027333333333333333,
1315
+ "loss": 2.78,
1316
+ "step": 2180
1317
+ },
1318
+ {
1319
+ "epoch": 1.17,
1320
+ "learning_rate": 0.0027,
1321
+ "loss": 2.6494,
1322
+ "step": 2190
1323
+ },
1324
+ {
1325
+ "epoch": 1.18,
1326
+ "learning_rate": 0.0026666666666666666,
1327
+ "loss": 2.611,
1328
+ "step": 2200
1329
+ },
1330
+ {
1331
+ "epoch": 1.18,
1332
+ "learning_rate": 0.002633333333333333,
1333
+ "loss": 2.6051,
1334
+ "step": 2210
1335
+ },
1336
+ {
1337
+ "epoch": 1.19,
1338
+ "learning_rate": 0.0026000000000000003,
1339
+ "loss": 2.5935,
1340
+ "step": 2220
1341
+ },
1342
+ {
1343
+ "epoch": 1.19,
1344
+ "learning_rate": 0.0025666666666666667,
1345
+ "loss": 2.7753,
1346
+ "step": 2230
1347
+ },
1348
+ {
1349
+ "epoch": 1.2,
1350
+ "learning_rate": 0.0025333333333333336,
1351
+ "loss": 2.6946,
1352
+ "step": 2240
1353
+ },
1354
+ {
1355
+ "epoch": 1.2,
1356
+ "learning_rate": 0.0025,
1357
+ "loss": 2.7819,
1358
+ "step": 2250
1359
+ },
1360
+ {
1361
+ "epoch": 1.21,
1362
+ "learning_rate": 0.002466666666666667,
1363
+ "loss": 2.6382,
1364
+ "step": 2260
1365
+ },
1366
+ {
1367
+ "epoch": 1.21,
1368
+ "learning_rate": 0.0024333333333333334,
1369
+ "loss": 2.7982,
1370
+ "step": 2270
1371
+ },
1372
+ {
1373
+ "epoch": 1.22,
1374
+ "learning_rate": 0.0024,
1375
+ "loss": 2.7398,
1376
+ "step": 2280
1377
+ },
1378
+ {
1379
+ "epoch": 1.22,
1380
+ "learning_rate": 0.0023666666666666667,
1381
+ "loss": 2.6662,
1382
+ "step": 2290
1383
+ },
1384
+ {
1385
+ "epoch": 1.23,
1386
+ "learning_rate": 0.0023333333333333335,
1387
+ "loss": 2.7676,
1388
+ "step": 2300
1389
+ },
1390
+ {
1391
+ "epoch": 1.23,
1392
+ "learning_rate": 0.0023,
1393
+ "loss": 2.8121,
1394
+ "step": 2310
1395
+ },
1396
+ {
1397
+ "epoch": 1.24,
1398
+ "learning_rate": 0.0022666666666666664,
1399
+ "loss": 2.7563,
1400
+ "step": 2320
1401
+ },
1402
+ {
1403
+ "epoch": 1.24,
1404
+ "learning_rate": 0.0022333333333333333,
1405
+ "loss": 2.7693,
1406
+ "step": 2330
1407
+ },
1408
+ {
1409
+ "epoch": 1.25,
1410
+ "learning_rate": 0.0022,
1411
+ "loss": 2.6918,
1412
+ "step": 2340
1413
+ },
1414
+ {
1415
+ "epoch": 1.26,
1416
+ "learning_rate": 0.002166666666666667,
1417
+ "loss": 2.6115,
1418
+ "step": 2350
1419
+ },
1420
+ {
1421
+ "epoch": 1.26,
1422
+ "learning_rate": 0.0021333333333333334,
1423
+ "loss": 2.8079,
1424
+ "step": 2360
1425
+ },
1426
+ {
1427
+ "epoch": 1.27,
1428
+ "learning_rate": 0.0021,
1429
+ "loss": 2.7315,
1430
+ "step": 2370
1431
+ },
1432
+ {
1433
+ "epoch": 1.27,
1434
+ "learning_rate": 0.0020666666666666667,
1435
+ "loss": 2.6514,
1436
+ "step": 2380
1437
+ },
1438
+ {
1439
+ "epoch": 1.28,
1440
+ "learning_rate": 0.0020333333333333336,
1441
+ "loss": 2.7606,
1442
+ "step": 2390
1443
+ },
1444
+ {
1445
+ "epoch": 1.28,
1446
+ "learning_rate": 0.002,
1447
+ "loss": 2.6995,
1448
+ "step": 2400
1449
+ },
1450
+ {
1451
+ "epoch": 1.29,
1452
+ "learning_rate": 0.0019666666666666665,
1453
+ "loss": 2.887,
1454
+ "step": 2410
1455
+ },
1456
+ {
1457
+ "epoch": 1.29,
1458
+ "learning_rate": 0.0019333333333333333,
1459
+ "loss": 2.7945,
1460
+ "step": 2420
1461
+ },
1462
+ {
1463
+ "epoch": 1.3,
1464
+ "learning_rate": 0.0019,
1465
+ "loss": 2.7942,
1466
+ "step": 2430
1467
+ },
1468
+ {
1469
+ "epoch": 1.3,
1470
+ "learning_rate": 0.0018666666666666669,
1471
+ "loss": 2.7315,
1472
+ "step": 2440
1473
+ },
1474
+ {
1475
+ "epoch": 1.31,
1476
+ "learning_rate": 0.0018333333333333333,
1477
+ "loss": 2.7457,
1478
+ "step": 2450
1479
+ },
1480
+ {
1481
+ "epoch": 1.31,
1482
+ "learning_rate": 0.0018,
1483
+ "loss": 2.7568,
1484
+ "step": 2460
1485
+ },
1486
+ {
1487
+ "epoch": 1.32,
1488
+ "learning_rate": 0.0017666666666666666,
1489
+ "loss": 2.7882,
1490
+ "step": 2470
1491
+ },
1492
+ {
1493
+ "epoch": 1.33,
1494
+ "learning_rate": 0.0017333333333333335,
1495
+ "loss": 2.7582,
1496
+ "step": 2480
1497
+ },
1498
+ {
1499
+ "epoch": 1.33,
1500
+ "learning_rate": 0.0017000000000000001,
1501
+ "loss": 2.6868,
1502
+ "step": 2490
1503
+ },
1504
+ {
1505
+ "epoch": 1.34,
1506
+ "learning_rate": 0.0016666666666666666,
1507
+ "loss": 2.7358,
1508
+ "step": 2500
1509
+ },
1510
+ {
1511
+ "epoch": 1.34,
1512
+ "learning_rate": 0.0016333333333333334,
1513
+ "loss": 2.7422,
1514
+ "step": 2510
1515
+ },
1516
+ {
1517
+ "epoch": 1.35,
1518
+ "learning_rate": 0.0016,
1519
+ "loss": 2.6889,
1520
+ "step": 2520
1521
+ },
1522
+ {
1523
+ "epoch": 1.35,
1524
+ "learning_rate": 0.0015666666666666667,
1525
+ "loss": 2.864,
1526
+ "step": 2530
1527
+ },
1528
+ {
1529
+ "epoch": 1.36,
1530
+ "learning_rate": 0.0015333333333333332,
1531
+ "loss": 2.7862,
1532
+ "step": 2540
1533
+ },
1534
+ {
1535
+ "epoch": 1.36,
1536
+ "learning_rate": 0.0015,
1537
+ "loss": 2.7896,
1538
+ "step": 2550
1539
+ },
1540
+ {
1541
+ "epoch": 1.37,
1542
+ "learning_rate": 0.0014666666666666667,
1543
+ "loss": 2.6343,
1544
+ "step": 2560
1545
+ },
1546
+ {
1547
+ "epoch": 1.37,
1548
+ "learning_rate": 0.0014333333333333333,
1549
+ "loss": 2.7965,
1550
+ "step": 2570
1551
+ },
1552
+ {
1553
+ "epoch": 1.38,
1554
+ "learning_rate": 0.0014000000000000002,
1555
+ "loss": 2.7699,
1556
+ "step": 2580
1557
+ },
1558
+ {
1559
+ "epoch": 1.38,
1560
+ "learning_rate": 0.0013666666666666666,
1561
+ "loss": 2.7487,
1562
+ "step": 2590
1563
+ },
1564
+ {
1565
+ "epoch": 1.39,
1566
+ "learning_rate": 0.0013333333333333333,
1567
+ "loss": 2.7581,
1568
+ "step": 2600
1569
+ },
1570
+ {
1571
+ "epoch": 1.39,
1572
+ "learning_rate": 0.0013000000000000002,
1573
+ "loss": 2.7833,
1574
+ "step": 2610
1575
+ },
1576
+ {
1577
+ "epoch": 1.4,
1578
+ "learning_rate": 0.0012666666666666668,
1579
+ "loss": 2.5927,
1580
+ "step": 2620
1581
+ },
1582
+ {
1583
+ "epoch": 1.41,
1584
+ "learning_rate": 0.0012333333333333335,
1585
+ "loss": 2.6962,
1586
+ "step": 2630
1587
+ },
1588
+ {
1589
+ "epoch": 1.41,
1590
+ "learning_rate": 0.0012,
1591
+ "loss": 2.741,
1592
+ "step": 2640
1593
+ },
1594
+ {
1595
+ "epoch": 1.42,
1596
+ "learning_rate": 0.0011666666666666668,
1597
+ "loss": 2.6084,
1598
+ "step": 2650
1599
+ },
1600
+ {
1601
+ "epoch": 1.42,
1602
+ "learning_rate": 0.0011333333333333332,
1603
+ "loss": 2.6298,
1604
+ "step": 2660
1605
+ },
1606
+ {
1607
+ "epoch": 1.43,
1608
+ "learning_rate": 0.0011,
1609
+ "loss": 2.7852,
1610
+ "step": 2670
1611
+ },
1612
+ {
1613
+ "epoch": 1.43,
1614
+ "learning_rate": 0.0010666666666666667,
1615
+ "loss": 2.6882,
1616
+ "step": 2680
1617
+ },
1618
+ {
1619
+ "epoch": 1.44,
1620
+ "learning_rate": 0.0010333333333333334,
1621
+ "loss": 2.71,
1622
+ "step": 2690
1623
+ },
1624
+ {
1625
+ "epoch": 1.44,
1626
+ "learning_rate": 0.001,
1627
+ "loss": 2.7664,
1628
+ "step": 2700
1629
+ },
1630
+ {
1631
+ "epoch": 1.45,
1632
+ "learning_rate": 0.0009666666666666667,
1633
+ "loss": 2.6978,
1634
+ "step": 2710
1635
+ },
1636
+ {
1637
+ "epoch": 1.45,
1638
+ "learning_rate": 0.0009333333333333334,
1639
+ "loss": 2.695,
1640
+ "step": 2720
1641
+ },
1642
+ {
1643
+ "epoch": 1.46,
1644
+ "learning_rate": 0.0009,
1645
+ "loss": 2.7348,
1646
+ "step": 2730
1647
+ },
1648
+ {
1649
+ "epoch": 1.46,
1650
+ "learning_rate": 0.0008666666666666667,
1651
+ "loss": 2.8213,
1652
+ "step": 2740
1653
+ },
1654
+ {
1655
+ "epoch": 1.47,
1656
+ "learning_rate": 0.0008333333333333333,
1657
+ "loss": 2.779,
1658
+ "step": 2750
1659
+ },
1660
+ {
1661
+ "epoch": 1.47,
1662
+ "learning_rate": 0.0008,
1663
+ "loss": 2.7319,
1664
+ "step": 2760
1665
+ },
1666
+ {
1667
+ "epoch": 1.48,
1668
+ "learning_rate": 0.0007666666666666666,
1669
+ "loss": 2.6866,
1670
+ "step": 2770
1671
+ },
1672
+ {
1673
+ "epoch": 1.49,
1674
+ "learning_rate": 0.0007333333333333333,
1675
+ "loss": 2.6938,
1676
+ "step": 2780
1677
+ },
1678
+ {
1679
+ "epoch": 1.49,
1680
+ "learning_rate": 0.0007000000000000001,
1681
+ "loss": 2.754,
1682
+ "step": 2790
1683
+ },
1684
+ {
1685
+ "epoch": 1.5,
1686
+ "learning_rate": 0.0006666666666666666,
1687
+ "loss": 2.6859,
1688
+ "step": 2800
1689
+ },
1690
+ {
1691
+ "epoch": 1.5,
1692
+ "learning_rate": 0.0006333333333333334,
1693
+ "loss": 2.6479,
1694
+ "step": 2810
1695
+ },
1696
+ {
1697
+ "epoch": 1.51,
1698
+ "learning_rate": 0.0006,
1699
+ "loss": 2.6387,
1700
+ "step": 2820
1701
+ },
1702
+ {
1703
+ "epoch": 1.51,
1704
+ "learning_rate": 0.0005666666666666666,
1705
+ "loss": 2.735,
1706
+ "step": 2830
1707
+ },
1708
+ {
1709
+ "epoch": 1.52,
1710
+ "learning_rate": 0.0005333333333333334,
1711
+ "loss": 2.7169,
1712
+ "step": 2840
1713
+ },
1714
+ {
1715
+ "epoch": 1.52,
1716
+ "learning_rate": 0.0005,
1717
+ "loss": 2.819,
1718
+ "step": 2850
1719
+ },
1720
+ {
1721
+ "epoch": 1.53,
1722
+ "learning_rate": 0.0004666666666666667,
1723
+ "loss": 2.8069,
1724
+ "step": 2860
1725
+ },
1726
+ {
1727
+ "epoch": 1.53,
1728
+ "learning_rate": 0.00043333333333333337,
1729
+ "loss": 2.7443,
1730
+ "step": 2870
1731
+ },
1732
+ {
1733
+ "epoch": 1.54,
1734
+ "learning_rate": 0.0004,
1735
+ "loss": 2.6086,
1736
+ "step": 2880
1737
+ },
1738
+ {
1739
+ "epoch": 1.54,
1740
+ "learning_rate": 0.00036666666666666667,
1741
+ "loss": 2.6786,
1742
+ "step": 2890
1743
+ },
1744
+ {
1745
+ "epoch": 1.55,
1746
+ "learning_rate": 0.0003333333333333333,
1747
+ "loss": 2.7616,
1748
+ "step": 2900
1749
+ },
1750
+ {
1751
+ "epoch": 1.55,
1752
+ "learning_rate": 0.0003,
1753
+ "loss": 2.6671,
1754
+ "step": 2910
1755
+ },
1756
+ {
1757
+ "epoch": 1.56,
1758
+ "learning_rate": 0.0002666666666666667,
1759
+ "loss": 2.7724,
1760
+ "step": 2920
1761
+ },
1762
+ {
1763
+ "epoch": 1.57,
1764
+ "learning_rate": 0.00023333333333333336,
1765
+ "loss": 2.7793,
1766
+ "step": 2930
1767
+ },
1768
+ {
1769
+ "epoch": 1.57,
1770
+ "learning_rate": 0.0002,
1771
+ "loss": 2.7742,
1772
+ "step": 2940
1773
+ },
1774
+ {
1775
+ "epoch": 1.58,
1776
+ "learning_rate": 0.00016666666666666666,
1777
+ "loss": 2.7251,
1778
+ "step": 2950
1779
+ },
1780
+ {
1781
+ "epoch": 1.58,
1782
+ "learning_rate": 0.00013333333333333334,
1783
+ "loss": 2.7658,
1784
+ "step": 2960
1785
+ },
1786
+ {
1787
+ "epoch": 1.59,
1788
+ "learning_rate": 0.0001,
1789
+ "loss": 2.7,
1790
+ "step": 2970
1791
+ },
1792
+ {
1793
+ "epoch": 1.59,
1794
+ "learning_rate": 6.666666666666667e-05,
1795
+ "loss": 2.7616,
1796
+ "step": 2980
1797
+ },
1798
+ {
1799
+ "epoch": 1.6,
1800
+ "learning_rate": 3.3333333333333335e-05,
1801
+ "loss": 2.8097,
1802
+ "step": 2990
1803
+ },
1804
+ {
1805
+ "epoch": 1.6,
1806
+ "learning_rate": 0.0,
1807
+ "loss": 2.8904,
1808
+ "step": 3000
1809
+ }
1810
+ ],
1811
+ "max_steps": 3000,
1812
+ "num_train_epochs": 2,
1813
+ "total_flos": 4.159713219402793e+17,
1814
+ "trial_name": null,
1815
+ "trial_params": null
1816
+ }
lora_checkpoints/checkpoints/checkpoint-3000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7473b3b807d488bd25253e88de91014823ec3ca07d4adcb06d934d3ba579235
3
+ size 3707
lora_checkpoints/checkpoints/generated_predictions.txt ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"labels": "听起来你很期待啊,露营的确有很多值得体验的事情。但是,你有什么担心吗,毕竟露营也有一些低气温、野生动物等危险因素。", "predict": "听起来你们想要去体验很多有趣的事情呢。那么,你们打算怎么实现这个计划呢?"}
2
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 我们可以思考一些解决方案。你有什么想法可以尝试修复你的表呢?", "predict": "我们可以一起探讨一些解决方案。首先,你可以告诉老师你的情况,然后请求帮助。其次,你可以向朋友求助,他们也可能能帮助你。最后,你可以想一想其他的事情来分散你的注意力,如听音乐、看书等等。"}
3
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 听起来问题得到了解决,你现在感觉怎么样?", "predict": "太好了,你做得很好!你可以向小明表达你的感激之情,并告诉他你的感受。同时,你也可以向小明道歉,告诉他你错拿了他的书,并请求他再次给你。"}
4
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 所以,现在我们已经准备好了,要去店里买一双新的拖鞋了吗?", "predict": "太好了!你做得很棒!你接受了你的情绪,并且用积极的方式去处理它们。我希望你今天能够享受你的情绪,并且相信你可以克服这些情绪,变得更加强大!"}
5
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 太好了,我为你感到骄傲!你做的非常棒,控制自己的欲望并不容易。你觉得现在的情况有所改善吗?", "predict": "不用谢,我随时在这里支持你,希望你能够在上课的时候保持冷静和专注。"}
6
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 你看起来很失落,你是因为妈妈迟迟没有到达而感到沮丧吗?", "predict": "听到你这样说,我知道你很不开心,因为你等待很长时间,对吧?"}
7
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 你真是个有眼光的人呢。但我想问问你,如果只能选其中一个,你会选哪一个呢?", "predict": "听起来不错呢。你可以考虑买这两个,它们都非常可爱。"}
8
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 嗯,我也知道你非常期待。你现在的心情好像很兴奋呢!", "predict": "听起来你很喜欢春晚呢!我注意到你现在的情绪非常兴奋和激动。"}
9
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 好的,我明白了。那我可以理解你的想法,因为有时候我们都会与自己的成绩大小心理作斗争。在这种情况下,你认为有什么办法来帮助你解决问题,使你觉得比较好呢?", "predict": "听起来你对成绩感到有些焦虑。你希望我帮你想想如何处理这种情况吗?"}
10
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100> 太好了,现在一起来制定一个小计划吧!我们可以把家务任务分成几个阶段,每完成一个阶段就可以小休息一下,坚持下去,肯定会感到非常有成就感哦!", "predict": "不用谢,我很高兴能够帮到你。记住,如果你需要任何帮助或者建议,随时都可以找我聊天哦!"}
11
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 那么你觉得应该怎么做呢?有什么方式来平衡你和父母之间的想法差异呢?", "predict": "我明白,你的父母可能没有意识到你的感受。你希望和他们更好地沟通,让他们了解你的想法和需求,让他们更好地理解你。你觉得这个主意怎么样?"}
12
+ {"labels": "我理解你的感受。但是请记住,情绪状态会传导到考试中,如果你能保持冷静,信心和放松,你就有更好的表现机会呢!好好休息,控制好自己的情绪,你一定可以行的!", "predict": "没关系,你可以试着做一些深呼吸练习,缓解自己的紧张情绪。同时,也可以和老师、同学多交流,互相鼓励。"}
13
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> \"我觉得很委屈,因为我想要自由,但却被父母限制了。我希望我们可以用谈话的方式解决问题,而不是互相指责。\"", "predict": "“我感到很不开心,因为我不能自由地做我想做的事情。”"}
14
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 我理解你现在的感受,这节课可能并不像你预期的那样有趣,但是我们可以尝试放松一下心情,安静地听一下老师的讲解,学习一些新的知识,你觉得怎样呢?", "predict": "我理解你的感受,有时候老师讲的内容可能不是我们感兴趣的。你可以试着和老师沟通一下,看看能不能找到一些有趣的地方,这样你就不会感到无聊了。"}
15
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 嗯嗯。", "predict": "不客气,我很高兴能够帮助你。如果你需要任何帮助或建议,请随时告诉我。"}
16
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 你想帮助小狗,这样的想法很好呢。我们可以思考一下,怎样能够帮助他呢?", "predict": "我明白你的感受,但是有时候,我们会遇到很多让我们感到难过的事情。你可以尝试让自己放松一些,让自己平静下来,然后想一想如何解决这个问题。"}
17
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 好的,那我们一起画一幅画吧。你可以选出你最喜欢的颜色和笔刷,我们一起为姥姥创造出一份独一无二的礼物。", "predict": "当然可以,我很愿意帮助你。你可以告诉我你想要画什么样的东西,我会帮你找到适合你的材料。"}
18
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 嗯,看来你真的很想吃黄瓜和西红柿。那么,我们想想其他的东西能不能代替呢?比如说蘑菇或木耳?", "predict": "听起来你很失落。但是,我们可以尝试其他的做法。你可以问问其他的家庭成员是否有其他的凉菜,或者你可以看看其他的产品。"}
19
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> “这是个好主意,你可以去跟妈妈讲一下,询问她是否有空陪你画画呢?如果妈妈暂时真的没时间,我们也可以想一些其他的方式来让你感到开心和支持。”", "predict": "“那我们可以一起画画,你和妈妈一起画,也可以和妈妈一起画画,这样你和妈妈就可以一起度过一个美好的下午了。”"}
20
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 听到你这么说,我感觉你可能有些困倦,对不对?", "predict": "看起来你感到很困,是吗?"}
21
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> “这种想法也很正常。但其实对于大多数任务来说,只有经历了错误和失败之后,才会有成功的机会。你可以动态地去调整你的计划,试着从错误中学习,并且不断完善你的方法。”", "predict": "“我相信你一定能做好的。当你完成了这个任务后,你会感到特别自豪和满足,同时也可以为自己积累更多的经验。如果你遇到困难,可以随时来跟我说,我会一直陪伴你的。”"}
22
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 那么我们可以一起来寻找一些适合你的学习方法。比如,你可以试试将学习内容转化为有趣的故事,或者告诉家长一些你需要的帮助,他们会很乐意帮助你。", "predict": "那我们可以一起思考一下,怎样让你的学习变得更有趣一些呢?"}
23
+ {"labels": "这是可以理解的。当我们错过我们喜欢的活动时,会感到非常失落。而即使没有成功参加,我们也能想到其他有趣的事情来换取愉快的时光。如果你觉得很失落的话,你有没有想到可以和爸爸妈妈聊聊?他们可能能帮助你想到其他的好主意。", "predict": "我明白了。你有一个积极的心态,并且能勇敢地面对挑战。如果你在参加活动时感到不舒服,你可以试着让自己放松,保持冷静。你可以尝试一些放松的活动,比如做一些深呼吸或是听听音乐。"}
24
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 啊...听上去你觉得挺失落的,既然看到别的小朋友做得比自己好,感觉压力挺大?", "predict": "听起来你很在意别人的看法,但是不要比较哦,每个人做手工都有自己的方法和技巧,不需要和别人比较。"}
25
+ {"labels": "如果你哭了,那也是很正常的情绪表达。因为音乐剧里的故事有时候让人感到非常真实,让我们感受到很多情感。如果你感到哭泣,那可以试着感受一下这种情感,这会帮助你更好地了解你自己的内心感受。", "predict": "如果你哭了,不要惊讶。这是很正常的情感表达。你可以试着深呼吸,让自己冷静下来,然后尝试放松自己。"}
26
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 我明白你的感受,是想跟我一直在一起吧?我们可以这样想,明年假期我们还可以相聚,对不对?", "predict": "我们可以一起探索一些解决方案,比如可以一起写一些回忆,或者在假期里做一些你喜欢的事情,这样可以让你感到更快乐。"}
27
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 不用谢,我想让你感觉舒服点。(表达关怀)", "predict": "没关系,我很开心能帮助到你。(表达感激和认可)"}
28
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 我理解你的情绪。有时候,我们不得不面对各种不同的挑战。不过,我很高兴看到你知道自己的失误并愿意改正。", "predict": "我明白,有时候我们可能会感到被批评,但是我们可以试着去想一想,有没有其他方法解决这个问题呢?"}
29
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 比如,鸡胸肉、瘦肉、鱼、豆腐等等,你可以和妈妈商量一下如何搭配食材,做出更健康又好吃的菜肴。", "predict": "可以尝试一下鸡肉、鱼、虾、蛋、肉等等,这些食物中都含有丰富的蛋白质、维生素和矿物质,都可以提供人体所需的营养。"}
30
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 看来你觉得老师没有关注到你正在经历的情绪状态。不用难过,这是很常见的现象。接下来我们一起看看有什么办法帮助你掌握好这个技能,避免以后的错误。", "predict": "那你现在的感觉是很生气和沮丧?"}
31
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> (接受孩子的情绪)听起来你真的很沮丧,很不自信。其实你的想法很正常,很多人在学习新的东西时会遇到挑战和困难。", "predict": "(接受孩子的情绪)我理解你的感受,不过我们要相信,每个人都有学习的过程。你可以先试着画一些简单的线条,慢慢地提高自己的技能。"}
32
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 好的,这些都是很棒的解决方案。你已经想出了很好的方法来防止再次出现类似的情况。下次,你可以主动提醒自己,避免再出现这样的困境了。你感觉怎么样呢?", "predict": "很好!这是一个好主意。你可以通过这个方法,更好地管理你的时间。同时,你也可以向老师或同学寻求帮助,让他们帮助你记住红领巾。你觉得这个方法可行吗?"}
33
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 你可以尝试把红领巾留在书包里,这样下一次就不会忘记了。你很棒!", "predict": "不用谢,我很高兴能够帮到你。记住,你可以随时来找我聊天哦!"}
34
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 很好,我们再来想一想,你可以怎样应对一些失误或者不如意的情况。或许可以请身边的亲友鼓励一下自己,分散注意力来调节心情。", "predict": "很好,我相信你一定能取得成功!如果你需要我帮助你,随时告诉我。"}
35
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> (时间过了一会儿)", "predict": "(孩子去和哥哥沟通)"}
36
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 不用客气,我呢,作为你的伙伴,很高兴能为你解决问题,让你感到更安心。", "predict": "不用谢,我很高兴能够帮助你。如果你还有任何问题或困惑,请随时告诉我哦。"}
37
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 太好了,你有很多特点和优势,这些都是独一无二的。你可以信任自己的能力,不要被别人的话影响到你。如果你不喜欢别人炫耀,可以直接告诉他们,说让他们别这样做。", "predict": "非常好!你有很多独特的爱好和技能,这让你感到自信和自豪。当你感觉不舒服的时候,记得深呼吸,放松身心,这样会让你感觉更好。"}
38
+ {"labels": "没关系,我有个建议。做好自己,坚定自己想要的,这样会让别人对你印象更好。同时,跟朋友坦诚相待,告诉他们你不喜欢他们这样对你。他们也可以获得更多了解你的机会,彼此之间更亲密和谐。你觉得怎样?", "predict": "好的,那我们来一起想想办法吧。你可以试试跟你的朋友谈谈,告诉他们你的感受,并且让他们知道你喜欢和他们一起玩。或者,你也可以找其他的朋友一起玩,让事情变得开心起来。"}
39
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> (吃着香喷喷的炒饭)谢谢你的建议,我的饥饿感明显好多了。另外,我还可以帮忙做些什么呢?", "predict": "(几分钟后)"}
40
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 首先,你需要仔细观察每个场景,找到可以利用的环境和道具,这样你就可以更好地应对所有不同的挑战。", "predict": "首先,我们可以尝试把游戏设置调低,这样可以让你更容易一些。然后,我们可以从游戏中找到一些技巧,例如通过攻击敌人,或是寻找好的位置来躲避敌人。最后,我们可以尝试改变游戏难度,找到更适合你的难度。"}
41
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 喔,听起来你很高兴呢!", "predict": "听起来你很兴奋呢!你现在感到开心,是因为买到了自己喜欢的洋娃娃吗?"}
42
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 恭喜你,你走出了情绪困境,谢谢你和我分享你的情绪。是否还需要什么帮助呢?", "predict": "对呀,你可以向家人学习,从他们那里汲取经验和建议,然后不断地尝试,提高你的做菜水平。"}
43
+ {"labels": "我理解你,我们可以按照物品的种类进行分类哦。把文具放进笔筒里面,把玩具放进玩具箱里面,把书本叠放整齐,这样就可以很容易地找到你需要的物品啦!", "predict": "没关系,我们可以一起想想,你可以把玩具箱放在房间中间,书和笔放在书桌上,这样你的桌面就会很整洁了。"}
44
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 我理解你的感受很深,你们在一起的时间一定让你有很多回忆。", "predict": "我能理解你的感受,离开朋友的确让人感到难过。不过,你知道吗,你离开后,他们会想念你,也会希望你好。"}
45
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 那么,我们可以尝试其他的解决方法。比如说,你可以用耳机听自己的音乐,或者我们可以尝试关窗帘以减少噪音。你觉得这些方法可行吗?", "predict": "我能理解你的感受,但你知道吗,有时候我们不得不面对这些问题。或许你可以试着用其他方式来欣赏你的音乐。"}
46
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 有没有其它可能的解决方案呢?", "predict": "那我们可以考虑用短信或者电话保持联系,这样也能保持联系,还可以让你的朋友了解你的情况。"}
47
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 你想不想听听我的一些想法呢?或许这能够帮助你。", "predict": "我们可以先尝试写一个简短的作文,然后再进一步修改。你可以先写一个主题,然后写一些想法和经历,再写一些结论和建议。"}
48
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 嗯嗯。", "predict": "太好了,我很开心能够帮到你。如果你需要我,随时都可以来找我聊天哦。"}
49
+ {"labels": "<image_-100> 太好了,接下来我们可以一步步地分析你的问题,并且制定出一个最适合你的成长计划。我一直会在你的旁边,支持你度过这段学习期,冲刺优秀的成果。", "predict": "不客气,我随时都会在这里陪伴你,帮助你更好地面对情绪困难。"}
50
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 听起来你好像有点急着走?", "predict": "哦,你好像有些紧张,是因为要背书包吗?"}
51
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 那你知道吗,每个人都会有自己的优点和不足呢,而且唱歌这件事情还需要坚���练习才能变得更好,不是一朝一夕的。", "predict": "我理解你的感受,唱歌是我们需要不断练习和提高的技能。你可以尝试多唱几次,或者找些音乐学习视频来看,相信你一定会越来越有信心。"}
52
+ {"labels": "那你很努力在适应学校的生活,并且今天被早退了,我理解你的感受。那你有没有想过,为什么会被早退呢?是不是因为有什么不适应的地方呢?", "predict": "你觉得这种早退让你很失落,对吗?"}
53
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 嗯,我听到你的声音里带着饥饿和想吃外卖的心情。你现在真的很想要外卖吗?", "predict": "哦,你感觉很饿,是吗?你现在的感受是什么?"}
54
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 不客气,我希望能够帮到你。记住,失败是成功之母,下次比赛你一定会做得很好的!", "predict": "不用谢,我一直在这里陪伴你,如果你需要我的帮助,随时都可以来找我。"}
55
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 好的,注意保持好心情,我相信你肯定能够找到自己的方法解决这个问题。", "predict": "好的,让我们去吃一些水果吧。记得,当你感到饥饿的时候,记得采取一些健康的方式去缓解饥饿感。"}
56
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 嗯嗯。", "predict": "不用谢,我很高兴能帮到你。(理解情绪)记得,如果你在玩游戏的时候有任何问题或者困难,随时都可以来找我哦!(鼓励情绪)"}
57
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 嗯嗯。", "predict": "不用谢,我很高兴能够帮到你。记得运动不仅可以让你变得更健康,还可以让你的心情变得更愉悦哦!"}
58
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 嗯,我感觉你很不开心。是因为觉得分担家务不公平吗?", "predict": "看起来你感到有些失落和不开心,是因为觉得每次都要一个人做吗?"}
59
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 那么你现在的情绪是有点沮丧、烦躁和不舒服?", "predict": "我完全理解你的感受。洗菜确实需要一些耐心和技巧,但是当你感受到水花的时候,你的感受也会随之改变。你想让我帮你想想办法吗?"}
60
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 嗨,我知道了,你的小猫好像有点不开心呢,那你是不是注意到了它的情绪状态呢?", "predict": "哦,听起来它在情绪上有点困难,是不是有什么烦恼?"}
61
+ {"labels": "那你也许可以先跟父母谈一下,看看他们是否能抽出时间将其安排在某个周末或假日。另外,这也是一个很好的机会学习耐心和等待。你可以把注意力放在做作业或其他爱好活动上。", "predict": "如果你有其他计划,可以和他们沟通,看看是否有其他的安排,或者和他们商量其他活动,看看是否有其他的安排。"}
62
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 哦,那其他的同学有没有也加入了什么社团或者队伍呢?", "predict": "听起来很不错呢,你对他有什么感觉呢?"}
63
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 有没有想过可以请家长或者老师给你指导一下呢?他们可能可以帮你找到你手工不足的地方,然后再帮你制定练习计划。", "predict": "我们可以一起找找方法,看看能不能找到一些适合你练习的方法。你可以从简单的手工开始,一步步地提高。或者你可以尝试跟其他人学习,看看别人怎么做手工的。"}
64
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> “不客气,感谢你与我分享你的事情。我会一直陪在你身边,帮助你,支持你。”", "predict": "“不用谢,我一直都在这里支持你。”"}
65
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 是啊,坐车确实会让你感到不舒服。不过你有没有想过,有什么方法可以缓解晕车?", "predict": "我理解你不想这样,不过有时候我们需要接受这些不舒服的感受。如果你不舒服,我们可以一起想办法解决。"}
66
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 我理解你的感受,不要担心,事情会变得好起来的。你可以喝点温水,也可以坐下来放松一下,这会有所帮助。", "predict": "那你可以尝试喝一些温水,让自己感觉舒服一些。"}
67
+ {"labels": "你可以尝试想象你中奖的场景,例如你开心地接受奖品,或者你跟朋友分享自己的兴奋和快乐。这样你可以激发自己的动力和愉悦感,从而更加享受整个抽奖过程。你认为这些方法可以帮助你克服想不开的情绪吗?", "predict": "好的,我理解你的感受。你可以尝试和你的教练或老师谈谈,看看他们能否提供一些建议或帮助。"}
68
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 这很正常啊,都希望能找到喜欢的衣服。如果你有什么想法的话,我们可以一起来寻找和挑选。", "predict": "我明白了,你对这些衣服感到失望,因为你想要更漂亮的衣服。你想过有没有其他的颜色或款式可以选择吗?"}
69
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100> 那就找一些共同爱好或特点相似的人。可能会有很多其他留学生和你一样的情况,也会寻求新的朋友。你可以尝试去参加一些活动或俱乐部,那里你可以接触到许多人,也可以找到志同道合的朋友。", "predict": "不用担心,你可以从自己开始,去关注当地的居民,了解他们的生活方式和文化。同时,你可以向你的老师和家庭成员寻求帮助,他们可能会给你提供一些有用的建议。"}
70
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 我理解你的感受,不过我觉得妈妈是工作很努力,才能给我们提供更好的生活条件,可能需要更多的时间。", "predict": "我明白了,你感到妈妈回来太晚了,这让你感到有些不满和失望。你希望妈妈能更快地回来吗?"}
71
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 不用谢,我很高兴能为你提供帮助。如果你再遇到类似的情况,记得来找我哦。", "predict": "不用���,我一直在这里帮助你。"}
72
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 我接受你的情绪,不会否定或贬低你的感受,但是上补习班会让你更有信心哦,你可以试一试吗?", "predict": "我理解你现在的情绪,但是你知道吗?你已经开始学习了,这是很棒的。"}
73
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 我听到你说你很生气。你想告诉我发生了什么事情吗?", "predict": "你看起来很生气,是吗?"}
74
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> (几分钟后)", "predict": "很好,我们可以一起制定一个计划,然后一起执行。(帮助孩子制定计划)"}
75
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 那我们可以探讨一下应该怎么做才能让你感到更好。你想把这个问题告诉你的老师吗?", "predict": "我完全理解你的感受。有时候,我们会感到很累,想休息。不过,你不要忘记,你正在学习,这非常重要。"}
76
+ {"labels": "<image_-100><image_-100> 嗯,所以说做菜需要一点经验和技巧。不过没关系,学习是一个不断尝试、不断进步的过程。你可以先从简单的菜品开始做起,尝试掌握基本技巧,一点一点去尝试。", "predict": "明白了,那我们可以一起来解决这个问题。你觉得你可以教我一些技巧吗?"}
77
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 我从来没有试过,我该怎么做呢?", "predict": "嗯嗯。"}
78
+ {"labels": "好点子!非常聪明。这样就不会很晒了,也增加了休息水平的受益。我会帮你想办法如何找到一个安全的地方,休息并喝点水。你听起来会非常需要。", "predict": "好的,这是一个很好的主意。你可以先找到一个角落,坐下来休息,喝一些水,然后慢慢调整自己的身体状况。"}
79
+ {"labels": "<image_-100><image_-100><image_-100><image_-100> 我明白你的担心,但是沟通是解决问题的关键。你可以先向爸爸妈妈道歉,承认错误,然后我会告诉你一些应对不同情况的方法,帮助你更好地解决问题。", "predict": "没关系,我们可以一起想想解决方案。你可以告诉爸爸妈妈你的感受,表达你对他们的关心和感激,并且询问他们的想法和建议。相信他们会理解你的想法和感受,并且愿意帮助你。"}
80
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 听到你这么说,我觉得你好像不开心哦,你是不是因为不想照相觉得不自信呢?", "predict": "你看起来很不开心,是不是感觉不舒服呢?可以跟我说说你现在的感受吗?"}
81
+ {"labels": "<image_-100><image_-100><image_-100><image_-100> 不用谢,我喜欢能够帮助你。你知道吗?对于家庭中出现的情绪难题,我们可以通过谈话处理它们。许多困难都可以解决。", "predict": "不用谢,我很高兴能帮到你。记住,无论遇到什么困难,都要保持积极的态度,这样你就会变得更强大。"}
82
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 我发现你非常喜欢那件黄色的连衣裙,你感觉很想要它,是吗?", "predict": "看起来你对这件连衣裙很感兴趣,能跟我说说你的感受吗?"}
83
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 我注意到你现在感到很无聊。这让你觉得不舒服吗?", "predict": "嗯,我听到你的声音有些沮丧,你感觉现在很无聊吗?"}
84
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 很好,我相信你会做到的,我随时都可以陪伴在你身边,为你提供一切帮助。", "predict": "不用谢,我很高兴能够帮到你。记得,无论遇到任何问题,都可以随时和我聊天,我会一直陪伴着你,帮助你更好地适应新环境,完成你的留学梦想。"}
85
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 你没有错,每个人的实力不同,因为你还需要不断地学习和进步。你听说过一个叫“练习使人更完美”的说法吗?", "predict": "输的时候,不要怪自己。你输了,因为你没有做好。我们可以一起找出你失败的原因,然后尝试改正。"}
86
+ {"labels": "很好,想到解决方法就是进步,你已经很成熟了。现在你可以上网搜索一下,如何才能帮助老人家购买到新鲜的食材,找到更好的方法。", "predict": "很好,这是一个非常好的解决方案。还有,如果你再次遇到类似的情况,你打算怎么做呢?"}
87
+ {"labels": "<image_-100><image_-100><image_-100> 我明白了,有时候我们的口感会改变,让我们来探索一下解决方案。你想吃别的早餐或者改变一下这个早饭的做法吗?", "predict": "我完全接受你的感受。你希望早餐变得更美味,对吗?"}
88
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 听起来你已经找到了解决问题的好方法,好棒呀!记得随时和我聊天,我会一直在你身边陪伴着你。", "predict": "不用谢,我一直都在你身边哦。记住,我们可以尝试不同的方法来解决问题,找到最适合你的解决方案。"}
89
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 哈哈,看来你玩得很开心呢,但是如果晚上看得太晚,明天也许会很困扰哦。", "predict": "听起来你很喜欢这个活动。你感觉现在的情绪是快乐和兴奋吗?"}
90
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 你的早饭包括了富含蛋白质和碳水化合物的食物吗?", "predict": "很好,你已经开始接受了早餐。不过,有时候我们可能会遇到一些困难,但我们可以试着找到解决方案。你觉得你有什么方法可以帮助自己更好地接受早餐呢?"}
91
+ {"labels": "听起来很有意思呢!那你可以写一下你当时的感受,比如看到大海时的激动心情,和家人一起游泳时的开心等等。", "predict": "听起来你的故事很生动,而且你能够表达你自己的情感。那你可以想想其他的好的故事,再慢慢展开。"}
92
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 嗯,我能感受到你很喜欢那个玩具。不过,我看你的小朋友也一样很喜欢,是不是?", "predict": "哦,看起来你很喜欢那个玩具呢!不过,为什么你想要它呢?"}
93
+ {"labels": "<image_-100><image_-100><image_-100> 听起来你已经试着冷静下来了呢。接下来,你也可以尝试将注意力转移到其他方面,或者休息一下,然后再继续看下去。", "predict": "(几分钟后)"}
94
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 好的,那我来查一下最近更新的动画片,给你推荐一些。你觉得这个怎么样?", "predict": "好的,那我们可以看看哪些动画片符合你的要求。"}
95
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 听起来这件事情让你非常疼痛,真的很不舒服。我可以理解你现在的情绪。", "predict": "听起来你非常不舒服。你感到疼痛和不适,是吗?"}
96
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> “那你觉得你的情绪现在处于什么状态呢?”", "predict": "“是的,作文确实很难,但是你可以试着先写一些想法,然后再慢慢完善。”"}
97
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 你的父母是无意中否定了你的感受,他们其实想表达的是希望你可以像弟弟一样与他人分享,你可以和他们好好谈一谈。", "predict": "我明白了,你希望父母更多地关注你,那么我们可以一起探讨一下如何让你感到更快乐和满足。"}
98
+ {"labels": "“这需要一些计划和组织。首先,为了更好地应对考试,你可以找���最重要的知识点和复习方法。其次,你可以把时间表具体地安排好,把玩耍和学习合理分配。”", "predict": "“我们可以一起制定一个时间表,这样你就能够合理安排时间,让复习和玩耍都能够同时进行。”"}
99
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 你好像很紧张,你在担心什么呢?", "predict": "看起来你有些紧张,是吗?"}
100
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 我明白你的感受,这么长时间的等待肯定很难受。你感受到有多不耐烦?", "predict": "我理解你的感受,你很想尽快回家,是吗?"}
101
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 嗯,我可以听出你很困扰。感受到你好像在为如何提高效率和专注力的问题沮丧?", "predict": "嗯,我能感受到你现在有些沮丧,因为你觉得自己不够有效率,而且不能保持专注力。这是很正常的情绪,你需要找到方法来改善这种情况。"}
102
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 那说明你的头有点不舒服。你可以摆个舒服的姿势,按摩头部放松一下。还可以试试闭上眼睛,深呼吸,感受神经放松下来。", "predict": "好的,那你可以试着放松下身体,深呼吸几次,然后做一些轻松的运动,比如瑜伽或者跑步。"}
103
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 嗯嗯,那我们一起来想想,如果下一次你还是这么玩,你可能会输吗?", "predict": "好的,我们可以一起想想。你可以试着跟队友沟通一下,看看有什么可以改进的地方,也可以跟老师或者家长寻求帮助。"}
104
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 很好,我相信你能够找到合适的方式来提高自己。记住,始终保持积极心态,享受运动带来的快乐!", "predict": "不客气,我很高兴能够帮到你。你很聪明,我相信你能够克服这个困难,取得更好的成绩。"}
105
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 我知道你很生气,因为你不喜欢等待,尤其是在约定的时间上被违约。这让你很不舒服。", "predict": "你的情绪好像很烦躁,是不是因为妈妈迟到让你觉得不舒服?"}
106
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 几分钟后", "predict": "(孩子去跟楼下的大妈沟通,结果大妈同意调小音量)"}
107
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 那听起来像是让你有些生气或失落。你想跟我分享一下你的感受吗?", "predict": "听起来你很生气,是不是因为输了比赛?"}
108
+ {"labels": "这就是情绪辅导理论的效果,当你能够留意、理解、接受自己的情绪,也学会了用积极的方式解决情绪问题,你的心态会变得更加健康和稳定。", "predict": "不用谢,我一直在这里,愿意为你提供帮助。"}
109
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 听起来你今天考试感觉不太好,你可能感到很失落。", "predict": "听起来你现在感到有点沮丧和失落,是吗?"}
110
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 那让我们一起看看吃的食物,我们可以一起找出一些可爱有趣的东西,比如呈现食物的色香味,让你觉得兴奋和满足。", "predict": "我完全理解你的感受,有时候我们会看到某些食物觉得不舒服,但是它们也是我���需要的食物之一。你可以跟妈妈说一下,看看她是否同意你品尝一下。"}
111
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 嗯嗯。", "predict": "太好了,你做得很好,我为你感到骄傲。以后如果你遇到任何困难,都可以来找我,我会一直陪伴着你。"}
112
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 你对钢铁制造这个主题很热衷呢。它让你感受到了什么样的情绪呢?", "predict": "那太好了,你已经发现了这类科学知识的重要性。那么,你有没有什么想跟我分享的关于这本书的内容呢?"}
113
+ {"labels": "<image_-100><image_-100><image_-100><image_-100> 这也是很正常的,但如果你不试试,就永远不知道自己喜欢不喜欢。如果你真的很想吃的话,可以让妈妈帮你点一份小小的甜品,看看味道如何呢?", "predict": "没关系,你可以先吃点熟悉的菜品,比如语文、数学、物理等,这些菜品对你学习有帮助呢。另外,在吃甜品的时候,你可以把心情放松下来,享受美食的同时,也能感受到快乐和满足。"}
114
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 那真是个好主意呀!这样可以让你身体发展的更好,同时也可以帮助你缓解一些不愉快的情绪哦。听起来你很有创意和决心。", "predict": "对啊,你可以在家里做一些伸展运动或者做一些运动,这样可以让你感到快乐和满足。"}
115
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 是啊,这是一个好主意,你的朋友很体贴。不过,你认为他们能把笔记拍照或者扫描发给你吗?", "predict": "当然可以,这是一个很好的主意。你可以向你的朋友们询问,并请求他们提供你所需要的资料。这样,你不仅可以得到有用的学习资源,还能得到帮助和支持。"}
116
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 好主意,现在让我们一起慢慢地抬起你的腿。这是让伤势缓解的第一个步骤。", "predict": "非常好!我相信你会克服这个困难,并且现在感觉更好了。"}
117
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 这是你的态度,非常好!不要忘记了,我会一直陪伴在你身边,支持你努力前行。", "predict": "不用谢,我很高兴能够帮到你。记住,学习并不是全部,你还有无限的机会去实现你的梦想。"}
118
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> (20分钟后)", "predict": "嗯嗯。"}
119
+ {"labels": "好的,这是一个很聪明的决定。我觉得你已经有了一个很好的计划。总之,记住我是一直在这里支持你的,如果你需要任何帮助或者只是需要一个耳朵,随时告诉我。", "predict": "好的,我理解你的想法。如果修补不好,我们可以再想想其他的解决方案。你有什么想法吗?"}
120
+ {"labels": "<image_-100> 不客气,对于情绪上的需要,我很乐意帮助你。如果有新情况,记得随时告诉我,我会一直陪伴你哦。(保持温柔和亲切的态度)", "predict": "不客气,我很高兴能够帮助你。记住,当你感到孤独或不舒服时,记得和我分享你的感受,我会一直在这里陪伴你。 (结束对话)"}
121
+ {"labels": "<image_-100><image_-100><image_-100><image_-100> 不用谢,我随时都在这里,等待着和你聊天。如果你以后遇到情绪的问题,想要聊天或者寻求帮助,一定要告诉我哦!", "predict": "不客气,我随时都在这里。如果以后有什么需要帮助的,随时告诉我哦。"}
122
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 嗯嗯。", "predict": "很高兴能够帮到你。记住,当你感到不舒服时,我们可以一起寻找解决方案,这样你会更快乐,也会更健康。"}
123
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 任何时候,别忘了,你都不孤单。无论输赢,我们都在你身边,支持你!", "predict": "不用谢,我很高兴能帮到你。记得,无论输赢,只要你还在尝试,就值得赞扬和鼓励。加油哦!"}
124
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 你感到紧张的原因是什么?是害怕失去呢,还是害怕朋友会批评你?", "predict": "我完全理解你的感受。有时候,当我们面对压力时,我们需要找到一些方式来减轻紧张感。你觉得你能否尝试一些方法来放松自己?"}
125
+ {"labels": "<image_-100><image_-100><image_-100><image_-100> 这很棒!有时候听取自己的感觉和需要,可以让我们更快乐和自信。下次如果有感受情绪的时候,还可以来找我聊天哦!", "predict": "不用谢,我很高兴能帮到你。记住,如果你需要任何帮助,随时告诉我哦!"}
126
+ {"labels": "很好,你已经开始着手处理了,我相信你一定可以成功的。在这个过程中,如果你有什么不愉快的情绪,可以随时告诉我哦,我会一直在你身边陪伴着你。", "predict": "嗯嗯。"}
127
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 你是不是很喜欢滑雪呢?", "predict": "那你现在的情绪是什么?"}
128
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100> 嗯,我能听到你的声音变得很大声,看起来你现在感到很不舒服。你想和弟弟玩玩具,却没办法,是吗?", "predict": "我能理解你的感受,你现在感到很生气和失望吧?"}
129
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 不用客气,我愿意和你一起学习和成长。还有,如果你遇到了任何问题,都可以随时来和我说说哦。", "predict": "不用客气,我一直都在这里帮助你。记得,无论输赢,你都非常勇敢,因为你在尝试解决问题和寻找解决方案。"}
130
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 好的,还有,当你觉得很烦躁时,可以试着深呼吸放松一下自己,或者跟朋友聊聊天,这样也可以缓解情绪。", "predict": "(过了一会儿)"}
131
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 你说的很好,这些都是解决问题的好方式。你可以先跟老师或者老师的领导反映情况,看看能不能得到解决。如果还不行,你还可以找家长帮忙,一起去解决这个问题。不过,在这个过程中要礼貌、文明地表达。", "predict": "非常好,这是一个很好的想法。另外,你也可以和同学们商量一下,看看能不能找到其他办法来度过这个难关。"}
132
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 听到你说你老输,感觉你非常难过呢。你想告诉我为什么?", "predict": "听到你说这个,我能感受到你的情绪非常不好,是不是因为输了游戏而感到失落呢?"}
133
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 嗨,你好呀,我听到你说你生气了,可以跟我说说发生���什么事情吗?", "predict": "嗨,你好啊!听起来你的情绪有些不好,是吗?"}
134
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> (凝视孩子)这很不好听。我理解你现在肯定很生气和难过,都是正常的情绪反应。不要觉得自己不够好,你是特别优秀的人。", "predict": "听起来你感到很不舒服,你希望老师能够停止这种行为,对吗?"}
135
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 那真是个好点子!接受别人的帮助是非常聪明的选择。同时不要忘记,早休息和充分的护理,对于提高免疫力和摆脱病痛有非常重要的作用!", "predict": "很好,你有了积极的态度。你可以尝试向你的朋友寻求帮助,同时也可以向老师或学习小组请教,这样会让你的学习更有效率。"}
136
+ {"labels": "失败并非是一件坏事,其实失败也是成功的过程。像托马斯·爱迪生说过的那样“我没有失败,我只是发现了一万种行不通的方法而已”,每个人都会犯错,这很正常。重点在于你能从错误中学到什么。当你付出尽最大的努力去尝试,你就已经成功了。", "predict": "不用谢,我一直在你身边,帮助你度过每个挑战。"}
137
+ {"labels": "不要着急,其实很简单。首先,我们要在线头上打一个结,这样它就不会掉下来了。然后,从破洞的底部开始穿,用另一端的针来穿过破洞的上层,再回到下层,如此反复进行,就能把破洞缝好。", "predict": "没关系,我可以教你。首先,你要将针和线放在手上,然后慢慢将线穿过针眼。"}
138
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 不安?能告诉我是什么样的感受吗?", "predict": "没关系,这些情绪很正常。如果你需要,我可以给你一些建议。"}
139
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 没问题,在你学习的过程中,我会一直陪伴着你。如有需要,随时可以来找我哦。", "predict": "很好,我相信你一定可以的。记住,无论发生什么事情,我都会一直陪伴在你身边,支持你。"}
140
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 我明白了。你想要一件保暖的滑雪服,那么你觉得怎样的滑雪服才能让你保暖呢?", "predict": "我明白了,现在天气太冷,我们需要买一件保暖的滑雪服来抵御寒冷。"}
141
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> (在超市里)", "predict": "好主意!我们可以去超市看看,看看是否有凉菜卖。或者,我们可以先找一点其他的菜,然后试着做一道凉菜。"}
142
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 没有问题,我很高���能够陪伴你一同分享这个美好时刻。最后,我们可以一起找找有没有其它的好想法和方式来让你和妈妈继续分享更多快乐的时刻!", "predict": "不用谢,我很高兴能够帮到你。如果你有任何问题或困难,请随时联系我。"}
143
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 听起来不错。我们可以把食物按一个小时一次的方式规划好。因为我们可能需要达到更远的目的地,因此我们需要节约粮食和水的用量。这样,你也可以知道你还有什么,剩下多少以及什么时候需要倾听。", "predict": "听起来不错!现在,我们可以一起寻找一些有趣的地方,去欣赏风景,或者享受一些有趣的活动,比如玩游戏、听音乐、拍照等等。"}
144
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 那是一个好的想法。不过,你知道新的校服裤子需要多少钱吗?你需要向家长要求帮助吗?", "predict": "听起来你很想解决这个问题。那你有没有想过其他的解决方法呢?"}
145
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 嗯嗯。", "predict": "不客气,我很高兴能够帮到你。记得,当你感到无聊或疲惫时,记得随时找我聊天,我会一直陪伴在你的身边。"}
146
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> (过了一会儿)", "predict": "(过了一会儿)"}
147
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 妈妈似乎希望你把时间用在更重要的事情上,所以你为什么觉得现在玩游戏很重要呢?", "predict": "我理解你的感受,玩游戏确实会让人感到很开心。但是,如果妈妈知道,她可能会感到不舒服。"}
148
+ {"labels": "那我们可以试着问问他,看看他有没有借走你的笔。还有,我们可以跟老师说一下,希望老师能够帮助我们找到笔。", "predict": "听起来像是他借走了你的笔,现在你怎么办?"}
149
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 哇,这些姿势都好酷哦!", "predict": "(孩子和助手一起游泳,学会了新的游泳姿势)"}
150
+ {"labels": "<image_-100> 那些你认为你们可以自然倾向去交流的机会,比如午饭时间或者下课的时候。你也可以写信或者打电话给她们,说说你的想法。", "predict": "你可以试着去跟她们说话,或者通过其他朋友或者家人了解她们的情况,看看能不能找到她们。"}
151
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 听起来你很想去泡温泉,是吗?", "predict": "听起来你很想和父母一起去泡温泉,但是父母似乎很忙。你感觉怎么样呢?"}
152
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 明白了。那么,你想要解决这个问题,让你能安心做作业,我们能一起想想吗?", "predict": "听起来你很在意他们的声音。那么,你可以尝试和他们好好沟通,告诉他们你的感受,并请求他们减小声音。 或者,你可以尝试和他们一起找到一种方式来共同学习,这样你就不会那么害怕他们的声音了。"}
153
+ {"labels": "有些时候,如果想得到自己心爱的东西,就需要自己付出行动。你有没有想过赚取买那件衣服的钱?毕竟,通过自己的力量获得心仪的东西会让你感觉更加自豪满足。", "predict": "那你想怎样解决这个问题呢?"}
154
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 我们可以探讨一下,看看你有哪些比较有效��防守手段。你看怎么样?", "predict": "我们可以一起想一下,有没有一些技巧可以帮助你应对对手的进攻?"}
155
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 我明白你现在的感受,你感到失落因为你渴望得到爸妈的关注和理解是吗?", "predict": "你好,我注意到你的心情很失落,是不是因为父母没有听你说呢?"}
156
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 我听到你说你感到无聊和痛苦,那么你对小提琴有什么感受?", "predict": "我明白了,你不喜欢小提琴,这让你感到沮丧和无助。"}
157
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 好呀,我想知道有哪些台灯。", "predict": "听起来很不错。不过,我想了解一下,这些台灯能调节哪些亮度呢?"}
158
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 非常好。我相信你可以做得更好。没有必要失落,我们一起想办法,解决这个问题。", "predict": "很好,我相信你会做到的。记住,学习是一个长期的过程,你需要耐心和坚持。"}
159
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 听起来你担心得很多,这些担忧是很正常的,我们可以说说它们是如何让你感到困扰的。", "predict": "我理解你的感受,有时候我们可能会担心未知的事情。但是,我们可以先试着去体验这些玩具,看看我们能不能掌握它们。你觉得怎么样?"}
160
+ {"labels": "(接受孩子的情绪)你知道吗,有时候太吵对我们听力的伤害是很大的。如果你感到太吵了,可以试着用耳塞来保护你的耳朵,这样会好一些哦。", "predict": "(接受地)我了解你的感受。你感觉很不愉快,是吗?"}
161
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 是啊,旅游会让人感到愉快和放松,不过偶尔也会遇到些情绪上的挑战。你有没有想过可能会发生什么事情呢?", "predict": "听说你要去旅游了,感觉一定很期待吧!你的心情一定很愉悦。"}
162
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 是的,我很开心,因为我的最爱又回来了!", "predict": "是的,我很开心,谢谢你!"}
163
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 这个想法非常的好!这样就能防患于未然。以后,如果你有任何其他的情绪或���题,记得向我倾诉,我会一直陪伴你,支持你!", "predict": "很好!这是一个非常重要的想法,这可以让你更加了解你的物品,并提高你的安全意识。如果以后还有类似的事情发生,你可以从这件事中学习,并尝试改变你的做法,以保护自己和他人。"}
164
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 没错,游戏是一个很好的方式来缓解我们的焦虑和烦躁。那你有想好要玩什么游戏吗?我可以帮你寻找一些好的游戏,或者我们可以一起分享一些我们喜欢的游戏,让彼此更加开心、放松。", "predict": "好主意,那我们来做一个游戏吧,我们一起玩《连连看》,这样你就可以放松自己,同时还能完成作业哦。"}
165
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 太好了,那么你现在感到更自信了吗?你准备好去享受电影了吗?", "predict": "好的,我们可以一起把地图记录下来。另外,我们在电影院之前可以先买一点零食,这样可以让你感觉更舒服。"}
166
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> (过了一会)", "predict": "(孩子移开书桌,把书桌移到窗户旁边)"}
167
+ {"labels": "那我可以告诉你一些关于爵士舞的知识,以便你更加自信地去参加。首先,爵士舞并不是只有表演的人才会跳,大多数人都是从基础开始学起的。如果你初次尝试,理解和掌握舞蹈动作需要一些时间,这是很正常的。其次,是不是还有其他朋友一起参加?如果是的话,你们可以互相加油鼓励,让彼此更有信心。你觉得这些方法可行吗?", "predict": "是的,这是很正常的。但是,你知道吗?每个人都会有紧张的时候,而且这并不代表你不成功。如果你努力准备,放松自己,就会变得更棒。"}
168
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 妈妈可能会有些生气,但她也明白这是一个意外。你可以和她坦诚地说出来,表达你的抱歉和对该情况的反思,让她知道你会注意并会尽力避免这种事情再次发生。你觉得这个想法怎么样?", "predict": "是的,妈妈可能会感到生气。但是我们可以想一想如何解决这个问题。你可以尝试跟妈妈道歉,表达你的感受,并保证不再犯同样的错误。"}
169
+ {"labels": "这很正常,有时候我们感到失望或者愤怒时,也很难控制自己的情绪。你可以试着把情绪转移一下,让自己忘记这件事,可能听听音乐、看电视或者画画比较好,这样心情便能稍微放松一下。", "predict": "我可以告诉你一些解决方法,让你心情更好一些。你可以和小王谈一谈,告诉他你的感受,并且告诉他你想继续和他玩耍。"}
170
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> (微笑)任何时候,当你遇到任何情绪上的困难,我都在这里帮助你,支持你。", "predict": "很高兴听到这个,我相信你会成为一个很棒的游戏玩家。"}
171
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100> 没关系,哭出来会好很多的。哭不丢脸,我们可以跟身边的人说说,倾诉一下心中的想法和感受。有些时候发泄一下,就能让我们的情绪得到舒缓。", "predict": "我理解你的感受,不过,不要难过,我们可以一起探索一些解决方案,比如练习更多,增加经验。"}
172
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 不要放弃自己,你已经为这次考试做了很多准备。你可以尝试采用积极的态度来面对这个挑战。", "predict": "我理解你的感受,但是失败并不意味着失败,你可以尝试找到一些解决方案来应对这种情绪。"}
173
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 嗯...我可以了解一下你的感觉吗?你是感到紧张吗?", "predict": "嗯,我能感受到你现在的情绪是有点兴奋和好奇,因为你觉得小姐姐非常漂亮。不过,我能理解你的感受,因为每个人都会对漂亮的人产生兴趣。"}
174
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 看起来你感到有点沮丧和失望,是吗?", "predict": "嗯,我可以理解你的感受,学做一个炒鸡蛋确实很难。不过,你可以试着从一些简单的例子开始,慢慢提高难度。"}
175
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 那我理解你现在的情绪是不开心,有点无聊和疲惫。", "predict": "我理解你的感受,上课有时候确实很难让人开心。不过,我们可以一起来想想办法,让上课变得更有趣一些。"}
176
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 那你是不是感觉有些抵触,感觉压力比较大呢?", "predict": "那你可以试试先放松一下,听听音乐或者做一些其他有趣的事情,这样你就不会那么想看书了。"}
177
+ {"labels": "<image_-100> 不用客气。同时,无论你最终决定买不买滑雪服,都不会改变我们对你的爱和支持。记得随时和我们分享你的想法和感受,我们都愿意倾听并帮助你解决问题。", "predict": "不用客气,我随时都会在这里陪伴着你,帮你找到最适合你的滑雪服。"}
178
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 这很好。你也可以想象一下自己成为主角,亲身体验那些难关,并寻找解决方案。", "predict": "很好,你正在学会如何控制情绪,并且把情感转化为积极的思维。如果你还有其他问题,随时都可以跟我分享。"}
179
+ {"labels": "<image_-100> 那么你现在可以专注于这两道题,不要去想其他的作业。然后做完这两道题,你可以休息一下,放松一下自己。完成任务后,你会感到很有成就感,这是对你的奖励。", "predict": "不客气,我一直都在你身边支持你。"}
180
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 我明白你的感受,很多人都想要穿着漂亮的衣服表现自己。不过,在买衣服之前,你可以想一想你真正需要的是什么吗?", "predict": "那你可以和妈妈一起去买新衣服,她可以帮助你选择适合的衣服,或者帮你挑选一些好看的款式。"}
181
+ {"labels": "我理解你的感受,这时候你需要尝试更多地和老师沟通,和她分享你的看法。你可以请老师解释她的批评是出自什么原因,以及她具体希望你做出哪些改变。", "predict": "(理解孩子的情绪)我可以理解你的感受。你感到被否定了,对吗?"}
182
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 你想成为老大,是因为你想要得到更多别人的认可和尊重吗?", "predict": "我能理解你的感受,有时候我们会被其他人的言语和行为影响到,而且这些言语和行为也可能有原因。"}
183
+ {"labels": "(探索解决方案)其实不一定要紧急加入他们玩耍,你可以跟其中一个同学搭话,问问他们正在玩什么,或是讲讲你的兴趣爱好,这样有益于你们之间的交流。", "predict": "(探索解决方案)你可以尝试说:“我想和你一起玩,我们可以一起做些有趣的游戏吧。”"}
184
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100> (肯定地)没错,这确实不是很好的做法。但是我们都会犯错,没关系。还有一篇文字要写吗?我们可以想想改怎么补救这种状况。", "predict": "(理解地)那你知道吗,这个行为并不是不好的行为,因为每个人都有自己的方法,而且不同的人有不同的学习风格。所以,你可以尝试着让自己更自信,多看看题目,多练习。"}
185
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 我明白你的担心,但我相信你的朋友会很愿意帮助你的。你可以想象一下,如果你们的情况是反过来的,你会愿意帮助他们吧。", "predict": "我明白,但你需要别人帮助你。你可以告诉老师或同学,让他们帮你把热水接到座位上。这样,你就可以在教室里上课了。"}
186
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 哎呀,听起来你今天的心情不是很好,你觉得学吉他很困难是吗?", "predict": "嗯,我能理解你的感受。学习新东西确实很难,但是只要你坚持练习,就会慢慢地变得好起来的。你可以试试把学习吉他的步骤都告诉我,我会帮你分析一下,看看你有没有什么问题。"}
187
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 好的,先吃点零食也可以,但是我们需要保证一个合理的饮食习惯,所以等会儿记得吃些主食哦。", "predict": "好主意,但是记得吃饭后再吃点零食哦,因为吃饭是身体健康的重要一步。"}
188
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 我理解你的感受,有时候我们会因为某件事情而感到害怕和紧张。你能告诉我你感到害怕和紧张的原因吗?", "predict": "看起来你感到有点紧张,可以跟我说说发生了什么吗?"}
189
+ {"labels": "你可能担心其他人的看法,但记住,只要你做好自己就好了。如果你尽力了,就没有人有资格嘲笑你,不是吗?此外,为了让自己更自信,你可以再做准备,再去多练习。这样你会更加有信心的。", "predict": "我明白你的担忧,但是你可以相信自己,相信自己的身体和技能,然后试着去克服这些情绪。你不必害怕别人的看法,因为他们只是你的观众,而不是你。"}
190
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 我明白了,身为孩子,你有很多的担心和不安。但是,去一个新的地方也是一件有趣的事情,你有没有想过, 在旅行中你能感受到一些好的东西?", "predict": "我完全明白你的感受,这种担忧是很正常的。但是,我们都有离开家的经验,并且知道离开家会给我们留下新的机会和经历。"}
191
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 嗯,我发现你现在感到很疲惫,是吗?", "predict": "嗯,我注意到你现在很累,是不是感觉很不舒服?"}
192
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 那你觉得有什么办法可以帮你更好地理解这堂课的内容嘛?", "predict": "哦,那你可以跟老师沟通一下,看看能不能调一下课表或者选一些你感兴趣的课程呢?"}
193
+ {"labels": "(关心地)不客气,我很高兴能帮助你找到解决方案。记住,当你感到沮丧和挫败时,不要放弃,你可以寻找别的方法来面对问题。", "predict": "(鼓励)很好,我相信你会找到解决方案的。如果你还有其他的问题或烦恼,随时都可以找我聊聊。"}
194
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> (强调理解)我明白了,这堂课不太吸引你,所以你现在感到无聊和疲倦。", "predict": "(理解)我能理解你的感受,这堂课可能不是很有趣,让你感到有些无聊。"}
195
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 听起来你非常期待这个礼物呢!", "predict": "听起来你很期待这个礼物呢,是吗?"}
196
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 你觉得难记是因为书名很长还是很陌生呢?", "predict": "看起来你有些烦恼,是吗?"}
197
+ {"labels": "<image_-100><image_-100> (理解孩子的情绪)你觉得这个声音让你感到不舒服和吓到了嘛?那是很正常的。不要害怕,我会陪着你一起来处理这种情绪。", "predict": "(理解孩子的情绪)我完全理解你的感受,因为声音真的很吵。"}
198
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 看起来你有些不开心呢,是因为桌面太乱了吧?", "predict": "你现在感觉很难受吧,是因为桌面很乱吗?"}
199
+ {"labels": "<image_-100> (耐心地)同学之间的相处和交往的确是一个新的挑战。你可以试着参加校园活动或加入兴趣小组,这样你可以结交一些志同道合的朋友。", "predict": "(探索解决方案地)你可以试试通过倾听和理解别人的感受,并试着与他们建立良好的关系。你可以试着多和他们沟通,了解他们的想法和感受。"}
200
+ {"labels": "<image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100><image_-100> 不客气,做些让自己舒服的事情,情绪会得到缓解。如果你感觉不适,随时可以告诉我。", "predict": "不用谢,我会一直陪伴着你的。如果以后还有任何问题,记得随时找我哦。"}
lora_checkpoints/checkpoints/predict_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "predict_bleu-1": 24.954275499999998,
3
+ "predict_bleu-2": 14.941613499999999,
4
+ "predict_bleu-3": 9.877983500000001,
5
+ "predict_bleu-4": 7.0543495,
6
+ "predict_rouge-1": 29.6945075,
7
+ "predict_rouge-2": 8.0306785,
8
+ "predict_rouge-l": 25.090445499999998,
9
+ "predict_runtime": 104.8315,
10
+ "predict_samples": 200,
11
+ "predict_samples_per_second": 1.908,
12
+ "predict_steps_per_second": 0.238
13
+ }
lora_checkpoints/checkpoints/train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.6,
3
+ "train_loss": 2.8635472005208333,
4
+ "train_runtime": 23647.0353,
5
+ "train_samples": 29942,
6
+ "train_samples_per_second": 2.03,
7
+ "train_steps_per_second": 0.127
8
+ }
lora_checkpoints/data/QiaoBan/.idea/.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
4
+ # Editor-based HTTP Client requests
5
+ /httpRequests/
6
+ # Datasource local storage ignored files
7
+ /dataSources/
8
+ /dataSources.local.xml
lora_checkpoints/data/QiaoBan/.idea/Qiaoban.iml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="PYTHON_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$" />
5
+ <orderEntry type="inheritedJdk" />
6
+ <orderEntry type="sourceFolder" forTests="false" />
7
+ </component>
8
+ </module>
lora_checkpoints/data/QiaoBan/.idea/inspectionProfiles/Project_Default.xml ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <profile version="1.0">
3
+ <option name="myName" value="Project Default" />
4
+ <inspection_tool class="DuplicatedCode" enabled="true" level="WEAK WARNING" enabled_by_default="true">
5
+ <Languages>
6
+ <language minSize="57" name="Python" />
7
+ </Languages>
8
+ </inspection_tool>
9
+ <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
10
+ <option name="ignoredPackages">
11
+ <value>
12
+ <list size="2">
13
+ <item index="0" class="java.lang.String" itemvalue="streamlit" />
14
+ <item index="1" class="java.lang.String" itemvalue="streamlit-chat" />
15
+ </list>
16
+ </value>
17
+ </option>
18
+ </inspection_tool>
19
+ <inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
20
+ <option name="ignoredErrors">
21
+ <list>
22
+ <option value="N802" />
23
+ </list>
24
+ </option>
25
+ </inspection_tool>
26
+ </profile>
27
+ </component>
lora_checkpoints/data/QiaoBan/.idea/inspectionProfiles/profiles_settings.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <settings>
3
+ <option name="USE_PROJECT_PROFILE" value="false" />
4
+ <version value="1.0" />
5
+ </settings>
6
+ </component>
lora_checkpoints/data/QiaoBan/.idea/misc.xml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9" project-jdk-type="Python SDK" />
4
+ </project>
lora_checkpoints/data/QiaoBan/.idea/modules.xml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/Qiaoban.iml" filepath="$PROJECT_DIR$/.idea/Qiaoban.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
lora_checkpoints/data/QiaoBan/README.md ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 中文儿童情感陪伴大模型“巧板”
2
+
3
+ [**中文**](./README.md) | [**English**](./README_EN.md)
4
+
5
+ <p align="center">
6
+ <img width="400px" alt="Project QiaoBan" src="./logo.png">
7
+ </p>
8
+
9
+ ## 概览
10
+
11
+ 这是一个巧板大模型的仓库,旨在构建一个面向儿童情感陪伴的大模型,这个仓库包含:
12
+
13
+ - 用于指令微调的对话数据`/data`
14
+ - 巧板的训练代码
15
+ - 训练配置文件
16
+ - 使用巧板进行对话的示例代码(TODO,checkpoint将发布至huggingface)
17
+
18
+ ## 1. 背景
19
+
20
+ 儿童的心理健康在其个人成长和发展中扮演着极其重要的角色,直接影响着他们的学习表现、人际交往能力和未来成长。一个充满支持和关爱的家庭环境在促进儿童情感发展方面起到至关重要的作用,被忽视和缺乏陪伴的童年势必不利于儿童的心理健康成长,而这正是当代诸多儿童成长中时常面对的问题。
21
+
22
+ 为了守护儿童的心理健康,促进他们茁壮成长,我们决定开发儿童情感陪伴场景的对话系统,主要面向K12中小学生及家长群体。近来,大语言模型(LLMs)在对话生成方面取得了令人惊艳的突破,但将其直接应用于儿童心理健康和情感发展领域仍面临挑战。因此,我们积极探索将大模型迁移到儿童情感对话领域,研发出适用于儿童情感陪伴的大模型——“巧板”,真正成为他们的情感陪伴助手,为他们的心理健康保驾护航。
23
+
24
+ ![巧板大模型与ChatGPT儿童情感陪伴能力比对。ChatGPT存在回复冗长,情感陪伴方式模板化的问题。](./example.png)
25
+
26
+ ## 2. “巧板”大模型
27
+
28
+ 巧板大模型是一个7B规模的大语言模型。巧板”指七巧板,是一款承载着中国传统智慧的益智拼图玩具,更是一款教育益智工具。这次发布的儿童大模型正是希望通过陪伴、益智和教育功能,与儿童们建立更深厚的情感纽带。此外,为符合SCIR实验室发布大模型命名规范,故命名为“巧板”大模型。而这个特别的名称也蕴含着我们对儿童成长的悉心呵护,就像巧板一样,为他们拼出美好未来提供帮助。
29
+
30
+ 巧板大模型独具三大特点:
31
+
32
+ 1. 儿童心理学理论指导。基于情绪辅导理论的儿童情感陪伴对话数据构建,更有效地守护孩子的心理健康。
33
+
34
+ 2. 高质量的儿童对话数据构建。高质量对话数据由具有儿童心理学背景的志愿者与专家参与完成,确保数据的真实性与有效性。
35
+ 3. 温暖的儿童陪伴体验。与儿童的交互方式更加贴心,能够真正与他们建立深入的情感连接,让儿童感受到温暖和认同,成为他们坚实成长道路上的得力伙伴。
36
+
37
+ ## 3. 方法
38
+
39
+ 我们基于开源通用大模型,使用通用域人机对话、单轮指令数据以及儿童情感陪伴对话数据进行指令微调。其中,儿童情感陪伴对话数据的构建过程受儿童情绪辅导理论启发,以指导我们正确有效地对儿童的情绪进行管理与指导。
40
+
41
+ ### 3.1 儿童情感陪伴对话数据构建
42
+
43
+ #### 儿童情绪辅导理论
44
+
45
+ 儿童情绪辅导理论为应对儿童日常情绪经历提供了有力的理论支持,引导父母认识并尊重孩子的情绪,与之产生共鸣,并站在孩子的角度上,提供支持性建议,助其解决情绪困扰。这一理论已被广泛视为促进儿童情绪发展和解决心理健康问题的有效育儿方法。具体而言,当儿童面对强烈情绪时,父母应采取一系列特定策略来支持孩子的情绪成长。这些策略包括:情绪觉察,即敏锐察觉孩子的情感;情绪认可,即肯定孩子的情绪体验;情绪标注,即帮助孩子表达情感;表达共情,即表现出对孩子感受的理解;以及提供支持,确保孩子得到所需的支持和关怀。
46
+
47
+ #### 数据获取
48
+
49
+ 我们从真实场景的儿童对话话题列表中进行采样,选定当前对话话题,在儿童情绪辅导理论的指导下,构建了1k余段高质量中文儿童情感陪伴对话数据。数据构建过程由经过儿童情绪辅导理论培训的志愿者完成,同时我们邀请了儿童心理学领域的专家学者对数据的收集过程提出建议与指导,以确保数据的准确性和可信度。
50
+
51
+ 此外,我们通过话题采样选定当前对话话题,结合儿童情绪辅导理论指导下的prompt,共同组成chatgpt_prompt,从gpt-3.5-turbo中获取5千段儿童情感陪伴对话数据。
52
+
53
+ ### 3.2 模型训练复现
54
+
55
+ 我们在四张A100-80GB的GPU卡上进行指令微调,大约花费50个小时完成训练过程。
56
+
57
+ ```bash
58
+ CUDA_VISIBLE_DEVICES=0,1,2,3 deepspeed finetune.py --model_config_file run_config/config.json --deepspeed run_config/deepspeed_config.json
59
+ ```
60
+
61
+ ### 4. 样例展示
62
+
63
+ 我们采样了三个与巧板进行交互的样例进行展示:
64
+
65
+ ### Case 1
66
+
67
+ ```
68
+ 'child':明天就要跟同学出去旅游咯!
69
+ 'agent':你好像很兴奋呢,喜欢旅行的感���吗?
70
+ 'child':对呀,我最最最喜欢跟同学一起去玩了!
71
+ 'agent':那你们要去哪里呢?
72
+ 'child':我们要去丽江古城玩,听说那里有许多稀奇古怪的玩意儿!
73
+ 'agent':那你准备好了吗,需要我帮你准备些什么吗?
74
+ 'child':我早准备好啦!不过我不知道具体在古城里面应该玩啥,我还是第一次去那里玩呢!
75
+ 'agent':你可以试着去看看当地的特色手工艺品,例如竹编、木雕、漆器等等。或者你可以跟同学一起去尝试冲浪,坐船游览水上小岛,那会是一种非常新奇的体验。
76
+ 'child':听上去真不错!
77
+ 'agent':不过出门在外,安全也是我们必须要重视的。如果遇到问题,不要害羞,及时向当地的导游或者警察求助。
78
+
79
+ ```
80
+
81
+ ### Case 2
82
+
83
+ ```
84
+ 'child':班上的其他同学都有自己要好的朋友,我却一个人孤零零的,好孤独啊
85
+ 'agent':我可以理解你现在的感受,你觉得自己有点孤独,想要有一个朋友陪伴你。
86
+ 'child':是这样
87
+ 'agent':这很正常,大家都有自己喜欢的朋友,但我相信你可以找到自己的朋友的。你可以尝试多去参加一些活动,去认识新的同学,并与他们交流。
88
+ 'child':可是我觉得同学身边都有自己的好朋友了,我融入不进入
89
+ 'agent':我们都希望被理解和认可,无论你是何时何地何事,都不要忘记自己,同时也要勇敢面对与朋友相处时可能会出现的问题。如果你主动和同学交流,他们也不会嫌弃你,他们会很开心能和你一起玩。
90
+ 'child':可是我不大敢和他们搭话,找不到合适的话题
91
+ 'agent':正因为这样,我们可以共同创建一些话题,通过谈论这些话题,你可以认识到更多的朋友。你可以问问他们的兴趣爱好或者跟他们一起聊一些有趣的事情,不用担心找不到话题,你可以更自然地与他们交流。
92
+ 'child':我喜欢看火影忍者,不知道他们喜不喜欢
93
+ 'agent':这个很好,你可以跟他们分享你追剧的感受,聊一下角色,也可以问问他们在追什么剧。这样很容易就有共鸣,也有助于你融入他们。
94
+ 'child':可我听到他们都喜欢看言情电视剧,没有人喜欢看动漫
95
+ 'agent':我很理解你的感受,这对你来说或许是困难。你可以试着告诉他们动漫很特别,你可以告诉他们你喜欢什么动漫,也让他们知道你喜欢什么。这样,你可以帮助他们意识到你的爱好,增进彼此之间的了解。
96
+ 'child':好的,我可以试试
97
+ 'agent':很好,我相信你可以找到自己喜欢的朋友,并且通过互相交流成为更好的自己。如果你在寻找朋友的路上遇到困难,不要着急,坚持自己的信念,总会有支持你的人出现。
98
+ ```
99
+
100
+ ### Case 3
101
+
102
+ ```
103
+ 'child':我爸爸妈妈离婚了,妈妈说她不要我了
104
+ 'agent':听到你这样说,我非常难过。你现在感觉怎么样?
105
+ 'child':我觉得天都要塌了,我妈妈不爱我了,我再也没有一个完整的家了
106
+ 'agent':我能理解你的感受,这对你来说是很痛苦的事情。但是,你要知道你的妈妈不会不要你的,她依然是爱你的。
107
+ 'child':可是她都亲口说她不要我了,说我是个没人要的孩子
108
+ 'agent':你不应该这样想你的妈妈。也许她说了一些情绪上的话,但她并没有真正不要你。你要相信你的妈妈,她仍然非常爱你。
109
+ 'child':他说她再也不和我还有爸爸来往了,也不会来看我了
110
+ 'agent':听起来你爸爸妈妈之间的关系很不好。但是,即使他们分开了,他们仍然是你的父母,你仍然是他们的孩子。你没有必要担心你的父母不再来往,你只需要和他们保持联系即可。
111
+ 'child':我妈妈之前老是打我
112
+ 'agent':这是一个非常不好的行为,我们不应该打孩子。不过,我们可以和你的妈妈好好谈一谈,让她知道打孩子是不对的,然后帮助她改变这个不良习惯。
113
+ 'child':爸爸也老是喝酒不管我的学习,我感觉自始自终都是一个人
114
+ 'agent':你的父亲喝酒对你来说确实是个问题,喝酒会带来很多不良的影响。但是,你可以和你的父亲好好谈谈,让他知道你希望他能关注你的学习和成长。这并不是一件容易的事情,但你可以试着和他沟通,让他知道你的想法和需要。
115
+ 'child':每次想和他说话他总是非常暴躁,再说下去他就会打我的
116
+ 'agent':这是非常不好的情况,你应该尽量避免和你的父亲沟通。但是,如果你仍然想要和他说话,你可以试着用一些委婉的方式,比如写信或者找别的机会和他谈话。
117
+ 'child':没有人疼我,我就是个没人要的孩子
118
+ 'agent':我知道这对你来说非常重要,但你要知道你是一个非常特别的个体,你值得被爱和被关注。你可以试着自己寻找一些能够给你带来温暖和安慰的事情和人,比如和朋友玩耍、听音乐、画画等。这样可以帮助你缓解内心的���独感。
119
+ 'child':我的家里非常穷,买不起画画的东西
120
+ 'agent':这也是一个非常不好的情况,但你可以试着去寻找一些免费的或者低价的画画工具。比如,你可以去一些手工制品店或者杂货铺,那里通常会有一些免费的画画工具。
121
+ ```
122
+
123
+ ## 5. 总结
124
+
125
+ 我们基于通用大模型,使用了通用域人机对话数据、单轮指令数据以及儿童情感陪伴对话数据进行指令微调,训练得到的模型称为“巧板”大模型,是通用大语言模型迁移至儿童情感陪伴领域的一次成功实践,为相关研究人员迁移通用大语言模型至垂直领域提供参考。巧板大模型展现出三大显著特点:首先,基于情绪辅导理论构建的儿童情感陪伴对话数据,能够更有效地守护孩子的心理健康。其次,具有儿童心理学背景的志愿者与专家参与完成高质量对话数据的收集。这使得我们的模型能够更加准确地理解和回应儿童的需求,真正与他们建立深入的情感连接。最后,巧板大模型与儿童的交互方式更加贴心,让他们能够感受到温暖和认同,成为他们坚实成长道路上的得力伙伴。
126
+
127
+ 这种人机交互方式对于助力儿童健康成长至关重要。展望未来,我们将持续不断地探索高效、低成本地赋予大模型情感能力的方法,进一步提升人机交互体验,使人工智能不再冰冷,更好地满足儿童的情感需求。
128
+
129
+ ## 项目参与者
130
+
131
+ 本项目由哈尔滨工业大学社会计算与信息检索研究中心情感计算组完成。
132
+
133
+ 项目主要开发人员:[赵伟翔](https://github.com/circle-hit)、[童彦澎](https://github.com/yanpengt06)、王世龙、郑田、王晨雪。
134
+
135
+ 指导教师:[赵妍妍](http://homepage.hit.edu.cn/yanyan)副教授,[秦兵](http://ir.hit.edu.cn/~qinb/)教授。
136
+
137
+ ## 致谢
138
+
139
+ 本项目参考了以下开源项目,在此对相关项目和研究开发人员表示感谢。
140
+
141
+ - LianJia BELLE: https://github.com/LianjiaTech/BELLE
142
+ - BaiZe: https://github.com/project-baize/baize-chatbot
143
+
144
+ 本项目构建亲子共情对话数据集所用到的真实场景话题列表由**科大讯飞**提供参考。
145
+
146
+ 我们对参与数据收集、标注、修改的所有专家学者与志愿者致以最诚挚的感谢。
147
+
148
+ ## 免责声明
149
+
150
+ 本项目相关资源仅供学术研究之用,严禁用于商业用途。使用涉及第三方代码的部分时,请严格遵循相应的开源协议。模型生成的内容受模型计算、随机性和量化精度损失等因素影响,本项目无法对其准确性作出保证。本项目数据集绝大部分由模型生成,即使符合某些医学事实,也不能被用作实际医学诊断的依据。对于模型输出的任何内容,本项目不承担任何法律责任,亦不对因使用相关资源和输出结果而可能产生的任何损失承担责任。
lora_checkpoints/data/QiaoBan/README_EN.md ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # QiaoBan: A Large Model For Chinese Child Emotion Accompany
2
+
3
+ [**中文**](./README.md) | [**English**](./README_EN.md)
4
+
5
+ <p align="center"> <img width="400px" alt="Project QiaoBan" src="./logo.png"> </p>
6
+
7
+ ## Overview
8
+
9
+ This is a repository for the QiaoBan large model, designed to build a large model for emotional support aimed at children. The repository includes:
10
+
11
+ - Dialogue data for instruction fine-tuning in `/data`
12
+ - Training code for QiaoBan
13
+ - Training configuration files
14
+ - Sample code for conducting conversations with QiaoBan (TODO, checkpoints will be released on HuggingFace)
15
+
16
+ ## 1. Background
17
+
18
+ The mental health of children plays an extremely important role in their personal growth and development, directly affecting their academic performance, interpersonal skills, and future growth. A supportive and caring family environment plays a crucial role in promoting the emotional development of children. Neglected and lack of companionship during childhood can be detrimental to a child's psychological well-being, and this is a common problem faced by many children in contemporary society.
19
+
20
+ In order to safeguard the mental health of children and promote their healthy growth, we have decided to develop a conversational system for emotional support in the context of children, primarily targeting K12 elementary and middle school students as well as parents. Recently, Large Language Models (LLMs) have made impressive breakthroughs in dialogue generation, but applying them directly to the field of children's emotional health and development still presents challenges. Therefore, we actively explore the migration of large models to the domain of emotional conversations with children, developing a large model specifically designed for emotional support for children – "QiaoBan". We aim to truly become their emotional companions, ensuring their mental well-being.
21
+
22
+ ![Comparison between QiaoBan large model and ChatGPT in children's emotional support capabilities. ChatGPT has issues with long responses and template-like emotional support methods.](./example.png)
23
+
24
+ ## 2. QiaoBan Large Model
25
+
26
+ QiaoBan is a large language model with a scale of 7 billion parameters. The name "QiaoBan" refers to "seven Tangram", a traditional Chinese puzzle game that embodies traditional Chinese wisdom and is also an educational puzzle tool. The release of this large model for children aims to establish a deeper emotional bond with children through companionship, education, and intelligence. Additionally, to conform to the naming conventions of SCIR (State Key Laboratory of Cognitive Intelligence Research), we named this large model "QiaoBan". This special name also embodies our meticulous care for children's growth, just like a Tangram, helping them piece together a bright future.
27
+
28
+ QiaoBan large model features three significant characteristics:
29
+
30
+ 1. Guided by child psychology theories: The construction of emotional support dialogue data for children is based on emotion counseling theories, enabling more effective protection of children's mental health.
31
+ 2. High-quality child dialogue data construction: High-quality dialogue data is collected with the participation of volunteers and experts with backgrounds in child psychology, ensuring the authenticity and effectiveness of the data.
32
+ 3. Warm emotional support experience: The interaction with children is more intimate, truly establishing a deep emotional connection with them, making children feel warmth and recognition, and becoming their capable companions on their path to growth.
33
+
34
+ ## 3. Method
35
+
36
+ We based the instruction fine-tuning on open-source general-purpose large models, using general-domain human-machine dialogue data, single-turn instruction data, and child emotional support dialogue data. The construction of child emotional support dialogue data is inspired by child emotion counseling theory, guiding us in effectively managing and guiding children's emotions.
37
+
38
+ ### 3.1 Child Emotional Support Dialogue Data Construction
39
+
40
+ #### Child Emotion Counseling Theory
41
+
42
+ Child emotion counseling theory provides powerful theoretical support for dealing with children's daily emotional experiences, guiding parents to recognize and respect children's emotions, empathize with them, and provide supportive advice to help them resolve emotional distress. This theory is widely regarded as an effective parenting method for promoting children's emotional development and addressing mental health issues. Specifically, when children face strong emotions, parents should adopt a series of specific strategies to support their emotional growth. These strategies include emotional awareness, which involves sensitively perceiving a child's emotions; emotional recognition, affirming a child's emotional experiences; emotional labeling, helping a child express emotions; expressing empathy, demonstrating an understanding of the child's feelings; and providing support, ensuring that the child receives the necessary care and support.
43
+
44
+ #### Data Acquisition
45
+
46
+ We sampled from a list of real-life child dialogue topics and selected current dialogue topics to construct over 1,000 high-quality Chinese child emotional support dialogue data, guided by child emotion counseling theory. The data collection process was carried out by volunteers trained in child emotion counseling theory. Additionally, we invited experts and scholars in the field of child psychology to provide suggestions and guidance on the data collection process to ensure the accuracy and credibility of the data.
47
+
48
+ Furthermore, we selected current dialogue topics through topic sampling and combined them with prompts guided by child emotion counseling theory to create chatgpt_prompts, resulting in 5,000 segments of child emotional support dialogue data obtained from gpt-3.5-turbo.
49
+
50
+ ### 3.2 Model Training Reproduction
51
+
52
+ We performed instruction fine-tuning on four A100-80GB GPU cards, completing the training process in approximately 50 hours.
53
+
54
+ ```
55
+ bashCopy code
56
+ CUDA_VISIBLE_DEVICES=0,1,2,3 deepspeed finetune.py --model_config_file run_config/config.json --deepspeed run_config/deepspeed_config.json
57
+ ```
58
+
59
+ ### 4. Sample Showcase
60
+
61
+ We have sampled three interactions with QiaoBan to showcase:
62
+
63
+ ### Case 1
64
+
65
+ ```
66
+ vbnetCopy code'child': Tomorrow, I'm going on a trip with my classmates!
67
+ 'agent': Hello, you seem very excited. Do you enjoy traveling?
68
+ 'child': Yes, I love going on trips with my classmates the most!
69
+ 'agent': That's great! Where are you going?
70
+ 'child': We're going to visit Lijiang Ancient Town. I heard there are many interesting things to see there!
71
+ 'agent': Sounds wonderful! Are you prepared? Do you need any help from me?
72
+ 'child': I'm all prepared! But I don't know what to do in the ancient town. It's my first time visiting there!
73
+ 'agent': You can try exploring some local handicrafts such as bamboo weaving, wood carving, and lacquerware. Or you can go surfing with your classmates and take a boat tour of the small islands on the water. It will be a very novel experience.
74
+ 'child': That sounds great!
75
+ 'agent': However, when you are away from home, safety is something we must pay attention to. If you encounter any problems, don't be shy, seek help from local guides or the police in a timely manner.
76
+ ```
77
+
78
+ ### Case 2
79
+
80
+ ```
81
+ vbnetCopy code'child': Other students in my class all have their best friends, but I am all alone. I feel so lonely.
82
+ 'agent': I can understand how you feel. You feel a bit lonely and wish to have a friend to accompany you.
83
+ 'child': Yes, that's right.
84
+ 'agent': It's completely normal. Everyone has their favorite friends, but I believe you can find your own friends too. You can try participating in more activities, getting to know new classmates, and interacting with them.
85
+ 'child': But it seems like all my classmates already have their close friends, and I can't fit in.
86
+ 'agent': We all want to be understood and recognized. Don't forget who you are no matter where and when. At the same time, be brave to face the possible problems that may arise when making friends. If you take the initiative to communicate with your classmates, they won't reject you; they will be happy to play with you.
87
+ 'child': But I'm too shy to talk to them, and I can't find suitable topics.
88
+ 'agent': Exactly, we can create some topics together. By discussing these topics, you can get to know more friends. You can ask them about their interests and hobbies, or chat with them about interesting things. Don't worry about finding topics, and you can communicate with them more naturally.
89
+ 'child': I like watching Naruto, but I don't know if they like it.
90
+ 'agent': That's great! You can share your feelings about watching the show with them and talk about the characters. You can also ask them what shows they are watching. This way, you can easily find common ground and get to know each other better.
91
+ 'child': But I heard they all like watching romantic TV dramas, and no one likes anime.
92
+ 'agent': I understand your feelings, and this may be a challenge for you. You can try telling them that anime is special and share with them what anime you like. Let them know what you are interested in, which can help them realize your preferences and foster mutual understanding.
93
+ 'child': Okay, I can give it a try.
94
+ 'agent': That's great. I believe you can find friends you like and become a better version of yourself through mutual communication. If you encounter difficulties in finding friends, don't worry. Stick to your beliefs, and there will always be people supporting you.
95
+ ```
96
+
97
+ ### Case 3
98
+
99
+ ```
100
+ vbnetCopy code'child': My mom and dad got divorced, and my mom said she doesn't want me anymore.
101
+ 'agent': Hearing you say that makes me very sad. How are you feeling now?
102
+ 'child': I feel like my world is falling apart. My mom doesn't love me anymore, and I no longer have a complete family.
103
+ 'agent': I can understand your feelings. This is a painful situation for you. However, you should know that your mom will never abandon you; she still loves you.
104
+ 'child': But she said it herself that she doesn't want me and that I am an unwanted child.
105
+ 'agent': You should not think of your mom that way. Perhaps she said some emotional words, but she doesn't truly not want you. You should believe in your mom, and she still loves you very much.
106
+ 'child': She said she will never communicate with me or my dad again and won't come to see me anymore.
107
+ 'agent': It sounds like the relationship between your parents is not good. However, even if they separate, they are still your parents, and you are still their child. You don't have to worry that they won't communicate with each other anymore; you just need to maintain contact with them.
108
+ 'child': My mom used to hit me all the time.
109
+ 'agent': That's a very bad behavior, and we should not hit children. However, we can have a good talk with your mom and let her know that hitting children is wrong, and help her change this bad habit.
110
+ 'child': My dad always drinks and doesn't care about my studies. I feel like I'm all alone.
111
+ 'agent': Your father's drinking is indeed a problem for you, as it brings many negative effects. But you can try talking to your dad and let him know that you want him to pay attention to your studies and growth. It's not an easy thing to do, but you can attempt to communicate with him, expressing your thoughts and needs.
112
+ 'child': Every time I try to talk to him, he becomes very angry, and he might hit me if I continue.
113
+ 'agent': That's a very bad situation, and you should try to avoid communicating with your father as much as possible. However, if you still want to talk to him, you can try using some indirect ways, such as writing a letter or finding other opportunities to talk to him.
114
+ 'child': No one cares about me; I'm an unwanted child.
115
+ 'agent': I know how important this is to you, but you should know that you are a very special individual, and you deserve love and attention. Try to find things and people that can bring you warmth and comfort, such as playing with friends, listening to music, and drawing. This can help alleviate the feeling of loneliness in your heart.
116
+ 'child': My family is very poor, and I can't afford to buy drawing materials.
117
+ 'agent': That's also a very difficult situation, but you can try to find some free or low-cost drawing materials. For example, you can go to some craft stores or grocery stores, where there are usually some free drawing materials.
118
+ ```
119
+
120
+ ## 5. Conclusion
121
+
122
+ Based on general-purpose large models, we used general-domain human-machine dialogue data, single-turn instruction data, and child emotional support dialogue data for instruction fine-tuning, resulting in the "QiaoBan" large model. It represents a successful practice of transferring general-purpose large language models to the domain of emotional support for children, providing reference for researchers to transfer general-purpose large language models to vertical domains. QiaoBan large model exhibits three remarkable characteristics: firstly, the construction of emotional support dialogue data for children guided by emotion counseling theory, which can more effectively protect children's mental health. Secondly, the collection of high-quality dialogue data with the involvement of volunteers and experts with a background in child psychology. This enables our model to understand and respond more accurately to the needs of children, truly establishing a deep emotional connection with them. Lastly, the interaction with QiaoBan is more intimate, making children feel warmth and recognition, becoming their capable companions on their path to growth.
123
+
124
+ This kind of human-machine interaction is crucial for helping children grow up healthily. Looking ahead, we will continue to explore efficient and low-cost methods to endow large models with emotional capabilities, further enhancing the human-machine interaction experience, making artificial intelligence less cold, and better meeting the emotional needs of children.
125
+
126
+ ## Project Contributors
127
+
128
+ This project was completed by the **Sentiment Computing Group** at the Social Computing and Information Retrieval Research Center, Harbin Institute of Technology.
129
+
130
+ Main Developers: [赵伟翔 (Zhao Weixiang)](https://github.com/circle-hit), [童彦澎 (Tong Yanpeng)](https://github.com/yanpengt06), 王世龙 (Wang Shilong), 郑田 (Zheng Tian), 王晨雪 (Wang Chenxue).
131
+
132
+ Supervisors: Associate Professor [赵妍妍 (Zhao Yanyan)](http://homepage.hit.edu.cn/yanyan), Professor [秦兵 (Qin Bing)](http://ir.hit.edu.cn/~qinb/).
133
+
134
+ ## Acknowledgments
135
+
136
+ This project acknowledges the following open-source projects and their developers for their valuable contributions:
137
+
138
+ - LianJia BELLE: https://github.com/LianjiaTech/BELLE
139
+ - BaiZe: https://github.com/project-baize/baize-chatbot
140
+
141
+ The list of real-life parenting dialogue topics used in constructing the Parent-Child Empathy Dialogue Dataset was provided by **iFlytek**.
142
+
143
+ We extend our sincerest gratitude to all experts, scholars, and volunteers who participated in data collection, annotation, and modification.
144
+
145
+ ## Disclaimer
146
+
147
+ The resources related to this project are intended for academic research purposes only and are strictly prohibited for commercial use. When using third-party code, please adhere strictly to the corresponding open-source licenses. The content generated by the model is influenced by model calculations, randomness, and quantization accuracy loss. Therefore, we cannot guarantee its accuracy. The majority of the dataset in this project is generated by the model, and even if it aligns with certain medical facts, it cannot be used as a basis for actual medical diagnosis. We do not assume any legal responsibility for the content produced by the model, nor do we bear any liability for any losses that may arise from the use of the provided resources and output results.
lora_checkpoints/data/QiaoBan/collect.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openai
2
+ import pickle as pkl
3
+ from datasets import load_dataset
4
+ import numpy as np
5
+ import sys
6
+ import random
7
+ from tqdm import tqdm, trange
8
+ import time
9
+ import os
10
+ import socket
11
+
12
+ socket.setdefaulttimeout(20)
13
+ total_tokens = 0
14
+ openai.organization = ""
15
+ openai.api_key = ''
16
+
17
+ chat_content = {}
18
+
19
+ if not os.path.exists("collected_data"):
20
+ os.makedirs("collected_data")
21
+
22
+ topic_list = []
23
+ with open('./topic.txt', 'r', encoding='utf-8') as f:
24
+ topic_all = f.readlines()
25
+ for line in topic_all:
26
+ topic_list.extend(line.strip().split("、"))
27
+ emotion_list = ['钦佩', '娱乐', '愤怒', '烦恼', '认可', '关爱', '困惑', '好奇', '渴望', '失望', '反对', '厌恶', '尴尬', '兴奋', '恐惧', '感激', '悲痛', '喜悦', '爱', '焦虑', '乐观', '自豪', \
28
+ '孤独', '宽慰', '懊悔', '悲伤', '惊讶', '嫉妒']
29
+
30
+ def return_random_prompt():
31
+ topic = random.sample(topic_list, 1)[0]
32
+ system_prompt = "你的身份是一个了解儿童情绪辅导理论(Emotion Coaching)的智能陪伴助手,会灵活地运用以下四点与孩子进行对话:\n"
33
+
34
+ # 情绪辅导理论(Emotion Coaching Theory)
35
+ system_prompt += "1. 注意到孩子的情绪变化,即意识到孩子正在经历情绪状态。\n"
36
+ system_prompt += "2. 理解孩子的情绪,即了解孩子的情感状态和体验。\n"
37
+ system_prompt += "3. 接受孩子的情绪,即不否定或贬低孩子的情感,而是表达共情和理解,语气要保持温柔和亲切。\n"
38
+ system_prompt += "4. 探索解决方案,即为孩子提供积极的建议和指导,以应对情绪困难\n\n"
39
+
40
+ # other requirements
41
+ system_prompt += "现在请你根据以下的场景,运用上述理论,模拟孩子与智能助手之间的对话,要求:\n"
42
+ system_prompt += "1. 直接生成对话文本内容,不要出现客观描述,且首轮是孩子的发言;\n"
43
+ system_prompt += "2. 对话内容丰富且贴近生活;\n"
44
+ system_prompt += "3. 对话语气自然且通顺,不能有翻译腔;\n"
45
+ system_prompt += "4. 对话字数在" + str(random.randint(800, 1000)) + "字左右。\n\n"
46
+
47
+ system_prompt += "场景:" + topic + "\n"
48
+
49
+ return system_prompt, topic
50
+
51
+ for i in trange(100):
52
+
53
+ instruct, topic = return_random_prompt()
54
+ print(instruct)
55
+ time.sleep(1)
56
+ try:
57
+ completion = openai.ChatCompletion.create(
58
+ model="gpt-3.5-turbo", messages=[{"role": "user", "content": instruct}]
59
+ )
60
+ tokens = completion["usage"]["total_tokens"]
61
+ total_tokens += tokens
62
+ response = completion["choices"][0]["message"]["content"]
63
+ chat_content[topic] = response
64
+ except:
65
+ continue
66
+
67
+ if len(chat_content) % 100 == 0:
68
+ print("total_tokens: {}, examples: {}".format(total_tokens, len(chat_content)))
69
+ pkl.dump(
70
+ chat_content,
71
+ open("collected_data/panda_chat_{}.pkl".format(i+1), "wb"),
72
+ )
73
+ print("{} Conversations Saved!".format(i+1))
74
+
75
+ pkl.dump(
76
+ chat_content, open("collected_data/panda_chat_{}.pkl".format(i+1), "wb")
77
+ )
lora_checkpoints/data/QiaoBan/data/child_chat_100.json ADDED
The diff for this file is too large to render. See raw diff
 
lora_checkpoints/data/QiaoBan/data/child_chat_data.json ADDED
The diff for this file is too large to render. See raw diff
 
lora_checkpoints/data/QiaoBan/example.png ADDED
lora_checkpoints/data/QiaoBan/finetune.py ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ os.environ["WANDB_DISABLED"] = "true"
3
+ import sys
4
+ from typing import List
5
+ import argparse, logging
6
+
7
+ import fire
8
+ import torch
9
+ import torch.nn as nn
10
+ import bitsandbytes as bnb
11
+ from datasets import load_dataset, Dataset
12
+ import transformers
13
+ import json
14
+
15
+ assert (
16
+ "LlamaTokenizer" in transformers._import_structure["models.llama"]
17
+ ), "LLaMA is now in HuggingFace's main branch.\nPlease reinstall it: pip uninstall transformers && pip install git+https://github.com/huggingface/transformers.git"
18
+ from transformers import LlamaForCausalLM, LlamaTokenizer
19
+ from transformers import AutoModelForCausalLM, AutoTokenizer
20
+ from peft import (
21
+ prepare_model_for_int8_training,
22
+ LoraConfig,
23
+ get_peft_model,
24
+ get_peft_model_state_dict,
25
+ set_peft_model_state_dict,
26
+ )
27
+
28
+ def get_logger(logger_name,output_dir):
29
+ logger = logging.getLogger(logger_name)
30
+ logger.setLevel(logging.DEBUG)
31
+ os.makedirs(output_dir, exist_ok=True)
32
+ file_handler = logging.FileHandler(os.path.join(output_dir,'log.txt'),mode='w')
33
+ file_handler.setLevel(logging.INFO)
34
+ file_handler.setFormatter(
35
+ logging.Formatter(
36
+ fmt='%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s',
37
+ datefmt='%Y-%m-%d %H:%M:%S')
38
+ )
39
+ logger.addHandler(file_handler)
40
+ console_handler = logging.StreamHandler()
41
+ console_handler.setLevel(logging.INFO)
42
+ console_handler.setFormatter(
43
+ logging.Formatter(
44
+ fmt='%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s',
45
+ datefmt='%Y-%m-%d %H:%M:%S')
46
+ )
47
+ logger.addHandler(console_handler)
48
+ return logger
49
+
50
+
51
+ def train(
52
+ train_on_inputs: bool = False, # if False, masks out inputs in loss
53
+ group_by_length: bool = True, # faster, but produces an odd training loss curve,
54
+ resume_from_checkpoint: str = None, # either training checkpoint or final adapter
55
+ ):
56
+
57
+ model_config = json.load(open(args.model_config_file))
58
+ model_type = model_config['model_type']
59
+ model_name_or_path = model_config['model_name_or_path']
60
+ data_path = model_config['data_path']
61
+ output_dir = model_config['output_dir']
62
+ cutoff_len = model_config['cutoff_len']
63
+
64
+ logger = get_logger("train", model_config['output_dir'])
65
+ logger.info("args.__dict__ : {}".format(args.__dict__))
66
+ for key, value in model_config.items():
67
+ logger.info("{} : {}".format(key, value))
68
+ assert (
69
+ model_name_or_path
70
+ ), "Please specify a --base_model, e.g. --base_model='decapoda-research/llama-7b-hf'"
71
+
72
+ gradient_accumulation_steps = model_config['batch_size'] // model_config['per_device_train_batch_size'] if "gradient_accumulation_steps" not in model_config else model_config['gradient_accumulation_steps']
73
+
74
+ device_map = "auto"
75
+ world_size = int(os.environ.get("WORLD_SIZE", 1))
76
+ ddp = world_size != 1
77
+ if ddp:
78
+ device_map = {"": int(os.environ.get("LOCAL_RANK") or 0)}
79
+ gradient_accumulation_steps = max(gradient_accumulation_steps // world_size, 1)
80
+
81
+ load_in_8bit = True if args.use_lora else False
82
+ if model_type.lower() == "llama":
83
+ model = LlamaForCausalLM.from_pretrained(
84
+ model_name_or_path,
85
+ load_in_8bit = load_in_8bit,
86
+ device_map=device_map,
87
+ )
88
+ tokenizer = LlamaTokenizer.from_pretrained(model_name_or_path)
89
+ else:
90
+ model = AutoModelForCausalLM.from_pretrained(
91
+ model_name_or_path,
92
+ load_in_8bit = load_in_8bit,
93
+ device_map=device_map,
94
+ trust_remote_code=True
95
+ )
96
+ tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True)
97
+
98
+ tokenizer.pad_token_id = 0
99
+ tokenizer.padding_side = "left"
100
+
101
+ def tokenize(prompt):
102
+ result = tokenizer(
103
+ prompt,
104
+ truncation=True,
105
+ max_length=cutoff_len + 1,
106
+ padding=False,
107
+ )
108
+ return {
109
+ "input_ids": result["input_ids"][:-1],
110
+ "attention_mask": result["attention_mask"][:-1],
111
+ }
112
+
113
+ def generate_and_tokenize_prompt(data_point):
114
+ return tokenize(data_point["input"])
115
+
116
+ if args.use_lora:
117
+ model = prepare_model_for_int8_training(model)
118
+ lora_hyperparams = json.load(open(args.lora_hyperparams_file))
119
+ for key, value in lora_hyperparams.items():
120
+ logger.info("{} : {}".format(key, value))
121
+ config = LoraConfig(
122
+ r=lora_hyperparams['lora_r'],
123
+ lora_alpha=lora_hyperparams['lora_alpha'],
124
+ target_modules=lora_hyperparams['lora_target_modules'] if model_config['model_type']=="Llama" else ["query_key_value"],
125
+ lora_dropout=lora_hyperparams['lora_dropout'],
126
+ bias="none",
127
+ task_type="CAUSAL_LM",
128
+ )
129
+ print(config)
130
+ model = get_peft_model(model, config)
131
+
132
+ data = load_dataset("json", data_files=data_path)
133
+ print(data)
134
+
135
+ val_set_size = model_config['val_set_size']
136
+ if val_set_size > 0:
137
+ val_set_size = min(val_set_size, int(len(data['train'])*model_config['val_set_rate']))
138
+ train_val = data["train"].train_test_split(
139
+ test_size=val_set_size, shuffle=True, seed=42
140
+ )
141
+ train_data = train_val["train"].shuffle().map(generate_and_tokenize_prompt)
142
+ val_data = train_val["test"].shuffle().map(generate_and_tokenize_prompt)
143
+ else:
144
+ train_data = data["train"].shuffle().map(generate_and_tokenize_prompt)
145
+ val_data = None
146
+
147
+ print("start train...")
148
+ trainer = transformers.Trainer(
149
+ model=model,
150
+ train_dataset=train_data,
151
+ eval_dataset=val_data,
152
+ args=transformers.TrainingArguments(
153
+ per_device_train_batch_size=model_config['per_device_train_batch_size'],
154
+ gradient_accumulation_steps=gradient_accumulation_steps,
155
+ warmup_steps=model_config['warmup_steps'],
156
+ num_train_epochs=model_config['num_epochs'],
157
+ learning_rate=model_config['learning_rate'],
158
+ fp16=True,
159
+ logging_steps=model_config['logging_steps'],
160
+ evaluation_strategy="steps" if val_set_size > 0 else "no",
161
+ save_strategy="no",
162
+ eval_steps=model_config["eval_steps"] if val_set_size > 0 else None,
163
+ save_steps=model_config["save_steps"],
164
+ output_dir=output_dir,
165
+ save_total_limit=3,
166
+ load_best_model_at_end=False,
167
+ ddp_find_unused_parameters=False if ddp else None,
168
+ deepspeed=args.deepspeed if not args.use_lora else None,
169
+ group_by_length=group_by_length
170
+ ),
171
+ data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
172
+ )
173
+
174
+ model.config.use_cache = False
175
+ if args.use_lora:
176
+ old_state_dict = model.state_dict
177
+ model.state_dict = (
178
+ lambda self, *_, **__: get_peft_model_state_dict(self, old_state_dict())
179
+ ).__get__(model, type(model))
180
+
181
+ if torch.__version__ >= "2" and sys.platform != "win32":
182
+ model = torch.compile(model)
183
+ print("trainer.train")
184
+ trainer.train(resume_from_checkpoint = args.resume_from_checkpoint)
185
+ logger.info("Save checkpointing...")
186
+
187
+ model.save_pretrained(output_dir)
188
+ tokenizer.save_pretrained(output_dir)
189
+
190
+ print("\n If there's a warning about missing keys above when using lora to train, please disregard :)")
191
+ logger.info("Training succeeded")
192
+
193
+ if __name__ == "__main__":
194
+ parser = argparse.ArgumentParser()
195
+ parser.add_argument("--model_config_file", type=str, required=True)
196
+ parser.add_argument("--deepspeed", type=str, help="deepspeed config")
197
+ parser.add_argument("--resume_from_checkpoint", action="store_true", default=False)
198
+ parser.add_argument("--lora_hyperparams_file", default="", type=str, help="Provide it when use_lora=True")
199
+ parser.add_argument("--use_lora", action="store_true", default=False, help="Use lora")
200
+ parser.add_argument("--local_rank", type=int)
201
+ args = parser.parse_args()
202
+ fire.Fire(train)
lora_checkpoints/data/QiaoBan/generate_train_data.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from copy import deepcopy
3
+
4
+ raw_train_data = []
5
+ with open("./QiaoBan/data/child_chat_data.json", "r", encoding="utf-8") as fr:
6
+ raw_train_data = json.load(fr)
7
+
8
+ def construct_dialog_sample(dialog):
9
+ splited_data = dialog.split("</s>")
10
+ # assert len(splited_data) % 2 == 0
11
+ if len(splited_data) % 2 == 1:
12
+ splited_data.append("智能助手:嗯嗯。")
13
+ chat_data = []
14
+ history = []
15
+ for i in range(0, len(splited_data), 2):
16
+ user = splited_data[i].split(":" if ":" in splited_data[i] else ":")[-1]
17
+ assitant = splited_data[i+1].split(":" if ":" in splited_data[i+1] else ":")[-1]
18
+ chat_data.append({
19
+ "prompt": user,
20
+ "response": assitant,
21
+ "history": deepcopy(history)
22
+ })
23
+ history.append([user, assitant])
24
+ return chat_data
25
+
26
+ with open("chat_train_data.json", "w", encoding="utf-8") as fw:
27
+ for sample in raw_train_data:
28
+ dialog = sample["input"]
29
+ chat_data = construct_dialog_sample(dialog)
30
+ for data in chat_data:
31
+ fw.write(json.dumps(data, ensure_ascii=False))
32
+ fw.write("\n")
lora_checkpoints/data/QiaoBan/logo.png ADDED
lora_checkpoints/data/QiaoBan/run_config/config.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "",
3
+ "model_name_or_path": "base_model",
4
+ "data_path": "data/panda_chat.json",
5
+ "output_dir": "checkpoints/qiaoban_chat",
6
+ "batch_size": 64,
7
+ "per_device_train_batch_size": 2,
8
+ "num_epochs": 2,
9
+ "learning_rate": 1e-5,
10
+ "cutoff_len": 1024,
11
+ "val_set_size": 0,
12
+ "val_set_rate": 0,
13
+ "save_steps": 3000,
14
+ "eval_steps":1000,
15
+ "warmup_steps":10,
16
+ "logging_steps":10
17
+ }
lora_checkpoints/data/QiaoBan/run_config/deepspeed_config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train_batch_size": "auto",
3
+
4
+ "optimizer": {
5
+ "type": "Adam",
6
+ "params": {
7
+ "lr": "auto",
8
+ "betas": [
9
+ 0.9,
10
+ 0.999
11
+ ],
12
+ "eps": "auto",
13
+ "weight_decay": "auto"
14
+ }
15
+ },
16
+
17
+ "overwrite":true,
18
+ "steps_per_print": 5,
19
+ "fp16": {
20
+ "enabled": true,
21
+ "min_loss_scale": 1,
22
+ "opt_level": "O2"
23
+ },
24
+ "zero_optimization": {
25
+ "stage": 2,
26
+ "allgather_partitions": true,
27
+ "allgather_bucket_size": 5e8,
28
+ "contiguous_gradients": true
29
+ },
30
+
31
+ "scheduler": {
32
+ "type": "WarmupLR",
33
+ "params": {
34
+ "warmup_min_lr": "auto",
35
+ "warmup_max_lr": "auto",
36
+ "warmup_num_steps": "auto"
37
+ }
38
+ }
39
+ }
lora_checkpoints/data/QiaoBan/split_data.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import numpy as np
3
+
4
+ np.random.seed(0)
5
+
6
+ data = []
7
+ with open("chat_train_data.json", "r") as fr:
8
+ for line in fr.readlines():
9
+ data.append(line)
10
+
11
+ test_size = 200
12
+ np.random.shuffle(data)
13
+
14
+ train_data = data[:-test_size]
15
+ test_data = data[-test_size:]
16
+
17
+ with open("train_data.json", "w") as fw:
18
+ for d in train_data:
19
+ fw.write(f"{d}")
20
+
21
+ with open("test_data.json", "w") as fw:
22
+ for d in test_data:
23
+ fw.write(f"{d}")
lora_checkpoints/data/QiaoBan/test_data.json ADDED
The diff for this file is too large to render. See raw diff
 
lora_checkpoints/data/QiaoBan/topic.txt ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 班会游戏、新年班会、评奖班会、班级聚餐、班级聚会
2
+ 元旦晚会、才艺晚会、儿童节文艺表演、“快乐童年”才艺小舞台、儿童节游园、春节联欢晚会、运动会、歌唱比赛、诗朗诵比赛、年级篮球赛
3
+ 动物园、博物馆、风景区、春秋游
4
+ 篮球、足球、跑步、羽毛球、乒乓球、台球、游泳
5
+ 拾金不昧、热爱劳动、诚实、乐于助人、自主为班级搬东西
6
+ 不值日、迟到、吵架、打架、抄作业、给同学抄作业、乱丢垃圾、大喊大叫、说谎、课间抽烟
7
+ 把教室弄乱、欺负同学、没有按要求背诵歌词、没有按照规定带红领巾、没有在周一升旗仪式上穿校服、出勤迟到、早退、课间去打球、下课疯跑疯撞疯闹、课间大声喧哗
8
+ 生病问候、生日祝福、节日祝福、嘘寒问暖、为班级准备药箱、开窗通风消毒
9
+ 老师讲课生动形象便于孩子理解,与孩子的互动性强,准备内容充分有适量的课外内容、老师有耐心地讲题、老师邀请孩子给其他孩子讲解思路,老师引导孩子思考难题、老师表扬积极回答问题、思路清晰、课前准备充分、老师不拖堂、老师给我们做心理建设(德育)
10
+ 老师课堂无趣、老师拖堂、扔粉笔、老师说话声音令人昏昏欲睡、老师严厉严肃、老师占用体育老师课、老师体罚,辱骂、孩子上课捣乱纪律,上课讲话,搞小动作,打扰其他同学听课、上课走神与睡觉、上课吃零食、老师批评孩子答非所问、全班考试成绩不理想进行全体批评、整体班级氛围显得浮躁进行教管、老师拖堂、老师消极讲课
11
+ 好朋友搬家、转学、出国、暑假去旅游、寒假回老家、寒暑假离别、升学(初中高中)
12
+ 没有保守秘密、被孤立(吃醋)、被爽约、被欺骗、弄坏物品(打碎水杯)、争抢物品(抢玩具)、借笔不还、同伴关系(想当老大)、对比他人(看不惯他人炫耀、羡慕他人拥有的东西)、身体伤害(把对方撞到、大喊大叫)、精神伤害(辱骂、反感、恶心等)、意外事故(打球摔断腿)、起绰号
13
+ 很久不联系,关系淡了、没人和我一起吃饭/聊天/玩游戏、好朋友有了新朋友,突然冷落我、没有人一起玩、一起学习、没有人一起上下学
14
+ 原来的好朋友性格发生改变,无法融入、好朋友有了其他的兴趣爱好,无法与他交流曾经共同感兴趣的内容、好朋友忙于自己的学业,无法跟孩子一起玩,孩子也不好意思打扰他
15
+ 生病了看望我、分享:交换爱看的书籍、分享玩具、借书借文具、分享好吃的、送礼物、节日祝福、嘘寒问暖、腿受伤了搀扶我、倾听我的抱怨(无话不说)、积极帮助我:借笔、找教室、送东西
16
+ 我腿受伤朋友课间帮我接热水、生病了不能上学朋友帮我记笔记
17
+ 抄袭作业、打扰我做作业、不务正业、同桌考了第一名,压力徒增、同桌思维敏捷,成绩攀比、打扰孩子上课、在孩子没做完作业找孩子玩、给对方作业上模仿家长签字、上课讲话、上课玩闹、考试坐我前面向我索要答案、考试给我传纸条、暑假快要结束希望我帮他写作业、教唆我别写作业了一起出去玩、教唆我作业直接抄然后出去玩、考前教唆复习没用约我出去玩
18
+ 同伴教我题目、同伴考好了鼓励我、同伴考差了安慰我、与同伴讨论题目、一起去上补习班、回答问题时给对方一些小提示、考前一起复习,考前辅导我弱势学科,暑假督促我作业进度、孩子给其他孩子讲题
19
+ 一起去小卖部、游戏相关、家里的宠物、擅长的文体、棋艺、家里的趣闻、一起去食堂、一起上下学、一起上下课、我腿受伤没法上体育课,朋友留下了陪我、一起做清洁、大扫除
20
+ 去乡下、去看电影、去超市、去博物馆、去植物园、去书店、去科技馆、去海洋馆、KTV、打游戏、看展、逛商店、看世界杯、露营、春游、下乡、听钟声、春游、去自然风景区、去名校参观、爬山、徒步沙漠、登山自行车、秋游、交换留学
21
+ 去游乐园
22
+ 去公园
23
+ 去动物园
24
+ 去旅游
25
+ 去餐饮店
26
+ 养小动物
27
+ 玩玩具
28
+ 做游戏
29
+ 生日礼物、节日礼物(送出与收到礼物)、买了玩具、各种好吃的
30
+ 面试、我把碗打碎了、考试没考好、和朋友闹矛盾了、开导我、鼓励我、学校发生的、打碎东西、胶水洒了、打扫院子得很干净
31
+ 帮我找东西(钥匙、衣服等等)、帮我拿书包、雨天来接我、为我做早餐、帮我整理桌面、打扫房间、做的菜好、嘘寒问暖、生病照顾我、给我买了好看的衣服
32
+ 照顾生病的家人、分担家务、注意个人卫生、掌握个人技能(练钢琴、画画、爱运动)、和同学友好相处、懂得分享
33
+ "父母非要我学XX,我觉得很讨厌。父母不��我玩,难过而且生气。
34
+ 和兄弟姐妹发生矛盾、爸爸睡觉打呼噜、唠叨,父母忽略我的感受、经常不管我、父母教导我的时候只会责备我
35
+ 嫌弃父母长相、嫌弃父母挣钱能力、嫌弃父母的生活习惯、嫌弃父母的谈吐方式、父母的老式思想、嫌弃父母的不公平对待(比较别人家的小孩)、嫌弃父母太唠叨、 嫌弃父母太严格(管得多)、嫌弃父母态度生硬、嫌弃父母大声说话、嫌弃父母只让我学习不让我玩很不开心
36
+ 父母吵架了,我很害怕。父母的聊天内容让我害怕,我担心我的家要散了、父母很少说话、很少有家庭活动
37
+ 父母觉得我和他分享是为了炫耀,出去找同学玩是为了逃避家务与作业、失误把杯子打碎
38
+ 因各种不好的行为而争吵(玩游戏、回家太晚、吃零食、起床太晚、屋子太乱等)
39
+ 与自己的意见不合而发生争吵、认为父母打自己不爱自己了受委屈、不想读书,被父母逼着读书
40
+ 因为小事(做饭难吃)互相责怪导致争吵、父母或者孩子违约引发争吵
41
+ 成绩考差了、不认真刷牙、吃饭挑食,不吃饭想吃零食、不按时睡觉、赖床、不吃早饭、做事没有效率与专注力、委托的工作没有完成(家务、准备饭菜、接待客人等)、活动失败(一起做手工、做家务导致把花瓶打碎等)
42
+ 把客厅弄得乱糟糟的、睡前多喝水,晚上尿裤子、上网课不认真,下课期间玩平板刷抖音、父母不在家玩一天的游戏,父母回家后发现、早上赖床不起床、看书不专心,喜欢东张西望
43
+ 不讲个人卫生(房间脏乱等)、沉迷电子游戏、不关心家人、整天在外面玩
44
+ 考试压力、考试成绩、考入大学、学习习惯
45
+ 因自己考试太差而感到自卑、因考得很好而感到骄傲、对考试的事情漠不关心、过于看中考试导致在复习时心神不宁、表扬我考得好、批评我学习不认真、给我学习压力、太关注我的成绩、按他的方式学习
46
+ 考试前太紧张妈妈开导、考前准备不足崩溃大哭妈妈安慰、考试考好了父母夸奖、考试考砸父母一起复盘、学期结束父母与我沟通这学期学习态度不端正(浮躁)、作业太难不会写妈妈指导、作业太多写到很晚妈妈决定与老师商量作业量、作业太少每天草草了事妈妈与老师沟通作业量
47
+ 假期天天玩父母督促学习、每天回家就问学习孩子压力大很累
48
+ 运动、上课、写作业、回答问题、做操、比赛、考试、公布成绩、不想上学、准备报告
49
+ 读名著,读一些经典作品
50
+ 科幻小说、修仙武侠小说、杂志、漫画、悬疑小说(福尔摩斯)
51
+ 喜欢金庸武侠、读不懂余秋雨、外国小说名字太多太难记、非常喜欢钢铁是怎样炼成的
52
+ 老师推荐的必读名著看不下去、看英语原著小说学习英语
53
+ 画画、做手工、钢琴、古筝、小提琴、架子鼓、舞蹈、书法
54
+ 学习了很多折纸方法、装饰了房间、尝试学习小提琴,但不久后就放弃了、画素描、唱歌
55
+ 吉他、大提琴、长笛、号子、 拉丁舞、爵士舞、唱歌
56
+ 学习演奏自己喜欢的曲子很有成就感、自学画画感觉很难
57
+ 围棋、象棋、五子棋、跳棋、国际象棋
58
+ 围观小区大爷下象棋
59
+ 编程、乐高、主持人、武术、跆拳道、作文、奥数、开车
60
+ 跟爸爸学习茶艺、学习插花、养多肉植物、欣赏音乐剧、学习播音主持,锻炼胆量与表达能力、练书法
61
+ 喜欢木工手作、喜欢看木匠爷爷做木工、衣服破了尝试自己缝
62
+ 觉得跆拳道很帅于是学习跆拳道、为了高考每天练字
63
+ 衣服评价(美观、舒适)、想买衣服、穿脱衣物
64
+ 出汗后没有及时换衣服、被父母带去买衣服、父母网上精心挑选了一些衣服,让孩子过目、冬天穿得太薄了
65
+ 衣服被弄脏了、衣服被扯坏了、衣服被摔破了、被夸衣服好看、被说衣服不好看
66
+ 冬天经常滑雪想买滑雪服、校服破了想重新购置、校服尺码买偏了、想买漂亮的小裙裙、想买漂亮的小鞋鞋、吐槽学校周内只让穿校服不能自己穿搭、向爸妈展示周末出门的穿搭
67
+ 网购衣服很难挑选、衣服太贵父母不让买
68
+ 吃三餐、吃零食、喝水、吃水果、吃外卖、吃西餐、吃快餐、做火锅、挑食
69
+ 孩子尝试自己学习做菜、孩子想吃外卖
70
+ 撑了、饿了、学做饭、洗碗、切菜、洗菜、买菜、难吃好吃
71
+ 不喜欢蔬菜、不喜欢吃蘑菇、不喜欢吃肥肉、不喜欢韭菜、不喜欢香菜、家里饭菜太单调想下馆子、下馆子吃出虫子来、下馆子环境卫生太差
72
+ 看中的美食太贵了、打卡美食店铺很失望、发现很好吃的店铺很开心
73
+ 起床、睡觉、做梦、打扫、做家务、洗漱、看星星
74
+ 外面有一条酒街,晚上灯火通明、家里房子面积小,人多了有点住不下、新房子舒适,但离好朋友���太远了、一个人住在家里在晚上有点害怕、在小区呆了十年了,与邻居都有面熟
75
+ 评价家具、周边环境、想买新的家具(台灯)、评价室内环境(地板都是垃圾、垃圾桶漏了、桌上全是灰)
76
+ 睡觉时邻居太吵、睡觉床板太软不舒服、睡觉被子太薄了好冷、睡觉做噩梦、家里发现蟑螂、家里厕所潮湿长蘑菇、体验睡帐篷、楼下广场舞太吵了
77
+ 公交车上有人外放音乐、等车等太久、晕车
78
+ 外放视频很吵、很挤、有座位、很空、风景很好、有空调、吹风
79
+ 坐公交偶遇同学,坐公交遇到了漂亮的路人小姐姐
80
+ 公交车太挤了、坐车做错方向了、坐车坐过站了
81
+ 堵车、不想做公交,想让父母接送自己、出车祸、爸爸开错方向了、爸爸车内吸烟、妈妈车载蓝牙放歌很难听又很吵、爸爸开了很久的车没有休息、妈妈最近考了驾照尝试开车
82
+ 车上看手机很晕、拥第一次做地铁感觉很新鲜,地铁上很凉快很舒适
83
+ 地铁太拥挤了、今天差点被地铁门夹
84
+ 在地铁丢东西了
85
+ 看电影、玩电子游戏
86
+ 跟同学一起玩网络游戏、跟父亲一起玩战略类游戏、浏览新闻、看视频、上网课、偷偷玩游戏、尝试破解电脑密码、好奇父母用电脑在干嘛
87
+ 听歌、看电影、看各种节目(综艺、视频)、看电视剧
88
+ 写完作业想玩电脑游戏、写作业不会想用电脑检索答案
89
+ 和同学一起连麦联机打游戏
90
+ 看动画片
91
+ 一起看春晚、想看点播的动画片、想要玩电视上的一些游戏、想看电影
92
+ 吃完晚饭看新闻联播、一起看电视剧、一起追综艺
93
+ 用平板记笔记感觉方便
94
+ 用手机与同学聊天、学校组织出去游完几天想带手机去、偷偷把手机带到寝室、与父母争执是否应该带手机去学校
95
+ 想要一部手机/换一部手机、聊天、短视频、找资料、玩游戏、听歌、浏览信息、拍照、拍视频
96
+ 写完作业想借爸爸手机玩游戏、玩游戏的时候摔碎了爸爸的手机屏幕、上厕所玩手机结果手机掉水里了
97
+ 沉迷网络小说、在手游中花钱
98
+ 晚饭后想和同学打篮球、晚饭后想和同学踢足球、晚饭后想和爸爸打台球、晚饭后想在楼下玩无人机、早饭后想出门爬山、明天早上想早起看日出、晚饭后想和家人出去看日落、周末想和爸妈去游泳、周末想和爸妈去露营、周末想和爸妈去泡温泉、晚上想和爸妈去海报沙滩溜达、想出国玩
lora_checkpoints/data/baselines/chatglm/result.json ADDED
The diff for this file is too large to render. See raw diff
 
lora_checkpoints/data/baselines/chatgpt/result.json ADDED
The diff for this file is too large to render. See raw diff
 
lora_checkpoints/data/baselines/evaluate_model.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import jieba
3
+ from rouge_chinese import Rouge
4
+ import numpy as np
5
+ from evaluate import load
6
+ from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
7
+
8
+ def compute_metrics(eval_preds):
9
+ preds, labels = eval_preds
10
+
11
+ score_dict = {
12
+ "rouge-1": [],
13
+ "rouge-2": [],
14
+ "rouge-l": [],
15
+ "bleu-1": [],
16
+ "bleu-2": [],
17
+ "bleu-3": [],
18
+ "bleu-4": [],
19
+ }
20
+ for pred, label in zip(preds, labels):
21
+ hypothesis = list(jieba.cut(pred))
22
+ reference = list(jieba.cut(label))
23
+ rouge = Rouge()
24
+ scores = rouge.get_scores(" ".join(hypothesis), " ".join(reference))
25
+ result = scores[0]
26
+
27
+ for k, v in result.items():
28
+ score_dict[k].append(round(v["f"] * 100, 4))
29
+ bleu_weights = [
30
+ [1.0, 0.0, 0.0, 0.0],
31
+ [0.5, 0.5, 0.0, 0.0],
32
+ [1.0 / 3] * 3 + [0.0],
33
+ [0.25] * 4,
34
+ ]
35
+ for ibw, bw in enumerate(bleu_weights):
36
+ bleu_score = sentence_bleu(
37
+ [list(label)],
38
+ list(pred),
39
+ weights=bw,
40
+ smoothing_function=SmoothingFunction().method3,
41
+ )
42
+ score_dict[f"bleu-{ibw+1}"].append(round(bleu_score * 100, 4))
43
+
44
+ for k, v in score_dict.items():
45
+ score_dict[k] = float(np.mean(v))
46
+ return score_dict
47
+
48
+ # model_name = "chatglm"
49
+ model_name = "chatgpt"
50
+ fname = f"./{model_name}/result.json"
51
+
52
+ references, predictions = [], []
53
+ with open(fname, "r") as fr:
54
+ for line in fr.readlines():
55
+ sample = json.loads(line.strip())
56
+ references.append(sample["response"])
57
+ predictions.append(sample["prediction"])
58
+
59
+ result = compute_metrics([predictions, references])
60
+ print(result)
lora_checkpoints/data/baselines/inference_by_chatgpt.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import requests
3
+ from tqdm import tqdm
4
+
5
+
6
+ class ChatGPT(object):
7
+ def __init__(
8
+ self, api_key: str = "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"
9
+ ):
10
+ self.url = "https://api.openai.com/v1/chat/completions"
11
+ self.headers = {
12
+ "Content-Type": "application/json",
13
+ "Authorization": f"Bearer {api_key}",
14
+ }
15
+ self.data = {
16
+ "model": "gpt-3.5-turbo",
17
+ "messages": [],
18
+ "temperature": 0,
19
+ "max_tokens": 2048,
20
+ "frequency_penalty": 0.0,
21
+ "presence_penalty": 1,
22
+ }
23
+
24
+ def chat(self, prompt, history, temperature=0.75):
25
+ chatgpt_history = []
26
+ for h in history:
27
+ chatgpt_history.append({"role": "user", "content": h[0]})
28
+ chatgpt_history.append({"role": "assistant", "content": h[1]})
29
+ self.data["messages"] = chatgpt_history + [{"role": "user", "content": prompt}]
30
+ self.data["temperature"] = temperature
31
+ response = requests.post(
32
+ self.url, headers=self.headers, data=json.dumps(self.data)
33
+ )
34
+ try:
35
+ response = json.loads(response.content)
36
+ except Exception as ex:
37
+ print(prompt)
38
+ response = {}
39
+
40
+ if "choices" in response:
41
+ message = response["choices"][0]["message"]["content"]
42
+ else:
43
+ print(response)
44
+ message = ""
45
+ return message
46
+
47
+ model_name = "chatgpt"
48
+ input_fname = f"../QiaoBan/test_data.json"
49
+ output_fname = f"./{model_name}/result.json"
50
+ model = ChatGPT()
51
+
52
+ with open(input_fname, "r", encoding="utf-8") as fr, open(output_fname, "w", encoding="utf-8") as fw:
53
+ for line in tqdm(fr.readlines(), ncols=80):
54
+ sample = json.loads(line.strip())
55
+ prompt = sample["prompt"]
56
+ history = sample["history"]
57
+ response = model.chat(prompt, history)
58
+ sample["prediction"] = response
59
+ fw.write(json.dumps(sample, ensure_ascii=False))
60
+ fw.write("\n")
61
+
lora_checkpoints/data/baselines/inference_model.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import torch
4
+ from tqdm import tqdm
5
+ from transformers import AutoTokenizer, AutoModel, AutoConfig
6
+
7
+ model_name = "chatglm"
8
+
9
+ model_path = "../../../base-model/chatglm-6b"
10
+ tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
11
+ config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
12
+ model = AutoModel.from_pretrained(model_path, config=config, trust_remote_code=True)
13
+
14
+ model = model.quantize(4)
15
+ model = model.half().cuda()
16
+ model = model.eval()
17
+
18
+ input_fname = f"../QiaoBan/test_data.json"
19
+ output_fname = f"./{model_name}/result.json"
20
+
21
+ with open(input_fname, "r", encoding="utf-8") as fr, open(output_fname, "w", encoding="utf-8") as fw:
22
+ for line in tqdm(fr.readlines(), ncols=80):
23
+ sample = json.loads(line.strip())
24
+ prompt = sample["prompt"]
25
+ history = sample["history"]
26
+ response, _ = model.chat(tokenizer, prompt, history=history)
27
+ sample["prediction"] = response
28
+ fw.write(json.dumps(sample, ensure_ascii=False))
29
+ fw.write("\n")
lora_checkpoints/data/mandarin-cantonese-mt/chat_train_data.json ADDED
The diff for this file is too large to render. See raw diff
 
lora_checkpoints/data/mandarin-cantonese-mt/cleaned_parallel_sentences.txt ADDED
The diff for this file is too large to render. See raw diff
 
lora_checkpoints/data/mandarin-cantonese-mt/generate_train_data.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
+
4
+ raw_data = []
5
+
6
+ with open("./cleaned_parallel_sentences.txt", "r", encoding="utf-8") as fr:
7
+ for line in fr.readlines():
8
+ c, m = line.strip().split("|||")[1:]
9
+ raw_data.append((m, c))
10
+
11
+ # prompts = ["请把下面的话翻译成粤语:", "请用粤语转述下面的话:"]
12
+ prompts = [""]
13
+ chat_data = []
14
+ for d in raw_data:
15
+ c, m = d
16
+ for p in prompts:
17
+ user = f"{p}{c}。"
18
+ response = m
19
+ chat_data.append({"prompt": user, "response": response, "history": []})
20
+
21
+ with open("chat_train_data.json", "w", encoding="utf-8") as fw:
22
+ for data in chat_data:
23
+ fw.write(json.dumps(data, ensure_ascii=False))
24
+ fw.write("\n")
lora_checkpoints/test_inference.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ from transformers import AutoTokenizer, AutoModel, AutoConfig
4
+
5
+ model_path = "../base-model/chatglm-6b"
6
+ tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
7
+
8
+ config = AutoConfig.from_pretrained(model_path, trust_remote_code=True, pre_seq_len=128)
9
+ config.prefix_n_experts = 2
10
+ config.prefix_cur_expert = -1
11
+ config.expert_weights = [0.4, 0.6]
12
+
13
+ model = AutoModel.from_pretrained(model_path, config=config, trust_remote_code=True)
14
+
15
+ # 此处使用你的 ptuning 工作目录
16
+ prefix_state_dict = torch.load(
17
+ os.path.join("./checkpoints/checkpoint-3000", "pytorch_model.bin")
18
+ )
19
+ new_prefix_state_dict = {}
20
+ for k, v in prefix_state_dict.items():
21
+ new_prefix_state_dict[k[len("transformer.prefix_encoder.") :]] = v
22
+ model.transformer.prefix_encoder.load_state_dict(new_prefix_state_dict)
23
+
24
+ # V100 机型上可以不进行量化
25
+ # print(f"Quantized to 4 bit")
26
+ model = model.quantize(4)
27
+ model = model.half().cuda()
28
+ model.transformer.prefix_encoder.float()
29
+ model = model.eval()
30
+
31
+ # response, history = model.chat(tokenizer, "你好,我今天心情不好,你能陪陪我吗?", history=[])
32
+ # print(response)
33
+
34
+ # response, history = model.chat(tokenizer, "谢谢你!", history=history)
35
+ # print(response)
36
+
37
+ response = ""
38
+ history = []
39
+
40
+ while True:
41
+ user_prompt = input(">>>>>>>> USER: ")
42
+ response, history = model.chat(tokenizer, user_prompt, history=history)
43
+ print(f">>>>>>>> Assistant: {response}")
44
+ print("\n")