| ==================================================================================================== | |
| import os | |
| import sys | |
| with open(sys.argv[0]) as f: | |
| code = f.read() # read the code of this file ASAP, for logging | |
| import uuid | |
| import glob | |
| import time | |
| from dataclasses import dataclass | |
| import numpy as np | |
| import torch | |
| from torch import nn | |
| import torch.nn.functional as F | |
| import torch.distributed as dist | |
| import torch._inductor.config as config | |
| from torch.nn.parallel import DistributedDataParallel as DDP | |
| # ----------------------------------------------------------------------------- | |
| # Muon optimizer | |
| def zeropower_via_svd(G, steps=None): | |
| U, S, V = G.svd() | |
| return U @ V.T | |
| @torch.compile | |
| def zeropower_via_newtonschulz5(G, steps=10, eps=1e-7): | |
| """ | |
| Newton-Schulz iteration to compute the zeroth power / orthogonalization of G. We opt to use a | |
| quintic iteration whose coefficients are selected to maximize the slope at zero. For the purpose | |
| of minimizing steps, it turns out to be empirically effective to keep increasing the slope at | |
| zero even beyond the point where the iteration no longer converges all the way to one everywhere | |
| on the interval. This iteration therefore does not produce UV^T but rather something like US'V^T | |
| where S' is diagonal with S_{ii}' \sim Uniform(0.5, 1.5), which turns out not to hurt model | |
| performance at all relative to UV^T, where USV^T = G is the SVD. | |
| """ | |
| assert len(G.shape) == 2 | |
| a, b, c = (3.4445, -4.7750, 2.0315) | |
| X = G.bfloat16() | |
| X /= (X.norm() + eps) # ensure top singular value <= 1 | |
| if G.size(0) > G.size(1): | |
| X = X.T | |
| for _ in range(steps): | |
| A = X @ X.T | |
| B = b * A + c * A @ A # adapted from suggestion by @jxbz, @leloykun, and @YouJiacheng | |
| X = a * X + B @ X | |
| if G.size(0) > G.size(1): | |
| X = X.T | |
| return X | |
| zeropower_backends = dict(svd=zeropower_via_svd, newtonschulz5=zeropower_via_newtonschulz5) | |
| class Muon(torch.optim.Optimizer): | |
| """ | |
| Muon - MomentUm Orthogonalized by Newton-schulz | |
| Muon internally runs standard SGD-momentum, and then performs an orthogonalization post- | |
| processing step, in which each 2D parameter's update is replaced with the nearest orthogonal | |
| matrix. To efficiently orthogonalize each update, we use a Newton-Schulz iteration, which has | |
| the advantage that it can be stably run in bfloat16 on the GPU. | |
| Some warnings: | |
| - This optimizer assumes that all parameters passed in are 2D. | |
| - It should not be used for the embedding layer, the final fully connected layer, or any {0,1}-D | |
| parameters; those should all be optimized by a standard method (e.g., AdamW). | |
| - To use it with 4D convolutional filters, it works well to just flatten their last 3 dimensions. | |
| - We believe it is unlikely to work well for training with small batch size. | |
| - We believe it may not work well for finetuning pretrained models, but we haven't tested this. | |
| - We have not yet tried this optimizer for training scenarios larger than NanoGPT (124M). | |
| Arguments: | |
| lr: The learning rate used by the internal SGD. | |
| momentum: The momentum used by the internal SGD. | |
| nesterov: Whether to use Nesterov-style momentum in the internal SGD. (recommended) | |
| backend: The chosen backend for the orthogonalization step. (recommended: 'newtonschulz5') | |
| backend_steps: The number of iteration steps to use in the backend, if it is iterative. | |
| """ | |
| def __init__(self, params, lr=0.02, momentum=0.95, nesterov=True, | |
| backend='newtonschulz5', backend_steps=5): | |
| defaults = dict(lr=lr, momentum=momentum, nesterov=nesterov, backend=backend, backend_steps=backend_steps) | |
| super().__init__(params, defaults) | |
| def step(self): | |
| for group in self.param_groups: | |
| lr = group['lr'] | |
| momentum = group['momentum'] | |
| zeropower_backend = zeropower_backends[group['backend']] | |
| # generate weight updates in distributed fashion | |
| total_params = sum(p.numel() for p in group['params']) | |
| updates_flat = torch.zeros(total_params, device='cuda', dtype=torch.bfloat16) | |
| curr_idx = 0 | |
| for i, p in enumerate(group['params']): | |
| # luckily this will perfectly distribute a transformer with multiple of 4 layers to 8 GPUs | |
| if i % int(os.environ['WORLD_SIZE']) == int(os.environ['RANK']): | |
| g = p.grad | |
| assert g is not None | |
| state = self.state[p] | |
| if 'momentum_buffer' not in state: | |
| state['momentum_buffer'] = torch.zeros_like(g) | |
| buf = state['momentum_buffer'] | |
| buf.mul_(momentum).add_(g) | |
| if group['nesterov']: | |
| g = g.add(buf, alpha=momentum) | |
| g = zeropower_backend(g, steps=group['backend_steps']) | |
| g *= max(1, g.size(0)/g.size(1))**0.5 | |
| updates_flat[curr_idx:curr_idx+p.numel()] = g.flatten() | |
| curr_idx += p.numel() | |
| # sync updates across devices. we are not memory-constrained so can do this simple deserialization | |
| dist.all_reduce(updates_flat, op=dist.ReduceOp.SUM) | |
| # deserialize and apply updates | |
| curr_idx = 0 | |
| for p in group['params']: | |
| g = updates_flat[curr_idx:curr_idx+p.numel()].view_as(p.data).type_as(p.data) | |
| p.data.add_(g, alpha=-lr) | |
| curr_idx += p.numel() | |
| # ----------------------------------------------------------------------------- | |
| # PyTorch nn.Module definitions for the GPT-2 model | |
| class Rotary(torch.nn.Module): | |
| def __init__(self, dim, base=10000): | |
| super().__init__() | |
| self.inv_freq = 1.0 / (base ** (torch.arange(0, dim, 2).float() / dim)) | |
| self.seq_len_cached = None | |
| self.cos_cached = None | |
| self.sin_cached = None | |
| def forward(self, x): | |
| seq_len = x.shape[1] | |
| if seq_len != self.seq_len_cached: | |
| self.seq_len_cached = seq_len | |
| t = torch.arange(seq_len, device=x.device).type_as(self.inv_freq) | |
| freqs = torch.outer(t, self.inv_freq).to(x.device) | |
| self.cos_cached = freqs.cos().bfloat16() | |
| self.sin_cached = freqs.sin().bfloat16() | |
| return self.cos_cached[None, :, None, :], self.sin_cached[None, :, None, :] | |
| def apply_rotary_emb(x, cos, sin): | |
| assert x.ndim == 4 # multihead attention | |
| d = x.shape[3]//2 | |
| x1 = x[..., :d] | |
| x2 = x[..., d:] | |
| y1 = x1 * cos + x2 * sin | |
| y2 = x1 * (-sin) + x2 * cos | |
| return torch.cat([y1, y2], 3).type_as(x) | |
| class CastedLinear(nn.Linear): | |
| def forward(self, x): | |
| return F.linear(x, self.weight.to(x.dtype)) | |
| class CausalSelfAttention(nn.Module): | |
| def __init__(self, config): | |
| super().__init__() | |
| self.n_head = config.n_head | |
| self.n_embd = config.n_embd | |
| self.head_dim = self.n_embd // self.n_head | |
| assert self.n_embd % self.n_head == 0 | |
| self.c_q = CastedLinear(self.n_embd, self.n_embd, bias=False) | |
| self.c_k = CastedLinear(self.n_embd, self.n_embd, bias=False) | |
| self.c_v = CastedLinear(self.n_embd, self.n_embd, bias=False) | |
| # output projection | |
| self.c_proj = CastedLinear(self.n_embd, self.n_embd, bias=False) | |
| self.c_proj.weight.data.zero_() # zero init suggested by @Grad62304977 | |
| self.rotary = Rotary(self.head_dim) | |
| self.lamb = nn.Parameter(torch.tensor(0.5)) # @Grad62304977 | |
| def forward(self, x, v1=None): | |
| B, T, C = x.size() # batch size, sequence length, embedding dimensionality (n_embd) | |
| q = self.c_q(x).view(B, T, self.n_head, self.head_dim) | |
| k = self.c_k(x).view(B, T, self.n_head, self.head_dim) | |
| v = self.c_v(x).view(B, T, self.n_head, self.head_dim) | |
| if v1 is None: | |
| v1 = v # This happens if we are in the first block. v needs to be accessed by subsequent blocks | |
| v = (1 - self.lamb) * v + self.lamb * v1.view_as(v) # @Grad62304977 | |
| cos, sin = self.rotary(q) | |
| q, k = F.rms_norm(q, (q.size(-1),)), F.rms_norm(k, (k.size(-1),)) # QK norm suggested by @Grad62304977 | |
| q, k = apply_rotary_emb(q, cos, sin), apply_rotary_emb(k, cos, sin) | |
| y = F.scaled_dot_product_attention(q.transpose(1, 2), k.transpose(1, 2), v.transpose(1, 2), is_causal=True) | |
| y = y.transpose(1, 2).contiguous().view_as(x) # re-assemble all head outputs side by side | |
| y = self.c_proj(y) | |
| return y, v1 | |
| class MLP(nn.Module): | |
| def __init__(self, config): | |
| super().__init__() | |
| self.c_fc = CastedLinear(config.n_embd, 4 * config.n_embd, bias=False) | |
| self.c_proj = CastedLinear(4 * config.n_embd, config.n_embd, bias=False) | |
| self.c_proj.weight.data.zero_() # zero init suggested by @Grad62304977 | |
| def forward(self, x): | |
| x = self.c_fc(x) | |
| x = F.relu(x).square() # https://arxiv.org/abs/2109.08668v2; ~1-2% better than GELU; suggested by @SKYLINEZ007 and @Grad62304977 | |
| x = self.c_proj(x) | |
| return x | |
| class Block(nn.Module): | |
| def __init__(self, config): | |
| super().__init__() | |
| self.attn = CausalSelfAttention(config) | |
| self.mlp = MLP(config) | |
| self.lambdas = nn.Parameter(torch.tensor([1., 0.])) | |
| def forward(self, x, v1, x0): | |
| x = self.lambdas[0] * x + self.lambdas[1] * x0 | |
| x1, v1 = self.attn(F.rms_norm(x, (x.size(-1),)), v1) | |
| x = x + x1 | |
| x = x + self.mlp(F.rms_norm(x, (x.size(-1),))) | |
| return x, v1 | |
| # ----------------------------------------------------------------------------- | |
| # The main GPT-2 model | |
| @dataclass | |
| class GPTConfig: | |
| vocab_size : int = 50304 | |
| n_layer : int = 12 | |
| n_head : int = 6 # head dim 128 suggested by @Grad62304977 | |
| n_embd : int = 768 | |
| class GPT(nn.Module): | |
| def __init__(self, config): | |
| super().__init__() | |
| self.config = config | |
| self.transformer = nn.ModuleDict(dict( | |
| wte = nn.Embedding(config.vocab_size, config.n_embd), | |
| h = nn.ModuleList([Block(config) for _ in range(config.n_layer)]), | |
| )) | |
| # U-net design by @brendanh0gan | |
| self.encoder_layers = config.n_layer // 2 # Half of the layers for encoder | |
| self.decoder_layers = config.n_layer - self.encoder_layers # Remaining for decoder | |
| # Add learnable skip connection weights for decoder layers | |
| self.skip_weights = nn.Parameter(torch.ones(self.decoder_layers)) | |
| self.lm_head = CastedLinear(config.n_embd, config.vocab_size, bias=False) | |
| self.lm_head.weight.data.zero_() # @Grad62304977 | |
| def forward(self, idx, target): | |
| # forward the GPT model itself | |
| x = self.transformer.wte(idx) # token embeddings of shape (b, t, n_embd) | |
| x = F.rms_norm(x, (x.size(-1),)) # @Grad62304977 | |
| x0 = x | |
| v1 = None | |
| # Store outputs for U-Net skip connections | |
| skip_connections = [] | |
| # Encoder pass - process only the first half of the blocks | |
| for i in range(self.encoder_layers): | |
| x, v1 = self.transformer.h[i](x, v1, x0) | |
| skip_connections.append(x) # Store the output for skip connections | |
| # Decoder pass - process the remaining blocks with weighted skip connections | |
| for i in range(self.decoder_layers): | |
| skip_connection = skip_connections.pop() # Get the corresponding encoder output | |
| # Apply learnable weight to skip connection | |
| weighted_skip = self.skip_weights[i] * skip_connection | |
| x, v1 = self.transformer.h[self.encoder_layers + i](x + weighted_skip, v1, x0) | |
| x = F.rms_norm(x, (x.size(-1),)) | |
| logits = self.lm_head(x) | |
| logits = 30 * torch.tanh(logits / 30) # @Grad62304977 | |
| logits = logits.float() | |
| loss = F.cross_entropy(logits.view(-1, logits.size(-1)), target.view(-1)) | |
| return loss.float() | |
| # ----------------------------------------------------------------------------- | |
| # Our own simple Distributed Data Loader | |
| def _peek_data_shard(filename): | |
| # only reads the header, returns header data | |
| with open(filename, "rb") as f: | |
| # first read the header, which is 256 int32 integers (4 bytes each) | |
| header = np.frombuffer(f.read(256*4), dtype=np.int32) | |
| if header[0] != 20240520: | |
| print("ERROR: magic number mismatch in the data .bin file!") | |
| print("---> HINT: Are you passing in a correct file with --input_bin?") | |
| print("---> HINT: Dataset encoding changed recently, re-run data prepro or refer again to README") | |
| print("---> HINT: For example re-run: `python dev/data/tinyshakespeare.py`, then re-try") | |
| exit(1) | |
| assert header[1] == 1, "unsupported version" | |
| ntok = header[2] # number of tokens (claimed) | |
| return ntok # for now just return the number of tokens | |
| def _load_data_shard(filename): | |
| with open(filename, "rb") as f: | |
| # first read the header, which is 256 int32 integers (4 bytes each) | |
| header = np.frombuffer(f.read(256*4), dtype=np.int32) | |
| assert header[0] == 20240520, "magic number mismatch in the data .bin file" | |
| assert header[1] == 1, "unsupported version" | |
| ntok = header[2] # number of tokens (claimed) | |
| # the rest of it are tokens, stored as uint16 | |
| tokens = np.frombuffer(f.read(), dtype=np.uint16) | |
| assert len(tokens) == ntok, "number of tokens read does not match header?" | |
| return tokens | |
| class DistributedDataLoader: | |
| def __init__(self, filename_pattern, B, T, process_rank, num_processes): | |
| self.process_rank = process_rank | |
| self.num_processes = num_processes | |
| self.B = B | |
| self.T = T | |
| # glob files that match the pattern | |
| self.files = sorted(glob.glob(filename_pattern)) | |
| assert len(self.files) > 0, f"did not find any files that match the pattern {filename_pattern}" | |
| # load and validate all data shards, count number of tokens in total | |
| ntok_total = 0 | |
| for fname in self.files: | |
| shard_ntok = _peek_data_shard(fname) | |
| assert shard_ntok >= num_processes * B * T + 1 | |
| ntok_total += int(shard_ntok) | |
| self.ntok_total = ntok_total | |
| # kick things off | |
| self.reset() | |
| def reset(self): | |
| self.current_shard = 0 | |
| self.current_position = self.process_rank * self.B * self.T | |
| self.tokens = _load_data_shard(self.files[self.current_shard]) | |
| def advance(self): # advance to next data shard | |
| self.current_shard = (self.current_shard + 1) % len(self.files) | |
| self.current_position = self.process_rank * self.B * self.T | |
| self.tokens = _load_data_shard(self.files[self.current_shard]) | |
| def next_batch(self): | |
| B = self.B | |
| T = self.T | |
| buf = self.tokens[self.current_position : self.current_position+B*T+1] | |
| buf = torch.tensor(buf.astype(np.int32), dtype=torch.long) | |
| x = (buf[:-1]).view(B, T) # inputs | |
| y = (buf[1:]).view(B, T) # targets | |
| # advance current position and load next shard if necessary | |
| self.current_position += B * T * self.num_processes | |
| if self.current_position + (B * T * self.num_processes + 1) > len(self.tokens): | |
| self.advance() | |
| return x.cuda(), y.cuda() | |
| # ----------------------------------------------------------------------------- | |
| # int main | |
| @dataclass | |
| class Hyperparameters: | |
| # data hyperparams | |
| input_bin : str = 'data/fineweb10B/fineweb_train_*.bin' # input .bin to train on | |
| input_val_bin : str = 'data/fineweb10B/fineweb_val_*.bin' # input .bin to eval validation loss on | |
| # optimization hyperparams | |
| batch_size : int = 8*64 # batch size, in sequences, across all devices | |
| device_batch_size : int = 64 # batch size, in sequences, per device | |
| sequence_length : int = 1024 # sequence length, in tokens | |
| num_iterations : int = 3000 # number of iterations to run | |
| warmup_iters : int = 0 | |
| warmdown_iters : int = 900 # number of iterations of linear warmup/warmdown for triangular or trapezoidal schedule | |
| weight_decay : float = 0 | |
| # evaluation and logging hyperparams | |
| val_loss_every : int = 125 # every how many steps to evaluate val loss? 0 for only at the end | |
| val_tokens : int = 10485760 # how many tokens of validation data? it's important to keep this fixed for consistent comparisons | |
| save_every : int = 0 # every how many steps to save the checkpoint? 0 for only at the end | |
| args = Hyperparameters() | |
| # set up DDP (distributed data parallel). torchrun sets this env variable | |
| assert torch.cuda.is_available() | |
| dist.init_process_group(backend='nccl') | |
| ddp_rank = int(os.environ['RANK']) | |
| ddp_local_rank = int(os.environ['LOCAL_RANK']) | |
| ddp_world_size = int(os.environ['WORLD_SIZE']) | |
| device = f'cuda:{ddp_local_rank}' | |
| torch.cuda.set_device(device) | |
| print(f"using device: {device}") | |
| master_process = (ddp_rank == 0) # this process will do logging, checkpointing etc. | |
| # convenience variables | |
| B, T = args.device_batch_size, args.sequence_length | |
| # calculate the number of steps to take in the val loop. | |
| assert args.val_tokens % (B * T * ddp_world_size) == 0 | |
| val_steps = args.val_tokens // (B * T * ddp_world_size) | |
| # calculate the steps of gradient accumulation required to attain the desired global batch size. | |
| assert args.batch_size % (B * ddp_world_size) == 0 | |
| train_accumulation_steps = args.batch_size // (B * ddp_world_size) | |
| # load tokens | |
| train_loader = DistributedDataLoader(args.input_bin, B, T, ddp_rank, ddp_world_size) | |
| val_loader = DistributedDataLoader(args.input_val_bin, B, T, ddp_rank, ddp_world_size) | |
| if master_process: | |
| print(f"Training DataLoader: total number of tokens: {train_loader.ntok_total} across {len(train_loader.files)} files") | |
| print(f"Validation DataLoader: total number of tokens: {val_loader.ntok_total} across {len(val_loader.files)} files") | |
| x, y = train_loader.next_batch() | |
| # there are only 50257 unique GPT-2 tokens; we extend to nearest multiple of 128 for efficiency. suggested to me by @Grad62304977. | |
| # this originates from Karpathy's experiments. | |
| num_vocab = 50304 | |
| model = GPT(GPTConfig(vocab_size=num_vocab, n_layer=12, n_head=6, n_embd=768)) | |
| model = model.cuda().bfloat16() | |
| for m in model.modules(): | |
| if isinstance(m, CastedLinear): | |
| m.float() | |
| if hasattr(config, "coordinate_descent_tuning"): | |
| config.coordinate_descent_tuning = True # suggested by @Chillee | |
| model = torch.compile(model) | |
| # here we wrap model into DDP container | |
| model = DDP(model, device_ids=[ddp_local_rank]) | |
| raw_model = model.module # always contains the "raw" unwrapped model | |
| # CUDNN attention is ~4ms faster than Flash, but doesn't get selected by default in PyTorch 2.5.1 | |
| from torch.backends.cuda import enable_cudnn_sdp, enable_flash_sdp, enable_math_sdp, enable_mem_efficient_sdp | |
| enable_cudnn_sdp(True) | |
| enable_flash_sdp(False) | |
| enable_mem_efficient_sdp(False) | |
| enable_math_sdp(False) | |
| # init the optimizer(s) | |
| optimizer1 = torch.optim.Adam([raw_model.transformer.wte.weight], lr=0.6, betas=(0.9, 0.95), fused=True) | |
| optimizer2 = torch.optim.Adam([raw_model.lm_head.weight], lr=0.008, betas=(0.9, 0.95), fused=True) | |
| params = list(raw_model.transformer.h.parameters()) | |
| matrix_params = [p for p in params if p.ndim == 2] | |
| scalar_params = [p for p in params if p.ndim < 2]+[raw_model.skip_weights] | |
| optimizer3 = Muon(matrix_params, lr=0.04, momentum=0.95) | |
| optimizer4 = torch.optim.Adam(scalar_params, lr=0.04, betas=(0.9, 0.95), fused=True) # note that this learning rate is neither sensitive nor tuned | |
| optimizers = [optimizer1, optimizer2, optimizer3, optimizer4] | |
| # learning rate decay scheduler (linear warmup and warmdown) | |
| def get_lr(it): | |
| assert it <= args.num_iterations | |
| # 1) linear warmup for warmup_iters steps | |
| if it < args.warmup_iters: | |
| return (it+1) / args.warmup_iters | |
| # 2) constant lr for a while | |
| elif it < args.num_iterations - args.warmdown_iters: | |
| return 1.0 | |
| # 3) linear warmdown | |
| else: | |
| decay_ratio = (args.num_iterations - it) / args.warmdown_iters | |
| return decay_ratio | |
| schedulers = [torch.optim.lr_scheduler.LambdaLR(opt, get_lr) for opt in optimizers] | |
| # begin logging | |
| if master_process: | |
| run_id = str(uuid.uuid4()) | |
| logdir = 'logs/%s/' % run_id | |
| os.makedirs(logdir, exist_ok=True) | |
| logfile = 'logs/%s.txt' % run_id | |
| # create the log file | |
| with open(logfile, "w") as f: | |
| # begin the log by printing this file (the Python code) | |
| f.write('='*100 + '\n') | |
| f.write(code) | |
| f.write('='*100 + '\n') | |
| # log information about the hardware/software environment this is running on | |
| # and print the full `nvidia-smi` to file | |
| f.write(f"Running pytorch {torch.version.__version__} compiled for CUDA {torch.version.cuda}\nnvidia-smi:\n") | |
| import subprocess | |
| result = subprocess.run(['nvidia-smi'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) | |
| f.write(f'{result.stdout}\n') | |
| f.write('='*100 + '\n') | |
| training_time_ms = 0 | |
| # start the clock | |
| torch.cuda.synchronize() | |
| t0 = time.time() | |
| # begin training | |
| train_loader.reset() | |
| for step in range(args.num_iterations + 1): | |
| last_step = (step == args.num_iterations) | |
| # This effectively ignores timing first 10 steps, which are slower for weird reasons. | |
| # Alternately, and slightly more correctly in terms of benchmarking, we could do 10 | |
| # steps with dummy data first, and then re-initialize the model and reset the loader. | |
| if step == 10: | |
| training_time_ms = 0 | |
| t0 = time.time() | |
| timed_steps = float('nan') if step <= 11 else (step - 10) + 1 # <= 11 to avoid bug in val | |
| # once in a while evaluate the validation dataset | |
| if (last_step or (args.val_loss_every > 0 and step % args.val_loss_every == 0)): | |
| # stop the clock | |
| torch.cuda.synchronize() | |
| training_time_ms += 1000 * (time.time() - t0) | |
| # run validation batches | |
| model.eval() | |
| val_loader.reset() | |
| val_loss = 0.0 | |
| for _ in range(val_steps): | |
| with torch.no_grad(): | |
| x_val, y_val = val_loader.next_batch() | |
| val_loss += model(x_val, y_val) | |
| dist.all_reduce(val_loss, op=dist.ReduceOp.AVG) | |
| val_loss /= val_steps | |
| # log val loss to console and to logfile | |
| if master_process: | |
| print(f'step:{step}/{args.num_iterations} val_loss:{val_loss:.4f} train_time:{training_time_ms:.0f}ms step_avg:{training_time_ms/(timed_steps-1):.2f}ms') | |
| with open(logfile, "a") as f: | |
| f.write(f'step:{step}/{args.num_iterations} val_loss:{val_loss:.4f} train_time:{training_time_ms:.0f}ms step_avg:{training_time_ms/(timed_steps-1):.2f}ms\n') | |
| # start the clock again | |
| torch.cuda.synchronize() | |
| t0 = time.time() | |
| if master_process and (last_step or (args.save_every > 0 and step % args.save_every == 0)): | |
| # stop the clock | |
| torch.cuda.synchronize() | |
| training_time_ms += 1000 * (time.time() - t0) | |
| # save the state of the training process | |
| log = dict(step=step, code=code, model=raw_model.state_dict(), optimizers=[opt.state_dict() for opt in optimizers]) | |
| torch.save(log, 'logs/%s/state_step%06d.pt' % (run_id, step)) | |
| # start the clock again | |
| torch.cuda.synchronize() | |
| t0 = time.time() | |
| # bit confusing: we want to make sure to eval on 0th iteration | |
| # but also after the very last iteration. so we loop for step <= num_iterations | |
| # instead of just < num_iterations (one extra due to <=), only to do | |
| # the validation/sampling one last time, and then we break right here as we're done. | |
| if last_step: | |
| break | |
| # --------------- TRAINING SECTION BEGIN ----------------- | |
| model.train() | |
| for i in range(1, train_accumulation_steps+1): | |
| # forward pass | |
| loss = model(x, y) | |
| train_loss = loss.detach() | |
| # advance the dataset for the next batch | |
| x, y = train_loader.next_batch() | |
| # backward pass | |
| if i < train_accumulation_steps: | |
| with model.no_sync(): # there's no need to sync gradients every accumulation step | |
| loss.backward() | |
| else: | |
| loss.backward() # just sync on the last step | |
| for p in model.parameters(): | |
| p.grad /= train_accumulation_steps | |
| # momentum warmup for Muon | |
| frac = min(step/500, 1) | |
| optimizer3.param_groups[0]['momentum'] = (1 - frac) * 0.85 + frac * 0.95 | |
| # step the optimizers and schedulers | |
| for opt, sched in zip(optimizers, schedulers): | |
| opt.step() | |
| sched.step() | |
| # null the gradients | |
| model.zero_grad(set_to_none=True) | |
| # --------------- TRAINING SECTION END ------------------- | |
| # everything that follows now is just diagnostics, prints, logging, etc. | |
| #dist.all_reduce(train_loss, op=dist.ReduceOp.AVG) # all-reducing the training loss would be more correct in terms of logging, but slower | |
| if master_process: | |
| approx_time = training_time_ms + 1000 * (time.time() - t0) | |
| print(f"step:{step+1}/{args.num_iterations} train_loss:{train_loss.item():.4f} train_time:{approx_time:.0f}ms step_avg:{approx_time/timed_steps:.2f}ms") | |
| with open(logfile, "a") as f: | |
| f.write(f"step:{step+1}/{args.num_iterations} train_loss:{train_loss.item():.4f} train_time:{approx_time:.0f}ms step_avg:{approx_time/timed_steps:.2f}ms\n") | |
| if master_process: | |
| print(f"peak memory consumption: {torch.cuda.max_memory_allocated() // 1024 // 1024} MiB") | |
| # ------------------------------------------------------------------------- | |
| # clean up nice | |
| dist.destroy_process_group() | |
| ==================================================================================================== | |
| Running pytorch 2.5.1+cu124 compiled for CUDA 12.4 | |
| nvidia-smi: | |
| Sun Nov 10 19:09:29 2024 | |
| +-----------------------------------------------------------------------------------------+ | |
| | NVIDIA-SMI 555.42.06 Driver Version: 555.42.06 CUDA Version: 12.5 | | |
| |-----------------------------------------+------------------------+----------------------+ | |
| | GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC | | |
| | Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. | | |
| | | | MIG M. | | |
| |=========================================+========================+======================| | |
| | 0 NVIDIA H100 80GB HBM3 Off | 00000000:18:00.0 Off | 0 | | |
| | N/A 28C P0 113W / 700W | 5156MiB / 81559MiB | 1% Default | | |
| | | | Disabled | | |
| +-----------------------------------------+------------------------+----------------------+ | |
| | 1 NVIDIA H100 80GB HBM3 Off | 00000000:2A:00.0 Off | 0 | | |
| | N/A 31C P0 116W / 700W | 5204MiB / 81559MiB | 0% Default | | |
| | | | Disabled | | |
| +-----------------------------------------+------------------------+----------------------+ | |
| | 2 NVIDIA H100 80GB HBM3 Off | 00000000:3A:00.0 Off | 0 | | |
| | N/A 32C P0 115W / 700W | 5204MiB / 81559MiB | 7% Default | | |
| | | | Disabled | | |
| +-----------------------------------------+------------------------+----------------------+ | |
| | 3 NVIDIA H100 80GB HBM3 Off | 00000000:5D:00.0 Off | 0 | | |
| | N/A 28C P0 115W / 700W | 5204MiB / 81559MiB | 6% Default | | |
| | | | Disabled | | |
| +-----------------------------------------+------------------------+----------------------+ | |
| | 4 NVIDIA H100 80GB HBM3 Off | 00000000:84:00.0 Off | 0 | | |
| | N/A 28C P0 115W / 700W | 5204MiB / 81559MiB | 0% Default | | |
| | | | Disabled | | |
| +-----------------------------------------+------------------------+----------------------+ | |
| | 5 NVIDIA H100 80GB HBM3 Off | 00000000:8B:00.0 Off | 0 | | |
| | N/A 31C P0 115W / 700W | 5204MiB / 81559MiB | 0% Default | | |
| | | | Disabled | | |
| +-----------------------------------------+------------------------+----------------------+ | |
| | 6 NVIDIA H100 80GB HBM3 Off | 00000000:91:00.0 Off | 0 | | |
| | N/A 30C P0 114W / 700W | 5204MiB / 81559MiB | 6% Default | | |
| | | | Disabled | | |
| +-----------------------------------------+------------------------+----------------------+ | |
| | 7 NVIDIA H100 80GB HBM3 Off | 00000000:E4:00.0 Off | 0 | | |
| | N/A 28C P0 117W / 700W | 4964MiB / 81559MiB | 0% Default | | |
| | | | Disabled | | |
| +-----------------------------------------+------------------------+----------------------+ | |
| +-----------------------------------------------------------------------------------------+ | |
| | Processes: | | |
| | GPU GI CI PID Type Process name GPU Memory | | |
| | ID ID Usage | | |
| |=========================================================================================| | |
| | 0 N/A N/A 157229 C /usr/bin/python3 0MiB | | |
| | 1 N/A N/A 157230 C /usr/bin/python3 0MiB | | |
| | 2 N/A N/A 157231 C /usr/bin/python3 0MiB | | |
| | 3 N/A N/A 157232 C /usr/bin/python3 0MiB | | |
| | 4 N/A N/A 157233 C /usr/bin/python3 0MiB | | |
| | 5 N/A N/A 157234 C /usr/bin/python3 0MiB | | |
| | 6 N/A N/A 157235 C /usr/bin/python3 0MiB | | |
| | 7 N/A N/A 157236 C /usr/bin/python3 0MiB | | |
| +-----------------------------------------------------------------------------------------+ | |
| ==================================================================================================== | |
| step:0/3000 val_loss:10.8258 train_time:411ms step_avg:nanms | |
| step:1/3000 train_loss:10.8258 train_time:14600ms step_avg:nanms | |
| step:2/3000 train_loss:9.5118 train_time:14707ms step_avg:nanms | |
| step:3/3000 train_loss:8.4756 train_time:14844ms step_avg:nanms | |
| step:4/3000 train_loss:8.0097 train_time:14985ms step_avg:nanms | |
| step:5/3000 train_loss:7.5120 train_time:15126ms step_avg:nanms | |
| step:6/3000 train_loss:7.2826 train_time:15267ms step_avg:nanms | |
| step:7/3000 train_loss:6.9171 train_time:15409ms step_avg:nanms | |
| step:8/3000 train_loss:7.1124 train_time:15556ms step_avg:nanms | |
| step:9/3000 train_loss:6.7596 train_time:15706ms step_avg:nanms | |
| step:10/3000 train_loss:6.6236 train_time:15849ms step_avg:nanms | |
| step:11/3000 train_loss:6.5634 train_time:106ms step_avg:nanms | |
| step:12/3000 train_loss:6.5905 train_time:249ms step_avg:nanms | |
| step:13/3000 train_loss:6.4862 train_time:392ms step_avg:130.53ms | |
| step:14/3000 train_loss:6.3646 train_time:535ms step_avg:133.65ms | |
| step:15/3000 train_loss:6.3322 train_time:678ms step_avg:135.64ms | |
| step:16/3000 train_loss:6.2824 train_time:824ms step_avg:137.34ms | |
| step:17/3000 train_loss:6.2691 train_time:970ms step_avg:138.53ms | |
| step:18/3000 train_loss:6.3454 train_time:1115ms step_avg:139.35ms | |
| step:19/3000 train_loss:6.1535 train_time:1257ms step_avg:139.64ms | |
| step:20/3000 train_loss:6.1644 train_time:1398ms step_avg:139.84ms | |
| step:21/3000 train_loss:5.8851 train_time:1542ms step_avg:140.15ms | |
| step:22/3000 train_loss:6.1650 train_time:1687ms step_avg:140.59ms | |
| step:23/3000 train_loss:6.3783 train_time:1834ms step_avg:141.07ms | |
| step:24/3000 train_loss:6.0796 train_time:1977ms step_avg:141.23ms | |
| step:25/3000 train_loss:6.2266 train_time:2121ms step_avg:141.40ms | |
| step:26/3000 train_loss:5.9066 train_time:2264ms step_avg:141.52ms | |
| step:27/3000 train_loss:5.8187 train_time:2409ms step_avg:141.71ms | |
| step:28/3000 train_loss:6.0527 train_time:2552ms step_avg:141.80ms | |
| step:29/3000 train_loss:5.6964 train_time:2696ms step_avg:141.89ms | |
| step:30/3000 train_loss:5.9238 train_time:2840ms step_avg:141.98ms | |
| step:31/3000 train_loss:5.7351 train_time:2985ms step_avg:142.14ms | |
| step:32/3000 train_loss:5.7140 train_time:3132ms step_avg:142.38ms | |
| step:33/3000 train_loss:5.5725 train_time:3276ms step_avg:142.43ms | |
| step:34/3000 train_loss:5.8444 train_time:3420ms step_avg:142.48ms | |
| step:35/3000 train_loss:5.7806 train_time:3563ms step_avg:142.51ms | |
| step:36/3000 train_loss:5.9140 train_time:3707ms step_avg:142.57ms | |
| step:37/3000 train_loss:5.8207 train_time:3852ms step_avg:142.67ms | |
| step:38/3000 train_loss:5.7391 train_time:3997ms step_avg:142.74ms | |
| step:39/3000 train_loss:5.6048 train_time:4141ms step_avg:142.79ms | |
| step:40/3000 train_loss:5.6009 train_time:4286ms step_avg:142.88ms | |
| step:41/3000 train_loss:5.5260 train_time:4432ms step_avg:142.98ms | |
| step:42/3000 train_loss:5.5092 train_time:4575ms step_avg:142.97ms | |
| step:43/3000 train_loss:5.4288 train_time:4719ms step_avg:143.00ms | |
| step:44/3000 train_loss:5.5299 train_time:4862ms step_avg:143.00ms | |
| step:45/3000 train_loss:5.4996 train_time:5008ms step_avg:143.08ms | |
| step:46/3000 train_loss:5.6457 train_time:5153ms step_avg:143.13ms | |
| step:47/3000 train_loss:5.4348 train_time:5297ms step_avg:143.15ms | |
| step:48/3000 train_loss:5.3142 train_time:5440ms step_avg:143.15ms | |
| step:49/3000 train_loss:5.4901 train_time:5584ms step_avg:143.18ms | |
| step:50/3000 train_loss:5.3885 train_time:5730ms step_avg:143.25ms | |
| step:51/3000 train_loss:5.5243 train_time:5873ms step_avg:143.25ms | |
| step:52/3000 train_loss:5.4120 train_time:6017ms step_avg:143.25ms | |
| step:53/3000 train_loss:5.2482 train_time:6159ms step_avg:143.24ms | |
| step:54/3000 train_loss:5.3877 train_time:6305ms step_avg:143.29ms | |
| step:55/3000 train_loss:5.2555 train_time:6450ms step_avg:143.32ms | |
| step:56/3000 train_loss:5.6425 train_time:6594ms step_avg:143.36ms | |
| step:57/3000 train_loss:5.2742 train_time:6737ms step_avg:143.34ms | |
| step:58/3000 train_loss:5.1168 train_time:6882ms step_avg:143.37ms | |
| step:59/3000 train_loss:5.2245 train_time:7027ms step_avg:143.41ms | |
| step:60/3000 train_loss:5.2294 train_time:7171ms step_avg:143.42ms | |
| step:61/3000 train_loss:5.3408 train_time:7315ms step_avg:143.44ms | |
| step:62/3000 train_loss:5.0759 train_time:7457ms step_avg:143.40ms | |
| step:63/3000 train_loss:5.1987 train_time:7601ms step_avg:143.42ms | |
| step:64/3000 train_loss:5.1882 train_time:7746ms step_avg:143.44ms | |
| step:65/3000 train_loss:4.8413 train_time:7891ms step_avg:143.48ms | |
| step:66/3000 train_loss:5.0236 train_time:8036ms step_avg:143.49ms | |
| step:67/3000 train_loss:5.1505 train_time:8178ms step_avg:143.47ms | |
| step:68/3000 train_loss:5.0399 train_time:8322ms step_avg:143.49ms | |
| step:69/3000 train_loss:5.3209 train_time:8466ms step_avg:143.49ms | |
| step:70/3000 train_loss:4.9200 train_time:8612ms step_avg:143.54ms | |
| step:71/3000 train_loss:5.0072 train_time:8755ms step_avg:143.53ms | |
| step:72/3000 train_loss:5.2001 train_time:8899ms step_avg:143.54ms | |
| step:73/3000 train_loss:5.0981 train_time:9043ms step_avg:143.54ms | |
| step:74/3000 train_loss:4.9796 train_time:9188ms step_avg:143.57ms | |
| step:75/3000 train_loss:5.1104 train_time:9333ms step_avg:143.59ms | |
| step:76/3000 train_loss:5.0924 train_time:9475ms step_avg:143.57ms | |
| step:77/3000 train_loss:5.0097 train_time:9619ms step_avg:143.57ms | |
| step:78/3000 train_loss:5.1233 train_time:9762ms step_avg:143.55ms | |
| step:79/3000 train_loss:5.2696 train_time:9907ms step_avg:143.57ms | |
| step:80/3000 train_loss:4.9988 train_time:10052ms step_avg:143.60ms | |
| step:81/3000 train_loss:5.0664 train_time:10195ms step_avg:143.59ms | |
| step:82/3000 train_loss:4.8429 train_time:10339ms step_avg:143.59ms | |
| step:83/3000 train_loss:5.0232 train_time:10482ms step_avg:143.60ms | |
| step:84/3000 train_loss:4.9798 train_time:10627ms step_avg:143.61ms | |
| step:85/3000 train_loss:4.9659 train_time:10772ms step_avg:143.62ms | |
| step:86/3000 train_loss:4.8246 train_time:10916ms step_avg:143.64ms | |
| step:87/3000 train_loss:5.0222 train_time:11059ms step_avg:143.62ms | |
| step:88/3000 train_loss:4.9316 train_time:11203ms step_avg:143.62ms | |
| step:89/3000 train_loss:4.9636 train_time:11348ms step_avg:143.65ms | |
| step:90/3000 train_loss:4.9152 train_time:11492ms step_avg:143.65ms | |
| step:91/3000 train_loss:4.8617 train_time:11636ms step_avg:143.65ms | |
| step:92/3000 train_loss:4.8496 train_time:11779ms step_avg:143.64ms | |
| step:93/3000 train_loss:4.9879 train_time:11922ms step_avg:143.64ms | |
| step:94/3000 train_loss:4.8195 train_time:12067ms step_avg:143.65ms | |
| step:95/3000 train_loss:4.8462 train_time:12212ms step_avg:143.67ms | |
| step:96/3000 train_loss:4.8739 train_time:12356ms step_avg:143.67ms | |
| step:97/3000 train_loss:4.7754 train_time:12499ms step_avg:143.67ms | |
| step:98/3000 train_loss:4.8416 train_time:12642ms step_avg:143.66ms | |
| step:99/3000 train_loss:4.7600 train_time:12787ms step_avg:143.67ms | |
| step:100/3000 train_loss:4.8583 train_time:12933ms step_avg:143.70ms | |
| step:101/3000 train_loss:4.8539 train_time:13076ms step_avg:143.69ms | |
| step:102/3000 train_loss:4.7116 train_time:13220ms step_avg:143.70ms | |
| step:103/3000 train_loss:4.8685 train_time:13363ms step_avg:143.69ms | |
| step:104/3000 train_loss:4.7718 train_time:13508ms step_avg:143.70ms | |
| step:105/3000 train_loss:4.6944 train_time:13652ms step_avg:143.71ms | |
| step:106/3000 train_loss:4.7237 train_time:13796ms step_avg:143.71ms | |
| step:107/3000 train_loss:4.8175 train_time:13938ms step_avg:143.69ms | |
| step:108/3000 train_loss:4.6881 train_time:14082ms step_avg:143.70ms | |
| step:109/3000 train_loss:4.5080 train_time:14227ms step_avg:143.71ms | |
| step:110/3000 train_loss:4.6457 train_time:14372ms step_avg:143.72ms | |
| step:111/3000 train_loss:4.6387 train_time:14516ms step_avg:143.72ms | |
| step:112/3000 train_loss:4.5831 train_time:14658ms step_avg:143.71ms | |
| step:113/3000 train_loss:4.7349 train_time:14802ms step_avg:143.71ms | |
| step:114/3000 train_loss:4.6384 train_time:14946ms step_avg:143.71ms | |
| step:115/3000 train_loss:4.4999 train_time:15092ms step_avg:143.73ms | |
| step:116/3000 train_loss:4.6427 train_time:15235ms step_avg:143.73ms | |
| step:117/3000 train_loss:4.5970 train_time:15378ms step_avg:143.72ms | |
| step:118/3000 train_loss:4.5058 train_time:15522ms step_avg:143.72ms | |
| step:119/3000 train_loss:4.7097 train_time:15666ms step_avg:143.72ms | |
| step:120/3000 train_loss:4.5873 train_time:15811ms step_avg:143.74ms | |
| step:121/3000 train_loss:4.4731 train_time:15955ms step_avg:143.74ms | |
| step:122/3000 train_loss:4.4383 train_time:16099ms step_avg:143.74ms | |
| step:123/3000 train_loss:4.5754 train_time:16241ms step_avg:143.73ms | |
| step:124/3000 train_loss:4.3962 train_time:16387ms step_avg:143.74ms | |
| step:125/3000 train_loss:4.7078 train_time:16533ms step_avg:143.76ms | |
| step:125/3000 val_loss:4.5262 train_time:16570ms step_avg:144.09ms | |
| step:126/3000 train_loss:4.5741 train_time:16686ms step_avg:143.85ms | |
| step:127/3000 train_loss:4.5231 train_time:16833ms step_avg:143.88ms | |
| step:128/3000 train_loss:4.5517 train_time:16976ms step_avg:143.87ms | |
| step:129/3000 train_loss:4.4821 train_time:17118ms step_avg:143.85ms | |
| step:130/3000 train_loss:4.7820 train_time:17260ms step_avg:143.84ms | |
| step:131/3000 train_loss:4.4520 train_time:17403ms step_avg:143.83ms | |
| step:132/3000 train_loss:4.4960 train_time:17545ms step_avg:143.81ms | |
| step:133/3000 train_loss:4.4454 train_time:17693ms step_avg:143.84ms | |
| step:134/3000 train_loss:4.5549 train_time:17841ms step_avg:143.88ms | |
| step:135/3000 train_loss:4.3711 train_time:17984ms step_avg:143.87ms | |
| step:136/3000 train_loss:4.5423 train_time:18126ms step_avg:143.86ms | |
| step:137/3000 train_loss:4.3013 train_time:18269ms step_avg:143.85ms | |
| step:138/3000 train_loss:4.4656 train_time:18412ms step_avg:143.84ms | |
| step:139/3000 train_loss:4.3772 train_time:18555ms step_avg:143.83ms | |
| step:140/3000 train_loss:4.4598 train_time:18701ms step_avg:143.85ms | |
| step:141/3000 train_loss:4.5398 train_time:18845ms step_avg:143.86ms | |
| step:142/3000 train_loss:4.4025 train_time:18989ms step_avg:143.85ms | |
| step:143/3000 train_loss:4.3762 train_time:19133ms step_avg:143.85ms | |
| step:144/3000 train_loss:4.3234 train_time:19277ms step_avg:143.85ms | |
| step:145/3000 train_loss:4.4335 train_time:19420ms step_avg:143.85ms | |
| step:146/3000 train_loss:4.3911 train_time:19562ms step_avg:143.84ms | |
| step:147/3000 train_loss:4.2602 train_time:19707ms step_avg:143.84ms | |
| step:148/3000 train_loss:4.3971 train_time:19850ms step_avg:143.84ms | |
| step:149/3000 train_loss:4.4389 train_time:19997ms step_avg:143.86ms | |
| step:150/3000 train_loss:4.3793 train_time:20140ms step_avg:143.86ms | |
| step:151/3000 train_loss:4.5150 train_time:20283ms step_avg:143.85ms | |
| step:152/3000 train_loss:4.3494 train_time:20428ms step_avg:143.86ms | |
| step:153/3000 train_loss:4.3456 train_time:20570ms step_avg:143.84ms | |
| step:154/3000 train_loss:4.4266 train_time:20715ms step_avg:143.85ms | |
| step:155/3000 train_loss:4.4339 train_time:20859ms step_avg:143.86ms | |
| step:156/3000 train_loss:4.3549 train_time:21004ms step_avg:143.86ms | |
| step:157/3000 train_loss:4.4160 train_time:21146ms step_avg:143.85ms | |
| step:158/3000 train_loss:4.4827 train_time:21289ms step_avg:143.85ms | |
| step:159/3000 train_loss:4.3180 train_time:21433ms step_avg:143.85ms | |
| step:160/3000 train_loss:4.3809 train_time:21577ms step_avg:143.85ms | |
| step:161/3000 train_loss:4.1862 train_time:21721ms step_avg:143.85ms | |
| step:162/3000 train_loss:4.4194 train_time:21865ms step_avg:143.85ms | |
| step:163/3000 train_loss:4.4144 train_time:22008ms step_avg:143.84ms | |
| step:164/3000 train_loss:4.3985 train_time:22152ms step_avg:143.84ms | |
| step:165/3000 train_loss:4.2500 train_time:22297ms step_avg:143.85ms | |
| step:166/3000 train_loss:4.3339 train_time:22440ms step_avg:143.84ms | |
| step:167/3000 train_loss:4.4214 train_time:22582ms step_avg:143.84ms | |
| step:168/3000 train_loss:4.2570 train_time:22726ms step_avg:143.83ms | |
| step:169/3000 train_loss:4.3220 train_time:22870ms step_avg:143.84ms | |
| step:170/3000 train_loss:4.2154 train_time:23016ms step_avg:143.85ms | |
| step:171/3000 train_loss:4.0866 train_time:23161ms step_avg:143.86ms | |
| step:172/3000 train_loss:4.2498 train_time:23304ms step_avg:143.85ms | |
| step:173/3000 train_loss:4.2764 train_time:23446ms step_avg:143.84ms | |
| step:174/3000 train_loss:4.3114 train_time:23590ms step_avg:143.84ms | |
| step:175/3000 train_loss:4.4810 train_time:23734ms step_avg:143.84ms | |
| step:176/3000 train_loss:4.3008 train_time:23879ms step_avg:143.85ms | |
| step:177/3000 train_loss:4.1587 train_time:24023ms step_avg:143.85ms | |
| step:178/3000 train_loss:4.1317 train_time:24166ms step_avg:143.85ms | |
| step:179/3000 train_loss:4.2416 train_time:24308ms step_avg:143.84ms | |
| step:180/3000 train_loss:4.1906 train_time:24452ms step_avg:143.84ms | |
| step:181/3000 train_loss:4.1679 train_time:24597ms step_avg:143.84ms | |
| step:182/3000 train_loss:4.3506 train_time:24741ms step_avg:143.84ms | |
| step:183/3000 train_loss:4.2080 train_time:24883ms step_avg:143.84ms | |
| step:184/3000 train_loss:4.1898 train_time:25027ms step_avg:143.83ms | |
| step:185/3000 train_loss:4.1870 train_time:25171ms step_avg:143.83ms | |
| step:186/3000 train_loss:4.2641 train_time:25316ms step_avg:143.84ms | |
| step:187/3000 train_loss:4.2419 train_time:25461ms step_avg:143.85ms | |
| step:188/3000 train_loss:4.3007 train_time:25604ms step_avg:143.84ms | |
| step:189/3000 train_loss:4.2315 train_time:25864ms step_avg:144.49ms | |
| step:190/3000 train_loss:4.1682 train_time:26151ms step_avg:145.28ms | |
| step:191/3000 train_loss:4.2656 train_time:26292ms step_avg:145.26ms | |
| step:192/3000 train_loss:4.1482 train_time:26435ms step_avg:145.25ms | |
| step:193/3000 train_loss:4.0851 train_time:26577ms step_avg:145.23ms | |
| step:194/3000 train_loss:4.3162 train_time:26720ms step_avg:145.22ms | |
| step:195/3000 train_loss:4.2302 train_time:26862ms step_avg:145.20ms | |
| step:196/3000 train_loss:4.4159 train_time:27007ms step_avg:145.20ms | |
| step:197/3000 train_loss:4.2392 train_time:27156ms step_avg:145.22ms | |
| step:198/3000 train_loss:4.0918 train_time:27302ms step_avg:145.23ms | |
| step:199/3000 train_loss:4.2305 train_time:27445ms step_avg:145.21ms | |
| step:200/3000 train_loss:4.0901 train_time:27587ms step_avg:145.20ms | |
| step:201/3000 train_loss:4.1752 train_time:27729ms step_avg:145.18ms | |
| step:202/3000 train_loss:4.0694 train_time:27872ms step_avg:145.17ms | |
| step:203/3000 train_loss:4.3032 train_time:28017ms step_avg:145.17ms | |
| step:204/3000 train_loss:4.1226 train_time:28161ms step_avg:145.16ms | |
| step:205/3000 train_loss:4.2472 train_time:28306ms step_avg:145.16ms | |
| step:206/3000 train_loss:4.2976 train_time:28449ms step_avg:145.15ms | |
| step:207/3000 train_loss:3.9943 train_time:28593ms step_avg:145.14ms | |
| step:208/3000 train_loss:4.1465 train_time:28737ms step_avg:145.14ms | |
| step:209/3000 train_loss:4.1449 train_time:28879ms step_avg:145.12ms | |
| step:210/3000 train_loss:4.2933 train_time:29023ms step_avg:145.12ms | |
| step:211/3000 train_loss:4.2461 train_time:29167ms step_avg:145.11ms | |
| step:212/3000 train_loss:4.1242 train_time:29312ms step_avg:145.11ms | |
| step:213/3000 train_loss:4.1413 train_time:29457ms step_avg:145.11ms | |
| step:214/3000 train_loss:4.0978 train_time:29601ms step_avg:145.10ms | |
| step:215/3000 train_loss:4.1717 train_time:29744ms step_avg:145.09ms | |
| step:216/3000 train_loss:3.9967 train_time:29886ms step_avg:145.08ms | |
| step:217/3000 train_loss:4.0528 train_time:30031ms step_avg:145.08ms | |
| step:218/3000 train_loss:4.0577 train_time:30176ms step_avg:145.07ms | |
| step:219/3000 train_loss:4.1389 train_time:30321ms step_avg:145.08ms | |
| step:220/3000 train_loss:4.1205 train_time:30464ms step_avg:145.07ms | |
| step:221/3000 train_loss:4.1453 train_time:30608ms step_avg:145.06ms | |
| step:222/3000 train_loss:4.1676 train_time:30750ms step_avg:145.05ms | |
| step:223/3000 train_loss:4.0519 train_time:30894ms step_avg:145.04ms | |
| step:224/3000 train_loss:4.0278 train_time:31039ms step_avg:145.04ms | |
| step:225/3000 train_loss:4.3561 train_time:31182ms step_avg:145.03ms | |
| step:226/3000 train_loss:3.9796 train_time:31326ms step_avg:145.03ms | |
| step:227/3000 train_loss:4.0410 train_time:31469ms step_avg:145.02ms | |
| step:228/3000 train_loss:4.0347 train_time:31615ms step_avg:145.02ms | |
| step:229/3000 train_loss:4.1980 train_time:31759ms step_avg:145.02ms | |
| step:230/3000 train_loss:3.9792 train_time:31903ms step_avg:145.01ms | |
| step:231/3000 train_loss:4.1087 train_time:32045ms step_avg:145.00ms | |
| step:232/3000 train_loss:3.9565 train_time:32188ms step_avg:144.99ms | |
| step:233/3000 train_loss:4.0301 train_time:32333ms step_avg:144.99ms | |
| step:234/3000 train_loss:4.1562 train_time:32477ms step_avg:144.99ms | |
| step:235/3000 train_loss:4.0816 train_time:32623ms step_avg:144.99ms | |
| step:236/3000 train_loss:3.9585 train_time:32765ms step_avg:144.98ms | |
| step:237/3000 train_loss:4.1246 train_time:32908ms step_avg:144.97ms | |
| step:238/3000 train_loss:4.1341 train_time:33053ms step_avg:144.97ms | |
| step:239/3000 train_loss:3.9934 train_time:33198ms step_avg:144.97ms | |
| step:240/3000 train_loss:4.1259 train_time:33343ms step_avg:144.97ms | |
| step:241/3000 train_loss:4.1713 train_time:33485ms step_avg:144.96ms | |
| step:242/3000 train_loss:4.0157 train_time:33628ms step_avg:144.95ms | |
| step:243/3000 train_loss:4.1972 train_time:33771ms step_avg:144.94ms | |
| step:244/3000 train_loss:4.0799 train_time:33915ms step_avg:144.94ms | |
| step:245/3000 train_loss:4.1408 train_time:34059ms step_avg:144.93ms | |
| step:246/3000 train_loss:4.2061 train_time:34202ms step_avg:144.92ms | |
| step:247/3000 train_loss:4.1254 train_time:34345ms step_avg:144.92ms | |
| step:248/3000 train_loss:4.0626 train_time:34487ms step_avg:144.90ms | |
| step:249/3000 train_loss:4.1690 train_time:34630ms step_avg:144.90ms | |
| step:250/3000 train_loss:3.9779 train_time:34774ms step_avg:144.89ms | |
| step:250/3000 val_loss:4.0672 train_time:34813ms step_avg:145.05ms | |
| step:251/3000 train_loss:4.0330 train_time:34931ms step_avg:144.94ms | |
| step:252/3000 train_loss:4.1335 train_time:35076ms step_avg:144.94ms | |
| step:253/3000 train_loss:4.2245 train_time:35218ms step_avg:144.93ms | |
| step:254/3000 train_loss:4.0026 train_time:35360ms step_avg:144.92ms | |
| step:255/3000 train_loss:3.9401 train_time:35502ms step_avg:144.91ms | |
| step:256/3000 train_loss:4.1216 train_time:35646ms step_avg:144.90ms | |
| step:257/3000 train_loss:4.0343 train_time:35789ms step_avg:144.89ms | |
| step:258/3000 train_loss:4.0454 train_time:35936ms step_avg:144.90ms | |
| step:259/3000 train_loss:4.0313 train_time:36081ms step_avg:144.90ms | |
| step:260/3000 train_loss:4.0862 train_time:36226ms step_avg:144.91ms | |
| step:261/3000 train_loss:4.1122 train_time:36369ms step_avg:144.90ms | |
| step:262/3000 train_loss:4.0743 train_time:36511ms step_avg:144.89ms | |
| step:263/3000 train_loss:4.0533 train_time:36653ms step_avg:144.87ms | |
| step:264/3000 train_loss:3.9617 train_time:36795ms step_avg:144.86ms | |
| step:265/3000 train_loss:4.0402 train_time:36941ms step_avg:144.87ms | |
| step:266/3000 train_loss:3.9239 train_time:37087ms step_avg:144.87ms | |
| step:267/3000 train_loss:3.9761 train_time:37231ms step_avg:144.87ms | |
| step:268/3000 train_loss:3.9763 train_time:37373ms step_avg:144.86ms | |
| step:269/3000 train_loss:4.0133 train_time:37515ms step_avg:144.85ms | |
| step:270/3000 train_loss:3.9078 train_time:37658ms step_avg:144.84ms | |
| step:271/3000 train_loss:4.1566 train_time:37801ms step_avg:144.83ms | |
| step:272/3000 train_loss:4.0342 train_time:37946ms step_avg:144.83ms | |
| step:273/3000 train_loss:3.9743 train_time:38090ms step_avg:144.83ms | |
| step:274/3000 train_loss:4.0085 train_time:38233ms step_avg:144.82ms | |
| step:275/3000 train_loss:4.0926 train_time:38375ms step_avg:144.81ms | |
| step:276/3000 train_loss:4.1204 train_time:38518ms step_avg:144.80ms | |
| step:277/3000 train_loss:4.2916 train_time:38661ms step_avg:144.80ms | |
| step:278/3000 train_loss:4.0969 train_time:38805ms step_avg:144.80ms | |
| step:279/3000 train_loss:4.1376 train_time:38949ms step_avg:144.79ms | |
| step:280/3000 train_loss:4.0504 train_time:39092ms step_avg:144.78ms | |
| step:281/3000 train_loss:4.2219 train_time:39235ms step_avg:144.78ms | |
| step:282/3000 train_loss:4.0204 train_time:39379ms step_avg:144.77ms | |
| step:283/3000 train_loss:4.0107 train_time:39523ms step_avg:144.77ms | |
| step:284/3000 train_loss:3.9730 train_time:39668ms step_avg:144.77ms | |
| step:285/3000 train_loss:4.1174 train_time:39811ms step_avg:144.77ms | |
| step:286/3000 train_loss:4.1262 train_time:39954ms step_avg:144.76ms | |
| step:287/3000 train_loss:4.1559 train_time:40096ms step_avg:144.75ms | |
| step:288/3000 train_loss:3.9759 train_time:40240ms step_avg:144.75ms | |
| step:289/3000 train_loss:4.0760 train_time:40384ms step_avg:144.75ms | |
| step:290/3000 train_loss:3.9257 train_time:40529ms step_avg:144.75ms | |
| step:291/3000 train_loss:3.9186 train_time:40672ms step_avg:144.74ms | |
| step:292/3000 train_loss:3.9777 train_time:40816ms step_avg:144.74ms | |
| step:293/3000 train_loss:3.9181 train_time:40958ms step_avg:144.73ms | |
| step:294/3000 train_loss:3.9722 train_time:41102ms step_avg:144.72ms | |
| step:295/3000 train_loss:4.0066 train_time:41247ms step_avg:144.73ms | |
| step:296/3000 train_loss:3.8979 train_time:41390ms step_avg:144.72ms | |
| step:297/3000 train_loss:3.9242 train_time:41534ms step_avg:144.72ms | |
| step:298/3000 train_loss:3.9216 train_time:41675ms step_avg:144.70ms | |
| step:299/3000 train_loss:4.0290 train_time:41819ms step_avg:144.70ms | |
| step:300/3000 train_loss:3.8886 train_time:41963ms step_avg:144.70ms | |
| step:301/3000 train_loss:4.0130 train_time:42108ms step_avg:144.70ms | |
| step:302/3000 train_loss:4.0354 train_time:42251ms step_avg:144.69ms | |
| step:303/3000 train_loss:3.9921 train_time:42393ms step_avg:144.69ms | |
| step:304/3000 train_loss:4.0435 train_time:42537ms step_avg:144.68ms | |
| step:305/3000 train_loss:4.0155 train_time:42680ms step_avg:144.68ms | |
| step:306/3000 train_loss:4.5136 train_time:42825ms step_avg:144.68ms | |
| step:307/3000 train_loss:3.9960 train_time:42968ms step_avg:144.67ms | |
| step:308/3000 train_loss:3.8988 train_time:43111ms step_avg:144.67ms | |
| step:309/3000 train_loss:4.0459 train_time:43254ms step_avg:144.66ms | |
| step:310/3000 train_loss:3.9280 train_time:43398ms step_avg:144.66ms | |
| step:311/3000 train_loss:4.1526 train_time:43542ms step_avg:144.66ms | |
| step:312/3000 train_loss:3.9857 train_time:43686ms step_avg:144.66ms | |
| step:313/3000 train_loss:3.9275 train_time:43830ms step_avg:144.65ms | |
| step:314/3000 train_loss:3.9981 train_time:43972ms step_avg:144.64ms | |
| step:315/3000 train_loss:4.1285 train_time:44114ms step_avg:144.64ms | |
| step:316/3000 train_loss:4.0144 train_time:44257ms step_avg:144.63ms | |
| step:317/3000 train_loss:3.8540 train_time:44401ms step_avg:144.63ms | |
| step:318/3000 train_loss:3.9383 train_time:44546ms step_avg:144.63ms | |
| step:319/3000 train_loss:3.9775 train_time:44689ms step_avg:144.62ms | |
| step:320/3000 train_loss:3.9517 train_time:44832ms step_avg:144.62ms | |
| step:321/3000 train_loss:4.0685 train_time:44974ms step_avg:144.61ms | |
| step:322/3000 train_loss:4.0180 train_time:45116ms step_avg:144.60ms | |
| step:323/3000 train_loss:3.9943 train_time:45259ms step_avg:144.60ms | |
| step:324/3000 train_loss:4.0750 train_time:45404ms step_avg:144.60ms | |
| step:325/3000 train_loss:4.0229 train_time:45549ms step_avg:144.60ms | |
| step:326/3000 train_loss:4.0807 train_time:45691ms step_avg:144.59ms | |
| step:327/3000 train_loss:3.9448 train_time:45834ms step_avg:144.59ms | |
| step:328/3000 train_loss:4.4525 train_time:45977ms step_avg:144.58ms | |
| step:329/3000 train_loss:4.1320 train_time:46120ms step_avg:144.58ms | |
| step:330/3000 train_loss:3.8645 train_time:46265ms step_avg:144.58ms | |
| step:331/3000 train_loss:3.8146 train_time:46409ms step_avg:144.58ms | |
| step:332/3000 train_loss:4.0322 train_time:46551ms step_avg:144.57ms | |
| step:333/3000 train_loss:3.9700 train_time:46693ms step_avg:144.56ms | |
| step:334/3000 train_loss:3.9394 train_time:46837ms step_avg:144.56ms | |
| step:335/3000 train_loss:3.9110 train_time:46981ms step_avg:144.56ms | |
| step:336/3000 train_loss:4.0781 train_time:47126ms step_avg:144.56ms | |
| step:337/3000 train_loss:4.0181 train_time:47269ms step_avg:144.55ms | |
| step:338/3000 train_loss:4.4934 train_time:47412ms step_avg:144.55ms | |
| step:339/3000 train_loss:4.0015 train_time:47554ms step_avg:144.54ms | |
| step:340/3000 train_loss:3.9417 train_time:47697ms step_avg:144.54ms | |
| step:341/3000 train_loss:3.9904 train_time:47841ms step_avg:144.54ms | |
| step:342/3000 train_loss:3.9146 train_time:47986ms step_avg:144.53ms | |
| step:343/3000 train_loss:3.8778 train_time:48130ms step_avg:144.53ms | |
| step:344/3000 train_loss:3.9089 train_time:48272ms step_avg:144.53ms | |
| step:345/3000 train_loss:4.0592 train_time:48414ms step_avg:144.52ms | |
| step:346/3000 train_loss:3.8991 train_time:48557ms step_avg:144.51ms | |
| step:347/3000 train_loss:3.8326 train_time:48700ms step_avg:144.51ms | |
| step:348/3000 train_loss:3.8686 train_time:48844ms step_avg:144.51ms | |
| step:349/3000 train_loss:3.9286 train_time:48989ms step_avg:144.51ms | |
| step:350/3000 train_loss:3.8880 train_time:49132ms step_avg:144.51ms | |
| step:351/3000 train_loss:3.6234 train_time:49274ms step_avg:144.50ms | |
| step:352/3000 train_loss:3.8823 train_time:49417ms step_avg:144.49ms | |
| step:353/3000 train_loss:4.2428 train_time:49560ms step_avg:144.49ms | |
| step:354/3000 train_loss:3.7234 train_time:49705ms step_avg:144.49ms | |
| step:355/3000 train_loss:3.9949 train_time:49848ms step_avg:144.49ms | |
| step:356/3000 train_loss:3.8539 train_time:49991ms step_avg:144.48ms | |
| step:357/3000 train_loss:3.9579 train_time:50134ms step_avg:144.48ms | |
| step:358/3000 train_loss:3.8585 train_time:50276ms step_avg:144.47ms | |
| step:359/3000 train_loss:3.9195 train_time:50420ms step_avg:144.47ms | |
| step:360/3000 train_loss:3.8977 train_time:50566ms step_avg:144.47ms | |
| step:361/3000 train_loss:3.4966 train_time:50710ms step_avg:144.47ms | |
| step:362/3000 train_loss:4.0851 train_time:50852ms step_avg:144.47ms | |
| step:363/3000 train_loss:3.9905 train_time:50995ms step_avg:144.46ms | |
| step:364/3000 train_loss:3.9122 train_time:51139ms step_avg:144.46ms | |
| step:365/3000 train_loss:3.8094 train_time:51283ms step_avg:144.46ms | |
| step:366/3000 train_loss:3.9867 train_time:51428ms step_avg:144.46ms | |
| step:367/3000 train_loss:3.9420 train_time:51571ms step_avg:144.46ms | |
| step:368/3000 train_loss:3.9366 train_time:51714ms step_avg:144.45ms | |
| step:369/3000 train_loss:3.9155 train_time:51857ms step_avg:144.45ms | |
| step:370/3000 train_loss:3.8167 train_time:52000ms step_avg:144.45ms | |
| step:371/3000 train_loss:3.9500 train_time:52145ms step_avg:144.45ms | |
| step:372/3000 train_loss:3.8124 train_time:52288ms step_avg:144.44ms | |
| step:373/3000 train_loss:3.7675 train_time:52432ms step_avg:144.44ms | |
| step:374/3000 train_loss:3.9882 train_time:52573ms step_avg:144.43ms | |
| step:375/3000 train_loss:3.9103 train_time:52716ms step_avg:144.43ms | |
| step:375/3000 val_loss:3.9030 train_time:52755ms step_avg:144.53ms | |
| step:376/3000 train_loss:3.8811 train_time:52865ms step_avg:144.44ms | |
| step:377/3000 train_loss:3.9444 train_time:53015ms step_avg:144.46ms | |
| step:378/3000 train_loss:3.8548 train_time:53263ms step_avg:144.74ms | |
| step:379/3000 train_loss:3.9121 train_time:53415ms step_avg:144.76ms | |
| step:380/3000 train_loss:3.9269 train_time:53692ms step_avg:145.11ms | |
| step:381/3000 train_loss:4.0203 train_time:53832ms step_avg:145.10ms | |
| step:382/3000 train_loss:3.9208 train_time:53975ms step_avg:145.09ms | |
| step:383/3000 train_loss:3.8873 train_time:54118ms step_avg:145.09ms | |
| step:384/3000 train_loss:3.8669 train_time:54260ms step_avg:145.08ms | |
| step:385/3000 train_loss:3.9450 train_time:54402ms step_avg:145.07ms | |
| step:386/3000 train_loss:3.8647 train_time:54548ms step_avg:145.07ms | |
| step:387/3000 train_loss:3.9619 train_time:54699ms step_avg:145.09ms | |
| step:388/3000 train_loss:4.1448 train_time:54842ms step_avg:145.08ms | |
| step:389/3000 train_loss:3.8674 train_time:54985ms step_avg:145.08ms | |
| step:390/3000 train_loss:3.8646 train_time:55127ms step_avg:145.07ms | |
| step:391/3000 train_loss:3.9715 train_time:55271ms step_avg:145.07ms | |
| step:392/3000 train_loss:3.8868 train_time:55415ms step_avg:145.07ms | |
| step:393/3000 train_loss:3.9934 train_time:55559ms step_avg:145.06ms | |
| step:394/3000 train_loss:3.8391 train_time:55702ms step_avg:145.06ms | |
| step:395/3000 train_loss:3.9631 train_time:55845ms step_avg:145.05ms | |
| step:396/3000 train_loss:3.7131 train_time:55989ms step_avg:145.05ms | |
| step:397/3000 train_loss:3.9204 train_time:56134ms step_avg:145.05ms | |
| step:398/3000 train_loss:3.9489 train_time:56277ms step_avg:145.04ms | |
| step:399/3000 train_loss:3.9594 train_time:56420ms step_avg:145.04ms | |
| step:400/3000 train_loss:3.8573 train_time:56563ms step_avg:145.03ms | |
| step:401/3000 train_loss:3.9092 train_time:56707ms step_avg:145.03ms | |
| step:402/3000 train_loss:3.9963 train_time:56851ms step_avg:145.03ms | |
| step:403/3000 train_loss:3.9191 train_time:56997ms step_avg:145.03ms | |
| step:404/3000 train_loss:4.0395 train_time:57141ms step_avg:145.03ms | |
| step:405/3000 train_loss:3.7696 train_time:57284ms step_avg:145.02ms | |
| step:406/3000 train_loss:3.8750 train_time:57427ms step_avg:145.02ms | |
| step:407/3000 train_loss:4.1678 train_time:57572ms step_avg:145.02ms | |
| step:408/3000 train_loss:3.8667 train_time:57717ms step_avg:145.02ms | |
| step:409/3000 train_loss:3.8966 train_time:57860ms step_avg:145.01ms | |
| step:410/3000 train_loss:3.9376 train_time:58003ms step_avg:145.01ms | |
| step:411/3000 train_loss:3.8299 train_time:58147ms step_avg:145.01ms | |
| step:412/3000 train_loss:3.8434 train_time:58293ms step_avg:145.01ms | |
| step:413/3000 train_loss:4.2694 train_time:58436ms step_avg:145.00ms | |
| step:414/3000 train_loss:3.6932 train_time:58579ms step_avg:145.00ms | |
| step:415/3000 train_loss:4.0902 train_time:58722ms step_avg:144.99ms | |
| step:416/3000 train_loss:3.8317 train_time:58865ms step_avg:144.99ms | |
| step:417/3000 train_loss:3.8483 train_time:59009ms step_avg:144.99ms | |
| step:418/3000 train_loss:4.0276 train_time:59154ms step_avg:144.99ms | |
| step:419/3000 train_loss:3.7719 train_time:59299ms step_avg:144.99ms | |
| step:420/3000 train_loss:3.8917 train_time:59441ms step_avg:144.98ms | |
| step:421/3000 train_loss:3.8013 train_time:59585ms step_avg:144.97ms | |
| step:422/3000 train_loss:3.7321 train_time:59728ms step_avg:144.97ms | |
| step:423/3000 train_loss:3.8678 train_time:59874ms step_avg:144.97ms | |
| step:424/3000 train_loss:3.9546 train_time:60019ms step_avg:144.97ms | |
| step:425/3000 train_loss:3.7093 train_time:60162ms step_avg:144.97ms | |
| step:426/3000 train_loss:3.8866 train_time:60306ms step_avg:144.97ms | |
| step:427/3000 train_loss:3.7562 train_time:60449ms step_avg:144.96ms | |
| step:428/3000 train_loss:3.9874 train_time:60595ms step_avg:144.96ms | |
| step:429/3000 train_loss:3.8971 train_time:60738ms step_avg:144.96ms | |
| step:430/3000 train_loss:3.8384 train_time:60881ms step_avg:144.96ms | |
| step:431/3000 train_loss:3.7971 train_time:61024ms step_avg:144.95ms | |
| step:432/3000 train_loss:3.7073 train_time:61168ms step_avg:144.95ms | |
| step:433/3000 train_loss:3.8482 train_time:61315ms step_avg:144.95ms | |
| step:434/3000 train_loss:3.9037 train_time:61458ms step_avg:144.95ms | |
| step:435/3000 train_loss:3.8621 train_time:61602ms step_avg:144.95ms | |
| step:436/3000 train_loss:3.8997 train_time:61745ms step_avg:144.94ms | |
| step:437/3000 train_loss:3.9176 train_time:61889ms step_avg:144.94ms | |
| step:438/3000 train_loss:3.8011 train_time:62034ms step_avg:144.94ms | |
| step:439/3000 train_loss:3.8008 train_time:62178ms step_avg:144.94ms | |
| step:440/3000 train_loss:3.7982 train_time:62321ms step_avg:144.93ms | |
| step:441/3000 train_loss:3.9762 train_time:62463ms step_avg:144.93ms | |
| step:442/3000 train_loss:3.8484 train_time:62607ms step_avg:144.92ms | |
| step:443/3000 train_loss:3.8285 train_time:62751ms step_avg:144.92ms | |
| step:444/3000 train_loss:3.7297 train_time:62897ms step_avg:144.92ms | |
| step:445/3000 train_loss:4.0087 train_time:63040ms step_avg:144.92ms | |
| step:446/3000 train_loss:3.9346 train_time:63182ms step_avg:144.91ms | |
| step:447/3000 train_loss:3.9157 train_time:63325ms step_avg:144.91ms | |
| step:448/3000 train_loss:3.8439 train_time:63470ms step_avg:144.91ms | |
| step:449/3000 train_loss:3.9432 train_time:63616ms step_avg:144.91ms | |
| step:450/3000 train_loss:3.7780 train_time:63758ms step_avg:144.91ms | |
| step:451/3000 train_loss:3.8146 train_time:63902ms step_avg:144.90ms | |
| step:452/3000 train_loss:3.6739 train_time:64044ms step_avg:144.90ms | |
| step:453/3000 train_loss:3.7989 train_time:64187ms step_avg:144.89ms | |
| step:454/3000 train_loss:3.7627 train_time:64331ms step_avg:144.89ms | |
| step:455/3000 train_loss:3.7299 train_time:64475ms step_avg:144.89ms | |
| step:456/3000 train_loss:3.9363 train_time:64619ms step_avg:144.88ms | |
| step:457/3000 train_loss:3.8218 train_time:64760ms step_avg:144.88ms | |
| step:458/3000 train_loss:3.8849 train_time:64903ms step_avg:144.87ms | |
| step:459/3000 train_loss:3.9215 train_time:65047ms step_avg:144.87ms | |
| step:460/3000 train_loss:3.7308 train_time:65190ms step_avg:144.87ms | |
| step:461/3000 train_loss:3.8895 train_time:65334ms step_avg:144.86ms | |
| step:462/3000 train_loss:3.7917 train_time:65479ms step_avg:144.86ms | |
| step:463/3000 train_loss:3.8239 train_time:65621ms step_avg:144.86ms | |
| step:464/3000 train_loss:3.8650 train_time:65763ms step_avg:144.85ms | |
| step:465/3000 train_loss:3.8098 train_time:65907ms step_avg:144.85ms | |
| step:466/3000 train_loss:3.8158 train_time:66051ms step_avg:144.85ms | |
| step:467/3000 train_loss:3.8990 train_time:66196ms step_avg:144.85ms | |
| step:468/3000 train_loss:3.9165 train_time:66339ms step_avg:144.85ms | |
| step:469/3000 train_loss:3.8943 train_time:66482ms step_avg:144.84ms | |
| step:470/3000 train_loss:3.7847 train_time:66625ms step_avg:144.84ms | |
| step:471/3000 train_loss:3.8613 train_time:66769ms step_avg:144.84ms | |
| step:472/3000 train_loss:3.9168 train_time:66914ms step_avg:144.84ms | |
| step:473/3000 train_loss:3.8711 train_time:67058ms step_avg:144.83ms | |
| step:474/3000 train_loss:3.8231 train_time:67201ms step_avg:144.83ms | |
| step:475/3000 train_loss:3.6785 train_time:67344ms step_avg:144.83ms | |
| step:476/3000 train_loss:4.1131 train_time:67486ms step_avg:144.82ms | |
| step:477/3000 train_loss:3.8644 train_time:67630ms step_avg:144.82ms | |
| step:478/3000 train_loss:3.6867 train_time:67775ms step_avg:144.82ms | |
| step:479/3000 train_loss:3.9166 train_time:67919ms step_avg:144.82ms | |
| step:480/3000 train_loss:3.8623 train_time:68061ms step_avg:144.81ms | |
| step:481/3000 train_loss:4.0140 train_time:68204ms step_avg:144.81ms | |
| step:482/3000 train_loss:3.8264 train_time:68346ms step_avg:144.80ms | |
| step:483/3000 train_loss:3.6290 train_time:68491ms step_avg:144.80ms | |
| step:484/3000 train_loss:3.9039 train_time:68634ms step_avg:144.80ms | |
| step:485/3000 train_loss:3.7655 train_time:68778ms step_avg:144.80ms | |
| step:486/3000 train_loss:3.7668 train_time:68921ms step_avg:144.79ms | |
| step:487/3000 train_loss:3.6929 train_time:69063ms step_avg:144.79ms | |
| step:488/3000 train_loss:3.7718 train_time:69207ms step_avg:144.79ms | |
| step:489/3000 train_loss:3.9767 train_time:69351ms step_avg:144.78ms | |
| step:490/3000 train_loss:3.8151 train_time:69495ms step_avg:144.78ms | |
| step:491/3000 train_loss:3.6968 train_time:69638ms step_avg:144.78ms | |
| step:492/3000 train_loss:3.7164 train_time:69781ms step_avg:144.77ms | |
| step:493/3000 train_loss:3.8310 train_time:69923ms step_avg:144.77ms | |
| step:494/3000 train_loss:3.6787 train_time:70067ms step_avg:144.77ms | |
| step:495/3000 train_loss:3.8060 train_time:70211ms step_avg:144.77ms | |
| step:496/3000 train_loss:3.7575 train_time:70355ms step_avg:144.76ms | |
| step:497/3000 train_loss:3.6304 train_time:70500ms step_avg:144.76ms | |
| step:498/3000 train_loss:3.8337 train_time:70642ms step_avg:144.76ms | |
| step:499/3000 train_loss:3.9007 train_time:70785ms step_avg:144.75ms | |
| step:500/3000 train_loss:3.9358 train_time:70928ms step_avg:144.75ms | |
| step:500/3000 val_loss:3.8112 train_time:70967ms step_avg:144.83ms | |
| step:501/3000 train_loss:3.8455 train_time:71081ms step_avg:144.77ms | |
| step:502/3000 train_loss:3.9055 train_time:71229ms step_avg:144.77ms | |
| step:503/3000 train_loss:3.8445 train_time:71372ms step_avg:144.77ms | |
| step:504/3000 train_loss:3.8849 train_time:71514ms step_avg:144.77ms | |
| step:505/3000 train_loss:3.8279 train_time:71656ms step_avg:144.76ms | |
| step:506/3000 train_loss:3.9244 train_time:71797ms step_avg:144.75ms | |
| step:507/3000 train_loss:3.7504 train_time:71940ms step_avg:144.75ms | |
| step:508/3000 train_loss:3.8677 train_time:72088ms step_avg:144.76ms | |
| step:509/3000 train_loss:3.9307 train_time:72233ms step_avg:144.76ms | |
| step:510/3000 train_loss:3.8800 train_time:72375ms step_avg:144.75ms | |
| step:511/3000 train_loss:3.6895 train_time:72517ms step_avg:144.74ms | |
| step:512/3000 train_loss:3.8914 train_time:72659ms step_avg:144.74ms | |
| step:513/3000 train_loss:3.8258 train_time:72802ms step_avg:144.74ms | |
| step:514/3000 train_loss:3.7918 train_time:72946ms step_avg:144.73ms | |
| step:515/3000 train_loss:3.8924 train_time:73091ms step_avg:144.74ms | |
| step:516/3000 train_loss:3.8435 train_time:73236ms step_avg:144.73ms | |
| step:517/3000 train_loss:4.1838 train_time:73379ms step_avg:144.73ms | |
| step:518/3000 train_loss:3.7949 train_time:73522ms step_avg:144.73ms | |
| step:519/3000 train_loss:3.8942 train_time:73666ms step_avg:144.73ms | |
| step:520/3000 train_loss:3.7867 train_time:73810ms step_avg:144.72ms | |
| step:521/3000 train_loss:3.8016 train_time:73953ms step_avg:144.72ms | |
| step:522/3000 train_loss:3.7562 train_time:74097ms step_avg:144.72ms | |
| step:523/3000 train_loss:3.7682 train_time:74240ms step_avg:144.72ms | |
| step:524/3000 train_loss:4.4103 train_time:74387ms step_avg:144.72ms | |
| step:525/3000 train_loss:3.8567 train_time:74530ms step_avg:144.72ms | |
| step:526/3000 train_loss:3.7883 train_time:74673ms step_avg:144.71ms | |
| step:527/3000 train_loss:3.8045 train_time:74816ms step_avg:144.71ms | |
| step:528/3000 train_loss:3.7615 train_time:74958ms step_avg:144.71ms | |
| step:529/3000 train_loss:3.7339 train_time:75104ms step_avg:144.71ms | |
| step:530/3000 train_loss:3.9531 train_time:75250ms step_avg:144.71ms | |
| step:531/3000 train_loss:3.7542 train_time:75393ms step_avg:144.71ms | |
| step:532/3000 train_loss:4.0245 train_time:75536ms step_avg:144.71ms | |
| step:533/3000 train_loss:3.8375 train_time:75679ms step_avg:144.70ms | |
| step:534/3000 train_loss:3.7650 train_time:75823ms step_avg:144.70ms | |
| step:535/3000 train_loss:3.7885 train_time:75967ms step_avg:144.70ms | |
| step:536/3000 train_loss:3.7232 train_time:76111ms step_avg:144.70ms | |
| step:537/3000 train_loss:3.8567 train_time:76254ms step_avg:144.69ms | |
| step:538/3000 train_loss:3.8396 train_time:76397ms step_avg:144.69ms | |
| step:539/3000 train_loss:3.7337 train_time:76540ms step_avg:144.69ms | |
| step:540/3000 train_loss:4.2396 train_time:76683ms step_avg:144.69ms | |
| step:541/3000 train_loss:3.7817 train_time:76828ms step_avg:144.69ms | |
| step:542/3000 train_loss:3.8871 train_time:76971ms step_avg:144.68ms | |
| step:543/3000 train_loss:3.7129 train_time:77115ms step_avg:144.68ms | |
| step:544/3000 train_loss:3.6883 train_time:77258ms step_avg:144.68ms | |
| step:545/3000 train_loss:3.7716 train_time:77402ms step_avg:144.68ms | |
| step:546/3000 train_loss:3.7013 train_time:77545ms step_avg:144.67ms | |
| step:547/3000 train_loss:3.7515 train_time:77689ms step_avg:144.67ms | |
| step:548/3000 train_loss:3.7533 train_time:77832ms step_avg:144.67ms | |
| step:549/3000 train_loss:3.7308 train_time:77975ms step_avg:144.67ms | |
| step:550/3000 train_loss:3.8288 train_time:78119ms step_avg:144.67ms | |
| step:551/3000 train_loss:3.7214 train_time:78263ms step_avg:144.66ms | |
| step:552/3000 train_loss:3.7384 train_time:78408ms step_avg:144.66ms | |
| step:553/3000 train_loss:4.0581 train_time:78552ms step_avg:144.66ms | |
| step:554/3000 train_loss:3.8665 train_time:78694ms step_avg:144.66ms | |
| step:555/3000 train_loss:3.8214 train_time:78837ms step_avg:144.66ms | |
| step:556/3000 train_loss:3.7573 train_time:78981ms step_avg:144.65ms | |
| step:557/3000 train_loss:3.7980 train_time:79126ms step_avg:144.66ms | |
| step:558/3000 train_loss:3.4629 train_time:79270ms step_avg:144.65ms | |
| step:559/3000 train_loss:3.7186 train_time:79414ms step_avg:144.65ms | |
| step:560/3000 train_loss:3.7653 train_time:79556ms step_avg:144.65ms | |
| step:561/3000 train_loss:3.8150 train_time:79700ms step_avg:144.65ms | |
| step:562/3000 train_loss:3.7167 train_time:79842ms step_avg:144.64ms | |
| step:563/3000 train_loss:3.6638 train_time:79988ms step_avg:144.64ms | |
| step:564/3000 train_loss:3.8696 train_time:80133ms step_avg:144.64ms | |
| step:565/3000 train_loss:3.6790 train_time:80275ms step_avg:144.64ms | |
| step:566/3000 train_loss:3.7906 train_time:80418ms step_avg:144.64ms | |
| step:567/3000 train_loss:3.7432 train_time:80669ms step_avg:144.83ms | |
| step:568/3000 train_loss:3.7105 train_time:80819ms step_avg:144.84ms | |
| step:569/3000 train_loss:3.7916 train_time:80962ms step_avg:144.83ms | |
| step:570/3000 train_loss:3.7641 train_time:81235ms step_avg:145.06ms | |
| step:571/3000 train_loss:3.8003 train_time:81374ms step_avg:145.05ms | |
| step:572/3000 train_loss:3.8741 train_time:81517ms step_avg:145.05ms | |
| step:573/3000 train_loss:3.8411 train_time:81659ms step_avg:145.04ms | |
| step:574/3000 train_loss:3.8409 train_time:81802ms step_avg:145.04ms | |
| step:575/3000 train_loss:3.8797 train_time:81944ms step_avg:145.03ms | |
| step:576/3000 train_loss:3.8359 train_time:82092ms step_avg:145.04ms | |
| step:577/3000 train_loss:3.8649 train_time:82240ms step_avg:145.04ms | |
| step:578/3000 train_loss:3.7899 train_time:82383ms step_avg:145.04ms | |
| step:579/3000 train_loss:3.7953 train_time:82527ms step_avg:145.04ms | |
| step:580/3000 train_loss:3.7723 train_time:82670ms step_avg:145.04ms | |
| step:581/3000 train_loss:3.7066 train_time:82813ms step_avg:145.03ms | |
| step:582/3000 train_loss:3.7376 train_time:82955ms step_avg:145.03ms | |
| step:583/3000 train_loss:3.9555 train_time:83099ms step_avg:145.02ms | |
| step:584/3000 train_loss:3.7309 train_time:83245ms step_avg:145.03ms | |
| step:585/3000 train_loss:3.6968 train_time:83390ms step_avg:145.03ms | |
| step:586/3000 train_loss:3.8889 train_time:83533ms step_avg:145.02ms | |
| step:587/3000 train_loss:3.6392 train_time:83675ms step_avg:145.02ms | |
| step:588/3000 train_loss:3.7801 train_time:83817ms step_avg:145.01ms | |
| step:589/3000 train_loss:3.7523 train_time:83961ms step_avg:145.01ms | |
| step:590/3000 train_loss:4.1069 train_time:84106ms step_avg:145.01ms | |
| step:591/3000 train_loss:3.8856 train_time:84252ms step_avg:145.01ms | |
| step:592/3000 train_loss:3.6353 train_time:84394ms step_avg:145.01ms | |
| step:593/3000 train_loss:3.6401 train_time:84537ms step_avg:145.00ms | |
| step:594/3000 train_loss:3.6242 train_time:84680ms step_avg:145.00ms | |
| step:595/3000 train_loss:3.6721 train_time:84823ms step_avg:145.00ms | |
| step:596/3000 train_loss:4.0399 train_time:84968ms step_avg:145.00ms | |
| step:597/3000 train_loss:3.7608 train_time:85113ms step_avg:145.00ms | |
| step:598/3000 train_loss:3.6897 train_time:85255ms step_avg:144.99ms | |
| step:599/3000 train_loss:3.7700 train_time:85398ms step_avg:144.99ms | |
| step:600/3000 train_loss:3.5837 train_time:85542ms step_avg:144.99ms | |
| step:601/3000 train_loss:3.7024 train_time:85688ms step_avg:144.99ms | |
| step:602/3000 train_loss:3.7443 train_time:85832ms step_avg:144.99ms | |
| step:603/3000 train_loss:3.7665 train_time:85975ms step_avg:144.98ms | |
| step:604/3000 train_loss:3.8840 train_time:86119ms step_avg:144.98ms | |
| step:605/3000 train_loss:3.7297 train_time:86262ms step_avg:144.98ms | |
| step:606/3000 train_loss:3.7224 train_time:86407ms step_avg:144.98ms | |
| step:607/3000 train_loss:3.6874 train_time:86551ms step_avg:144.98ms | |
| step:608/3000 train_loss:3.9344 train_time:86694ms step_avg:144.97ms | |
| step:609/3000 train_loss:3.7584 train_time:86836ms step_avg:144.97ms | |
| step:610/3000 train_loss:3.7308 train_time:86979ms step_avg:144.97ms | |
| step:611/3000 train_loss:3.8236 train_time:87124ms step_avg:144.96ms | |
| step:612/3000 train_loss:3.7257 train_time:87269ms step_avg:144.96ms | |
| step:613/3000 train_loss:3.7032 train_time:87413ms step_avg:144.96ms | |
| step:614/3000 train_loss:3.8672 train_time:87554ms step_avg:144.96ms | |
| step:615/3000 train_loss:3.8222 train_time:87699ms step_avg:144.96ms | |
| step:616/3000 train_loss:3.8073 train_time:87841ms step_avg:144.95ms | |
| step:617/3000 train_loss:3.7302 train_time:87986ms step_avg:144.95ms | |
| step:618/3000 train_loss:3.6782 train_time:88131ms step_avg:144.95ms | |
| step:619/3000 train_loss:3.7845 train_time:88274ms step_avg:144.95ms | |
| step:620/3000 train_loss:3.6815 train_time:88417ms step_avg:144.95ms | |
| step:621/3000 train_loss:3.7021 train_time:88560ms step_avg:144.94ms | |
| step:622/3000 train_loss:4.0203 train_time:88705ms step_avg:144.94ms | |
| step:623/3000 train_loss:3.6994 train_time:88849ms step_avg:144.94ms | |
| step:624/3000 train_loss:3.7165 train_time:88993ms step_avg:144.94ms | |
| step:625/3000 train_loss:3.8031 train_time:89136ms step_avg:144.94ms | |
| step:625/3000 val_loss:3.7344 train_time:89175ms step_avg:145.00ms | |
| step:626/3000 train_loss:3.8252 train_time:89291ms step_avg:144.95ms | |
| step:627/3000 train_loss:3.8516 train_time:89436ms step_avg:144.95ms | |
| step:628/3000 train_loss:3.8391 train_time:89578ms step_avg:144.95ms | |
| step:629/3000 train_loss:3.8777 train_time:89720ms step_avg:144.94ms | |
| step:630/3000 train_loss:3.6986 train_time:89862ms step_avg:144.94ms | |
| step:631/3000 train_loss:3.8324 train_time:90003ms step_avg:144.93ms | |
| step:632/3000 train_loss:3.8542 train_time:90147ms step_avg:144.93ms | |
| step:633/3000 train_loss:3.7600 train_time:90296ms step_avg:144.94ms | |
| step:634/3000 train_loss:3.6982 train_time:90441ms step_avg:144.94ms | |
| step:635/3000 train_loss:3.7968 train_time:90584ms step_avg:144.94ms | |
| step:636/3000 train_loss:4.0498 train_time:90728ms step_avg:144.93ms | |
| step:637/3000 train_loss:3.6492 train_time:90871ms step_avg:144.93ms | |
| step:638/3000 train_loss:3.4664 train_time:91015ms step_avg:144.93ms | |
| step:639/3000 train_loss:3.6875 train_time:91158ms step_avg:144.93ms | |
| step:640/3000 train_loss:3.7206 train_time:91302ms step_avg:144.92ms | |
| step:641/3000 train_loss:3.6744 train_time:91446ms step_avg:144.92ms | |
| step:642/3000 train_loss:3.6915 train_time:91590ms step_avg:144.92ms | |
| step:643/3000 train_loss:3.7240 train_time:91734ms step_avg:144.92ms | |
| step:644/3000 train_loss:3.7170 train_time:91877ms step_avg:144.92ms | |
| step:645/3000 train_loss:3.6613 train_time:92019ms step_avg:144.91ms | |
| step:646/3000 train_loss:3.8836 train_time:92162ms step_avg:144.91ms | |
| step:647/3000 train_loss:3.7799 train_time:92306ms step_avg:144.91ms | |
| step:648/3000 train_loss:3.7768 train_time:92450ms step_avg:144.91ms | |
| step:649/3000 train_loss:3.8085 train_time:92595ms step_avg:144.91ms | |
| step:650/3000 train_loss:3.8711 train_time:92738ms step_avg:144.90ms | |
| step:651/3000 train_loss:3.7290 train_time:92880ms step_avg:144.90ms | |
| step:652/3000 train_loss:3.8685 train_time:93023ms step_avg:144.90ms | |
| step:653/3000 train_loss:3.6910 train_time:93166ms step_avg:144.89ms | |
| step:654/3000 train_loss:3.7742 train_time:93310ms step_avg:144.89ms | |
| step:655/3000 train_loss:3.5365 train_time:93455ms step_avg:144.89ms | |
| step:656/3000 train_loss:3.6784 train_time:93599ms step_avg:144.89ms | |
| step:657/3000 train_loss:3.6905 train_time:93741ms step_avg:144.89ms | |
| step:658/3000 train_loss:3.6187 train_time:93884ms step_avg:144.88ms | |
| step:659/3000 train_loss:3.7965 train_time:94027ms step_avg:144.88ms | |
| step:660/3000 train_loss:3.6956 train_time:94170ms step_avg:144.88ms | |
| step:661/3000 train_loss:3.7906 train_time:94315ms step_avg:144.88ms | |
| step:662/3000 train_loss:3.8608 train_time:94459ms step_avg:144.88ms | |
| step:663/3000 train_loss:3.7760 train_time:94602ms step_avg:144.87ms | |
| step:664/3000 train_loss:3.6583 train_time:94746ms step_avg:144.87ms | |
| step:665/3000 train_loss:3.7332 train_time:94889ms step_avg:144.87ms | |
| step:666/3000 train_loss:3.6039 train_time:95035ms step_avg:144.87ms | |
| step:667/3000 train_loss:3.8868 train_time:95178ms step_avg:144.87ms | |
| step:668/3000 train_loss:3.7184 train_time:95322ms step_avg:144.87ms | |
| step:669/3000 train_loss:3.7415 train_time:95463ms step_avg:144.86ms | |
| step:670/3000 train_loss:3.5869 train_time:95608ms step_avg:144.86ms | |
| step:671/3000 train_loss:3.7108 train_time:95752ms step_avg:144.86ms | |
| step:672/3000 train_loss:3.6653 train_time:95896ms step_avg:144.86ms | |
| step:673/3000 train_loss:3.6773 train_time:96039ms step_avg:144.86ms | |
| step:674/3000 train_loss:3.9568 train_time:96182ms step_avg:144.85ms | |
| step:675/3000 train_loss:3.7415 train_time:96326ms step_avg:144.85ms | |
| step:676/3000 train_loss:3.8209 train_time:96469ms step_avg:144.85ms | |
| step:677/3000 train_loss:3.6066 train_time:96615ms step_avg:144.85ms | |
| step:678/3000 train_loss:3.7015 train_time:96758ms step_avg:144.85ms | |
| step:679/3000 train_loss:3.6571 train_time:96900ms step_avg:144.84ms | |
| step:680/3000 train_loss:3.7948 train_time:97043ms step_avg:144.84ms | |
| step:681/3000 train_loss:3.6887 train_time:97187ms step_avg:144.84ms | |
| step:682/3000 train_loss:3.7171 train_time:97331ms step_avg:144.84ms | |
| step:683/3000 train_loss:3.7993 train_time:97475ms step_avg:144.84ms | |
| step:684/3000 train_loss:3.8396 train_time:97618ms step_avg:144.83ms | |
| step:685/3000 train_loss:3.7386 train_time:97760ms step_avg:144.83ms | |
| step:686/3000 train_loss:3.8086 train_time:97903ms step_avg:144.83ms | |
| step:687/3000 train_loss:3.7442 train_time:98046ms step_avg:144.82ms | |
| step:688/3000 train_loss:3.7857 train_time:98190ms step_avg:144.82ms | |
| step:689/3000 train_loss:3.3806 train_time:98334ms step_avg:144.82ms | |
| step:690/3000 train_loss:3.5234 train_time:98477ms step_avg:144.82ms | |
| step:691/3000 train_loss:3.6604 train_time:98621ms step_avg:144.82ms | |
| step:692/3000 train_loss:3.5398 train_time:98763ms step_avg:144.81ms | |
| step:693/3000 train_loss:3.7425 train_time:98905ms step_avg:144.81ms | |
| step:694/3000 train_loss:3.7731 train_time:99048ms step_avg:144.81ms | |
| step:695/3000 train_loss:3.6642 train_time:99195ms step_avg:144.81ms | |
| step:696/3000 train_loss:3.6426 train_time:99338ms step_avg:144.81ms | |
| step:697/3000 train_loss:3.9673 train_time:99481ms step_avg:144.80ms | |
| step:698/3000 train_loss:3.7065 train_time:99624ms step_avg:144.80ms | |
| step:699/3000 train_loss:3.7554 train_time:99766ms step_avg:144.80ms | |
| step:700/3000 train_loss:3.9057 train_time:99911ms step_avg:144.80ms | |
| step:701/3000 train_loss:3.6821 train_time:100055ms step_avg:144.80ms | |
| step:702/3000 train_loss:3.6459 train_time:100199ms step_avg:144.80ms | |
| step:703/3000 train_loss:3.6249 train_time:100341ms step_avg:144.79ms | |
| step:704/3000 train_loss:3.5949 train_time:100484ms step_avg:144.79ms | |
| step:705/3000 train_loss:3.6720 train_time:100629ms step_avg:144.79ms | |
| step:706/3000 train_loss:3.6729 train_time:100773ms step_avg:144.79ms | |
| step:707/3000 train_loss:3.6835 train_time:100917ms step_avg:144.79ms | |
| step:708/3000 train_loss:3.7557 train_time:101060ms step_avg:144.78ms | |
| step:709/3000 train_loss:3.7075 train_time:101202ms step_avg:144.78ms | |
| step:710/3000 train_loss:3.6913 train_time:101346ms step_avg:144.78ms | |
| step:711/3000 train_loss:3.6510 train_time:101491ms step_avg:144.78ms | |
| step:712/3000 train_loss:3.6958 train_time:101635ms step_avg:144.78ms | |
| step:713/3000 train_loss:3.7463 train_time:101778ms step_avg:144.78ms | |
| step:714/3000 train_loss:3.7627 train_time:101921ms step_avg:144.77ms | |
| step:715/3000 train_loss:3.6766 train_time:102064ms step_avg:144.77ms | |
| step:716/3000 train_loss:3.6702 train_time:102208ms step_avg:144.77ms | |
| step:717/3000 train_loss:3.6888 train_time:102354ms step_avg:144.77ms | |
| step:718/3000 train_loss:3.8420 train_time:102498ms step_avg:144.77ms | |
| step:719/3000 train_loss:3.7016 train_time:102641ms step_avg:144.77ms | |
| step:720/3000 train_loss:3.7762 train_time:102784ms step_avg:144.77ms | |
| step:721/3000 train_loss:3.9441 train_time:102929ms step_avg:144.77ms | |
| step:722/3000 train_loss:3.5687 train_time:103072ms step_avg:144.76ms | |
| step:723/3000 train_loss:3.8378 train_time:103216ms step_avg:144.76ms | |
| step:724/3000 train_loss:3.8863 train_time:103359ms step_avg:144.76ms | |
| step:725/3000 train_loss:3.6776 train_time:103502ms step_avg:144.76ms | |
| step:726/3000 train_loss:3.7565 train_time:103645ms step_avg:144.76ms | |
| step:727/3000 train_loss:3.6478 train_time:103789ms step_avg:144.75ms | |
| step:728/3000 train_loss:3.6741 train_time:103934ms step_avg:144.76ms | |
| step:729/3000 train_loss:3.8462 train_time:104078ms step_avg:144.75ms | |
| step:730/3000 train_loss:3.7879 train_time:104220ms step_avg:144.75ms | |
| step:731/3000 train_loss:3.7796 train_time:104362ms step_avg:144.75ms | |
| step:732/3000 train_loss:3.6706 train_time:104506ms step_avg:144.75ms | |
| step:733/3000 train_loss:3.6945 train_time:104650ms step_avg:144.74ms | |
| step:734/3000 train_loss:3.9351 train_time:104795ms step_avg:144.74ms | |
| step:735/3000 train_loss:3.6632 train_time:104939ms step_avg:144.74ms | |
| step:736/3000 train_loss:3.7268 train_time:105080ms step_avg:144.74ms | |
| step:737/3000 train_loss:3.8514 train_time:105224ms step_avg:144.74ms | |
| step:738/3000 train_loss:3.7704 train_time:105367ms step_avg:144.73ms | |
| step:739/3000 train_loss:3.7091 train_time:105511ms step_avg:144.73ms | |
| step:740/3000 train_loss:3.5967 train_time:105656ms step_avg:144.73ms | |
| step:741/3000 train_loss:4.2430 train_time:105798ms step_avg:144.73ms | |
| step:742/3000 train_loss:3.5963 train_time:105941ms step_avg:144.73ms | |
| step:743/3000 train_loss:3.6826 train_time:106084ms step_avg:144.73ms | |
| step:744/3000 train_loss:3.6909 train_time:106227ms step_avg:144.72ms | |
| step:745/3000 train_loss:3.7529 train_time:106371ms step_avg:144.72ms | |
| step:746/3000 train_loss:3.7230 train_time:106516ms step_avg:144.72ms | |
| step:747/3000 train_loss:3.7092 train_time:106659ms step_avg:144.72ms | |
| step:748/3000 train_loss:3.7409 train_time:106802ms step_avg:144.72ms | |
| step:749/3000 train_loss:3.6746 train_time:106944ms step_avg:144.71ms | |
| step:750/3000 train_loss:3.6631 train_time:107088ms step_avg:144.71ms | |
| step:750/3000 val_loss:3.6796 train_time:107127ms step_avg:144.77ms | |
| step:751/3000 train_loss:3.7056 train_time:107243ms step_avg:144.73ms | |
| step:752/3000 train_loss:3.6770 train_time:107386ms step_avg:144.73ms | |
| step:753/3000 train_loss:3.7131 train_time:107528ms step_avg:144.72ms | |
| step:754/3000 train_loss:3.7244 train_time:107671ms step_avg:144.72ms | |
| step:755/3000 train_loss:3.6965 train_time:107814ms step_avg:144.72ms | |
| step:756/3000 train_loss:3.7718 train_time:108063ms step_avg:144.86ms | |
| step:757/3000 train_loss:3.5966 train_time:108216ms step_avg:144.87ms | |
| step:758/3000 train_loss:3.8299 train_time:108359ms step_avg:144.87ms | |
| step:759/3000 train_loss:3.7590 train_time:108502ms step_avg:144.86ms | |
| step:760/3000 train_loss:3.6962 train_time:108775ms step_avg:145.03ms | |
| step:761/3000 train_loss:3.8105 train_time:108917ms step_avg:145.03ms | |
| step:762/3000 train_loss:3.5113 train_time:109059ms step_avg:145.03ms | |
| step:763/3000 train_loss:3.6572 train_time:109201ms step_avg:145.02ms | |
| step:764/3000 train_loss:3.7739 train_time:109342ms step_avg:145.02ms | |
| step:765/3000 train_loss:3.4314 train_time:109484ms step_avg:145.01ms | |
| step:766/3000 train_loss:3.8490 train_time:109631ms step_avg:145.01ms | |
| step:767/3000 train_loss:3.6912 train_time:109778ms step_avg:145.02ms | |
| step:768/3000 train_loss:3.6714 train_time:109925ms step_avg:145.02ms | |
| step:769/3000 train_loss:3.6911 train_time:110066ms step_avg:145.01ms | |
| step:770/3000 train_loss:3.7043 train_time:110208ms step_avg:145.01ms | |
| step:771/3000 train_loss:3.7595 train_time:110350ms step_avg:145.01ms | |
| step:772/3000 train_loss:3.9884 train_time:110492ms step_avg:145.00ms | |
| step:773/3000 train_loss:3.5677 train_time:110638ms step_avg:145.00ms | |
| step:774/3000 train_loss:3.7500 train_time:110783ms step_avg:145.00ms | |
| step:775/3000 train_loss:3.7455 train_time:110927ms step_avg:145.00ms | |
| step:776/3000 train_loss:3.7150 train_time:111070ms step_avg:145.00ms | |
| step:777/3000 train_loss:3.5051 train_time:111212ms step_avg:145.00ms | |
| step:778/3000 train_loss:3.5079 train_time:111355ms step_avg:144.99ms | |
| step:779/3000 train_loss:3.5892 train_time:111499ms step_avg:144.99ms | |
| step:780/3000 train_loss:3.6775 train_time:111643ms step_avg:144.99ms | |
| step:781/3000 train_loss:3.7065 train_time:111786ms step_avg:144.99ms | |
| step:782/3000 train_loss:3.7733 train_time:111929ms step_avg:144.99ms | |
| step:783/3000 train_loss:3.6833 train_time:112073ms step_avg:144.98ms | |
| step:784/3000 train_loss:3.6744 train_time:112217ms step_avg:144.98ms | |
| step:785/3000 train_loss:3.6926 train_time:112359ms step_avg:144.98ms | |
| step:786/3000 train_loss:3.6646 train_time:112502ms step_avg:144.98ms | |
| step:787/3000 train_loss:3.5580 train_time:112645ms step_avg:144.97ms | |
| step:788/3000 train_loss:3.8453 train_time:112789ms step_avg:144.97ms | |
| step:789/3000 train_loss:3.6143 train_time:112932ms step_avg:144.97ms | |
| step:790/3000 train_loss:3.6660 train_time:113078ms step_avg:144.97ms | |
| step:791/3000 train_loss:3.7353 train_time:113222ms step_avg:144.97ms | |
| step:792/3000 train_loss:3.8661 train_time:113365ms step_avg:144.97ms | |
| step:793/3000 train_loss:3.8719 train_time:113506ms step_avg:144.96ms | |
| step:794/3000 train_loss:3.5863 train_time:113649ms step_avg:144.96ms | |
| step:795/3000 train_loss:3.7084 train_time:113793ms step_avg:144.96ms | |
| step:796/3000 train_loss:3.7686 train_time:113938ms step_avg:144.96ms | |
| step:797/3000 train_loss:3.8690 train_time:114082ms step_avg:144.96ms | |
| step:798/3000 train_loss:3.6223 train_time:114225ms step_avg:144.96ms | |
| step:799/3000 train_loss:3.7682 train_time:114366ms step_avg:144.95ms | |
| step:800/3000 train_loss:3.6601 train_time:114509ms step_avg:144.95ms | |
| step:801/3000 train_loss:3.6421 train_time:114652ms step_avg:144.95ms | |
| step:802/3000 train_loss:3.7353 train_time:114798ms step_avg:144.95ms | |
| step:803/3000 train_loss:3.6042 train_time:114941ms step_avg:144.94ms | |
| step:804/3000 train_loss:3.6409 train_time:115084ms step_avg:144.94ms | |
| step:805/3000 train_loss:3.7402 train_time:115227ms step_avg:144.94ms | |
| step:806/3000 train_loss:3.6328 train_time:115369ms step_avg:144.94ms | |
| step:807/3000 train_loss:3.6628 train_time:115513ms step_avg:144.93ms | |
| step:808/3000 train_loss:3.7551 train_time:115657ms step_avg:144.93ms | |
| step:809/3000 train_loss:3.6762 train_time:115802ms step_avg:144.93ms | |
| step:810/3000 train_loss:3.5961 train_time:115945ms step_avg:144.93ms | |
| step:811/3000 train_loss:3.6722 train_time:116087ms step_avg:144.93ms | |
| step:812/3000 train_loss:3.7014 train_time:116230ms step_avg:144.93ms | |
| step:813/3000 train_loss:3.7049 train_time:116374ms step_avg:144.92ms | |
| step:814/3000 train_loss:3.7360 train_time:116520ms step_avg:144.93ms | |
| step:815/3000 train_loss:3.6802 train_time:116663ms step_avg:144.92ms | |
| step:816/3000 train_loss:3.6658 train_time:116806ms step_avg:144.92ms | |
| step:817/3000 train_loss:3.7787 train_time:116948ms step_avg:144.92ms | |
| step:818/3000 train_loss:3.8765 train_time:117092ms step_avg:144.92ms | |
| step:819/3000 train_loss:3.6327 train_time:117237ms step_avg:144.92ms | |
| step:820/3000 train_loss:3.8296 train_time:117381ms step_avg:144.91ms | |
| step:821/3000 train_loss:3.6119 train_time:117525ms step_avg:144.91ms | |
| step:822/3000 train_loss:3.6595 train_time:117668ms step_avg:144.91ms | |
| step:823/3000 train_loss:3.7888 train_time:117811ms step_avg:144.91ms | |
| step:824/3000 train_loss:3.6857 train_time:117955ms step_avg:144.91ms | |
| step:825/3000 train_loss:3.6199 train_time:118099ms step_avg:144.91ms | |
| step:826/3000 train_loss:3.7235 train_time:118243ms step_avg:144.91ms | |
| step:827/3000 train_loss:3.6117 train_time:118385ms step_avg:144.90ms | |
| step:828/3000 train_loss:3.8407 train_time:118529ms step_avg:144.90ms | |
| step:829/3000 train_loss:3.7244 train_time:118673ms step_avg:144.90ms | |
| step:830/3000 train_loss:3.7705 train_time:118818ms step_avg:144.90ms | |
| step:831/3000 train_loss:3.6458 train_time:118962ms step_avg:144.90ms | |
| step:832/3000 train_loss:3.6985 train_time:119105ms step_avg:144.90ms | |
| step:833/3000 train_loss:3.6254 train_time:119247ms step_avg:144.89ms | |
| step:834/3000 train_loss:3.7567 train_time:119390ms step_avg:144.89ms | |
| step:835/3000 train_loss:3.5866 train_time:119534ms step_avg:144.89ms | |
| step:836/3000 train_loss:3.5656 train_time:119680ms step_avg:144.89ms | |
| step:837/3000 train_loss:3.8262 train_time:119823ms step_avg:144.89ms | |
| step:838/3000 train_loss:3.5204 train_time:119966ms step_avg:144.89ms | |
| step:839/3000 train_loss:3.6990 train_time:120109ms step_avg:144.88ms | |
| step:840/3000 train_loss:3.5329 train_time:120252ms step_avg:144.88ms | |
| step:841/3000 train_loss:3.5810 train_time:120398ms step_avg:144.88ms | |
| step:842/3000 train_loss:3.6663 train_time:120541ms step_avg:144.88ms | |
| step:843/3000 train_loss:3.6907 train_time:120684ms step_avg:144.88ms | |
| step:844/3000 train_loss:3.6873 train_time:120826ms step_avg:144.88ms | |
| step:845/3000 train_loss:3.5372 train_time:120969ms step_avg:144.87ms | |
| step:846/3000 train_loss:3.7726 train_time:121113ms step_avg:144.87ms | |
| step:847/3000 train_loss:3.6358 train_time:121258ms step_avg:144.87ms | |
| step:848/3000 train_loss:3.6047 train_time:121402ms step_avg:144.87ms | |
| step:849/3000 train_loss:3.7383 train_time:121545ms step_avg:144.87ms | |
| step:850/3000 train_loss:3.5972 train_time:121687ms step_avg:144.87ms | |
| step:851/3000 train_loss:3.5503 train_time:121831ms step_avg:144.86ms | |
| step:852/3000 train_loss:3.8419 train_time:121975ms step_avg:144.86ms | |
| step:853/3000 train_loss:3.5562 train_time:122120ms step_avg:144.86ms | |
| step:854/3000 train_loss:3.6747 train_time:122263ms step_avg:144.86ms | |
| step:855/3000 train_loss:3.7550 train_time:122405ms step_avg:144.86ms | |
| step:856/3000 train_loss:3.6278 train_time:122548ms step_avg:144.86ms | |
| step:857/3000 train_loss:3.6558 train_time:122691ms step_avg:144.85ms | |
| step:858/3000 train_loss:3.7091 train_time:122836ms step_avg:144.85ms | |
| step:859/3000 train_loss:3.5877 train_time:122980ms step_avg:144.85ms | |
| step:860/3000 train_loss:3.6691 train_time:123124ms step_avg:144.85ms | |
| step:861/3000 train_loss:3.6988 train_time:123266ms step_avg:144.85ms | |
| step:862/3000 train_loss:3.7342 train_time:123409ms step_avg:144.85ms | |
| step:863/3000 train_loss:3.7030 train_time:123553ms step_avg:144.84ms | |
| step:864/3000 train_loss:3.6831 train_time:123698ms step_avg:144.85ms | |
| step:865/3000 train_loss:3.4979 train_time:123842ms step_avg:144.84ms | |
| step:866/3000 train_loss:3.6991 train_time:123984ms step_avg:144.84ms | |
| step:867/3000 train_loss:3.9753 train_time:124128ms step_avg:144.84ms | |
| step:868/3000 train_loss:3.5620 train_time:124271ms step_avg:144.84ms | |
| step:869/3000 train_loss:3.7495 train_time:124414ms step_avg:144.84ms | |
| step:870/3000 train_loss:3.7206 train_time:124559ms step_avg:144.84ms | |
| step:871/3000 train_loss:3.5532 train_time:124703ms step_avg:144.83ms | |
| step:872/3000 train_loss:3.5389 train_time:124846ms step_avg:144.83ms | |
| step:873/3000 train_loss:3.7728 train_time:124989ms step_avg:144.83ms | |
| step:874/3000 train_loss:3.5590 train_time:125133ms step_avg:144.83ms | |
| step:875/3000 train_loss:3.3057 train_time:125277ms step_avg:144.83ms | |
| step:875/3000 val_loss:3.6320 train_time:125315ms step_avg:144.87ms | |
| step:876/3000 train_loss:3.7507 train_time:125434ms step_avg:144.84ms | |
| step:877/3000 train_loss:3.5542 train_time:125577ms step_avg:144.84ms | |
| step:878/3000 train_loss:3.7286 train_time:125719ms step_avg:144.84ms | |
| step:879/3000 train_loss:3.5900 train_time:125861ms step_avg:144.83ms | |
| step:880/3000 train_loss:3.7687 train_time:126004ms step_avg:144.83ms | |
| step:881/3000 train_loss:3.4271 train_time:126147ms step_avg:144.83ms | |
| step:882/3000 train_loss:3.5979 train_time:126291ms step_avg:144.83ms | |
| step:883/3000 train_loss:3.7931 train_time:126437ms step_avg:144.83ms | |
| step:884/3000 train_loss:3.9568 train_time:126580ms step_avg:144.83ms | |
| step:885/3000 train_loss:3.6744 train_time:126723ms step_avg:144.83ms | |
| step:886/3000 train_loss:3.5963 train_time:126866ms step_avg:144.82ms | |
| step:887/3000 train_loss:3.6808 train_time:127010ms step_avg:144.82ms | |
| step:888/3000 train_loss:4.1923 train_time:127154ms step_avg:144.82ms | |
| step:889/3000 train_loss:3.9484 train_time:127296ms step_avg:144.82ms | |
| step:890/3000 train_loss:3.6264 train_time:127440ms step_avg:144.82ms | |
| step:891/3000 train_loss:3.6403 train_time:127584ms step_avg:144.82ms | |
| step:892/3000 train_loss:3.4710 train_time:127729ms step_avg:144.82ms | |
| step:893/3000 train_loss:3.8232 train_time:127873ms step_avg:144.82ms | |
| step:894/3000 train_loss:3.5335 train_time:128016ms step_avg:144.81ms | |
| step:895/3000 train_loss:3.7862 train_time:128158ms step_avg:144.81ms | |
| step:896/3000 train_loss:3.8029 train_time:128301ms step_avg:144.81ms | |
| step:897/3000 train_loss:3.6020 train_time:128446ms step_avg:144.81ms | |
| step:898/3000 train_loss:3.6484 train_time:128591ms step_avg:144.81ms | |
| step:899/3000 train_loss:3.6953 train_time:128736ms step_avg:144.81ms | |
| step:900/3000 train_loss:3.5806 train_time:128878ms step_avg:144.81ms | |
| step:901/3000 train_loss:3.5234 train_time:129019ms step_avg:144.80ms | |
| step:902/3000 train_loss:3.7357 train_time:129162ms step_avg:144.80ms | |
| step:903/3000 train_loss:3.7363 train_time:129306ms step_avg:144.80ms | |
| step:904/3000 train_loss:3.6439 train_time:129452ms step_avg:144.80ms | |
| step:905/3000 train_loss:3.6118 train_time:129594ms step_avg:144.80ms | |
| step:906/3000 train_loss:3.5963 train_time:129738ms step_avg:144.80ms | |
| step:907/3000 train_loss:3.8281 train_time:129881ms step_avg:144.79ms | |
| step:908/3000 train_loss:3.6152 train_time:130024ms step_avg:144.79ms | |
| step:909/3000 train_loss:3.6636 train_time:130167ms step_avg:144.79ms | |
| step:910/3000 train_loss:3.5675 train_time:130311ms step_avg:144.79ms | |
| step:911/3000 train_loss:3.6513 train_time:130455ms step_avg:144.79ms | |
| step:912/3000 train_loss:3.7282 train_time:130597ms step_avg:144.79ms | |
| step:913/3000 train_loss:3.7193 train_time:130741ms step_avg:144.79ms | |
| step:914/3000 train_loss:3.5998 train_time:130885ms step_avg:144.78ms | |
| step:915/3000 train_loss:3.8471 train_time:131030ms step_avg:144.78ms | |
| step:916/3000 train_loss:3.6447 train_time:131174ms step_avg:144.78ms | |
| step:917/3000 train_loss:3.7381 train_time:131316ms step_avg:144.78ms | |
| step:918/3000 train_loss:3.7057 train_time:131458ms step_avg:144.78ms | |
| step:919/3000 train_loss:4.9498 train_time:131602ms step_avg:144.78ms | |
| step:920/3000 train_loss:3.6206 train_time:131745ms step_avg:144.78ms | |
| step:921/3000 train_loss:3.6786 train_time:131889ms step_avg:144.77ms | |
| step:922/3000 train_loss:3.6460 train_time:132034ms step_avg:144.77ms | |
| step:923/3000 train_loss:3.6944 train_time:132177ms step_avg:144.77ms | |
| step:924/3000 train_loss:3.7061 train_time:132319ms step_avg:144.77ms | |
| step:925/3000 train_loss:3.7930 train_time:132461ms step_avg:144.77ms | |
| step:926/3000 train_loss:3.7620 train_time:132605ms step_avg:144.77ms | |
| step:927/3000 train_loss:3.6654 train_time:132750ms step_avg:144.77ms | |
| step:928/3000 train_loss:3.6498 train_time:132893ms step_avg:144.76ms | |
| step:929/3000 train_loss:3.9005 train_time:133036ms step_avg:144.76ms | |
| step:930/3000 train_loss:3.7218 train_time:133178ms step_avg:144.76ms | |
| step:931/3000 train_loss:3.5103 train_time:133321ms step_avg:144.76ms | |
| step:932/3000 train_loss:3.5993 train_time:133465ms step_avg:144.76ms | |
| step:933/3000 train_loss:3.7764 train_time:133610ms step_avg:144.76ms | |
| step:934/3000 train_loss:3.5028 train_time:133755ms step_avg:144.76ms | |
| step:935/3000 train_loss:3.6794 train_time:133896ms step_avg:144.75ms | |
| step:936/3000 train_loss:3.5525 train_time:134040ms step_avg:144.75ms | |
| step:937/3000 train_loss:3.6291 train_time:134182ms step_avg:144.75ms | |
| step:938/3000 train_loss:3.7216 train_time:134325ms step_avg:144.75ms | |
| step:939/3000 train_loss:3.6510 train_time:134470ms step_avg:144.75ms | |
| step:940/3000 train_loss:3.7996 train_time:134615ms step_avg:144.75ms | |
| step:941/3000 train_loss:3.5860 train_time:134758ms step_avg:144.75ms | |
| step:942/3000 train_loss:3.6518 train_time:134901ms step_avg:144.74ms | |
| step:943/3000 train_loss:3.4545 train_time:135045ms step_avg:144.74ms | |
| step:944/3000 train_loss:3.8088 train_time:135189ms step_avg:144.74ms | |
| step:945/3000 train_loss:3.5182 train_time:135437ms step_avg:144.85ms | |
| step:946/3000 train_loss:3.5331 train_time:135590ms step_avg:144.86ms | |
| step:947/3000 train_loss:5.1692 train_time:135733ms step_avg:144.86ms | |
| step:948/3000 train_loss:3.7147 train_time:135875ms step_avg:144.86ms | |
| step:949/3000 train_loss:3.5997 train_time:136016ms step_avg:144.85ms | |
| step:950/3000 train_loss:3.4928 train_time:136289ms step_avg:144.99ms | |
| step:951/3000 train_loss:3.5639 train_time:136430ms step_avg:144.98ms | |
| step:952/3000 train_loss:3.5144 train_time:136573ms step_avg:144.98ms | |
| step:953/3000 train_loss:3.5869 train_time:136714ms step_avg:144.98ms | |
| step:954/3000 train_loss:3.6650 train_time:136856ms step_avg:144.97ms | |
| step:955/3000 train_loss:3.5510 train_time:136997ms step_avg:144.97ms | |
| step:956/3000 train_loss:3.5866 train_time:137142ms step_avg:144.97ms | |
| step:957/3000 train_loss:3.5539 train_time:137291ms step_avg:144.98ms | |
| step:958/3000 train_loss:3.6092 train_time:137437ms step_avg:144.98ms | |
| step:959/3000 train_loss:3.6049 train_time:137578ms step_avg:144.97ms | |
| step:960/3000 train_loss:3.6185 train_time:137719ms step_avg:144.97ms | |
| step:961/3000 train_loss:3.5057 train_time:137862ms step_avg:144.97ms | |
| step:962/3000 train_loss:3.7604 train_time:138004ms step_avg:144.96ms | |
| step:963/3000 train_loss:3.7118 train_time:138151ms step_avg:144.96ms | |
| step:964/3000 train_loss:3.6286 train_time:138295ms step_avg:144.96ms | |
| step:965/3000 train_loss:3.5554 train_time:138440ms step_avg:144.96ms | |
| step:966/3000 train_loss:3.5915 train_time:138582ms step_avg:144.96ms | |
| step:967/3000 train_loss:3.8198 train_time:138724ms step_avg:144.96ms | |
| step:968/3000 train_loss:3.6436 train_time:138870ms step_avg:144.96ms | |
| step:969/3000 train_loss:3.6208 train_time:139012ms step_avg:144.96ms | |
| step:970/3000 train_loss:3.6915 train_time:139156ms step_avg:144.95ms | |
| step:971/3000 train_loss:3.4985 train_time:139300ms step_avg:144.95ms | |
| step:972/3000 train_loss:3.6509 train_time:139445ms step_avg:144.95ms | |
| step:973/3000 train_loss:3.6011 train_time:139588ms step_avg:144.95ms | |
| step:974/3000 train_loss:3.6438 train_time:139732ms step_avg:144.95ms | |
| step:975/3000 train_loss:3.7171 train_time:139875ms step_avg:144.95ms | |
| step:976/3000 train_loss:3.5969 train_time:140018ms step_avg:144.95ms | |
| step:977/3000 train_loss:3.7946 train_time:140160ms step_avg:144.94ms | |
| step:978/3000 train_loss:3.6785 train_time:140305ms step_avg:144.94ms | |
| step:979/3000 train_loss:3.4984 train_time:140450ms step_avg:144.94ms | |
| step:980/3000 train_loss:3.7964 train_time:140593ms step_avg:144.94ms | |
| step:981/3000 train_loss:3.5295 train_time:140737ms step_avg:144.94ms | |
| step:982/3000 train_loss:3.6979 train_time:140878ms step_avg:144.94ms | |
| step:983/3000 train_loss:3.6671 train_time:141022ms step_avg:144.94ms | |
| step:984/3000 train_loss:3.6724 train_time:141166ms step_avg:144.93ms | |
| step:985/3000 train_loss:3.6145 train_time:141313ms step_avg:144.94ms | |
| step:986/3000 train_loss:3.7072 train_time:141455ms step_avg:144.93ms | |
| step:987/3000 train_loss:3.5289 train_time:141597ms step_avg:144.93ms | |
| step:988/3000 train_loss:3.5971 train_time:141742ms step_avg:144.93ms | |
| step:989/3000 train_loss:3.5961 train_time:141886ms step_avg:144.93ms | |
| step:990/3000 train_loss:3.5383 train_time:142031ms step_avg:144.93ms | |
| step:991/3000 train_loss:3.7612 train_time:142175ms step_avg:144.93ms | |
| step:992/3000 train_loss:3.5772 train_time:142318ms step_avg:144.93ms | |
| step:993/3000 train_loss:3.5465 train_time:142461ms step_avg:144.92ms | |
| step:994/3000 train_loss:3.6251 train_time:142605ms step_avg:144.92ms | |
| step:995/3000 train_loss:3.7066 train_time:142750ms step_avg:144.92ms | |
| step:996/3000 train_loss:3.6515 train_time:142893ms step_avg:144.92ms | |
| step:997/3000 train_loss:3.5624 train_time:143036ms step_avg:144.92ms | |
| step:998/3000 train_loss:3.9091 train_time:143179ms step_avg:144.92ms | |
| step:999/3000 train_loss:3.5744 train_time:143323ms step_avg:144.92ms | |
| step:1000/3000 train_loss:3.7035 train_time:143467ms step_avg:144.92ms | |
| step:1000/3000 val_loss:3.5918 train_time:143506ms step_avg:144.96ms | |
| step:1001/3000 train_loss:3.5687 train_time:143619ms step_avg:144.92ms | |
| step:1002/3000 train_loss:3.6171 train_time:143765ms step_avg:144.92ms | |
| step:1003/3000 train_loss:3.4944 train_time:143910ms step_avg:144.92ms | |
| step:1004/3000 train_loss:3.6882 train_time:144052ms step_avg:144.92ms | |
| step:1005/3000 train_loss:3.7339 train_time:144194ms step_avg:144.92ms | |
| step:1006/3000 train_loss:3.5157 train_time:144336ms step_avg:144.92ms | |
| step:1007/3000 train_loss:3.5933 train_time:144480ms step_avg:144.91ms | |
| step:1008/3000 train_loss:3.5597 train_time:144626ms step_avg:144.92ms | |
| step:1009/3000 train_loss:3.6808 train_time:144771ms step_avg:144.92ms | |
| step:1010/3000 train_loss:3.7897 train_time:144915ms step_avg:144.92ms | |
| step:1011/3000 train_loss:3.6779 train_time:145057ms step_avg:144.91ms | |
| step:1012/3000 train_loss:3.6411 train_time:145199ms step_avg:144.91ms | |
| step:1013/3000 train_loss:3.5031 train_time:145341ms step_avg:144.91ms | |
| step:1014/3000 train_loss:3.6478 train_time:145484ms step_avg:144.90ms | |
| step:1015/3000 train_loss:3.7547 train_time:145630ms step_avg:144.91ms | |
| step:1016/3000 train_loss:3.4659 train_time:145774ms step_avg:144.90ms | |
| step:1017/3000 train_loss:3.5525 train_time:145918ms step_avg:144.90ms | |
| step:1018/3000 train_loss:3.5349 train_time:146060ms step_avg:144.90ms | |
| step:1019/3000 train_loss:3.5057 train_time:146203ms step_avg:144.90ms | |
| step:1020/3000 train_loss:3.6440 train_time:146346ms step_avg:144.90ms | |
| step:1021/3000 train_loss:3.5450 train_time:146492ms step_avg:144.90ms | |
| step:1022/3000 train_loss:3.4848 train_time:146636ms step_avg:144.90ms | |
| step:1023/3000 train_loss:3.5968 train_time:146779ms step_avg:144.90ms | |
| step:1024/3000 train_loss:3.6227 train_time:146922ms step_avg:144.89ms | |
| step:1025/3000 train_loss:3.6084 train_time:147065ms step_avg:144.89ms | |
| step:1026/3000 train_loss:3.6024 train_time:147210ms step_avg:144.89ms | |
| step:1027/3000 train_loss:3.7782 train_time:147353ms step_avg:144.89ms | |
| step:1028/3000 train_loss:3.4479 train_time:147496ms step_avg:144.89ms | |
| step:1029/3000 train_loss:3.5216 train_time:147639ms step_avg:144.89ms | |
| step:1030/3000 train_loss:3.4688 train_time:147784ms step_avg:144.89ms | |
| step:1031/3000 train_loss:3.6498 train_time:147929ms step_avg:144.89ms | |
| step:1032/3000 train_loss:3.6256 train_time:148073ms step_avg:144.89ms | |
| step:1033/3000 train_loss:3.8103 train_time:148216ms step_avg:144.88ms | |
| step:1034/3000 train_loss:3.6261 train_time:148358ms step_avg:144.88ms | |
| step:1035/3000 train_loss:3.5404 train_time:148501ms step_avg:144.88ms | |
| step:1036/3000 train_loss:3.5600 train_time:148644ms step_avg:144.88ms | |
| step:1037/3000 train_loss:3.6137 train_time:148789ms step_avg:144.88ms | |
| step:1038/3000 train_loss:3.9236 train_time:148933ms step_avg:144.88ms | |
| step:1039/3000 train_loss:3.7487 train_time:149075ms step_avg:144.87ms | |
| step:1040/3000 train_loss:3.6465 train_time:149219ms step_avg:144.87ms | |
| step:1041/3000 train_loss:3.5426 train_time:149360ms step_avg:144.87ms | |
| step:1042/3000 train_loss:3.6115 train_time:149502ms step_avg:144.87ms | |
| step:1043/3000 train_loss:3.6475 train_time:149647ms step_avg:144.87ms | |
| step:1044/3000 train_loss:3.5744 train_time:149793ms step_avg:144.87ms | |
| step:1045/3000 train_loss:3.5903 train_time:149936ms step_avg:144.87ms | |
| step:1046/3000 train_loss:3.6614 train_time:150079ms step_avg:144.86ms | |
| step:1047/3000 train_loss:3.5651 train_time:150222ms step_avg:144.86ms | |
| step:1048/3000 train_loss:3.7700 train_time:150366ms step_avg:144.86ms | |
| step:1049/3000 train_loss:3.6271 train_time:150510ms step_avg:144.86ms | |
| step:1050/3000 train_loss:3.5470 train_time:150654ms step_avg:144.86ms | |
| step:1051/3000 train_loss:3.5118 train_time:150798ms step_avg:144.86ms | |
| step:1052/3000 train_loss:3.6351 train_time:150939ms step_avg:144.86ms | |
| step:1053/3000 train_loss:3.5137 train_time:151082ms step_avg:144.85ms | |
| step:1054/3000 train_loss:3.8406 train_time:151226ms step_avg:144.85ms | |
| step:1055/3000 train_loss:3.6764 train_time:151370ms step_avg:144.85ms | |
| step:1056/3000 train_loss:3.5309 train_time:151514ms step_avg:144.85ms | |
| step:1057/3000 train_loss:3.6334 train_time:151657ms step_avg:144.85ms | |
| step:1058/3000 train_loss:3.7102 train_time:151799ms step_avg:144.85ms | |
| step:1059/3000 train_loss:3.4322 train_time:151942ms step_avg:144.84ms | |
| step:1060/3000 train_loss:3.5451 train_time:152086ms step_avg:144.84ms | |
| step:1061/3000 train_loss:3.5796 train_time:152230ms step_avg:144.84ms | |
| step:1062/3000 train_loss:3.5394 train_time:152373ms step_avg:144.84ms | |
| step:1063/3000 train_loss:3.5202 train_time:152516ms step_avg:144.84ms | |
| step:1064/3000 train_loss:3.6140 train_time:152659ms step_avg:144.84ms | |
| step:1065/3000 train_loss:3.5087 train_time:152802ms step_avg:144.84ms | |
| step:1066/3000 train_loss:3.5079 train_time:152945ms step_avg:144.83ms | |
| step:1067/3000 train_loss:3.5345 train_time:153092ms step_avg:144.84ms | |
| step:1068/3000 train_loss:3.4497 train_time:153236ms step_avg:144.84ms | |
| step:1069/3000 train_loss:3.5600 train_time:153379ms step_avg:144.83ms | |
| step:1070/3000 train_loss:3.4441 train_time:153522ms step_avg:144.83ms | |
| step:1071/3000 train_loss:3.6953 train_time:153665ms step_avg:144.83ms | |
| step:1072/3000 train_loss:3.6397 train_time:153810ms step_avg:144.83ms | |
| step:1073/3000 train_loss:3.5860 train_time:153955ms step_avg:144.83ms | |
| step:1074/3000 train_loss:3.6602 train_time:154097ms step_avg:144.83ms | |
| step:1075/3000 train_loss:3.5867 train_time:154239ms step_avg:144.83ms | |
| step:1076/3000 train_loss:3.5343 train_time:154382ms step_avg:144.82ms | |
| step:1077/3000 train_loss:3.9323 train_time:154526ms step_avg:144.82ms | |
| step:1078/3000 train_loss:3.5951 train_time:154671ms step_avg:144.82ms | |
| step:1079/3000 train_loss:3.2674 train_time:154815ms step_avg:144.82ms | |
| step:1080/3000 train_loss:3.6669 train_time:154958ms step_avg:144.82ms | |
| step:1081/3000 train_loss:3.5876 train_time:155100ms step_avg:144.82ms | |
| step:1082/3000 train_loss:3.6405 train_time:155244ms step_avg:144.82ms | |
| step:1083/3000 train_loss:3.7491 train_time:155388ms step_avg:144.82ms | |
| step:1084/3000 train_loss:3.6477 train_time:155532ms step_avg:144.82ms | |
| step:1085/3000 train_loss:3.6132 train_time:155675ms step_avg:144.81ms | |
| step:1086/3000 train_loss:3.5814 train_time:155818ms step_avg:144.81ms | |
| step:1087/3000 train_loss:3.7716 train_time:155960ms step_avg:144.81ms | |
| step:1088/3000 train_loss:3.6619 train_time:156103ms step_avg:144.81ms | |
| step:1089/3000 train_loss:3.4969 train_time:156248ms step_avg:144.81ms | |
| step:1090/3000 train_loss:3.5221 train_time:156393ms step_avg:144.81ms | |
| step:1091/3000 train_loss:3.6354 train_time:156536ms step_avg:144.81ms | |
| step:1092/3000 train_loss:3.4346 train_time:156678ms step_avg:144.80ms | |
| step:1093/3000 train_loss:3.6301 train_time:156823ms step_avg:144.80ms | |
| step:1094/3000 train_loss:3.7629 train_time:156966ms step_avg:144.80ms | |
| step:1095/3000 train_loss:3.6002 train_time:157112ms step_avg:144.80ms | |
| step:1096/3000 train_loss:3.5530 train_time:157256ms step_avg:144.80ms | |
| step:1097/3000 train_loss:3.5676 train_time:157399ms step_avg:144.80ms | |
| step:1098/3000 train_loss:3.6266 train_time:157541ms step_avg:144.80ms | |
| step:1099/3000 train_loss:3.7044 train_time:157685ms step_avg:144.80ms | |
| step:1100/3000 train_loss:3.6529 train_time:157829ms step_avg:144.80ms | |
| step:1101/3000 train_loss:3.5823 train_time:157972ms step_avg:144.80ms | |
| step:1102/3000 train_loss:3.4368 train_time:158116ms step_avg:144.80ms | |
| step:1103/3000 train_loss:3.5052 train_time:158259ms step_avg:144.79ms | |
| step:1104/3000 train_loss:3.5908 train_time:158402ms step_avg:144.79ms | |
| step:1105/3000 train_loss:3.4703 train_time:158545ms step_avg:144.79ms | |
| step:1106/3000 train_loss:4.2285 train_time:158689ms step_avg:144.79ms | |
| step:1107/3000 train_loss:3.3741 train_time:158833ms step_avg:144.79ms | |
| step:1108/3000 train_loss:3.7160 train_time:158976ms step_avg:144.79ms | |
| step:1109/3000 train_loss:3.4980 train_time:159119ms step_avg:144.79ms | |
| step:1110/3000 train_loss:3.6382 train_time:159262ms step_avg:144.78ms | |
| step:1111/3000 train_loss:3.5743 train_time:159405ms step_avg:144.78ms | |
| step:1112/3000 train_loss:3.6205 train_time:159551ms step_avg:144.78ms | |
| step:1113/3000 train_loss:3.7087 train_time:159695ms step_avg:144.78ms | |
| step:1114/3000 train_loss:3.5707 train_time:159838ms step_avg:144.78ms | |
| step:1115/3000 train_loss:3.5078 train_time:159980ms step_avg:144.78ms | |
| step:1116/3000 train_loss:3.4113 train_time:160124ms step_avg:144.78ms | |
| step:1117/3000 train_loss:3.5794 train_time:160268ms step_avg:144.78ms | |
| step:1118/3000 train_loss:3.7409 train_time:160412ms step_avg:144.78ms | |
| step:1119/3000 train_loss:3.7706 train_time:160556ms step_avg:144.78ms | |
| step:1120/3000 train_loss:3.6160 train_time:160698ms step_avg:144.77ms | |
| step:1121/3000 train_loss:3.6393 train_time:160842ms step_avg:144.77ms | |
| step:1122/3000 train_loss:3.5394 train_time:160985ms step_avg:144.77ms | |
| step:1123/3000 train_loss:3.5971 train_time:161130ms step_avg:144.77ms | |
| step:1124/3000 train_loss:3.7361 train_time:161274ms step_avg:144.77ms | |
| step:1125/3000 train_loss:3.5038 train_time:161417ms step_avg:144.77ms | |
| step:1125/3000 val_loss:3.5650 train_time:161454ms step_avg:144.80ms | |
| step:1126/3000 train_loss:3.4039 train_time:161569ms step_avg:144.78ms | |
| step:1127/3000 train_loss:3.6312 train_time:161716ms step_avg:144.78ms | |
| step:1128/3000 train_loss:3.8378 train_time:161859ms step_avg:144.78ms | |
| step:1129/3000 train_loss:3.3810 train_time:162003ms step_avg:144.77ms | |
| step:1130/3000 train_loss:3.7079 train_time:162145ms step_avg:144.77ms | |
| step:1131/3000 train_loss:3.5380 train_time:162287ms step_avg:144.77ms | |
| step:1132/3000 train_loss:3.5665 train_time:162429ms step_avg:144.77ms | |
| step:1133/3000 train_loss:3.5127 train_time:162574ms step_avg:144.77ms | |
| step:1134/3000 train_loss:3.6699 train_time:162825ms step_avg:144.86ms | |
| step:1135/3000 train_loss:3.6072 train_time:162976ms step_avg:144.87ms | |
| step:1136/3000 train_loss:3.6582 train_time:163120ms step_avg:144.87ms | |
| step:1137/3000 train_loss:3.7026 train_time:163263ms step_avg:144.87ms | |
| step:1138/3000 train_loss:3.6105 train_time:163406ms step_avg:144.86ms | |
| step:1139/3000 train_loss:3.5097 train_time:163548ms step_avg:144.86ms | |
| step:1140/3000 train_loss:3.8093 train_time:163823ms step_avg:144.98ms | |
| step:1141/3000 train_loss:3.6215 train_time:163963ms step_avg:144.97ms | |
| step:1142/3000 train_loss:3.7299 train_time:164106ms step_avg:144.97ms | |
| step:1143/3000 train_loss:3.5995 train_time:164248ms step_avg:144.97ms | |
| step:1144/3000 train_loss:3.5134 train_time:164389ms step_avg:144.96ms | |
| step:1145/3000 train_loss:3.6203 train_time:164531ms step_avg:144.96ms | |
| step:1146/3000 train_loss:3.7366 train_time:164676ms step_avg:144.96ms | |
| step:1147/3000 train_loss:3.7184 train_time:164825ms step_avg:144.96ms | |
| step:1148/3000 train_loss:3.6381 train_time:164969ms step_avg:144.96ms | |
| step:1149/3000 train_loss:3.6501 train_time:165111ms step_avg:144.96ms | |
| step:1150/3000 train_loss:3.5026 train_time:165252ms step_avg:144.96ms | |
| step:1151/3000 train_loss:3.5227 train_time:165395ms step_avg:144.96ms | |
| step:1152/3000 train_loss:3.4869 train_time:165538ms step_avg:144.95ms | |
| step:1153/3000 train_loss:3.6259 train_time:165683ms step_avg:144.95ms | |
| step:1154/3000 train_loss:3.6117 train_time:165829ms step_avg:144.96ms | |
| step:1155/3000 train_loss:3.6690 train_time:165972ms step_avg:144.95ms | |
| step:1156/3000 train_loss:3.5193 train_time:166117ms step_avg:144.95ms | |
| step:1157/3000 train_loss:3.6876 train_time:166261ms step_avg:144.95ms | |
| step:1158/3000 train_loss:3.6443 train_time:166405ms step_avg:144.95ms | |
| step:1159/3000 train_loss:3.4499 train_time:166549ms step_avg:144.95ms | |
| step:1160/3000 train_loss:3.4973 train_time:166692ms step_avg:144.95ms | |
| step:1161/3000 train_loss:3.4843 train_time:166837ms step_avg:144.95ms | |
| step:1162/3000 train_loss:3.3123 train_time:166982ms step_avg:144.95ms | |
| step:1163/3000 train_loss:3.5979 train_time:167126ms step_avg:144.95ms | |
| step:1164/3000 train_loss:3.5701 train_time:167270ms step_avg:144.95ms | |
| step:1165/3000 train_loss:3.4296 train_time:167411ms step_avg:144.94ms | |
| step:1166/3000 train_loss:3.4140 train_time:167556ms step_avg:144.94ms | |
| step:1167/3000 train_loss:3.5366 train_time:167702ms step_avg:144.95ms | |
| step:1168/3000 train_loss:3.5522 train_time:167847ms step_avg:144.95ms | |
| step:1169/3000 train_loss:3.8661 train_time:167990ms step_avg:144.94ms | |
| step:1170/3000 train_loss:3.5505 train_time:168133ms step_avg:144.94ms | |
| step:1171/3000 train_loss:3.5585 train_time:168276ms step_avg:144.94ms | |
| step:1172/3000 train_loss:3.4732 train_time:168420ms step_avg:144.94ms | |
| step:1173/3000 train_loss:3.5728 train_time:168564ms step_avg:144.94ms | |
| step:1174/3000 train_loss:3.7021 train_time:168708ms step_avg:144.94ms | |
| step:1175/3000 train_loss:3.5402 train_time:168851ms step_avg:144.94ms | |
| step:1176/3000 train_loss:3.5556 train_time:168995ms step_avg:144.94ms | |
| step:1177/3000 train_loss:3.6080 train_time:169139ms step_avg:144.93ms | |
| step:1178/3000 train_loss:3.5978 train_time:169285ms step_avg:144.94ms | |
| step:1179/3000 train_loss:3.6533 train_time:169428ms step_avg:144.93ms | |
| step:1180/3000 train_loss:3.5602 train_time:169571ms step_avg:144.93ms | |
| step:1181/3000 train_loss:3.5563 train_time:169715ms step_avg:144.93ms | |
| step:1182/3000 train_loss:3.5059 train_time:169859ms step_avg:144.93ms | |
| step:1183/3000 train_loss:3.5644 train_time:170004ms step_avg:144.93ms | |
| step:1184/3000 train_loss:3.4943 train_time:170148ms step_avg:144.93ms | |
| step:1185/3000 train_loss:3.6563 train_time:170291ms step_avg:144.93ms | |
| step:1186/3000 train_loss:3.7201 train_time:170434ms step_avg:144.93ms | |
| step:1187/3000 train_loss:3.5217 train_time:170577ms step_avg:144.93ms | |
| step:1188/3000 train_loss:3.5742 train_time:170723ms step_avg:144.93ms | |
| step:1189/3000 train_loss:3.6009 train_time:170867ms step_avg:144.93ms | |
| step:1190/3000 train_loss:3.4389 train_time:171010ms step_avg:144.92ms | |
| step:1191/3000 train_loss:3.6122 train_time:171153ms step_avg:144.92ms | |
| step:1192/3000 train_loss:3.7563 train_time:171298ms step_avg:144.92ms | |
| step:1193/3000 train_loss:3.5499 train_time:171443ms step_avg:144.92ms | |
| step:1194/3000 train_loss:3.4381 train_time:171586ms step_avg:144.92ms | |
| step:1195/3000 train_loss:3.7236 train_time:171729ms step_avg:144.92ms | |
| step:1196/3000 train_loss:3.5369 train_time:171871ms step_avg:144.92ms | |
| step:1197/3000 train_loss:3.5460 train_time:172014ms step_avg:144.91ms | |
| step:1198/3000 train_loss:3.4437 train_time:172158ms step_avg:144.91ms | |
| step:1199/3000 train_loss:3.4597 train_time:172305ms step_avg:144.92ms | |
| step:1200/3000 train_loss:3.5083 train_time:172449ms step_avg:144.91ms | |
| step:1201/3000 train_loss:3.5942 train_time:172591ms step_avg:144.91ms | |
| step:1202/3000 train_loss:3.6607 train_time:172735ms step_avg:144.91ms | |
| step:1203/3000 train_loss:3.7305 train_time:172878ms step_avg:144.91ms | |
| step:1204/3000 train_loss:3.5729 train_time:173023ms step_avg:144.91ms | |
| step:1205/3000 train_loss:3.4958 train_time:173167ms step_avg:144.91ms | |
| step:1206/3000 train_loss:3.5876 train_time:173310ms step_avg:144.91ms | |
| step:1207/3000 train_loss:3.6464 train_time:173455ms step_avg:144.91ms | |
| step:1208/3000 train_loss:3.6796 train_time:173600ms step_avg:144.91ms | |
| step:1209/3000 train_loss:3.5597 train_time:173745ms step_avg:144.91ms | |
| step:1210/3000 train_loss:3.4204 train_time:173888ms step_avg:144.91ms | |
| step:1211/3000 train_loss:3.4660 train_time:174031ms step_avg:144.91ms | |
| step:1212/3000 train_loss:3.5659 train_time:174173ms step_avg:144.90ms | |
| step:1213/3000 train_loss:3.5887 train_time:174318ms step_avg:144.90ms | |
| step:1214/3000 train_loss:3.6157 train_time:174465ms step_avg:144.90ms | |
| step:1215/3000 train_loss:3.4926 train_time:174609ms step_avg:144.90ms | |
| step:1216/3000 train_loss:3.5629 train_time:174751ms step_avg:144.90ms | |
| step:1217/3000 train_loss:3.5090 train_time:174895ms step_avg:144.90ms | |
| step:1218/3000 train_loss:3.4909 train_time:175039ms step_avg:144.90ms | |
| step:1219/3000 train_loss:3.5952 train_time:175184ms step_avg:144.90ms | |
| step:1220/3000 train_loss:3.4460 train_time:175329ms step_avg:144.90ms | |
| step:1221/3000 train_loss:3.6608 train_time:175472ms step_avg:144.90ms | |
| step:1222/3000 train_loss:3.6821 train_time:175614ms step_avg:144.90ms | |
| step:1223/3000 train_loss:3.6122 train_time:175758ms step_avg:144.89ms | |
| step:1224/3000 train_loss:3.4688 train_time:175904ms step_avg:144.90ms | |
| step:1225/3000 train_loss:3.4443 train_time:176047ms step_avg:144.89ms | |
| step:1226/3000 train_loss:3.5316 train_time:176189ms step_avg:144.89ms | |
| step:1227/3000 train_loss:3.5215 train_time:176333ms step_avg:144.89ms | |
| step:1228/3000 train_loss:3.4564 train_time:176476ms step_avg:144.89ms | |
| step:1229/3000 train_loss:3.6264 train_time:176621ms step_avg:144.89ms | |
| step:1230/3000 train_loss:3.5444 train_time:176765ms step_avg:144.89ms | |
| step:1231/3000 train_loss:3.5995 train_time:176908ms step_avg:144.89ms | |
| step:1232/3000 train_loss:3.7561 train_time:177051ms step_avg:144.89ms | |
| step:1233/3000 train_loss:3.6570 train_time:177193ms step_avg:144.88ms | |
| step:1234/3000 train_loss:3.5946 train_time:177338ms step_avg:144.88ms | |
| step:1235/3000 train_loss:3.7563 train_time:177484ms step_avg:144.89ms | |
| step:1236/3000 train_loss:3.5067 train_time:177628ms step_avg:144.88ms | |
| step:1237/3000 train_loss:3.4837 train_time:177771ms step_avg:144.88ms | |
| step:1238/3000 train_loss:3.4281 train_time:177913ms step_avg:144.88ms | |
| step:1239/3000 train_loss:3.4999 train_time:178057ms step_avg:144.88ms | |
| step:1240/3000 train_loss:3.5096 train_time:178201ms step_avg:144.88ms | |
| step:1241/3000 train_loss:3.5556 train_time:178346ms step_avg:144.88ms | |
| step:1242/3000 train_loss:3.6101 train_time:178489ms step_avg:144.88ms | |
| step:1243/3000 train_loss:3.4815 train_time:178632ms step_avg:144.88ms | |
| step:1244/3000 train_loss:3.5774 train_time:178774ms step_avg:144.87ms | |
| step:1245/3000 train_loss:3.5926 train_time:178918ms step_avg:144.87ms | |
| step:1246/3000 train_loss:3.5923 train_time:179063ms step_avg:144.87ms | |
| step:1247/3000 train_loss:3.4175 train_time:179207ms step_avg:144.87ms | |
| step:1248/3000 train_loss:3.5559 train_time:179350ms step_avg:144.87ms | |
| step:1249/3000 train_loss:3.6154 train_time:179492ms step_avg:144.87ms | |
| step:1250/3000 train_loss:3.5866 train_time:179636ms step_avg:144.87ms | |
| step:1250/3000 val_loss:3.5382 train_time:179673ms step_avg:144.90ms | |
| step:1251/3000 train_loss:3.4857 train_time:179790ms step_avg:144.87ms | |
| step:1252/3000 train_loss:3.6839 train_time:179933ms step_avg:144.87ms | |
| step:1253/3000 train_loss:3.5527 train_time:180076ms step_avg:144.87ms | |
| step:1254/3000 train_loss:3.4864 train_time:180219ms step_avg:144.87ms | |
| step:1255/3000 train_loss:3.6154 train_time:180360ms step_avg:144.87ms | |
| step:1256/3000 train_loss:3.6871 train_time:180502ms step_avg:144.87ms | |
| step:1257/3000 train_loss:3.4893 train_time:180646ms step_avg:144.86ms | |
| step:1258/3000 train_loss:3.5279 train_time:180794ms step_avg:144.87ms | |
| step:1259/3000 train_loss:3.5483 train_time:180940ms step_avg:144.87ms | |
| step:1260/3000 train_loss:3.5217 train_time:181083ms step_avg:144.87ms | |
| step:1261/3000 train_loss:3.3786 train_time:181225ms step_avg:144.86ms | |
| step:1262/3000 train_loss:3.4835 train_time:181366ms step_avg:144.86ms | |
| step:1263/3000 train_loss:3.5509 train_time:181509ms step_avg:144.86ms | |
| step:1264/3000 train_loss:3.4017 train_time:181653ms step_avg:144.86ms | |
| step:1265/3000 train_loss:3.6184 train_time:181799ms step_avg:144.86ms | |
| step:1266/3000 train_loss:3.6025 train_time:181942ms step_avg:144.86ms | |
| step:1267/3000 train_loss:3.6116 train_time:182085ms step_avg:144.86ms | |
| step:1268/3000 train_loss:3.5504 train_time:182228ms step_avg:144.86ms | |
| step:1269/3000 train_loss:3.5870 train_time:182369ms step_avg:144.85ms | |
| step:1270/3000 train_loss:3.4359 train_time:182513ms step_avg:144.85ms | |
| step:1271/3000 train_loss:3.2919 train_time:182658ms step_avg:144.85ms | |
| step:1272/3000 train_loss:3.5697 train_time:182803ms step_avg:144.85ms | |
| step:1273/3000 train_loss:3.5289 train_time:182945ms step_avg:144.85ms | |
| step:1274/3000 train_loss:3.5868 train_time:183089ms step_avg:144.85ms | |
| step:1275/3000 train_loss:3.5341 train_time:183233ms step_avg:144.85ms | |
| step:1276/3000 train_loss:3.6226 train_time:183379ms step_avg:144.85ms | |
| step:1277/3000 train_loss:3.6528 train_time:183522ms step_avg:144.85ms | |
| step:1278/3000 train_loss:3.6059 train_time:183665ms step_avg:144.85ms | |
| step:1279/3000 train_loss:3.6034 train_time:183808ms step_avg:144.84ms | |
| step:1280/3000 train_loss:3.4347 train_time:183951ms step_avg:144.84ms | |
| step:1281/3000 train_loss:3.5525 train_time:184096ms step_avg:144.84ms | |
| step:1282/3000 train_loss:3.6132 train_time:184240ms step_avg:144.84ms | |
| step:1283/3000 train_loss:3.6507 train_time:184383ms step_avg:144.84ms | |
| step:1284/3000 train_loss:3.5314 train_time:184526ms step_avg:144.84ms | |
| step:1285/3000 train_loss:3.5541 train_time:184670ms step_avg:144.84ms | |
| step:1286/3000 train_loss:3.5441 train_time:184814ms step_avg:144.84ms | |
| step:1287/3000 train_loss:3.5211 train_time:184959ms step_avg:144.84ms | |
| step:1288/3000 train_loss:3.6612 train_time:185103ms step_avg:144.84ms | |
| step:1289/3000 train_loss:3.4859 train_time:185246ms step_avg:144.84ms | |
| step:1290/3000 train_loss:3.5751 train_time:185389ms step_avg:144.83ms | |
| step:1291/3000 train_loss:3.6417 train_time:185532ms step_avg:144.83ms | |
| step:1292/3000 train_loss:3.5692 train_time:185677ms step_avg:144.83ms | |
| step:1293/3000 train_loss:3.6762 train_time:185820ms step_avg:144.83ms | |
| step:1294/3000 train_loss:3.6893 train_time:185963ms step_avg:144.83ms | |
| step:1295/3000 train_loss:3.6588 train_time:186107ms step_avg:144.83ms | |
| step:1296/3000 train_loss:3.4693 train_time:186250ms step_avg:144.83ms | |
| step:1297/3000 train_loss:3.5411 train_time:186394ms step_avg:144.83ms | |
| step:1298/3000 train_loss:3.4546 train_time:186538ms step_avg:144.83ms | |
| step:1299/3000 train_loss:3.5081 train_time:186682ms step_avg:144.83ms | |
| step:1300/3000 train_loss:3.5866 train_time:186825ms step_avg:144.83ms | |
| step:1301/3000 train_loss:3.5837 train_time:186967ms step_avg:144.82ms | |
| step:1302/3000 train_loss:3.5898 train_time:187111ms step_avg:144.82ms | |
| step:1303/3000 train_loss:3.7525 train_time:187254ms step_avg:144.82ms | |
| step:1304/3000 train_loss:3.5171 train_time:187399ms step_avg:144.82ms | |
| step:1305/3000 train_loss:3.7214 train_time:187542ms step_avg:144.82ms | |
| step:1306/3000 train_loss:3.4497 train_time:187684ms step_avg:144.82ms | |
| step:1307/3000 train_loss:3.6443 train_time:187827ms step_avg:144.82ms | |
| step:1308/3000 train_loss:3.6426 train_time:187971ms step_avg:144.82ms | |
| step:1309/3000 train_loss:3.5001 train_time:188115ms step_avg:144.82ms | |
| step:1310/3000 train_loss:3.4925 train_time:188259ms step_avg:144.81ms | |
| step:1311/3000 train_loss:3.5215 train_time:188403ms step_avg:144.81ms | |
| step:1312/3000 train_loss:3.4758 train_time:188545ms step_avg:144.81ms | |
| step:1313/3000 train_loss:3.5890 train_time:188687ms step_avg:144.81ms | |
| step:1314/3000 train_loss:3.5364 train_time:188831ms step_avg:144.81ms | |
| step:1315/3000 train_loss:3.2576 train_time:188978ms step_avg:144.81ms | |
| step:1316/3000 train_loss:3.4877 train_time:189121ms step_avg:144.81ms | |
| step:1317/3000 train_loss:3.5682 train_time:189263ms step_avg:144.81ms | |
| step:1318/3000 train_loss:3.6014 train_time:189407ms step_avg:144.81ms | |
| step:1319/3000 train_loss:3.4743 train_time:189549ms step_avg:144.80ms | |
| step:1320/3000 train_loss:3.6120 train_time:189694ms step_avg:144.80ms | |
| step:1321/3000 train_loss:3.6646 train_time:189838ms step_avg:144.80ms | |
| step:1322/3000 train_loss:3.5522 train_time:189982ms step_avg:144.80ms | |
| step:1323/3000 train_loss:3.4968 train_time:190234ms step_avg:144.89ms | |
| step:1324/3000 train_loss:3.5250 train_time:190386ms step_avg:144.89ms | |
| step:1325/3000 train_loss:3.6199 train_time:190527ms step_avg:144.89ms | |
| step:1326/3000 train_loss:3.6778 train_time:190668ms step_avg:144.88ms | |
| step:1327/3000 train_loss:3.4308 train_time:190810ms step_avg:144.88ms | |
| step:1328/3000 train_loss:3.3581 train_time:190953ms step_avg:144.88ms | |
| step:1329/3000 train_loss:3.6720 train_time:191097ms step_avg:144.88ms | |
| step:1330/3000 train_loss:3.5010 train_time:191405ms step_avg:145.00ms | |
| step:1331/3000 train_loss:3.6353 train_time:191558ms step_avg:145.01ms | |
| step:1332/3000 train_loss:3.5343 train_time:191699ms step_avg:145.01ms | |
| step:1333/3000 train_loss:3.9462 train_time:191842ms step_avg:145.01ms | |
| step:1334/3000 train_loss:3.6445 train_time:191983ms step_avg:145.00ms | |
| step:1335/3000 train_loss:3.5581 train_time:192124ms step_avg:145.00ms | |
| step:1336/3000 train_loss:3.4949 train_time:192267ms step_avg:145.00ms | |
| step:1337/3000 train_loss:3.4854 train_time:192416ms step_avg:145.00ms | |
| step:1338/3000 train_loss:3.7470 train_time:192560ms step_avg:145.00ms | |
| step:1339/3000 train_loss:3.6879 train_time:192703ms step_avg:145.00ms | |
| step:1340/3000 train_loss:3.5328 train_time:192844ms step_avg:145.00ms | |
| step:1341/3000 train_loss:3.4878 train_time:192986ms step_avg:144.99ms | |
| step:1342/3000 train_loss:3.7850 train_time:193128ms step_avg:144.99ms | |
| step:1343/3000 train_loss:3.5563 train_time:193271ms step_avg:144.99ms | |
| step:1344/3000 train_loss:3.5596 train_time:193419ms step_avg:144.99ms | |
| step:1345/3000 train_loss:3.6212 train_time:193562ms step_avg:144.99ms | |
| step:1346/3000 train_loss:3.5821 train_time:193706ms step_avg:144.99ms | |
| step:1347/3000 train_loss:3.4852 train_time:193847ms step_avg:144.99ms | |
| step:1348/3000 train_loss:3.4386 train_time:193990ms step_avg:144.98ms | |
| step:1349/3000 train_loss:3.5350 train_time:194135ms step_avg:144.98ms | |
| step:1350/3000 train_loss:3.4581 train_time:194279ms step_avg:144.98ms | |
| step:1351/3000 train_loss:3.5916 train_time:194423ms step_avg:144.98ms | |
| step:1352/3000 train_loss:3.4471 train_time:194566ms step_avg:144.98ms | |
| step:1353/3000 train_loss:3.5048 train_time:194710ms step_avg:144.98ms | |
| step:1354/3000 train_loss:3.6031 train_time:194854ms step_avg:144.98ms | |
| step:1355/3000 train_loss:3.4463 train_time:194998ms step_avg:144.98ms | |
| step:1356/3000 train_loss:3.3778 train_time:195142ms step_avg:144.98ms | |
| step:1357/3000 train_loss:3.7203 train_time:195284ms step_avg:144.98ms | |
| step:1358/3000 train_loss:3.6443 train_time:195427ms step_avg:144.98ms | |
| step:1359/3000 train_loss:3.3638 train_time:195572ms step_avg:144.98ms | |
| step:1360/3000 train_loss:3.6411 train_time:195716ms step_avg:144.97ms | |
| step:1361/3000 train_loss:3.5341 train_time:195860ms step_avg:144.97ms | |
| step:1362/3000 train_loss:3.3825 train_time:196003ms step_avg:144.97ms | |
| step:1363/3000 train_loss:3.5766 train_time:196145ms step_avg:144.97ms | |
| step:1364/3000 train_loss:3.4722 train_time:196287ms step_avg:144.97ms | |
| step:1365/3000 train_loss:3.4913 train_time:196431ms step_avg:144.97ms | |
| step:1366/3000 train_loss:3.5096 train_time:196576ms step_avg:144.97ms | |
| step:1367/3000 train_loss:3.6117 train_time:196720ms step_avg:144.97ms | |
| step:1368/3000 train_loss:3.5980 train_time:196863ms step_avg:144.97ms | |
| step:1369/3000 train_loss:3.5505 train_time:197005ms step_avg:144.96ms | |
| step:1370/3000 train_loss:3.4735 train_time:197148ms step_avg:144.96ms | |
| step:1371/3000 train_loss:3.7918 train_time:197291ms step_avg:144.96ms | |
| step:1372/3000 train_loss:3.5188 train_time:197435ms step_avg:144.96ms | |
| step:1373/3000 train_loss:3.5628 train_time:197580ms step_avg:144.96ms | |
| step:1374/3000 train_loss:3.5644 train_time:197723ms step_avg:144.96ms | |
| step:1375/3000 train_loss:3.3569 train_time:197865ms step_avg:144.96ms | |
| step:1375/3000 val_loss:3.5165 train_time:197903ms step_avg:144.98ms | |
| step:1376/3000 train_loss:3.7579 train_time:198020ms step_avg:144.96ms | |
| step:1377/3000 train_loss:3.5400 train_time:198166ms step_avg:144.96ms | |
| step:1378/3000 train_loss:3.6806 train_time:198309ms step_avg:144.96ms | |
| step:1379/3000 train_loss:3.7187 train_time:198451ms step_avg:144.96ms | |
| step:1380/3000 train_loss:3.3791 train_time:198592ms step_avg:144.96ms | |
| step:1381/3000 train_loss:3.5230 train_time:198733ms step_avg:144.95ms | |
| step:1382/3000 train_loss:3.9833 train_time:198877ms step_avg:144.95ms | |
| step:1383/3000 train_loss:3.4354 train_time:199024ms step_avg:144.96ms | |
| step:1384/3000 train_loss:3.5945 train_time:199171ms step_avg:144.96ms | |
| step:1385/3000 train_loss:3.6754 train_time:199313ms step_avg:144.95ms | |
| step:1386/3000 train_loss:3.5875 train_time:199456ms step_avg:144.95ms | |
| step:1387/3000 train_loss:3.5687 train_time:199598ms step_avg:144.95ms | |
| step:1388/3000 train_loss:3.4040 train_time:199740ms step_avg:144.95ms | |
| step:1389/3000 train_loss:3.5475 train_time:199886ms step_avg:144.95ms | |
| step:1390/3000 train_loss:3.5163 train_time:200031ms step_avg:144.95ms | |
| step:1391/3000 train_loss:3.7787 train_time:200175ms step_avg:144.95ms | |
| step:1392/3000 train_loss:3.4925 train_time:200318ms step_avg:144.95ms | |
| step:1393/3000 train_loss:3.4847 train_time:200461ms step_avg:144.95ms | |
| step:1394/3000 train_loss:3.4479 train_time:200604ms step_avg:144.95ms | |
| step:1395/3000 train_loss:3.7306 train_time:200748ms step_avg:144.94ms | |
| step:1396/3000 train_loss:3.6249 train_time:200891ms step_avg:144.94ms | |
| step:1397/3000 train_loss:3.6313 train_time:201033ms step_avg:144.94ms | |
| step:1398/3000 train_loss:3.5007 train_time:201177ms step_avg:144.94ms | |
| step:1399/3000 train_loss:3.4738 train_time:201321ms step_avg:144.94ms | |
| step:1400/3000 train_loss:3.5288 train_time:201466ms step_avg:144.94ms | |
| step:1401/3000 train_loss:3.5145 train_time:201610ms step_avg:144.94ms | |
| step:1402/3000 train_loss:3.5386 train_time:201751ms step_avg:144.94ms | |
| step:1403/3000 train_loss:3.5013 train_time:201894ms step_avg:144.93ms | |
| step:1404/3000 train_loss:3.7329 train_time:202038ms step_avg:144.93ms | |
| step:1405/3000 train_loss:3.4734 train_time:202183ms step_avg:144.93ms | |
| step:1406/3000 train_loss:3.5196 train_time:202327ms step_avg:144.93ms | |
| step:1407/3000 train_loss:3.5086 train_time:202472ms step_avg:144.93ms | |
| step:1408/3000 train_loss:3.3812 train_time:202614ms step_avg:144.93ms | |
| step:1409/3000 train_loss:3.5093 train_time:202757ms step_avg:144.93ms | |
| step:1410/3000 train_loss:3.4824 train_time:202900ms step_avg:144.93ms | |
| step:1411/3000 train_loss:3.4875 train_time:203046ms step_avg:144.93ms | |
| step:1412/3000 train_loss:3.5708 train_time:203190ms step_avg:144.93ms | |
| step:1413/3000 train_loss:3.5147 train_time:203333ms step_avg:144.93ms | |
| step:1414/3000 train_loss:3.5565 train_time:203476ms step_avg:144.93ms | |
| step:1415/3000 train_loss:3.5427 train_time:203619ms step_avg:144.92ms | |
| step:1416/3000 train_loss:3.6198 train_time:203763ms step_avg:144.92ms | |
| step:1417/3000 train_loss:3.4304 train_time:203907ms step_avg:144.92ms | |
| step:1418/3000 train_loss:3.4850 train_time:204051ms step_avg:144.92ms | |
| step:1419/3000 train_loss:3.5862 train_time:204194ms step_avg:144.92ms | |
| step:1420/3000 train_loss:3.6074 train_time:204338ms step_avg:144.92ms | |
| step:1421/3000 train_loss:3.5914 train_time:204482ms step_avg:144.92ms | |
| step:1422/3000 train_loss:3.5765 train_time:204627ms step_avg:144.92ms | |
| step:1423/3000 train_loss:3.5586 train_time:204771ms step_avg:144.92ms | |
| step:1424/3000 train_loss:3.5407 train_time:204913ms step_avg:144.92ms | |
| step:1425/3000 train_loss:3.5455 train_time:205057ms step_avg:144.92ms | |
| step:1426/3000 train_loss:3.4225 train_time:205201ms step_avg:144.92ms | |
| step:1427/3000 train_loss:3.5280 train_time:205347ms step_avg:144.92ms | |
| step:1428/3000 train_loss:3.4775 train_time:205490ms step_avg:144.92ms | |
| step:1429/3000 train_loss:3.5855 train_time:205633ms step_avg:144.91ms | |
| step:1430/3000 train_loss:3.5446 train_time:205776ms step_avg:144.91ms | |
| step:1431/3000 train_loss:3.4740 train_time:205918ms step_avg:144.91ms | |
| step:1432/3000 train_loss:3.5247 train_time:206063ms step_avg:144.91ms | |
| step:1433/3000 train_loss:3.5584 train_time:206207ms step_avg:144.91ms | |
| step:1434/3000 train_loss:3.4030 train_time:206350ms step_avg:144.91ms | |
| step:1435/3000 train_loss:3.5294 train_time:206493ms step_avg:144.91ms | |
| step:1436/3000 train_loss:3.3476 train_time:206636ms step_avg:144.91ms | |
| step:1437/3000 train_loss:3.4232 train_time:206780ms step_avg:144.91ms | |
| step:1438/3000 train_loss:3.6110 train_time:206923ms step_avg:144.90ms | |
| step:1439/3000 train_loss:3.5742 train_time:207068ms step_avg:144.90ms | |
| step:1440/3000 train_loss:3.5283 train_time:207211ms step_avg:144.90ms | |
| step:1441/3000 train_loss:3.3756 train_time:207354ms step_avg:144.90ms | |
| step:1442/3000 train_loss:3.5503 train_time:207496ms step_avg:144.90ms | |
| step:1443/3000 train_loss:3.6112 train_time:207640ms step_avg:144.90ms | |
| step:1444/3000 train_loss:3.6955 train_time:207786ms step_avg:144.90ms | |
| step:1445/3000 train_loss:3.6546 train_time:207929ms step_avg:144.90ms | |
| step:1446/3000 train_loss:3.5426 train_time:208072ms step_avg:144.90ms | |
| step:1447/3000 train_loss:3.4058 train_time:208215ms step_avg:144.90ms | |
| step:1448/3000 train_loss:3.4884 train_time:208357ms step_avg:144.89ms | |
| step:1449/3000 train_loss:3.5060 train_time:208501ms step_avg:144.89ms | |
| step:1450/3000 train_loss:3.6216 train_time:208647ms step_avg:144.89ms | |
| step:1451/3000 train_loss:3.6063 train_time:208790ms step_avg:144.89ms | |
| step:1452/3000 train_loss:3.4198 train_time:208933ms step_avg:144.89ms | |
| step:1453/3000 train_loss:3.5388 train_time:209076ms step_avg:144.89ms | |
| step:1454/3000 train_loss:3.4537 train_time:209219ms step_avg:144.89ms | |
| step:1455/3000 train_loss:3.4918 train_time:209363ms step_avg:144.89ms | |
| step:1456/3000 train_loss:3.5345 train_time:209507ms step_avg:144.89ms | |
| step:1457/3000 train_loss:3.4732 train_time:209651ms step_avg:144.89ms | |
| step:1458/3000 train_loss:3.3662 train_time:209793ms step_avg:144.88ms | |
| step:1459/3000 train_loss:3.6133 train_time:209935ms step_avg:144.88ms | |
| step:1460/3000 train_loss:3.4755 train_time:210080ms step_avg:144.88ms | |
| step:1461/3000 train_loss:3.5328 train_time:210223ms step_avg:144.88ms | |
| step:1462/3000 train_loss:3.6502 train_time:210368ms step_avg:144.88ms | |
| step:1463/3000 train_loss:3.4761 train_time:210510ms step_avg:144.88ms | |
| step:1464/3000 train_loss:3.6659 train_time:210653ms step_avg:144.88ms | |
| step:1465/3000 train_loss:3.5551 train_time:210796ms step_avg:144.88ms | |
| step:1466/3000 train_loss:3.5661 train_time:210940ms step_avg:144.88ms | |
| step:1467/3000 train_loss:3.4819 train_time:211085ms step_avg:144.88ms | |
| step:1468/3000 train_loss:3.6383 train_time:211229ms step_avg:144.88ms | |
| step:1469/3000 train_loss:3.5022 train_time:211372ms step_avg:144.87ms | |
| step:1470/3000 train_loss:3.4795 train_time:211514ms step_avg:144.87ms | |
| step:1471/3000 train_loss:3.5301 train_time:211656ms step_avg:144.87ms | |
| step:1472/3000 train_loss:3.4507 train_time:211801ms step_avg:144.87ms | |
| step:1473/3000 train_loss:3.5448 train_time:211946ms step_avg:144.87ms | |
| step:1474/3000 train_loss:3.6360 train_time:212090ms step_avg:144.87ms | |
| step:1475/3000 train_loss:3.5092 train_time:212232ms step_avg:144.87ms | |
| step:1476/3000 train_loss:3.3475 train_time:212375ms step_avg:144.87ms | |
| step:1477/3000 train_loss:3.4669 train_time:212519ms step_avg:144.87ms | |
| step:1478/3000 train_loss:3.4359 train_time:212663ms step_avg:144.87ms | |
| step:1479/3000 train_loss:3.5227 train_time:212808ms step_avg:144.87ms | |
| step:1480/3000 train_loss:3.6090 train_time:212951ms step_avg:144.86ms | |
| step:1481/3000 train_loss:3.4730 train_time:213093ms step_avg:144.86ms | |
| step:1482/3000 train_loss:3.6538 train_time:213236ms step_avg:144.86ms | |
| step:1483/3000 train_loss:3.5792 train_time:213379ms step_avg:144.86ms | |
| step:1484/3000 train_loss:3.4751 train_time:213523ms step_avg:144.86ms | |
| step:1485/3000 train_loss:3.4744 train_time:213667ms step_avg:144.86ms | |
| step:1486/3000 train_loss:3.4711 train_time:213810ms step_avg:144.86ms | |
| step:1487/3000 train_loss:3.4492 train_time:213953ms step_avg:144.86ms | |
| step:1488/3000 train_loss:3.5337 train_time:214096ms step_avg:144.86ms | |
| step:1489/3000 train_loss:3.4446 train_time:214239ms step_avg:144.85ms | |
| step:1490/3000 train_loss:3.5301 train_time:214384ms step_avg:144.85ms | |
| step:1491/3000 train_loss:3.4674 train_time:214528ms step_avg:144.85ms | |
| step:1492/3000 train_loss:3.3842 train_time:214671ms step_avg:144.85ms | |
| step:1493/3000 train_loss:3.4648 train_time:214814ms step_avg:144.85ms | |
| step:1494/3000 train_loss:3.6427 train_time:214956ms step_avg:144.85ms | |
| step:1495/3000 train_loss:3.4905 train_time:215100ms step_avg:144.85ms | |
| step:1496/3000 train_loss:3.2503 train_time:215246ms step_avg:144.85ms | |
| step:1497/3000 train_loss:3.5517 train_time:215391ms step_avg:144.85ms | |
| step:1498/3000 train_loss:3.5169 train_time:215532ms step_avg:144.85ms | |
| step:1499/3000 train_loss:3.5678 train_time:215676ms step_avg:144.85ms | |
| step:1500/3000 train_loss:3.5144 train_time:215820ms step_avg:144.85ms | |
| step:1500/3000 val_loss:3.4969 train_time:215858ms step_avg:144.87ms | |
| step:1501/3000 train_loss:3.5006 train_time:215974ms step_avg:144.85ms | |
| step:1502/3000 train_loss:3.2913 train_time:216120ms step_avg:144.85ms | |
| step:1503/3000 train_loss:3.5730 train_time:216263ms step_avg:144.85ms | |
| step:1504/3000 train_loss:3.4447 train_time:216405ms step_avg:144.85ms | |
| step:1505/3000 train_loss:3.4571 train_time:216546ms step_avg:144.85ms | |
| step:1506/3000 train_loss:3.4138 train_time:216688ms step_avg:144.84ms | |
| step:1507/3000 train_loss:3.4971 train_time:216831ms step_avg:144.84ms | |
| step:1508/3000 train_loss:3.4135 train_time:216979ms step_avg:144.85ms | |
| step:1509/3000 train_loss:3.7242 train_time:217125ms step_avg:144.85ms | |
| step:1510/3000 train_loss:3.4723 train_time:217268ms step_avg:144.85ms | |
| step:1511/3000 train_loss:3.4720 train_time:217410ms step_avg:144.84ms | |
| step:1512/3000 train_loss:3.6068 train_time:217663ms step_avg:144.92ms | |
| step:1513/3000 train_loss:3.6299 train_time:217815ms step_avg:144.92ms | |
| step:1514/3000 train_loss:3.4857 train_time:217960ms step_avg:144.92ms | |
| step:1515/3000 train_loss:3.3147 train_time:218102ms step_avg:144.92ms | |
| step:1516/3000 train_loss:3.4475 train_time:218244ms step_avg:144.92ms | |
| step:1517/3000 train_loss:3.4503 train_time:218385ms step_avg:144.91ms | |
| step:1518/3000 train_loss:3.5272 train_time:218529ms step_avg:144.91ms | |
| step:1519/3000 train_loss:3.4143 train_time:218677ms step_avg:144.92ms | |
| step:1520/3000 train_loss:3.7109 train_time:218950ms step_avg:145.00ms | |
| step:1521/3000 train_loss:3.3666 train_time:219091ms step_avg:145.00ms | |
| step:1522/3000 train_loss:3.4275 train_time:219232ms step_avg:144.99ms | |
| step:1523/3000 train_loss:3.5803 train_time:219375ms step_avg:144.99ms | |
| step:1524/3000 train_loss:3.4353 train_time:219519ms step_avg:144.99ms | |
| step:1525/3000 train_loss:3.5387 train_time:219662ms step_avg:144.99ms | |
| step:1526/3000 train_loss:3.5260 train_time:219807ms step_avg:144.99ms | |
| step:1527/3000 train_loss:3.4915 train_time:219957ms step_avg:144.99ms | |
| step:1528/3000 train_loss:3.4908 train_time:220102ms step_avg:145.00ms | |
| step:1529/3000 train_loss:3.6447 train_time:220244ms step_avg:144.99ms | |
| step:1530/3000 train_loss:3.6104 train_time:220387ms step_avg:144.99ms | |
| step:1531/3000 train_loss:3.4502 train_time:220529ms step_avg:144.99ms | |
| step:1532/3000 train_loss:3.3988 train_time:220671ms step_avg:144.99ms | |
| step:1533/3000 train_loss:3.5665 train_time:220816ms step_avg:144.99ms | |
| step:1534/3000 train_loss:3.5106 train_time:220963ms step_avg:144.99ms | |
| step:1535/3000 train_loss:3.5014 train_time:221107ms step_avg:144.99ms | |
| step:1536/3000 train_loss:3.4960 train_time:221249ms step_avg:144.99ms | |
| step:1537/3000 train_loss:3.4381 train_time:221392ms step_avg:144.99ms | |
| step:1538/3000 train_loss:3.4903 train_time:221536ms step_avg:144.98ms | |
| step:1539/3000 train_loss:3.6578 train_time:221680ms step_avg:144.98ms | |
| step:1540/3000 train_loss:3.6034 train_time:221824ms step_avg:144.98ms | |
| step:1541/3000 train_loss:3.5084 train_time:221968ms step_avg:144.98ms | |
| step:1542/3000 train_loss:3.4558 train_time:222111ms step_avg:144.98ms | |
| step:1543/3000 train_loss:3.4651 train_time:222256ms step_avg:144.98ms | |
| step:1544/3000 train_loss:3.4082 train_time:222402ms step_avg:144.98ms | |
| step:1545/3000 train_loss:3.5111 train_time:222544ms step_avg:144.98ms | |
| step:1546/3000 train_loss:3.4771 train_time:222687ms step_avg:144.98ms | |
| step:1547/3000 train_loss:3.4585 train_time:222830ms step_avg:144.98ms | |
| step:1548/3000 train_loss:3.4136 train_time:222975ms step_avg:144.98ms | |
| step:1549/3000 train_loss:3.4566 train_time:223120ms step_avg:144.98ms | |
| step:1550/3000 train_loss:3.5671 train_time:223264ms step_avg:144.98ms | |
| step:1551/3000 train_loss:3.4919 train_time:223408ms step_avg:144.98ms | |
| step:1552/3000 train_loss:3.4261 train_time:223550ms step_avg:144.97ms | |
| step:1553/3000 train_loss:3.4279 train_time:223695ms step_avg:144.97ms | |
| step:1554/3000 train_loss:3.4233 train_time:223841ms step_avg:144.97ms | |
| step:1555/3000 train_loss:3.5505 train_time:223985ms step_avg:144.97ms | |
| step:1556/3000 train_loss:3.5526 train_time:224127ms step_avg:144.97ms | |
| step:1557/3000 train_loss:3.4857 train_time:224271ms step_avg:144.97ms | |
| step:1558/3000 train_loss:3.5398 train_time:224415ms step_avg:144.97ms | |
| step:1559/3000 train_loss:3.4629 train_time:224560ms step_avg:144.97ms | |
| step:1560/3000 train_loss:3.3721 train_time:224703ms step_avg:144.97ms | |
| step:1561/3000 train_loss:3.6267 train_time:224846ms step_avg:144.97ms | |
| step:1562/3000 train_loss:3.4342 train_time:224989ms step_avg:144.97ms | |
| step:1563/3000 train_loss:3.4226 train_time:225132ms step_avg:144.97ms | |
| step:1564/3000 train_loss:3.5414 train_time:225276ms step_avg:144.97ms | |
| step:1565/3000 train_loss:3.3723 train_time:225421ms step_avg:144.97ms | |
| step:1566/3000 train_loss:3.4183 train_time:225565ms step_avg:144.96ms | |
| step:1567/3000 train_loss:3.5816 train_time:225708ms step_avg:144.96ms | |
| step:1568/3000 train_loss:3.4516 train_time:225851ms step_avg:144.96ms | |
| step:1569/3000 train_loss:3.4387 train_time:225995ms step_avg:144.96ms | |
| step:1570/3000 train_loss:3.5368 train_time:226141ms step_avg:144.96ms | |
| step:1571/3000 train_loss:3.5437 train_time:226285ms step_avg:144.96ms | |
| step:1572/3000 train_loss:3.3723 train_time:226427ms step_avg:144.96ms | |
| step:1573/3000 train_loss:3.4039 train_time:226570ms step_avg:144.96ms | |
| step:1574/3000 train_loss:3.5287 train_time:226714ms step_avg:144.96ms | |
| step:1575/3000 train_loss:3.3972 train_time:226859ms step_avg:144.96ms | |
| step:1576/3000 train_loss:3.5426 train_time:227004ms step_avg:144.96ms | |
| step:1577/3000 train_loss:3.4418 train_time:227147ms step_avg:144.96ms | |
| step:1578/3000 train_loss:3.4997 train_time:227290ms step_avg:144.96ms | |
| step:1579/3000 train_loss:3.4816 train_time:227433ms step_avg:144.95ms | |
| step:1580/3000 train_loss:3.4407 train_time:227578ms step_avg:144.95ms | |
| step:1581/3000 train_loss:3.4147 train_time:227722ms step_avg:144.95ms | |
| step:1582/3000 train_loss:3.6642 train_time:227865ms step_avg:144.95ms | |
| step:1583/3000 train_loss:3.4275 train_time:228008ms step_avg:144.95ms | |
| step:1584/3000 train_loss:3.5886 train_time:228152ms step_avg:144.95ms | |
| step:1585/3000 train_loss:3.4160 train_time:228296ms step_avg:144.95ms | |
| step:1586/3000 train_loss:3.5741 train_time:228440ms step_avg:144.95ms | |
| step:1587/3000 train_loss:3.3649 train_time:228584ms step_avg:144.95ms | |
| step:1588/3000 train_loss:3.5584 train_time:228727ms step_avg:144.95ms | |
| step:1589/3000 train_loss:3.4645 train_time:228869ms step_avg:144.95ms | |
| step:1590/3000 train_loss:3.6214 train_time:229014ms step_avg:144.95ms | |
| step:1591/3000 train_loss:3.4397 train_time:229159ms step_avg:144.95ms | |
| step:1592/3000 train_loss:3.4554 train_time:229304ms step_avg:144.95ms | |
| step:1593/3000 train_loss:3.5221 train_time:229446ms step_avg:144.94ms | |
| step:1594/3000 train_loss:3.4995 train_time:229589ms step_avg:144.94ms | |
| step:1595/3000 train_loss:3.4713 train_time:229732ms step_avg:144.94ms | |
| step:1596/3000 train_loss:3.6162 train_time:229876ms step_avg:144.94ms | |
| step:1597/3000 train_loss:3.3410 train_time:230022ms step_avg:144.94ms | |
| step:1598/3000 train_loss:3.5076 train_time:230165ms step_avg:144.94ms | |
| step:1599/3000 train_loss:3.5494 train_time:230308ms step_avg:144.94ms | |
| step:1600/3000 train_loss:3.5999 train_time:230453ms step_avg:144.94ms | |
| step:1601/3000 train_loss:3.4478 train_time:230596ms step_avg:144.94ms | |
| step:1602/3000 train_loss:3.7441 train_time:230741ms step_avg:144.94ms | |
| step:1603/3000 train_loss:3.6296 train_time:230884ms step_avg:144.94ms | |
| step:1604/3000 train_loss:3.4166 train_time:231027ms step_avg:144.94ms | |
| step:1605/3000 train_loss:3.4499 train_time:231170ms step_avg:144.93ms | |
| step:1606/3000 train_loss:3.3351 train_time:231312ms step_avg:144.93ms | |
| step:1607/3000 train_loss:3.6546 train_time:231458ms step_avg:144.93ms | |
| step:1608/3000 train_loss:3.4648 train_time:231604ms step_avg:144.93ms | |
| step:1609/3000 train_loss:3.4780 train_time:231745ms step_avg:144.93ms | |
| step:1610/3000 train_loss:3.4307 train_time:231888ms step_avg:144.93ms | |
| step:1611/3000 train_loss:4.0332 train_time:232031ms step_avg:144.93ms | |
| step:1612/3000 train_loss:3.6665 train_time:232177ms step_avg:144.93ms | |
| step:1613/3000 train_loss:3.5704 train_time:232321ms step_avg:144.93ms | |
| step:1614/3000 train_loss:3.4430 train_time:232466ms step_avg:144.93ms | |
| step:1615/3000 train_loss:3.4864 train_time:232610ms step_avg:144.93ms | |
| step:1616/3000 train_loss:3.4792 train_time:232753ms step_avg:144.93ms | |
| step:1617/3000 train_loss:3.4458 train_time:232897ms step_avg:144.93ms | |
| step:1618/3000 train_loss:3.5284 train_time:233042ms step_avg:144.93ms | |
| step:1619/3000 train_loss:3.4673 train_time:233186ms step_avg:144.93ms | |
| step:1620/3000 train_loss:3.3674 train_time:233328ms step_avg:144.92ms | |
| step:1621/3000 train_loss:3.6301 train_time:233472ms step_avg:144.92ms | |
| step:1622/3000 train_loss:3.5491 train_time:233616ms step_avg:144.92ms | |
| step:1623/3000 train_loss:3.3353 train_time:233762ms step_avg:144.92ms | |
| step:1624/3000 train_loss:3.4506 train_time:233905ms step_avg:144.92ms | |
| step:1625/3000 train_loss:3.4037 train_time:234048ms step_avg:144.92ms | |
| step:1625/3000 val_loss:3.4792 train_time:234086ms step_avg:144.94ms | |
| step:1626/3000 train_loss:3.4899 train_time:234200ms step_avg:144.93ms | |
| step:1627/3000 train_loss:3.4462 train_time:234346ms step_avg:144.93ms | |
| step:1628/3000 train_loss:3.4151 train_time:234490ms step_avg:144.93ms | |
| step:1629/3000 train_loss:3.5236 train_time:234633ms step_avg:144.92ms | |
| step:1630/3000 train_loss:3.4200 train_time:234775ms step_avg:144.92ms | |
| step:1631/3000 train_loss:3.4797 train_time:234918ms step_avg:144.92ms | |
| step:1632/3000 train_loss:3.3564 train_time:235060ms step_avg:144.92ms | |
| step:1633/3000 train_loss:3.3276 train_time:235205ms step_avg:144.92ms | |
| step:1634/3000 train_loss:3.4924 train_time:235351ms step_avg:144.92ms | |
| step:1635/3000 train_loss:3.4796 train_time:235495ms step_avg:144.92ms | |
| step:1636/3000 train_loss:3.4180 train_time:235638ms step_avg:144.92ms | |
| step:1637/3000 train_loss:3.4985 train_time:235780ms step_avg:144.92ms | |
| step:1638/3000 train_loss:3.5489 train_time:235921ms step_avg:144.91ms | |
| step:1639/3000 train_loss:3.5886 train_time:236064ms step_avg:144.91ms | |
| step:1640/3000 train_loss:3.7475 train_time:236212ms step_avg:144.92ms | |
| step:1641/3000 train_loss:3.5561 train_time:236356ms step_avg:144.91ms | |
| step:1642/3000 train_loss:3.4828 train_time:236501ms step_avg:144.91ms | |
| step:1643/3000 train_loss:3.5699 train_time:236644ms step_avg:144.91ms | |
| step:1644/3000 train_loss:3.4680 train_time:236788ms step_avg:144.91ms | |
| step:1645/3000 train_loss:3.4872 train_time:236932ms step_avg:144.91ms | |
| step:1646/3000 train_loss:3.4786 train_time:237075ms step_avg:144.91ms | |
| step:1647/3000 train_loss:3.2539 train_time:237218ms step_avg:144.91ms | |
| step:1648/3000 train_loss:3.5190 train_time:237361ms step_avg:144.91ms | |
| step:1649/3000 train_loss:3.3881 train_time:237506ms step_avg:144.91ms | |
| step:1650/3000 train_loss:3.4646 train_time:237651ms step_avg:144.91ms | |
| step:1651/3000 train_loss:3.4362 train_time:237795ms step_avg:144.91ms | |
| step:1652/3000 train_loss:3.5111 train_time:237938ms step_avg:144.91ms | |
| step:1653/3000 train_loss:3.4343 train_time:238080ms step_avg:144.91ms | |
| step:1654/3000 train_loss:3.5619 train_time:238224ms step_avg:144.90ms | |
| step:1655/3000 train_loss:3.5498 train_time:238369ms step_avg:144.91ms | |
| step:1656/3000 train_loss:3.3743 train_time:238515ms step_avg:144.91ms | |
| step:1657/3000 train_loss:3.5301 train_time:238658ms step_avg:144.90ms | |
| step:1658/3000 train_loss:3.4250 train_time:238802ms step_avg:144.90ms | |
| step:1659/3000 train_loss:3.4035 train_time:238946ms step_avg:144.90ms | |
| step:1660/3000 train_loss:3.4954 train_time:239089ms step_avg:144.90ms | |
| step:1661/3000 train_loss:3.5185 train_time:239233ms step_avg:144.90ms | |
| step:1662/3000 train_loss:3.4335 train_time:239378ms step_avg:144.90ms | |
| step:1663/3000 train_loss:3.5286 train_time:239521ms step_avg:144.90ms | |
| step:1664/3000 train_loss:3.5360 train_time:239666ms step_avg:144.90ms | |
| step:1665/3000 train_loss:3.5609 train_time:239811ms step_avg:144.90ms | |
| step:1666/3000 train_loss:3.5350 train_time:239955ms step_avg:144.90ms | |
| step:1667/3000 train_loss:3.6875 train_time:240099ms step_avg:144.90ms | |
| step:1668/3000 train_loss:3.3856 train_time:240241ms step_avg:144.90ms | |
| step:1669/3000 train_loss:3.4670 train_time:240385ms step_avg:144.90ms | |
| step:1670/3000 train_loss:3.3913 train_time:240529ms step_avg:144.90ms | |
| step:1671/3000 train_loss:3.3963 train_time:240674ms step_avg:144.90ms | |
| step:1672/3000 train_loss:3.5519 train_time:240818ms step_avg:144.90ms | |
| step:1673/3000 train_loss:3.7340 train_time:240960ms step_avg:144.89ms | |
| step:1674/3000 train_loss:3.4465 train_time:241103ms step_avg:144.89ms | |
| step:1675/3000 train_loss:3.4388 train_time:241247ms step_avg:144.89ms | |
| step:1676/3000 train_loss:3.3192 train_time:241393ms step_avg:144.89ms | |
| step:1677/3000 train_loss:3.5337 train_time:241536ms step_avg:144.89ms | |
| step:1678/3000 train_loss:3.4421 train_time:241679ms step_avg:144.89ms | |
| step:1679/3000 train_loss:3.4741 train_time:241822ms step_avg:144.89ms | |
| step:1680/3000 train_loss:3.4585 train_time:241965ms step_avg:144.89ms | |
| step:1681/3000 train_loss:3.2664 train_time:242110ms step_avg:144.89ms | |
| step:1682/3000 train_loss:3.4653 train_time:242255ms step_avg:144.89ms | |
| step:1683/3000 train_loss:3.4784 train_time:242398ms step_avg:144.89ms | |
| step:1684/3000 train_loss:3.5210 train_time:242541ms step_avg:144.89ms | |
| step:1685/3000 train_loss:3.5175 train_time:242684ms step_avg:144.89ms | |
| step:1686/3000 train_loss:3.4252 train_time:242828ms step_avg:144.89ms | |
| step:1687/3000 train_loss:3.5382 train_time:242974ms step_avg:144.89ms | |
| step:1688/3000 train_loss:3.4241 train_time:243118ms step_avg:144.89ms | |
| step:1689/3000 train_loss:3.4991 train_time:243260ms step_avg:144.88ms | |
| step:1690/3000 train_loss:3.4168 train_time:243404ms step_avg:144.88ms | |
| step:1691/3000 train_loss:3.3111 train_time:243548ms step_avg:144.88ms | |
| step:1692/3000 train_loss:3.4665 train_time:243693ms step_avg:144.88ms | |
| step:1693/3000 train_loss:3.4632 train_time:243837ms step_avg:144.88ms | |
| step:1694/3000 train_loss:3.3737 train_time:243980ms step_avg:144.88ms | |
| step:1695/3000 train_loss:3.8170 train_time:244124ms step_avg:144.88ms | |
| step:1696/3000 train_loss:3.5378 train_time:244268ms step_avg:144.88ms | |
| step:1697/3000 train_loss:3.5176 train_time:244413ms step_avg:144.88ms | |
| step:1698/3000 train_loss:3.4205 train_time:244556ms step_avg:144.88ms | |
| step:1699/3000 train_loss:3.3443 train_time:244700ms step_avg:144.88ms | |
| step:1700/3000 train_loss:3.4256 train_time:244842ms step_avg:144.88ms | |
| step:1701/3000 train_loss:3.4217 train_time:245100ms step_avg:144.94ms | |
| step:1702/3000 train_loss:3.4971 train_time:245252ms step_avg:144.95ms | |
| step:1703/3000 train_loss:3.4196 train_time:245395ms step_avg:144.95ms | |
| step:1704/3000 train_loss:3.6237 train_time:245537ms step_avg:144.95ms | |
| step:1705/3000 train_loss:3.3812 train_time:245678ms step_avg:144.94ms | |
| step:1706/3000 train_loss:3.6077 train_time:245819ms step_avg:144.94ms | |
| step:1707/3000 train_loss:3.4465 train_time:245963ms step_avg:144.94ms | |
| step:1708/3000 train_loss:3.2426 train_time:246115ms step_avg:144.94ms | |
| step:1709/3000 train_loss:3.5687 train_time:246259ms step_avg:144.94ms | |
| step:1710/3000 train_loss:3.4706 train_time:246532ms step_avg:145.02ms | |
| step:1711/3000 train_loss:3.4636 train_time:246672ms step_avg:145.02ms | |
| step:1712/3000 train_loss:3.4629 train_time:246815ms step_avg:145.01ms | |
| step:1713/3000 train_loss:3.4911 train_time:246957ms step_avg:145.01ms | |
| step:1714/3000 train_loss:3.5276 train_time:247098ms step_avg:145.01ms | |
| step:1715/3000 train_loss:3.4419 train_time:247239ms step_avg:145.01ms | |
| step:1716/3000 train_loss:3.4570 train_time:247384ms step_avg:145.01ms | |
| step:1717/3000 train_loss:3.2890 train_time:247533ms step_avg:145.01ms | |
| step:1718/3000 train_loss:3.4272 train_time:247678ms step_avg:145.01ms | |
| step:1719/3000 train_loss:3.4468 train_time:247819ms step_avg:145.01ms | |
| step:1720/3000 train_loss:3.3879 train_time:247961ms step_avg:145.01ms | |
| step:1721/3000 train_loss:3.5475 train_time:248104ms step_avg:145.01ms | |
| step:1722/3000 train_loss:3.3514 train_time:248246ms step_avg:145.00ms | |
| step:1723/3000 train_loss:3.4994 train_time:248391ms step_avg:145.00ms | |
| step:1724/3000 train_loss:3.5773 train_time:248536ms step_avg:145.00ms | |
| step:1725/3000 train_loss:3.4323 train_time:248679ms step_avg:145.00ms | |
| step:1726/3000 train_loss:3.6581 train_time:248821ms step_avg:145.00ms | |
| step:1727/3000 train_loss:3.4450 train_time:248964ms step_avg:145.00ms | |
| step:1728/3000 train_loss:3.5077 train_time:249108ms step_avg:145.00ms | |
| step:1729/3000 train_loss:3.4742 train_time:249252ms step_avg:145.00ms | |
| step:1730/3000 train_loss:3.4839 train_time:249396ms step_avg:145.00ms | |
| step:1731/3000 train_loss:3.8450 train_time:249539ms step_avg:145.00ms | |
| step:1732/3000 train_loss:3.4686 train_time:249683ms step_avg:145.00ms | |
| step:1733/3000 train_loss:3.5988 train_time:249827ms step_avg:145.00ms | |
| step:1734/3000 train_loss:3.3803 train_time:249973ms step_avg:145.00ms | |
| step:1735/3000 train_loss:3.4212 train_time:250116ms step_avg:145.00ms | |
| step:1736/3000 train_loss:3.4423 train_time:250259ms step_avg:144.99ms | |
| step:1737/3000 train_loss:3.4253 train_time:250403ms step_avg:144.99ms | |
| step:1738/3000 train_loss:3.5632 train_time:250546ms step_avg:144.99ms | |
| step:1739/3000 train_loss:3.4271 train_time:250691ms step_avg:144.99ms | |
| step:1740/3000 train_loss:3.4819 train_time:250834ms step_avg:144.99ms | |
| step:1741/3000 train_loss:3.5427 train_time:250977ms step_avg:144.99ms | |
| step:1742/3000 train_loss:3.3445 train_time:251120ms step_avg:144.99ms | |
| step:1743/3000 train_loss:3.2407 train_time:251264ms step_avg:144.99ms | |
| step:1744/3000 train_loss:3.1627 train_time:251408ms step_avg:144.99ms | |
| step:1745/3000 train_loss:3.4671 train_time:251553ms step_avg:144.99ms | |
| step:1746/3000 train_loss:3.4773 train_time:251698ms step_avg:144.99ms | |
| step:1747/3000 train_loss:3.4511 train_time:251840ms step_avg:144.99ms | |
| step:1748/3000 train_loss:3.4633 train_time:251983ms step_avg:144.98ms | |
| step:1749/3000 train_loss:3.7031 train_time:252127ms step_avg:144.98ms | |
| step:1750/3000 train_loss:3.4133 train_time:252273ms step_avg:144.98ms | |
| step:1750/3000 val_loss:3.4627 train_time:252310ms step_avg:145.01ms | |
| step:1751/3000 train_loss:3.4784 train_time:252427ms step_avg:144.99ms | |
| step:1752/3000 train_loss:3.4673 train_time:252571ms step_avg:144.99ms | |
| step:1753/3000 train_loss:3.1064 train_time:252713ms step_avg:144.99ms | |
| step:1754/3000 train_loss:3.2260 train_time:252855ms step_avg:144.99ms | |
| step:1755/3000 train_loss:3.3122 train_time:252998ms step_avg:144.98ms | |
| step:1756/3000 train_loss:3.2754 train_time:253141ms step_avg:144.98ms | |
| step:1757/3000 train_loss:3.4365 train_time:253284ms step_avg:144.98ms | |
| step:1758/3000 train_loss:3.3147 train_time:253430ms step_avg:144.98ms | |
| step:1759/3000 train_loss:3.3134 train_time:253574ms step_avg:144.98ms | |
| step:1760/3000 train_loss:4.3766 train_time:253719ms step_avg:144.98ms | |
| step:1761/3000 train_loss:3.4498 train_time:253862ms step_avg:144.98ms | |
| step:1762/3000 train_loss:3.4886 train_time:254006ms step_avg:144.98ms | |
| step:1763/3000 train_loss:3.4765 train_time:254148ms step_avg:144.98ms | |
| step:1764/3000 train_loss:3.4986 train_time:254291ms step_avg:144.98ms | |
| step:1765/3000 train_loss:3.4091 train_time:254435ms step_avg:144.98ms | |
| step:1766/3000 train_loss:3.4592 train_time:254583ms step_avg:144.98ms | |
| step:1767/3000 train_loss:3.4670 train_time:254727ms step_avg:144.98ms | |
| step:1768/3000 train_loss:3.7165 train_time:254869ms step_avg:144.98ms | |
| step:1769/3000 train_loss:3.4469 train_time:255012ms step_avg:144.98ms | |
| step:1770/3000 train_loss:3.5125 train_time:255157ms step_avg:144.98ms | |
| step:1771/3000 train_loss:3.8404 train_time:255301ms step_avg:144.97ms | |
| step:1772/3000 train_loss:3.4438 train_time:255445ms step_avg:144.97ms | |
| step:1773/3000 train_loss:3.3536 train_time:255589ms step_avg:144.97ms | |
| step:1774/3000 train_loss:3.6024 train_time:255733ms step_avg:144.97ms | |
| step:1775/3000 train_loss:3.3650 train_time:255878ms step_avg:144.97ms | |
| step:1776/3000 train_loss:3.5130 train_time:256025ms step_avg:144.97ms | |
| step:1777/3000 train_loss:3.5592 train_time:256167ms step_avg:144.97ms | |
| step:1778/3000 train_loss:3.6543 train_time:256310ms step_avg:144.97ms | |
| step:1779/3000 train_loss:3.4526 train_time:256453ms step_avg:144.97ms | |
| step:1780/3000 train_loss:3.7530 train_time:256598ms step_avg:144.97ms | |
| step:1781/3000 train_loss:3.5262 train_time:256743ms step_avg:144.97ms | |
| step:1782/3000 train_loss:3.5450 train_time:256887ms step_avg:144.97ms | |
| step:1783/3000 train_loss:3.3248 train_time:257030ms step_avg:144.97ms | |
| step:1784/3000 train_loss:3.4124 train_time:257173ms step_avg:144.97ms | |
| step:1785/3000 train_loss:3.5630 train_time:257317ms step_avg:144.97ms | |
| step:1786/3000 train_loss:3.4442 train_time:257462ms step_avg:144.97ms | |
| step:1787/3000 train_loss:3.6117 train_time:257606ms step_avg:144.97ms | |
| step:1788/3000 train_loss:3.4176 train_time:257748ms step_avg:144.97ms | |
| step:1789/3000 train_loss:3.4046 train_time:257892ms step_avg:144.96ms | |
| step:1790/3000 train_loss:3.5380 train_time:258036ms step_avg:144.96ms | |
| step:1791/3000 train_loss:3.4492 train_time:258182ms step_avg:144.96ms | |
| step:1792/3000 train_loss:3.4003 train_time:258327ms step_avg:144.96ms | |
| step:1793/3000 train_loss:3.5342 train_time:258469ms step_avg:144.96ms | |
| step:1794/3000 train_loss:3.4046 train_time:258613ms step_avg:144.96ms | |
| step:1795/3000 train_loss:3.3906 train_time:258757ms step_avg:144.96ms | |
| step:1796/3000 train_loss:3.4571 train_time:258903ms step_avg:144.96ms | |
| step:1797/3000 train_loss:3.4115 train_time:259046ms step_avg:144.96ms | |
| step:1798/3000 train_loss:3.5581 train_time:259189ms step_avg:144.96ms | |
| step:1799/3000 train_loss:3.4353 train_time:259331ms step_avg:144.96ms | |
| step:1800/3000 train_loss:3.5101 train_time:259476ms step_avg:144.96ms | |
| step:1801/3000 train_loss:3.4428 train_time:259621ms step_avg:144.96ms | |
| step:1802/3000 train_loss:3.4816 train_time:259765ms step_avg:144.96ms | |
| step:1803/3000 train_loss:3.3947 train_time:259907ms step_avg:144.96ms | |
| step:1804/3000 train_loss:3.3201 train_time:260050ms step_avg:144.96ms | |
| step:1805/3000 train_loss:3.5729 train_time:260194ms step_avg:144.95ms | |
| step:1806/3000 train_loss:3.4954 train_time:260338ms step_avg:144.95ms | |
| step:1807/3000 train_loss:3.5082 train_time:260483ms step_avg:144.95ms | |
| step:1808/3000 train_loss:3.6141 train_time:260627ms step_avg:144.95ms | |
| step:1809/3000 train_loss:3.4046 train_time:260769ms step_avg:144.95ms | |
| step:1810/3000 train_loss:3.5078 train_time:260912ms step_avg:144.95ms | |
| step:1811/3000 train_loss:3.6496 train_time:261056ms step_avg:144.95ms | |
| step:1812/3000 train_loss:3.4976 train_time:261201ms step_avg:144.95ms | |
| step:1813/3000 train_loss:3.5366 train_time:261345ms step_avg:144.95ms | |
| step:1814/3000 train_loss:3.5622 train_time:261488ms step_avg:144.95ms | |
| step:1815/3000 train_loss:3.5154 train_time:261631ms step_avg:144.95ms | |
| step:1816/3000 train_loss:3.5331 train_time:261775ms step_avg:144.95ms | |
| step:1817/3000 train_loss:3.5024 train_time:261922ms step_avg:144.95ms | |
| step:1818/3000 train_loss:3.5552 train_time:262065ms step_avg:144.95ms | |
| step:1819/3000 train_loss:3.4775 train_time:262209ms step_avg:144.95ms | |
| step:1820/3000 train_loss:3.4630 train_time:262352ms step_avg:144.95ms | |
| step:1821/3000 train_loss:3.4215 train_time:262496ms step_avg:144.95ms | |
| step:1822/3000 train_loss:3.3939 train_time:262641ms step_avg:144.95ms | |
| step:1823/3000 train_loss:3.3333 train_time:262785ms step_avg:144.94ms | |
| step:1824/3000 train_loss:3.4826 train_time:262928ms step_avg:144.94ms | |
| step:1825/3000 train_loss:3.5979 train_time:263072ms step_avg:144.94ms | |
| step:1826/3000 train_loss:3.5520 train_time:263217ms step_avg:144.94ms | |
| step:1827/3000 train_loss:3.5358 train_time:263362ms step_avg:144.94ms | |
| step:1828/3000 train_loss:3.4072 train_time:263505ms step_avg:144.94ms | |
| step:1829/3000 train_loss:3.4174 train_time:263649ms step_avg:144.94ms | |
| step:1830/3000 train_loss:3.5673 train_time:263791ms step_avg:144.94ms | |
| step:1831/3000 train_loss:3.3414 train_time:263935ms step_avg:144.94ms | |
| step:1832/3000 train_loss:3.4923 train_time:264081ms step_avg:144.94ms | |
| step:1833/3000 train_loss:3.3690 train_time:264226ms step_avg:144.94ms | |
| step:1834/3000 train_loss:3.6905 train_time:264368ms step_avg:144.94ms | |
| step:1835/3000 train_loss:3.5265 train_time:264511ms step_avg:144.94ms | |
| step:1836/3000 train_loss:3.5077 train_time:264655ms step_avg:144.94ms | |
| step:1837/3000 train_loss:3.6349 train_time:264801ms step_avg:144.94ms | |
| step:1838/3000 train_loss:3.4905 train_time:264945ms step_avg:144.94ms | |
| step:1839/3000 train_loss:3.3715 train_time:265088ms step_avg:144.94ms | |
| step:1840/3000 train_loss:3.4841 train_time:265233ms step_avg:144.94ms | |
| step:1841/3000 train_loss:3.3765 train_time:265377ms step_avg:144.94ms | |
| step:1842/3000 train_loss:3.4842 train_time:265522ms step_avg:144.94ms | |
| step:1843/3000 train_loss:3.5387 train_time:265664ms step_avg:144.93ms | |
| step:1844/3000 train_loss:3.2943 train_time:265808ms step_avg:144.93ms | |
| step:1845/3000 train_loss:3.4117 train_time:265950ms step_avg:144.93ms | |
| step:1846/3000 train_loss:3.4781 train_time:266094ms step_avg:144.93ms | |
| step:1847/3000 train_loss:3.4116 train_time:266240ms step_avg:144.93ms | |
| step:1848/3000 train_loss:3.3120 train_time:266384ms step_avg:144.93ms | |
| step:1849/3000 train_loss:3.5897 train_time:266527ms step_avg:144.93ms | |
| step:1850/3000 train_loss:3.3475 train_time:266670ms step_avg:144.93ms | |
| step:1851/3000 train_loss:3.4286 train_time:266814ms step_avg:144.93ms | |
| step:1852/3000 train_loss:3.3921 train_time:266958ms step_avg:144.93ms | |
| step:1853/3000 train_loss:3.5895 train_time:267102ms step_avg:144.93ms | |
| step:1854/3000 train_loss:3.5673 train_time:267246ms step_avg:144.93ms | |
| step:1855/3000 train_loss:3.4468 train_time:267389ms step_avg:144.93ms | |
| step:1856/3000 train_loss:3.3934 train_time:267533ms step_avg:144.93ms | |
| step:1857/3000 train_loss:3.4227 train_time:267678ms step_avg:144.93ms | |
| step:1858/3000 train_loss:3.6699 train_time:267823ms step_avg:144.93ms | |
| step:1859/3000 train_loss:3.5245 train_time:267966ms step_avg:144.92ms | |
| step:1860/3000 train_loss:3.4502 train_time:268109ms step_avg:144.92ms | |
| step:1861/3000 train_loss:3.4959 train_time:268251ms step_avg:144.92ms | |
| step:1862/3000 train_loss:3.3844 train_time:268395ms step_avg:144.92ms | |
| step:1863/3000 train_loss:3.3823 train_time:268541ms step_avg:144.92ms | |
| step:1864/3000 train_loss:3.4505 train_time:268686ms step_avg:144.92ms | |
| step:1865/3000 train_loss:3.4946 train_time:268828ms step_avg:144.92ms | |
| step:1866/3000 train_loss:3.2551 train_time:268972ms step_avg:144.92ms | |
| step:1867/3000 train_loss:3.3792 train_time:269116ms step_avg:144.92ms | |
| step:1868/3000 train_loss:3.3419 train_time:269262ms step_avg:144.92ms | |
| step:1869/3000 train_loss:3.3441 train_time:269405ms step_avg:144.92ms | |
| step:1870/3000 train_loss:3.5018 train_time:269548ms step_avg:144.92ms | |
| step:1871/3000 train_loss:3.4831 train_time:269691ms step_avg:144.92ms | |
| step:1872/3000 train_loss:3.4361 train_time:269834ms step_avg:144.92ms | |
| step:1873/3000 train_loss:3.4472 train_time:269980ms step_avg:144.92ms | |
| step:1874/3000 train_loss:3.3717 train_time:270123ms step_avg:144.92ms | |
| step:1875/3000 train_loss:3.4718 train_time:270266ms step_avg:144.91ms | |
| step:1875/3000 val_loss:3.4492 train_time:270304ms step_avg:144.94ms | |
| step:1876/3000 train_loss:3.4756 train_time:270423ms step_avg:144.92ms | |
| step:1877/3000 train_loss:3.4027 train_time:270566ms step_avg:144.92ms | |
| step:1878/3000 train_loss:3.4478 train_time:270708ms step_avg:144.92ms | |
| step:1879/3000 train_loss:3.5544 train_time:270849ms step_avg:144.92ms | |
| step:1880/3000 train_loss:3.4392 train_time:270990ms step_avg:144.91ms | |
| step:1881/3000 train_loss:3.4832 train_time:271134ms step_avg:144.91ms | |
| step:1882/3000 train_loss:3.4060 train_time:271279ms step_avg:144.91ms | |
| step:1883/3000 train_loss:3.4737 train_time:271425ms step_avg:144.91ms | |
| step:1884/3000 train_loss:3.4744 train_time:271569ms step_avg:144.91ms | |
| step:1885/3000 train_loss:3.2335 train_time:271713ms step_avg:144.91ms | |
| step:1886/3000 train_loss:3.6245 train_time:271857ms step_avg:144.91ms | |
| step:1887/3000 train_loss:3.3573 train_time:272000ms step_avg:144.91ms | |
| step:1888/3000 train_loss:3.3718 train_time:272142ms step_avg:144.91ms | |
| step:1889/3000 train_loss:3.4536 train_time:272285ms step_avg:144.91ms | |
| step:1890/3000 train_loss:3.4967 train_time:272538ms step_avg:144.97ms | |
| step:1891/3000 train_loss:3.3149 train_time:272692ms step_avg:144.97ms | |
| step:1892/3000 train_loss:3.5897 train_time:272834ms step_avg:144.97ms | |
| step:1893/3000 train_loss:3.3330 train_time:272977ms step_avg:144.97ms | |
| step:1894/3000 train_loss:3.4842 train_time:273120ms step_avg:144.97ms | |
| step:1895/3000 train_loss:3.5107 train_time:273261ms step_avg:144.97ms | |
| step:1896/3000 train_loss:3.3155 train_time:273406ms step_avg:144.97ms | |
| step:1897/3000 train_loss:3.4859 train_time:273553ms step_avg:144.97ms | |
| step:1898/3000 train_loss:3.4427 train_time:273701ms step_avg:144.97ms | |
| step:1899/3000 train_loss:3.5204 train_time:273843ms step_avg:144.97ms | |
| step:1900/3000 train_loss:3.2983 train_time:274115ms step_avg:145.03ms | |
| step:1901/3000 train_loss:3.5405 train_time:274255ms step_avg:145.03ms | |
| step:1902/3000 train_loss:3.4225 train_time:274398ms step_avg:145.03ms | |
| step:1903/3000 train_loss:3.5865 train_time:274541ms step_avg:145.03ms | |
| step:1904/3000 train_loss:3.3904 train_time:274682ms step_avg:145.03ms | |
| step:1905/3000 train_loss:3.6670 train_time:274824ms step_avg:145.03ms | |
| step:1906/3000 train_loss:3.4009 train_time:274969ms step_avg:145.03ms | |
| step:1907/3000 train_loss:3.3982 train_time:275118ms step_avg:145.03ms | |
| step:1908/3000 train_loss:3.4681 train_time:275263ms step_avg:145.03ms | |
| step:1909/3000 train_loss:3.3481 train_time:275405ms step_avg:145.03ms | |
| step:1910/3000 train_loss:3.4242 train_time:275547ms step_avg:145.02ms | |
| step:1911/3000 train_loss:3.5079 train_time:275689ms step_avg:145.02ms | |
| step:1912/3000 train_loss:3.4376 train_time:275832ms step_avg:145.02ms | |
| step:1913/3000 train_loss:3.3212 train_time:275976ms step_avg:145.02ms | |
| step:1914/3000 train_loss:3.1935 train_time:276122ms step_avg:145.02ms | |
| step:1915/3000 train_loss:3.3917 train_time:276264ms step_avg:145.02ms | |
| step:1916/3000 train_loss:3.6035 train_time:276408ms step_avg:145.02ms | |
| step:1917/3000 train_loss:3.6099 train_time:276552ms step_avg:145.02ms | |
| step:1918/3000 train_loss:3.5656 train_time:276697ms step_avg:145.02ms | |
| step:1919/3000 train_loss:3.3816 train_time:276840ms step_avg:145.02ms | |
| step:1920/3000 train_loss:3.6365 train_time:276983ms step_avg:145.02ms | |
| step:1921/3000 train_loss:3.4466 train_time:277126ms step_avg:145.02ms | |
| step:1922/3000 train_loss:3.3917 train_time:277270ms step_avg:145.02ms | |
| step:1923/3000 train_loss:3.5586 train_time:277415ms step_avg:145.02ms | |
| step:1924/3000 train_loss:3.5249 train_time:277559ms step_avg:145.02ms | |
| step:1925/3000 train_loss:3.3645 train_time:277702ms step_avg:145.01ms | |
| step:1926/3000 train_loss:3.3939 train_time:277844ms step_avg:145.01ms | |
| step:1927/3000 train_loss:3.3073 train_time:277987ms step_avg:145.01ms | |
| step:1928/3000 train_loss:3.4242 train_time:278131ms step_avg:145.01ms | |
| step:1929/3000 train_loss:3.2724 train_time:278277ms step_avg:145.01ms | |
| step:1930/3000 train_loss:3.3990 train_time:278421ms step_avg:145.01ms | |
| step:1931/3000 train_loss:3.5275 train_time:278563ms step_avg:145.01ms | |
| step:1932/3000 train_loss:3.3931 train_time:278706ms step_avg:145.01ms | |
| step:1933/3000 train_loss:3.5346 train_time:278849ms step_avg:145.01ms | |
| step:1934/3000 train_loss:3.3919 train_time:278993ms step_avg:145.01ms | |
| step:1935/3000 train_loss:3.4446 train_time:279138ms step_avg:145.01ms | |
| step:1936/3000 train_loss:3.4842 train_time:279281ms step_avg:145.01ms | |
| step:1937/3000 train_loss:3.4444 train_time:279425ms step_avg:145.01ms | |
| step:1938/3000 train_loss:3.4746 train_time:279567ms step_avg:145.00ms | |
| step:1939/3000 train_loss:3.3973 train_time:279710ms step_avg:145.00ms | |
| step:1940/3000 train_loss:3.4981 train_time:279853ms step_avg:145.00ms | |
| step:1941/3000 train_loss:3.5259 train_time:279998ms step_avg:145.00ms | |
| step:1942/3000 train_loss:3.3656 train_time:280142ms step_avg:145.00ms | |
| step:1943/3000 train_loss:3.4009 train_time:280284ms step_avg:145.00ms | |
| step:1944/3000 train_loss:3.4677 train_time:280428ms step_avg:145.00ms | |
| step:1945/3000 train_loss:3.3124 train_time:280571ms step_avg:145.00ms | |
| step:1946/3000 train_loss:3.5851 train_time:280716ms step_avg:145.00ms | |
| step:1947/3000 train_loss:3.4548 train_time:280860ms step_avg:145.00ms | |
| step:1948/3000 train_loss:3.4337 train_time:281003ms step_avg:145.00ms | |
| step:1949/3000 train_loss:3.4283 train_time:281146ms step_avg:145.00ms | |
| step:1950/3000 train_loss:3.3168 train_time:281290ms step_avg:144.99ms | |
| step:1951/3000 train_loss:3.4375 train_time:281435ms step_avg:144.99ms | |
| step:1952/3000 train_loss:3.2862 train_time:281578ms step_avg:144.99ms | |
| step:1953/3000 train_loss:3.4942 train_time:281722ms step_avg:144.99ms | |
| step:1954/3000 train_loss:3.4823 train_time:281865ms step_avg:144.99ms | |
| step:1955/3000 train_loss:3.4451 train_time:282007ms step_avg:144.99ms | |
| step:1956/3000 train_loss:3.3319 train_time:282151ms step_avg:144.99ms | |
| step:1957/3000 train_loss:3.4230 train_time:282297ms step_avg:144.99ms | |
| step:1958/3000 train_loss:3.6012 train_time:282441ms step_avg:144.99ms | |
| step:1959/3000 train_loss:3.5240 train_time:282583ms step_avg:144.99ms | |
| step:1960/3000 train_loss:3.5494 train_time:282727ms step_avg:144.99ms | |
| step:1961/3000 train_loss:3.3433 train_time:282870ms step_avg:144.99ms | |
| step:1962/3000 train_loss:3.4672 train_time:283014ms step_avg:144.99ms | |
| step:1963/3000 train_loss:3.5088 train_time:283158ms step_avg:144.99ms | |
| step:1964/3000 train_loss:3.4540 train_time:283302ms step_avg:144.99ms | |
| step:1965/3000 train_loss:3.3682 train_time:283444ms step_avg:144.98ms | |
| step:1966/3000 train_loss:3.7794 train_time:283587ms step_avg:144.98ms | |
| step:1967/3000 train_loss:3.3771 train_time:283731ms step_avg:144.98ms | |
| step:1968/3000 train_loss:3.4311 train_time:283876ms step_avg:144.98ms | |
| step:1969/3000 train_loss:3.4782 train_time:284020ms step_avg:144.98ms | |
| step:1970/3000 train_loss:3.4332 train_time:284163ms step_avg:144.98ms | |
| step:1971/3000 train_loss:3.3202 train_time:284305ms step_avg:144.98ms | |
| step:1972/3000 train_loss:3.3044 train_time:284448ms step_avg:144.98ms | |
| step:1973/3000 train_loss:3.4265 train_time:284593ms step_avg:144.98ms | |
| step:1974/3000 train_loss:3.3885 train_time:284738ms step_avg:144.98ms | |
| step:1975/3000 train_loss:3.3709 train_time:284881ms step_avg:144.98ms | |
| step:1976/3000 train_loss:3.5342 train_time:285024ms step_avg:144.98ms | |
| step:1977/3000 train_loss:3.4038 train_time:285166ms step_avg:144.98ms | |
| step:1978/3000 train_loss:3.7666 train_time:285309ms step_avg:144.97ms | |
| step:1979/3000 train_loss:3.4379 train_time:285452ms step_avg:144.97ms | |
| step:1980/3000 train_loss:3.4436 train_time:285599ms step_avg:144.97ms | |
| step:1981/3000 train_loss:3.4546 train_time:285742ms step_avg:144.97ms | |
| step:1982/3000 train_loss:3.4732 train_time:285885ms step_avg:144.97ms | |
| step:1983/3000 train_loss:3.4056 train_time:286028ms step_avg:144.97ms | |
| step:1984/3000 train_loss:3.3621 train_time:286172ms step_avg:144.97ms | |
| step:1985/3000 train_loss:3.4303 train_time:286317ms step_avg:144.97ms | |
| step:1986/3000 train_loss:3.4879 train_time:286461ms step_avg:144.97ms | |
| step:1987/3000 train_loss:3.4569 train_time:286604ms step_avg:144.97ms | |
| step:1988/3000 train_loss:3.4314 train_time:286746ms step_avg:144.97ms | |
| step:1989/3000 train_loss:3.5189 train_time:286890ms step_avg:144.97ms | |
| step:1990/3000 train_loss:3.5494 train_time:287035ms step_avg:144.97ms | |
| step:1991/3000 train_loss:3.3310 train_time:287179ms step_avg:144.97ms | |
| step:1992/3000 train_loss:3.3199 train_time:287323ms step_avg:144.97ms | |
| step:1993/3000 train_loss:3.5036 train_time:287465ms step_avg:144.96ms | |
| step:1994/3000 train_loss:3.3369 train_time:287607ms step_avg:144.96ms | |
| step:1995/3000 train_loss:3.4090 train_time:287751ms step_avg:144.96ms | |
| step:1996/3000 train_loss:3.4948 train_time:287898ms step_avg:144.96ms | |
| step:1997/3000 train_loss:3.3566 train_time:288041ms step_avg:144.96ms | |
| step:1998/3000 train_loss:3.4620 train_time:288183ms step_avg:144.96ms | |
| step:1999/3000 train_loss:3.4585 train_time:288326ms step_avg:144.96ms | |
| step:2000/3000 train_loss:3.3780 train_time:288469ms step_avg:144.96ms | |
| step:2000/3000 val_loss:3.4357 train_time:288507ms step_avg:144.98ms | |
| step:2001/3000 train_loss:3.5274 train_time:288621ms step_avg:144.96ms | |
| step:2002/3000 train_loss:3.4670 train_time:288768ms step_avg:144.96ms | |
| step:2003/3000 train_loss:3.5592 train_time:288911ms step_avg:144.96ms | |
| step:2004/3000 train_loss:3.4767 train_time:289053ms step_avg:144.96ms | |
| step:2005/3000 train_loss:3.4841 train_time:289194ms step_avg:144.96ms | |
| step:2006/3000 train_loss:3.3745 train_time:289336ms step_avg:144.96ms | |
| step:2007/3000 train_loss:3.4027 train_time:289479ms step_avg:144.96ms | |
| step:2008/3000 train_loss:3.4537 train_time:289627ms step_avg:144.96ms | |
| step:2009/3000 train_loss:3.4915 train_time:289773ms step_avg:144.96ms | |
| step:2010/3000 train_loss:3.3885 train_time:289916ms step_avg:144.96ms | |
| step:2011/3000 train_loss:3.4717 train_time:290058ms step_avg:144.96ms | |
| step:2012/3000 train_loss:3.4516 train_time:290200ms step_avg:144.95ms | |
| step:2013/3000 train_loss:3.4512 train_time:290342ms step_avg:144.95ms | |
| step:2014/3000 train_loss:3.3616 train_time:290485ms step_avg:144.95ms | |
| step:2015/3000 train_loss:3.4063 train_time:290633ms step_avg:144.95ms | |
| step:2016/3000 train_loss:3.4313 train_time:290777ms step_avg:144.95ms | |
| step:2017/3000 train_loss:3.5590 train_time:290919ms step_avg:144.95ms | |
| step:2018/3000 train_loss:3.4139 train_time:291063ms step_avg:144.95ms | |
| step:2019/3000 train_loss:3.5641 train_time:291207ms step_avg:144.95ms | |
| step:2020/3000 train_loss:3.5780 train_time:291351ms step_avg:144.95ms | |
| step:2021/3000 train_loss:3.2828 train_time:291494ms step_avg:144.95ms | |
| step:2022/3000 train_loss:3.5096 train_time:291638ms step_avg:144.95ms | |
| step:2023/3000 train_loss:3.4338 train_time:291786ms step_avg:144.95ms | |
| step:2024/3000 train_loss:3.5336 train_time:291931ms step_avg:144.95ms | |
| step:2025/3000 train_loss:3.5737 train_time:292074ms step_avg:144.95ms | |
| step:2026/3000 train_loss:3.3599 train_time:292218ms step_avg:144.95ms | |
| step:2027/3000 train_loss:3.3907 train_time:292360ms step_avg:144.95ms | |
| step:2028/3000 train_loss:3.3072 train_time:292504ms step_avg:144.95ms | |
| step:2029/3000 train_loss:3.4139 train_time:292650ms step_avg:144.95ms | |
| step:2030/3000 train_loss:3.3367 train_time:292795ms step_avg:144.95ms | |
| step:2031/3000 train_loss:3.4277 train_time:292937ms step_avg:144.95ms | |
| step:2032/3000 train_loss:3.4227 train_time:293079ms step_avg:144.94ms | |
| step:2033/3000 train_loss:3.4442 train_time:293222ms step_avg:144.94ms | |
| step:2034/3000 train_loss:3.3339 train_time:293366ms step_avg:144.94ms | |
| step:2035/3000 train_loss:3.5020 train_time:293510ms step_avg:144.94ms | |
| step:2036/3000 train_loss:3.4944 train_time:293654ms step_avg:144.94ms | |
| step:2037/3000 train_loss:3.4833 train_time:293797ms step_avg:144.94ms | |
| step:2038/3000 train_loss:3.3535 train_time:293942ms step_avg:144.94ms | |
| step:2039/3000 train_loss:3.6082 train_time:294085ms step_avg:144.94ms | |
| step:2040/3000 train_loss:3.4531 train_time:294231ms step_avg:144.94ms | |
| step:2041/3000 train_loss:3.4703 train_time:294375ms step_avg:144.94ms | |
| step:2042/3000 train_loss:3.4199 train_time:294518ms step_avg:144.94ms | |
| step:2043/3000 train_loss:3.3133 train_time:294660ms step_avg:144.94ms | |
| step:2044/3000 train_loss:3.4410 train_time:294805ms step_avg:144.94ms | |
| step:2045/3000 train_loss:3.4323 train_time:294950ms step_avg:144.94ms | |
| step:2046/3000 train_loss:3.3002 train_time:295093ms step_avg:144.94ms | |
| step:2047/3000 train_loss:3.3718 train_time:295237ms step_avg:144.94ms | |
| step:2048/3000 train_loss:3.4549 train_time:295379ms step_avg:144.94ms | |
| step:2049/3000 train_loss:3.4042 train_time:295522ms step_avg:144.94ms | |
| step:2050/3000 train_loss:3.4558 train_time:295666ms step_avg:144.93ms | |
| step:2051/3000 train_loss:3.6007 train_time:295811ms step_avg:144.93ms | |
| step:2052/3000 train_loss:3.4640 train_time:295955ms step_avg:144.93ms | |
| step:2053/3000 train_loss:3.4146 train_time:296097ms step_avg:144.93ms | |
| step:2054/3000 train_loss:3.3962 train_time:296241ms step_avg:144.93ms | |
| step:2055/3000 train_loss:3.2614 train_time:296385ms step_avg:144.93ms | |
| step:2056/3000 train_loss:3.3802 train_time:296531ms step_avg:144.93ms | |
| step:2057/3000 train_loss:3.5479 train_time:296675ms step_avg:144.93ms | |
| step:2058/3000 train_loss:3.5779 train_time:296818ms step_avg:144.93ms | |
| step:2059/3000 train_loss:3.4303 train_time:296961ms step_avg:144.93ms | |
| step:2060/3000 train_loss:3.4697 train_time:297106ms step_avg:144.93ms | |
| step:2061/3000 train_loss:3.4589 train_time:297251ms step_avg:144.93ms | |
| step:2062/3000 train_loss:3.4178 train_time:297394ms step_avg:144.93ms | |
| step:2063/3000 train_loss:3.3254 train_time:297537ms step_avg:144.93ms | |
| step:2064/3000 train_loss:3.6347 train_time:297680ms step_avg:144.93ms | |
| step:2065/3000 train_loss:3.4904 train_time:297824ms step_avg:144.93ms | |
| step:2066/3000 train_loss:3.4421 train_time:297969ms step_avg:144.93ms | |
| step:2067/3000 train_loss:3.4892 train_time:298113ms step_avg:144.93ms | |
| step:2068/3000 train_loss:3.3897 train_time:298256ms step_avg:144.93ms | |
| step:2069/3000 train_loss:3.4480 train_time:298399ms step_avg:144.92ms | |
| step:2070/3000 train_loss:3.5815 train_time:298543ms step_avg:144.92ms | |
| step:2071/3000 train_loss:3.5843 train_time:298688ms step_avg:144.92ms | |
| step:2072/3000 train_loss:3.4283 train_time:298832ms step_avg:144.92ms | |
| step:2073/3000 train_loss:3.4638 train_time:298975ms step_avg:144.92ms | |
| step:2074/3000 train_loss:3.3556 train_time:299118ms step_avg:144.92ms | |
| step:2075/3000 train_loss:3.8868 train_time:299262ms step_avg:144.92ms | |
| step:2076/3000 train_loss:3.3118 train_time:299406ms step_avg:144.92ms | |
| step:2077/3000 train_loss:3.4808 train_time:299552ms step_avg:144.92ms | |
| step:2078/3000 train_loss:3.3632 train_time:299695ms step_avg:144.92ms | |
| step:2079/3000 train_loss:3.3479 train_time:299949ms step_avg:144.97ms | |
| step:2080/3000 train_loss:3.4330 train_time:300103ms step_avg:144.98ms | |
| step:2081/3000 train_loss:3.6794 train_time:300246ms step_avg:144.98ms | |
| step:2082/3000 train_loss:3.3105 train_time:300388ms step_avg:144.98ms | |
| step:2083/3000 train_loss:3.6570 train_time:300531ms step_avg:144.97ms | |
| step:2084/3000 train_loss:3.3600 train_time:300674ms step_avg:144.97ms | |
| step:2085/3000 train_loss:3.3425 train_time:300817ms step_avg:144.97ms | |
| step:2086/3000 train_loss:3.5881 train_time:300963ms step_avg:144.97ms | |
| step:2087/3000 train_loss:3.5109 train_time:301110ms step_avg:144.97ms | |
| step:2088/3000 train_loss:3.4965 train_time:301254ms step_avg:144.97ms | |
| step:2089/3000 train_loss:3.5549 train_time:301395ms step_avg:144.97ms | |
| step:2090/3000 train_loss:3.4872 train_time:301666ms step_avg:145.03ms | |
| step:2091/3000 train_loss:3.4738 train_time:301807ms step_avg:145.03ms | |
| step:2092/3000 train_loss:3.4228 train_time:301952ms step_avg:145.03ms | |
| step:2093/3000 train_loss:3.4934 train_time:302095ms step_avg:145.03ms | |
| step:2094/3000 train_loss:3.4073 train_time:302236ms step_avg:145.03ms | |
| step:2095/3000 train_loss:3.1906 train_time:302377ms step_avg:145.03ms | |
| step:2096/3000 train_loss:3.4162 train_time:302523ms step_avg:145.03ms | |
| step:2097/3000 train_loss:3.5883 train_time:302670ms step_avg:145.03ms | |
| step:2098/3000 train_loss:3.4052 train_time:302815ms step_avg:145.03ms | |
| step:2099/3000 train_loss:3.3109 train_time:302958ms step_avg:145.03ms | |
| step:2100/3000 train_loss:3.4073 train_time:303101ms step_avg:145.02ms | |
| step:2101/3000 train_loss:3.3640 train_time:303244ms step_avg:145.02ms | |
| step:2102/3000 train_loss:3.5075 train_time:303387ms step_avg:145.02ms | |
| step:2103/3000 train_loss:3.3406 train_time:303533ms step_avg:145.02ms | |
| step:2104/3000 train_loss:3.3120 train_time:303678ms step_avg:145.02ms | |
| step:2105/3000 train_loss:3.5671 train_time:303822ms step_avg:145.02ms | |
| step:2106/3000 train_loss:3.2931 train_time:303965ms step_avg:145.02ms | |
| step:2107/3000 train_loss:3.7018 train_time:304110ms step_avg:145.02ms | |
| step:2108/3000 train_loss:3.5287 train_time:304255ms step_avg:145.02ms | |
| step:2109/3000 train_loss:3.4299 train_time:304396ms step_avg:145.02ms | |
| step:2110/3000 train_loss:3.4550 train_time:304540ms step_avg:145.02ms | |
| step:2111/3000 train_loss:3.2731 train_time:304683ms step_avg:145.02ms | |
| step:2112/3000 train_loss:3.7578 train_time:304829ms step_avg:145.02ms | |
| step:2113/3000 train_loss:3.4545 train_time:304973ms step_avg:145.02ms | |
| step:2114/3000 train_loss:3.3810 train_time:305116ms step_avg:145.02ms | |
| step:2115/3000 train_loss:3.4968 train_time:305258ms step_avg:145.02ms | |
| step:2116/3000 train_loss:3.4463 train_time:305401ms step_avg:145.01ms | |
| step:2117/3000 train_loss:3.4400 train_time:305545ms step_avg:145.01ms | |
| step:2118/3000 train_loss:3.4967 train_time:305690ms step_avg:145.01ms | |
| step:2119/3000 train_loss:3.3502 train_time:305834ms step_avg:145.01ms | |
| step:2120/3000 train_loss:3.4214 train_time:305977ms step_avg:145.01ms | |
| step:2121/3000 train_loss:3.1165 train_time:306120ms step_avg:145.01ms | |
| step:2122/3000 train_loss:3.3139 train_time:306263ms step_avg:145.01ms | |
| step:2123/3000 train_loss:3.4857 train_time:306408ms step_avg:145.01ms | |
| step:2124/3000 train_loss:3.3928 train_time:306553ms step_avg:145.01ms | |
| step:2125/3000 train_loss:3.5513 train_time:306696ms step_avg:145.01ms | |
| step:2125/3000 val_loss:3.4242 train_time:306734ms step_avg:145.03ms | |
| step:2126/3000 train_loss:3.4111 train_time:306848ms step_avg:145.01ms | |
| step:2127/3000 train_loss:3.5235 train_time:306994ms step_avg:145.01ms | |
| step:2128/3000 train_loss:3.5054 train_time:307138ms step_avg:145.01ms | |
| step:2129/3000 train_loss:3.3548 train_time:307281ms step_avg:145.01ms | |
| step:2130/3000 train_loss:3.3516 train_time:307422ms step_avg:145.01ms | |
| step:2131/3000 train_loss:3.3866 train_time:307563ms step_avg:145.01ms | |
| step:2132/3000 train_loss:3.5267 train_time:307706ms step_avg:145.01ms | |
| step:2133/3000 train_loss:3.4108 train_time:307854ms step_avg:145.01ms | |
| step:2134/3000 train_loss:3.3178 train_time:308002ms step_avg:145.01ms | |
| step:2135/3000 train_loss:3.3721 train_time:308146ms step_avg:145.01ms | |
| step:2136/3000 train_loss:3.5070 train_time:308287ms step_avg:145.01ms | |
| step:2137/3000 train_loss:3.5226 train_time:308430ms step_avg:145.01ms | |
| step:2138/3000 train_loss:3.4590 train_time:308572ms step_avg:145.01ms | |
| step:2139/3000 train_loss:3.4512 train_time:308717ms step_avg:145.01ms | |
| step:2140/3000 train_loss:3.4310 train_time:308862ms step_avg:145.01ms | |
| step:2141/3000 train_loss:3.5180 train_time:309006ms step_avg:145.00ms | |
| step:2142/3000 train_loss:3.8171 train_time:309150ms step_avg:145.00ms | |
| step:2143/3000 train_loss:3.3417 train_time:309293ms step_avg:145.00ms | |
| step:2144/3000 train_loss:3.3847 train_time:309438ms step_avg:145.00ms | |
| step:2145/3000 train_loss:3.4260 train_time:309581ms step_avg:145.00ms | |
| step:2146/3000 train_loss:3.5513 train_time:309724ms step_avg:145.00ms | |
| step:2147/3000 train_loss:3.4772 train_time:309867ms step_avg:145.00ms | |
| step:2148/3000 train_loss:3.8905 train_time:310012ms step_avg:145.00ms | |
| step:2149/3000 train_loss:3.4117 train_time:310158ms step_avg:145.00ms | |
| step:2150/3000 train_loss:3.3719 train_time:310302ms step_avg:145.00ms | |
| step:2151/3000 train_loss:3.4532 train_time:310445ms step_avg:145.00ms | |
| step:2152/3000 train_loss:3.4784 train_time:310588ms step_avg:145.00ms | |
| step:2153/3000 train_loss:3.4243 train_time:310732ms step_avg:145.00ms | |
| step:2154/3000 train_loss:3.3627 train_time:310877ms step_avg:145.00ms | |
| step:2155/3000 train_loss:3.5742 train_time:311022ms step_avg:145.00ms | |
| step:2156/3000 train_loss:3.2036 train_time:311165ms step_avg:145.00ms | |
| step:2157/3000 train_loss:3.3582 train_time:311307ms step_avg:145.00ms | |
| step:2158/3000 train_loss:3.4898 train_time:311451ms step_avg:145.00ms | |
| step:2159/3000 train_loss:3.4346 train_time:311595ms step_avg:145.00ms | |
| step:2160/3000 train_loss:3.5920 train_time:311740ms step_avg:145.00ms | |
| step:2161/3000 train_loss:3.4963 train_time:311882ms step_avg:144.99ms | |
| step:2162/3000 train_loss:3.4299 train_time:312025ms step_avg:144.99ms | |
| step:2163/3000 train_loss:3.4029 train_time:312170ms step_avg:144.99ms | |
| step:2164/3000 train_loss:3.3914 train_time:312314ms step_avg:144.99ms | |
| step:2165/3000 train_loss:3.4821 train_time:312459ms step_avg:144.99ms | |
| step:2166/3000 train_loss:3.5009 train_time:312602ms step_avg:144.99ms | |
| step:2167/3000 train_loss:3.4315 train_time:312745ms step_avg:144.99ms | |
| step:2168/3000 train_loss:3.3287 train_time:312888ms step_avg:144.99ms | |
| step:2169/3000 train_loss:3.4174 train_time:313032ms step_avg:144.99ms | |
| step:2170/3000 train_loss:3.4499 train_time:313177ms step_avg:144.99ms | |
| step:2171/3000 train_loss:3.5810 train_time:313322ms step_avg:144.99ms | |
| step:2172/3000 train_loss:3.3708 train_time:313464ms step_avg:144.99ms | |
| step:2173/3000 train_loss:3.3576 train_time:313607ms step_avg:144.99ms | |
| step:2174/3000 train_loss:3.3705 train_time:313751ms step_avg:144.99ms | |
| step:2175/3000 train_loss:3.4191 train_time:313896ms step_avg:144.99ms | |
| step:2176/3000 train_loss:3.3836 train_time:314041ms step_avg:144.99ms | |
| step:2177/3000 train_loss:3.3575 train_time:314184ms step_avg:144.99ms | |
| step:2178/3000 train_loss:3.5707 train_time:314326ms step_avg:144.98ms | |
| step:2179/3000 train_loss:3.3983 train_time:314471ms step_avg:144.98ms | |
| step:2180/3000 train_loss:3.4134 train_time:314615ms step_avg:144.98ms | |
| step:2181/3000 train_loss:3.4607 train_time:314760ms step_avg:144.98ms | |
| step:2182/3000 train_loss:3.4431 train_time:314903ms step_avg:144.98ms | |
| step:2183/3000 train_loss:3.4009 train_time:315045ms step_avg:144.98ms | |
| step:2184/3000 train_loss:3.3064 train_time:315188ms step_avg:144.98ms | |
| step:2185/3000 train_loss:3.4857 train_time:315333ms step_avg:144.98ms | |
| step:2186/3000 train_loss:3.6508 train_time:315479ms step_avg:144.98ms | |
| step:2187/3000 train_loss:3.2987 train_time:315622ms step_avg:144.98ms | |
| step:2188/3000 train_loss:3.3358 train_time:315765ms step_avg:144.98ms | |
| step:2189/3000 train_loss:3.1833 train_time:315908ms step_avg:144.98ms | |
| step:2190/3000 train_loss:3.3381 train_time:316052ms step_avg:144.98ms | |
| step:2191/3000 train_loss:3.4833 train_time:316195ms step_avg:144.98ms | |
| step:2192/3000 train_loss:3.4076 train_time:316340ms step_avg:144.98ms | |
| step:2193/3000 train_loss:3.6491 train_time:316483ms step_avg:144.98ms | |
| step:2194/3000 train_loss:3.4116 train_time:316626ms step_avg:144.98ms | |
| step:2195/3000 train_loss:3.4752 train_time:316770ms step_avg:144.97ms | |
| step:2196/3000 train_loss:3.4221 train_time:316914ms step_avg:144.97ms | |
| step:2197/3000 train_loss:3.3490 train_time:317059ms step_avg:144.97ms | |
| step:2198/3000 train_loss:3.4194 train_time:317201ms step_avg:144.97ms | |
| step:2199/3000 train_loss:3.3648 train_time:317347ms step_avg:144.97ms | |
| step:2200/3000 train_loss:3.3642 train_time:317489ms step_avg:144.97ms | |
| step:2201/3000 train_loss:3.4154 train_time:317633ms step_avg:144.97ms | |
| step:2202/3000 train_loss:3.4038 train_time:317778ms step_avg:144.97ms | |
| step:2203/3000 train_loss:3.3819 train_time:317922ms step_avg:144.97ms | |
| step:2204/3000 train_loss:3.8838 train_time:318065ms step_avg:144.97ms | |
| step:2205/3000 train_loss:3.2963 train_time:318207ms step_avg:144.97ms | |
| step:2206/3000 train_loss:3.4157 train_time:318351ms step_avg:144.97ms | |
| step:2207/3000 train_loss:3.4390 train_time:318495ms step_avg:144.97ms | |
| step:2208/3000 train_loss:3.4492 train_time:318640ms step_avg:144.97ms | |
| step:2209/3000 train_loss:3.3479 train_time:318783ms step_avg:144.97ms | |
| step:2210/3000 train_loss:3.4241 train_time:318926ms step_avg:144.97ms | |
| step:2211/3000 train_loss:3.4277 train_time:319069ms step_avg:144.97ms | |
| step:2212/3000 train_loss:3.4240 train_time:319214ms step_avg:144.97ms | |
| step:2213/3000 train_loss:3.4435 train_time:319359ms step_avg:144.97ms | |
| step:2214/3000 train_loss:3.3085 train_time:319502ms step_avg:144.96ms | |
| step:2215/3000 train_loss:3.3776 train_time:319645ms step_avg:144.96ms | |
| step:2216/3000 train_loss:3.5109 train_time:319787ms step_avg:144.96ms | |
| step:2217/3000 train_loss:3.4621 train_time:319932ms step_avg:144.96ms | |
| step:2218/3000 train_loss:3.4254 train_time:320077ms step_avg:144.96ms | |
| step:2219/3000 train_loss:3.4384 train_time:320221ms step_avg:144.96ms | |
| step:2220/3000 train_loss:3.3400 train_time:320364ms step_avg:144.96ms | |
| step:2221/3000 train_loss:3.5986 train_time:320506ms step_avg:144.96ms | |
| step:2222/3000 train_loss:3.4848 train_time:320650ms step_avg:144.96ms | |
| step:2223/3000 train_loss:3.5055 train_time:320794ms step_avg:144.96ms | |
| step:2224/3000 train_loss:3.3942 train_time:320939ms step_avg:144.96ms | |
| step:2225/3000 train_loss:3.5234 train_time:321082ms step_avg:144.96ms | |
| step:2226/3000 train_loss:3.2665 train_time:321225ms step_avg:144.96ms | |
| step:2227/3000 train_loss:3.5467 train_time:321368ms step_avg:144.96ms | |
| step:2228/3000 train_loss:3.4824 train_time:321512ms step_avg:144.96ms | |
| step:2229/3000 train_loss:3.2852 train_time:321657ms step_avg:144.96ms | |
| step:2230/3000 train_loss:3.6277 train_time:321800ms step_avg:144.95ms | |
| step:2231/3000 train_loss:3.3133 train_time:321943ms step_avg:144.95ms | |
| step:2232/3000 train_loss:3.7837 train_time:322086ms step_avg:144.95ms | |
| step:2233/3000 train_loss:3.4737 train_time:322230ms step_avg:144.95ms | |
| step:2234/3000 train_loss:3.4170 train_time:322375ms step_avg:144.95ms | |
| step:2235/3000 train_loss:3.4354 train_time:322519ms step_avg:144.95ms | |
| step:2236/3000 train_loss:3.2294 train_time:322663ms step_avg:144.95ms | |
| step:2237/3000 train_loss:3.2287 train_time:322805ms step_avg:144.95ms | |
| step:2238/3000 train_loss:3.4541 train_time:322950ms step_avg:144.95ms | |
| step:2239/3000 train_loss:3.5528 train_time:323093ms step_avg:144.95ms | |
| step:2240/3000 train_loss:3.2709 train_time:323239ms step_avg:144.95ms | |
| step:2241/3000 train_loss:3.3338 train_time:323383ms step_avg:144.95ms | |
| step:2242/3000 train_loss:3.5288 train_time:323525ms step_avg:144.95ms | |
| step:2243/3000 train_loss:3.4879 train_time:323668ms step_avg:144.95ms | |
| step:2244/3000 train_loss:3.3417 train_time:323813ms step_avg:144.95ms | |
| step:2245/3000 train_loss:3.4191 train_time:323958ms step_avg:144.95ms | |
| step:2246/3000 train_loss:3.4356 train_time:324101ms step_avg:144.95ms | |
| step:2247/3000 train_loss:3.2708 train_time:324245ms step_avg:144.95ms | |
| step:2248/3000 train_loss:3.2869 train_time:324387ms step_avg:144.95ms | |
| step:2249/3000 train_loss:3.5596 train_time:324531ms step_avg:144.94ms | |
| step:2250/3000 train_loss:3.2657 train_time:324675ms step_avg:144.94ms | |
| step:2250/3000 val_loss:3.3985 train_time:324714ms step_avg:144.96ms | |
| step:2251/3000 train_loss:3.2765 train_time:324831ms step_avg:144.95ms | |
| step:2252/3000 train_loss:3.3502 train_time:324978ms step_avg:144.95ms | |
| step:2253/3000 train_loss:3.3174 train_time:325120ms step_avg:144.95ms | |
| step:2254/3000 train_loss:3.3787 train_time:325261ms step_avg:144.95ms | |
| step:2255/3000 train_loss:3.4303 train_time:325402ms step_avg:144.95ms | |
| step:2256/3000 train_loss:3.2980 train_time:325545ms step_avg:144.94ms | |
| step:2257/3000 train_loss:3.5950 train_time:325687ms step_avg:144.94ms | |
| step:2258/3000 train_loss:3.4672 train_time:325836ms step_avg:144.94ms | |
| step:2259/3000 train_loss:3.7760 train_time:325981ms step_avg:144.94ms | |
| step:2260/3000 train_loss:3.4668 train_time:326123ms step_avg:144.94ms | |
| step:2261/3000 train_loss:3.5163 train_time:326266ms step_avg:144.94ms | |
| step:2262/3000 train_loss:3.4251 train_time:326408ms step_avg:144.94ms | |
| step:2263/3000 train_loss:3.4302 train_time:326552ms step_avg:144.94ms | |
| step:2264/3000 train_loss:3.1927 train_time:326696ms step_avg:144.94ms | |
| step:2265/3000 train_loss:3.3091 train_time:326840ms step_avg:144.94ms | |
| step:2266/3000 train_loss:3.5296 train_time:326983ms step_avg:144.94ms | |
| step:2267/3000 train_loss:3.2590 train_time:327126ms step_avg:144.94ms | |
| step:2268/3000 train_loss:3.3362 train_time:327380ms step_avg:144.99ms | |
| step:2269/3000 train_loss:3.3130 train_time:327534ms step_avg:144.99ms | |
| step:2270/3000 train_loss:3.2760 train_time:327678ms step_avg:144.99ms | |
| step:2271/3000 train_loss:3.6755 train_time:327819ms step_avg:144.99ms | |
| step:2272/3000 train_loss:3.3319 train_time:327961ms step_avg:144.99ms | |
| step:2273/3000 train_loss:3.3376 train_time:328102ms step_avg:144.99ms | |
| step:2274/3000 train_loss:3.4195 train_time:328246ms step_avg:144.98ms | |
| step:2275/3000 train_loss:3.3689 train_time:328397ms step_avg:144.99ms | |
| step:2276/3000 train_loss:3.3842 train_time:328542ms step_avg:144.99ms | |
| step:2277/3000 train_loss:3.2641 train_time:328685ms step_avg:144.99ms | |
| step:2278/3000 train_loss:3.3681 train_time:328827ms step_avg:144.99ms | |
| step:2279/3000 train_loss:3.4960 train_time:328970ms step_avg:144.98ms | |
| step:2280/3000 train_loss:3.2968 train_time:329241ms step_avg:145.04ms | |
| step:2281/3000 train_loss:3.3578 train_time:329381ms step_avg:145.04ms | |
| step:2282/3000 train_loss:3.3701 train_time:329523ms step_avg:145.04ms | |
| step:2283/3000 train_loss:3.5034 train_time:329666ms step_avg:145.04ms | |
| step:2284/3000 train_loss:3.3864 train_time:329807ms step_avg:145.03ms | |
| step:2285/3000 train_loss:3.4099 train_time:329950ms step_avg:145.03ms | |
| step:2286/3000 train_loss:3.4053 train_time:330095ms step_avg:145.03ms | |
| step:2287/3000 train_loss:3.4070 train_time:330241ms step_avg:145.03ms | |
| step:2288/3000 train_loss:3.3606 train_time:330384ms step_avg:145.03ms | |
| step:2289/3000 train_loss:3.4956 train_time:330528ms step_avg:145.03ms | |
| step:2290/3000 train_loss:3.4577 train_time:330672ms step_avg:145.03ms | |
| step:2291/3000 train_loss:3.3488 train_time:330816ms step_avg:145.03ms | |
| step:2292/3000 train_loss:3.6806 train_time:330960ms step_avg:145.03ms | |
| step:2293/3000 train_loss:3.3447 train_time:331103ms step_avg:145.03ms | |
| step:2294/3000 train_loss:3.2874 train_time:331248ms step_avg:145.03ms | |
| step:2295/3000 train_loss:3.4792 train_time:331391ms step_avg:145.03ms | |
| step:2296/3000 train_loss:3.4213 train_time:331536ms step_avg:145.03ms | |
| step:2297/3000 train_loss:3.4057 train_time:331679ms step_avg:145.03ms | |
| step:2298/3000 train_loss:3.7761 train_time:331821ms step_avg:145.03ms | |
| step:2299/3000 train_loss:3.2917 train_time:331963ms step_avg:145.03ms | |
| step:2300/3000 train_loss:3.2929 train_time:332108ms step_avg:145.03ms | |
| step:2301/3000 train_loss:3.6257 train_time:332254ms step_avg:145.03ms | |
| step:2302/3000 train_loss:3.3510 train_time:332399ms step_avg:145.03ms | |
| step:2303/3000 train_loss:3.3740 train_time:332541ms step_avg:145.02ms | |
| step:2304/3000 train_loss:3.3515 train_time:332683ms step_avg:145.02ms | |
| step:2305/3000 train_loss:3.2924 train_time:332827ms step_avg:145.02ms | |
| step:2306/3000 train_loss:3.4490 train_time:332971ms step_avg:145.02ms | |
| step:2307/3000 train_loss:3.3167 train_time:333116ms step_avg:145.02ms | |
| step:2308/3000 train_loss:3.3312 train_time:333260ms step_avg:145.02ms | |
| step:2309/3000 train_loss:3.4535 train_time:333403ms step_avg:145.02ms | |
| step:2310/3000 train_loss:3.4266 train_time:333546ms step_avg:145.02ms | |
| step:2311/3000 train_loss:3.2968 train_time:333690ms step_avg:145.02ms | |
| step:2312/3000 train_loss:3.4091 train_time:333835ms step_avg:145.02ms | |
| step:2313/3000 train_loss:3.5359 train_time:333979ms step_avg:145.02ms | |
| step:2314/3000 train_loss:3.3398 train_time:334121ms step_avg:145.02ms | |
| step:2315/3000 train_loss:3.2758 train_time:334266ms step_avg:145.02ms | |
| step:2316/3000 train_loss:3.3653 train_time:334409ms step_avg:145.02ms | |
| step:2317/3000 train_loss:3.2529 train_time:334554ms step_avg:145.02ms | |
| step:2318/3000 train_loss:3.3527 train_time:334699ms step_avg:145.02ms | |
| step:2319/3000 train_loss:3.3745 train_time:334841ms step_avg:145.02ms | |
| step:2320/3000 train_loss:3.2303 train_time:334983ms step_avg:145.01ms | |
| step:2321/3000 train_loss:3.3638 train_time:335127ms step_avg:145.01ms | |
| step:2322/3000 train_loss:3.4047 train_time:335272ms step_avg:145.01ms | |
| step:2323/3000 train_loss:3.3275 train_time:335416ms step_avg:145.01ms | |
| step:2324/3000 train_loss:3.3719 train_time:335561ms step_avg:145.01ms | |
| step:2325/3000 train_loss:3.2852 train_time:335704ms step_avg:145.01ms | |
| step:2326/3000 train_loss:3.4325 train_time:335846ms step_avg:145.01ms | |
| step:2327/3000 train_loss:3.4429 train_time:335990ms step_avg:145.01ms | |
| step:2328/3000 train_loss:3.2160 train_time:336136ms step_avg:145.01ms | |
| step:2329/3000 train_loss:3.3252 train_time:336280ms step_avg:145.01ms | |
| step:2330/3000 train_loss:3.3501 train_time:336422ms step_avg:145.01ms | |
| step:2331/3000 train_loss:3.3288 train_time:336566ms step_avg:145.01ms | |
| step:2332/3000 train_loss:3.5095 train_time:336709ms step_avg:145.01ms | |
| step:2333/3000 train_loss:3.3797 train_time:336854ms step_avg:145.01ms | |
| step:2334/3000 train_loss:3.3624 train_time:336998ms step_avg:145.01ms | |
| step:2335/3000 train_loss:3.4502 train_time:337141ms step_avg:145.01ms | |
| step:2336/3000 train_loss:3.2810 train_time:337283ms step_avg:145.01ms | |
| step:2337/3000 train_loss:3.4396 train_time:337428ms step_avg:145.01ms | |
| step:2338/3000 train_loss:3.3937 train_time:337573ms step_avg:145.01ms | |
| step:2339/3000 train_loss:3.3319 train_time:337716ms step_avg:145.00ms | |
| step:2340/3000 train_loss:3.4160 train_time:337860ms step_avg:145.00ms | |
| step:2341/3000 train_loss:3.4661 train_time:338003ms step_avg:145.00ms | |
| step:2342/3000 train_loss:3.3338 train_time:338146ms step_avg:145.00ms | |
| step:2343/3000 train_loss:3.3509 train_time:338289ms step_avg:145.00ms | |
| step:2344/3000 train_loss:3.4201 train_time:338436ms step_avg:145.00ms | |
| step:2345/3000 train_loss:3.3525 train_time:338580ms step_avg:145.00ms | |
| step:2346/3000 train_loss:3.4712 train_time:338722ms step_avg:145.00ms | |
| step:2347/3000 train_loss:3.3732 train_time:338866ms step_avg:145.00ms | |
| step:2348/3000 train_loss:3.4876 train_time:339010ms step_avg:145.00ms | |
| step:2349/3000 train_loss:3.4436 train_time:339155ms step_avg:145.00ms | |
| step:2350/3000 train_loss:3.4856 train_time:339299ms step_avg:145.00ms | |
| step:2351/3000 train_loss:3.1825 train_time:339442ms step_avg:145.00ms | |
| step:2352/3000 train_loss:3.2985 train_time:339584ms step_avg:145.00ms | |
| step:2353/3000 train_loss:3.2983 train_time:339728ms step_avg:145.00ms | |
| step:2354/3000 train_loss:3.5210 train_time:339872ms step_avg:145.00ms | |
| step:2355/3000 train_loss:3.3075 train_time:340016ms step_avg:145.00ms | |
| step:2356/3000 train_loss:3.3025 train_time:340160ms step_avg:145.00ms | |
| step:2357/3000 train_loss:3.4552 train_time:340302ms step_avg:144.99ms | |
| step:2358/3000 train_loss:3.2971 train_time:340445ms step_avg:144.99ms | |
| step:2359/3000 train_loss:3.4132 train_time:340589ms step_avg:144.99ms | |
| step:2360/3000 train_loss:3.3127 train_time:340736ms step_avg:144.99ms | |
| step:2361/3000 train_loss:3.3230 train_time:340880ms step_avg:144.99ms | |
| step:2362/3000 train_loss:3.3440 train_time:341022ms step_avg:144.99ms | |
| step:2363/3000 train_loss:3.4193 train_time:341165ms step_avg:144.99ms | |
| step:2364/3000 train_loss:3.3670 train_time:341309ms step_avg:144.99ms | |
| step:2365/3000 train_loss:3.8017 train_time:341454ms step_avg:144.99ms | |
| step:2366/3000 train_loss:3.4324 train_time:341598ms step_avg:144.99ms | |
| step:2367/3000 train_loss:3.5658 train_time:341742ms step_avg:144.99ms | |
| step:2368/3000 train_loss:3.3868 train_time:341884ms step_avg:144.99ms | |
| step:2369/3000 train_loss:3.3911 train_time:342028ms step_avg:144.99ms | |
| step:2370/3000 train_loss:3.4271 train_time:342172ms step_avg:144.99ms | |
| step:2371/3000 train_loss:3.3127 train_time:342316ms step_avg:144.99ms | |
| step:2372/3000 train_loss:3.5463 train_time:342460ms step_avg:144.99ms | |
| step:2373/3000 train_loss:3.3873 train_time:342602ms step_avg:144.99ms | |
| step:2374/3000 train_loss:3.9448 train_time:342745ms step_avg:144.99ms | |
| step:2375/3000 train_loss:3.3720 train_time:342890ms step_avg:144.99ms | |
| step:2375/3000 val_loss:3.3728 train_time:342929ms step_avg:145.00ms | |
| step:2376/3000 train_loss:3.2714 train_time:343046ms step_avg:144.99ms | |
| step:2377/3000 train_loss:3.4333 train_time:343191ms step_avg:144.99ms | |
| step:2378/3000 train_loss:3.4073 train_time:343333ms step_avg:144.99ms | |
| step:2379/3000 train_loss:3.4166 train_time:343476ms step_avg:144.99ms | |
| step:2380/3000 train_loss:3.3960 train_time:343619ms step_avg:144.99ms | |
| step:2381/3000 train_loss:3.3011 train_time:343763ms step_avg:144.99ms | |
| step:2382/3000 train_loss:3.3968 train_time:343906ms step_avg:144.99ms | |
| step:2383/3000 train_loss:3.4130 train_time:344052ms step_avg:144.99ms | |
| step:2384/3000 train_loss:3.3604 train_time:344197ms step_avg:144.99ms | |
| step:2385/3000 train_loss:3.2941 train_time:344342ms step_avg:144.99ms | |
| step:2386/3000 train_loss:3.4032 train_time:344486ms step_avg:144.99ms | |
| step:2387/3000 train_loss:3.3558 train_time:344628ms step_avg:144.98ms | |
| step:2388/3000 train_loss:3.3595 train_time:344770ms step_avg:144.98ms | |
| step:2389/3000 train_loss:3.3958 train_time:344912ms step_avg:144.98ms | |
| step:2390/3000 train_loss:3.3752 train_time:345058ms step_avg:144.98ms | |
| step:2391/3000 train_loss:3.3774 train_time:345204ms step_avg:144.98ms | |
| step:2392/3000 train_loss:3.2611 train_time:345348ms step_avg:144.98ms | |
| step:2393/3000 train_loss:3.4852 train_time:345492ms step_avg:144.98ms | |
| step:2394/3000 train_loss:3.3071 train_time:345635ms step_avg:144.98ms | |
| step:2395/3000 train_loss:3.4209 train_time:345778ms step_avg:144.98ms | |
| step:2396/3000 train_loss:3.5286 train_time:345922ms step_avg:144.98ms | |
| step:2397/3000 train_loss:3.5325 train_time:346068ms step_avg:144.98ms | |
| step:2398/3000 train_loss:3.5006 train_time:346211ms step_avg:144.98ms | |
| step:2399/3000 train_loss:3.4554 train_time:346354ms step_avg:144.98ms | |
| step:2400/3000 train_loss:3.3279 train_time:346498ms step_avg:144.98ms | |
| step:2401/3000 train_loss:3.3394 train_time:346643ms step_avg:144.98ms | |
| step:2402/3000 train_loss:3.4370 train_time:346786ms step_avg:144.98ms | |
| step:2403/3000 train_loss:3.2781 train_time:346929ms step_avg:144.98ms | |
| step:2404/3000 train_loss:3.4038 train_time:347073ms step_avg:144.98ms | |
| step:2405/3000 train_loss:3.6194 train_time:347216ms step_avg:144.98ms | |
| step:2406/3000 train_loss:3.3378 train_time:347361ms step_avg:144.98ms | |
| step:2407/3000 train_loss:3.4969 train_time:347506ms step_avg:144.98ms | |
| step:2408/3000 train_loss:3.3587 train_time:347649ms step_avg:144.97ms | |
| step:2409/3000 train_loss:3.2812 train_time:347791ms step_avg:144.97ms | |
| step:2410/3000 train_loss:3.4273 train_time:347935ms step_avg:144.97ms | |
| step:2411/3000 train_loss:3.2035 train_time:348079ms step_avg:144.97ms | |
| step:2412/3000 train_loss:3.6430 train_time:348225ms step_avg:144.97ms | |
| step:2413/3000 train_loss:3.3362 train_time:348369ms step_avg:144.97ms | |
| step:2414/3000 train_loss:3.4185 train_time:348512ms step_avg:144.97ms | |
| step:2415/3000 train_loss:3.3334 train_time:348655ms step_avg:144.97ms | |
| step:2416/3000 train_loss:3.4043 train_time:348798ms step_avg:144.97ms | |
| step:2417/3000 train_loss:3.2138 train_time:348944ms step_avg:144.97ms | |
| step:2418/3000 train_loss:3.1395 train_time:349088ms step_avg:144.97ms | |
| step:2419/3000 train_loss:3.4427 train_time:349230ms step_avg:144.97ms | |
| step:2420/3000 train_loss:3.3220 train_time:349374ms step_avg:144.97ms | |
| step:2421/3000 train_loss:3.3480 train_time:349517ms step_avg:144.97ms | |
| step:2422/3000 train_loss:3.4599 train_time:349662ms step_avg:144.97ms | |
| step:2423/3000 train_loss:3.4949 train_time:349806ms step_avg:144.97ms | |
| step:2424/3000 train_loss:3.3185 train_time:349948ms step_avg:144.97ms | |
| step:2425/3000 train_loss:3.4128 train_time:350092ms step_avg:144.97ms | |
| step:2426/3000 train_loss:3.4021 train_time:350235ms step_avg:144.96ms | |
| step:2427/3000 train_loss:3.3312 train_time:350381ms step_avg:144.97ms | |
| step:2428/3000 train_loss:3.2733 train_time:350525ms step_avg:144.96ms | |
| step:2429/3000 train_loss:3.4136 train_time:350668ms step_avg:144.96ms | |
| step:2430/3000 train_loss:3.2999 train_time:350811ms step_avg:144.96ms | |
| step:2431/3000 train_loss:3.3670 train_time:350953ms step_avg:144.96ms | |
| step:2432/3000 train_loss:3.4261 train_time:351097ms step_avg:144.96ms | |
| step:2433/3000 train_loss:3.3914 train_time:351242ms step_avg:144.96ms | |
| step:2434/3000 train_loss:3.2516 train_time:351387ms step_avg:144.96ms | |
| step:2435/3000 train_loss:3.2213 train_time:351530ms step_avg:144.96ms | |
| step:2436/3000 train_loss:3.3920 train_time:351673ms step_avg:144.96ms | |
| step:2437/3000 train_loss:3.2437 train_time:351816ms step_avg:144.96ms | |
| step:2438/3000 train_loss:3.3223 train_time:351960ms step_avg:144.96ms | |
| step:2439/3000 train_loss:3.4237 train_time:352104ms step_avg:144.96ms | |
| step:2440/3000 train_loss:3.3328 train_time:352248ms step_avg:144.96ms | |
| step:2441/3000 train_loss:3.4218 train_time:352391ms step_avg:144.96ms | |
| step:2442/3000 train_loss:3.3084 train_time:352534ms step_avg:144.96ms | |
| step:2443/3000 train_loss:3.3579 train_time:352678ms step_avg:144.96ms | |
| step:2444/3000 train_loss:3.2533 train_time:352823ms step_avg:144.96ms | |
| step:2445/3000 train_loss:3.2576 train_time:352967ms step_avg:144.96ms | |
| step:2446/3000 train_loss:3.4197 train_time:353110ms step_avg:144.95ms | |
| step:2447/3000 train_loss:3.2929 train_time:353252ms step_avg:144.95ms | |
| step:2448/3000 train_loss:3.3531 train_time:353396ms step_avg:144.95ms | |
| step:2449/3000 train_loss:3.5151 train_time:353541ms step_avg:144.95ms | |
| step:2450/3000 train_loss:3.3543 train_time:353686ms step_avg:144.95ms | |
| step:2451/3000 train_loss:3.4200 train_time:353829ms step_avg:144.95ms | |
| step:2452/3000 train_loss:3.3225 train_time:353973ms step_avg:144.95ms | |
| step:2453/3000 train_loss:3.4258 train_time:354116ms step_avg:144.95ms | |
| step:2454/3000 train_loss:3.3243 train_time:354260ms step_avg:144.95ms | |
| step:2455/3000 train_loss:3.4441 train_time:354404ms step_avg:144.95ms | |
| step:2456/3000 train_loss:3.3839 train_time:354548ms step_avg:144.95ms | |
| step:2457/3000 train_loss:3.3062 train_time:354803ms step_avg:145.00ms | |
| step:2458/3000 train_loss:3.2172 train_time:354956ms step_avg:145.00ms | |
| step:2459/3000 train_loss:3.3607 train_time:355099ms step_avg:145.00ms | |
| step:2460/3000 train_loss:3.9580 train_time:355243ms step_avg:145.00ms | |
| step:2461/3000 train_loss:3.4136 train_time:355385ms step_avg:145.00ms | |
| step:2462/3000 train_loss:3.2338 train_time:355526ms step_avg:144.99ms | |
| step:2463/3000 train_loss:3.4313 train_time:355671ms step_avg:144.99ms | |
| step:2464/3000 train_loss:3.3444 train_time:355819ms step_avg:145.00ms | |
| step:2465/3000 train_loss:3.5512 train_time:355965ms step_avg:145.00ms | |
| step:2466/3000 train_loss:3.7138 train_time:356108ms step_avg:145.00ms | |
| step:2467/3000 train_loss:3.4610 train_time:356251ms step_avg:144.99ms | |
| step:2468/3000 train_loss:3.3359 train_time:356393ms step_avg:144.99ms | |
| step:2469/3000 train_loss:3.4512 train_time:356535ms step_avg:144.99ms | |
| step:2470/3000 train_loss:3.4629 train_time:356810ms step_avg:145.04ms | |
| step:2471/3000 train_loss:3.2600 train_time:356951ms step_avg:145.04ms | |
| step:2472/3000 train_loss:3.3558 train_time:357092ms step_avg:145.04ms | |
| step:2473/3000 train_loss:3.3546 train_time:357233ms step_avg:145.04ms | |
| step:2474/3000 train_loss:3.4903 train_time:357375ms step_avg:145.04ms | |
| step:2475/3000 train_loss:3.6282 train_time:357517ms step_avg:145.04ms | |
| step:2476/3000 train_loss:3.2117 train_time:357664ms step_avg:145.04ms | |
| step:2477/3000 train_loss:3.4282 train_time:357811ms step_avg:145.04ms | |
| step:2478/3000 train_loss:3.3908 train_time:357954ms step_avg:145.04ms | |
| step:2479/3000 train_loss:3.2275 train_time:358098ms step_avg:145.04ms | |
| step:2480/3000 train_loss:3.2240 train_time:358241ms step_avg:145.04ms | |
| step:2481/3000 train_loss:3.3622 train_time:358386ms step_avg:145.04ms | |
| step:2482/3000 train_loss:3.3744 train_time:358527ms step_avg:145.04ms | |
| step:2483/3000 train_loss:3.4042 train_time:358671ms step_avg:145.03ms | |
| step:2484/3000 train_loss:3.3559 train_time:358815ms step_avg:145.03ms | |
| step:2485/3000 train_loss:3.3616 train_time:358959ms step_avg:145.03ms | |
| step:2486/3000 train_loss:3.2537 train_time:359103ms step_avg:145.03ms | |
| step:2487/3000 train_loss:3.4501 train_time:359247ms step_avg:145.03ms | |
| step:2488/3000 train_loss:3.3997 train_time:359389ms step_avg:145.03ms | |
| step:2489/3000 train_loss:3.3045 train_time:359531ms step_avg:145.03ms | |
| step:2490/3000 train_loss:3.4180 train_time:359676ms step_avg:145.03ms | |
| step:2491/3000 train_loss:3.4630 train_time:359821ms step_avg:145.03ms | |
| step:2492/3000 train_loss:3.5469 train_time:359966ms step_avg:145.03ms | |
| step:2493/3000 train_loss:3.3956 train_time:360109ms step_avg:145.03ms | |
| step:2494/3000 train_loss:3.3215 train_time:360251ms step_avg:145.03ms | |
| step:2495/3000 train_loss:3.4458 train_time:360394ms step_avg:145.03ms | |
| step:2496/3000 train_loss:3.3957 train_time:360538ms step_avg:145.03ms | |
| step:2497/3000 train_loss:3.2997 train_time:360683ms step_avg:145.03ms | |
| step:2498/3000 train_loss:3.4012 train_time:360827ms step_avg:145.03ms | |
| step:2499/3000 train_loss:3.4588 train_time:360970ms step_avg:145.03ms | |
| step:2500/3000 train_loss:3.4818 train_time:361113ms step_avg:145.03ms | |
| step:2500/3000 val_loss:3.3475 train_time:361151ms step_avg:145.04ms | |
| step:2501/3000 train_loss:3.4145 train_time:361267ms step_avg:145.03ms | |
| step:2502/3000 train_loss:3.3767 train_time:361414ms step_avg:145.03ms | |
| step:2503/3000 train_loss:3.3877 train_time:361559ms step_avg:145.03ms | |
| step:2504/3000 train_loss:3.2502 train_time:361702ms step_avg:145.03ms | |
| step:2505/3000 train_loss:3.4511 train_time:361843ms step_avg:145.03ms | |
| step:2506/3000 train_loss:3.4049 train_time:361984ms step_avg:145.03ms | |
| step:2507/3000 train_loss:3.3428 train_time:362126ms step_avg:145.02ms | |
| step:2508/3000 train_loss:3.3470 train_time:362273ms step_avg:145.03ms | |
| step:2509/3000 train_loss:3.3055 train_time:362420ms step_avg:145.03ms | |
| step:2510/3000 train_loss:3.4912 train_time:362563ms step_avg:145.03ms | |
| step:2511/3000 train_loss:3.3187 train_time:362705ms step_avg:145.02ms | |
| step:2512/3000 train_loss:3.3026 train_time:362847ms step_avg:145.02ms | |
| step:2513/3000 train_loss:3.3898 train_time:362990ms step_avg:145.02ms | |
| step:2514/3000 train_loss:3.4054 train_time:363133ms step_avg:145.02ms | |
| step:2515/3000 train_loss:3.3081 train_time:363279ms step_avg:145.02ms | |
| step:2516/3000 train_loss:3.3965 train_time:363425ms step_avg:145.02ms | |
| step:2517/3000 train_loss:3.3799 train_time:363568ms step_avg:145.02ms | |
| step:2518/3000 train_loss:3.2704 train_time:363711ms step_avg:145.02ms | |
| step:2519/3000 train_loss:3.2954 train_time:363855ms step_avg:145.02ms | |
| step:2520/3000 train_loss:3.4153 train_time:363998ms step_avg:145.02ms | |
| step:2521/3000 train_loss:3.4021 train_time:364142ms step_avg:145.02ms | |
| step:2522/3000 train_loss:3.2938 train_time:364286ms step_avg:145.02ms | |
| step:2523/3000 train_loss:3.2662 train_time:364429ms step_avg:145.02ms | |
| step:2524/3000 train_loss:3.3625 train_time:364574ms step_avg:145.02ms | |
| step:2525/3000 train_loss:3.2169 train_time:364719ms step_avg:145.02ms | |
| step:2526/3000 train_loss:3.4319 train_time:364862ms step_avg:145.02ms | |
| step:2527/3000 train_loss:3.3338 train_time:365005ms step_avg:145.02ms | |
| step:2528/3000 train_loss:3.3470 train_time:365149ms step_avg:145.02ms | |
| step:2529/3000 train_loss:3.3376 train_time:365294ms step_avg:145.02ms | |
| step:2530/3000 train_loss:3.3459 train_time:365439ms step_avg:145.02ms | |
| step:2531/3000 train_loss:3.3821 train_time:365583ms step_avg:145.02ms | |
| step:2532/3000 train_loss:3.2158 train_time:365726ms step_avg:145.01ms | |
| step:2533/3000 train_loss:3.3729 train_time:365868ms step_avg:145.01ms | |
| step:2534/3000 train_loss:3.2653 train_time:366012ms step_avg:145.01ms | |
| step:2535/3000 train_loss:3.3028 train_time:366157ms step_avg:145.01ms | |
| step:2536/3000 train_loss:3.3624 train_time:366301ms step_avg:145.01ms | |
| step:2537/3000 train_loss:3.3654 train_time:366444ms step_avg:145.01ms | |
| step:2538/3000 train_loss:3.2020 train_time:366588ms step_avg:145.01ms | |
| step:2539/3000 train_loss:3.5014 train_time:366732ms step_avg:145.01ms | |
| step:2540/3000 train_loss:3.1877 train_time:366876ms step_avg:145.01ms | |
| step:2541/3000 train_loss:3.3663 train_time:367020ms step_avg:145.01ms | |
| step:2542/3000 train_loss:3.1219 train_time:367163ms step_avg:145.01ms | |
| step:2543/3000 train_loss:3.5718 train_time:367306ms step_avg:145.01ms | |
| step:2544/3000 train_loss:3.3434 train_time:367450ms step_avg:145.01ms | |
| step:2545/3000 train_loss:3.5035 train_time:367594ms step_avg:145.01ms | |
| step:2546/3000 train_loss:3.3376 train_time:367739ms step_avg:145.01ms | |
| step:2547/3000 train_loss:3.3085 train_time:367883ms step_avg:145.01ms | |
| step:2548/3000 train_loss:3.3229 train_time:368025ms step_avg:145.01ms | |
| step:2549/3000 train_loss:3.4764 train_time:368168ms step_avg:145.01ms | |
| step:2550/3000 train_loss:3.3344 train_time:368311ms step_avg:145.00ms | |
| step:2551/3000 train_loss:3.3385 train_time:368458ms step_avg:145.01ms | |
| step:2552/3000 train_loss:3.3689 train_time:368604ms step_avg:145.01ms | |
| step:2553/3000 train_loss:3.3911 train_time:368745ms step_avg:145.00ms | |
| step:2554/3000 train_loss:3.2904 train_time:368889ms step_avg:145.00ms | |
| step:2555/3000 train_loss:3.3973 train_time:369032ms step_avg:145.00ms | |
| step:2556/3000 train_loss:3.4526 train_time:369177ms step_avg:145.00ms | |
| step:2557/3000 train_loss:3.4438 train_time:369320ms step_avg:145.00ms | |
| step:2558/3000 train_loss:3.2910 train_time:369464ms step_avg:145.00ms | |
| step:2559/3000 train_loss:3.2959 train_time:369607ms step_avg:145.00ms | |
| step:2560/3000 train_loss:3.2998 train_time:369751ms step_avg:145.00ms | |
| step:2561/3000 train_loss:3.4223 train_time:369896ms step_avg:145.00ms | |
| step:2562/3000 train_loss:3.4555 train_time:370040ms step_avg:145.00ms | |
| step:2563/3000 train_loss:3.3304 train_time:370184ms step_avg:145.00ms | |
| step:2564/3000 train_loss:3.3677 train_time:370326ms step_avg:145.00ms | |
| step:2565/3000 train_loss:3.2834 train_time:370469ms step_avg:145.00ms | |
| step:2566/3000 train_loss:3.2957 train_time:370613ms step_avg:145.00ms | |
| step:2567/3000 train_loss:3.2908 train_time:370759ms step_avg:145.00ms | |
| step:2568/3000 train_loss:3.3415 train_time:370903ms step_avg:145.00ms | |
| step:2569/3000 train_loss:3.4821 train_time:371045ms step_avg:145.00ms | |
| step:2570/3000 train_loss:3.3878 train_time:371188ms step_avg:145.00ms | |
| step:2571/3000 train_loss:3.4602 train_time:371331ms step_avg:144.99ms | |
| step:2572/3000 train_loss:3.2261 train_time:371475ms step_avg:144.99ms | |
| step:2573/3000 train_loss:3.3295 train_time:371620ms step_avg:144.99ms | |
| step:2574/3000 train_loss:2.9815 train_time:371764ms step_avg:144.99ms | |
| step:2575/3000 train_loss:3.2359 train_time:371906ms step_avg:144.99ms | |
| step:2576/3000 train_loss:3.1755 train_time:372050ms step_avg:144.99ms | |
| step:2577/3000 train_loss:3.2867 train_time:372194ms step_avg:144.99ms | |
| step:2578/3000 train_loss:3.3379 train_time:372339ms step_avg:144.99ms | |
| step:2579/3000 train_loss:3.2512 train_time:372484ms step_avg:144.99ms | |
| step:2580/3000 train_loss:3.3088 train_time:372626ms step_avg:144.99ms | |
| step:2581/3000 train_loss:3.2510 train_time:372769ms step_avg:144.99ms | |
| step:2582/3000 train_loss:3.3640 train_time:372913ms step_avg:144.99ms | |
| step:2583/3000 train_loss:3.2422 train_time:373059ms step_avg:144.99ms | |
| step:2584/3000 train_loss:3.4272 train_time:373202ms step_avg:144.99ms | |
| step:2585/3000 train_loss:3.3353 train_time:373345ms step_avg:144.99ms | |
| step:2586/3000 train_loss:3.3585 train_time:373489ms step_avg:144.99ms | |
| step:2587/3000 train_loss:3.4781 train_time:373633ms step_avg:144.99ms | |
| step:2588/3000 train_loss:3.3738 train_time:373779ms step_avg:144.99ms | |
| step:2589/3000 train_loss:3.2271 train_time:373923ms step_avg:144.99ms | |
| step:2590/3000 train_loss:3.3912 train_time:374066ms step_avg:144.99ms | |
| step:2591/3000 train_loss:3.2985 train_time:374209ms step_avg:144.99ms | |
| step:2592/3000 train_loss:3.5203 train_time:374353ms step_avg:144.99ms | |
| step:2593/3000 train_loss:3.3751 train_time:374498ms step_avg:144.99ms | |
| step:2594/3000 train_loss:3.1890 train_time:374642ms step_avg:144.99ms | |
| step:2595/3000 train_loss:3.2634 train_time:374785ms step_avg:144.98ms | |
| step:2596/3000 train_loss:3.6734 train_time:374928ms step_avg:144.98ms | |
| step:2597/3000 train_loss:3.3580 train_time:375072ms step_avg:144.98ms | |
| step:2598/3000 train_loss:3.3491 train_time:375218ms step_avg:144.98ms | |
| step:2599/3000 train_loss:3.2030 train_time:375362ms step_avg:144.98ms | |
| step:2600/3000 train_loss:3.4462 train_time:375505ms step_avg:144.98ms | |
| step:2601/3000 train_loss:3.6098 train_time:375647ms step_avg:144.98ms | |
| step:2602/3000 train_loss:3.1952 train_time:375792ms step_avg:144.98ms | |
| step:2603/3000 train_loss:3.3332 train_time:375936ms step_avg:144.98ms | |
| step:2604/3000 train_loss:3.1820 train_time:376080ms step_avg:144.98ms | |
| step:2605/3000 train_loss:3.4670 train_time:376224ms step_avg:144.98ms | |
| step:2606/3000 train_loss:3.3326 train_time:376367ms step_avg:144.98ms | |
| step:2607/3000 train_loss:3.2205 train_time:376510ms step_avg:144.98ms | |
| step:2608/3000 train_loss:3.1889 train_time:376657ms step_avg:144.98ms | |
| step:2609/3000 train_loss:3.3018 train_time:376802ms step_avg:144.98ms | |
| step:2610/3000 train_loss:3.4801 train_time:376944ms step_avg:144.98ms | |
| step:2611/3000 train_loss:3.3423 train_time:377087ms step_avg:144.98ms | |
| step:2612/3000 train_loss:3.1929 train_time:377230ms step_avg:144.98ms | |
| step:2613/3000 train_loss:3.2693 train_time:377376ms step_avg:144.98ms | |
| step:2614/3000 train_loss:3.3867 train_time:377520ms step_avg:144.98ms | |
| step:2615/3000 train_loss:3.3243 train_time:377664ms step_avg:144.98ms | |
| step:2616/3000 train_loss:3.3201 train_time:377807ms step_avg:144.98ms | |
| step:2617/3000 train_loss:3.3584 train_time:377951ms step_avg:144.98ms | |
| step:2618/3000 train_loss:3.3861 train_time:378095ms step_avg:144.98ms | |
| step:2619/3000 train_loss:3.2444 train_time:378239ms step_avg:144.97ms | |
| step:2620/3000 train_loss:3.4166 train_time:378383ms step_avg:144.97ms | |
| step:2621/3000 train_loss:3.3766 train_time:378526ms step_avg:144.97ms | |
| step:2622/3000 train_loss:3.5105 train_time:378668ms step_avg:144.97ms | |
| step:2623/3000 train_loss:3.4219 train_time:378812ms step_avg:144.97ms | |
| step:2624/3000 train_loss:3.3364 train_time:378958ms step_avg:144.97ms | |
| step:2625/3000 train_loss:3.2968 train_time:379103ms step_avg:144.97ms | |
| step:2625/3000 val_loss:3.3246 train_time:379139ms step_avg:144.99ms | |
| step:2626/3000 train_loss:3.3220 train_time:379257ms step_avg:144.98ms | |
| step:2627/3000 train_loss:3.3814 train_time:379402ms step_avg:144.98ms | |
| step:2628/3000 train_loss:3.1866 train_time:379545ms step_avg:144.98ms | |
| step:2629/3000 train_loss:3.4670 train_time:379689ms step_avg:144.97ms | |
| step:2630/3000 train_loss:3.3472 train_time:379831ms step_avg:144.97ms | |
| step:2631/3000 train_loss:3.3971 train_time:379973ms step_avg:144.97ms | |
| step:2632/3000 train_loss:3.6256 train_time:380116ms step_avg:144.97ms | |
| step:2633/3000 train_loss:3.3755 train_time:380261ms step_avg:144.97ms | |
| step:2634/3000 train_loss:3.2906 train_time:380408ms step_avg:144.97ms | |
| step:2635/3000 train_loss:3.2623 train_time:380552ms step_avg:144.97ms | |
| step:2636/3000 train_loss:3.3089 train_time:380696ms step_avg:144.97ms | |
| step:2637/3000 train_loss:3.0906 train_time:380837ms step_avg:144.97ms | |
| step:2638/3000 train_loss:3.4079 train_time:380979ms step_avg:144.97ms | |
| step:2639/3000 train_loss:3.3765 train_time:381122ms step_avg:144.97ms | |
| step:2640/3000 train_loss:3.2681 train_time:381269ms step_avg:144.97ms | |
| step:2641/3000 train_loss:3.3488 train_time:381414ms step_avg:144.97ms | |
| step:2642/3000 train_loss:3.3790 train_time:381557ms step_avg:144.97ms | |
| step:2643/3000 train_loss:3.1799 train_time:381701ms step_avg:144.97ms | |
| step:2644/3000 train_loss:3.3037 train_time:381843ms step_avg:144.97ms | |
| step:2645/3000 train_loss:3.3721 train_time:381988ms step_avg:144.97ms | |
| step:2646/3000 train_loss:3.3261 train_time:382241ms step_avg:145.01ms | |
| step:2647/3000 train_loss:3.2273 train_time:382393ms step_avg:145.01ms | |
| step:2648/3000 train_loss:3.4401 train_time:382535ms step_avg:145.01ms | |
| step:2649/3000 train_loss:3.7054 train_time:382676ms step_avg:145.01ms | |
| step:2650/3000 train_loss:3.3407 train_time:382818ms step_avg:145.01ms | |
| step:2651/3000 train_loss:3.3095 train_time:382961ms step_avg:145.01ms | |
| step:2652/3000 train_loss:3.4376 train_time:383106ms step_avg:145.01ms | |
| step:2653/3000 train_loss:3.2723 train_time:383255ms step_avg:145.01ms | |
| step:2654/3000 train_loss:3.2610 train_time:383398ms step_avg:145.01ms | |
| step:2655/3000 train_loss:3.3384 train_time:383540ms step_avg:145.01ms | |
| step:2656/3000 train_loss:3.2541 train_time:383683ms step_avg:145.00ms | |
| step:2657/3000 train_loss:3.2896 train_time:383827ms step_avg:145.00ms | |
| step:2658/3000 train_loss:3.2614 train_time:383971ms step_avg:145.00ms | |
| step:2659/3000 train_loss:3.3430 train_time:384116ms step_avg:145.00ms | |
| step:2660/3000 train_loss:3.4797 train_time:384393ms step_avg:145.05ms | |
| step:2661/3000 train_loss:3.2764 train_time:384534ms step_avg:145.05ms | |
| step:2662/3000 train_loss:3.4273 train_time:384676ms step_avg:145.05ms | |
| step:2663/3000 train_loss:3.2918 train_time:384818ms step_avg:145.05ms | |
| step:2664/3000 train_loss:3.2911 train_time:384959ms step_avg:145.05ms | |
| step:2665/3000 train_loss:3.2247 train_time:385101ms step_avg:145.05ms | |
| step:2666/3000 train_loss:3.2649 train_time:385245ms step_avg:145.05ms | |
| step:2667/3000 train_loss:3.3133 train_time:385394ms step_avg:145.05ms | |
| step:2668/3000 train_loss:3.3494 train_time:385538ms step_avg:145.05ms | |
| step:2669/3000 train_loss:3.2595 train_time:385680ms step_avg:145.05ms | |
| step:2670/3000 train_loss:3.3126 train_time:385823ms step_avg:145.05ms | |
| step:2671/3000 train_loss:3.2145 train_time:385966ms step_avg:145.05ms | |
| step:2672/3000 train_loss:3.2798 train_time:386110ms step_avg:145.04ms | |
| step:2673/3000 train_loss:3.2607 train_time:386254ms step_avg:145.04ms | |
| step:2674/3000 train_loss:3.3314 train_time:386398ms step_avg:145.04ms | |
| step:2675/3000 train_loss:3.3576 train_time:386542ms step_avg:145.04ms | |
| step:2676/3000 train_loss:3.3189 train_time:386686ms step_avg:145.04ms | |
| step:2677/3000 train_loss:3.3129 train_time:386829ms step_avg:145.04ms | |
| step:2678/3000 train_loss:3.3406 train_time:386973ms step_avg:145.04ms | |
| step:2679/3000 train_loss:3.3897 train_time:387115ms step_avg:145.04ms | |
| step:2680/3000 train_loss:3.2932 train_time:387258ms step_avg:145.04ms | |
| step:2681/3000 train_loss:3.2178 train_time:387403ms step_avg:145.04ms | |
| step:2682/3000 train_loss:3.2606 train_time:387548ms step_avg:145.04ms | |
| step:2683/3000 train_loss:3.7342 train_time:387692ms step_avg:145.04ms | |
| step:2684/3000 train_loss:3.3193 train_time:387835ms step_avg:145.04ms | |
| step:2685/3000 train_loss:3.3501 train_time:387977ms step_avg:145.04ms | |
| step:2686/3000 train_loss:3.3929 train_time:388119ms step_avg:145.04ms | |
| step:2687/3000 train_loss:3.3161 train_time:388263ms step_avg:145.04ms | |
| step:2688/3000 train_loss:3.3966 train_time:388407ms step_avg:145.04ms | |
| step:2689/3000 train_loss:3.3246 train_time:388552ms step_avg:145.04ms | |
| step:2690/3000 train_loss:3.3094 train_time:388696ms step_avg:145.04ms | |
| step:2691/3000 train_loss:3.3411 train_time:388838ms step_avg:145.03ms | |
| step:2692/3000 train_loss:3.4098 train_time:388981ms step_avg:145.03ms | |
| step:2693/3000 train_loss:3.1996 train_time:389124ms step_avg:145.03ms | |
| step:2694/3000 train_loss:3.5996 train_time:389270ms step_avg:145.03ms | |
| step:2695/3000 train_loss:3.3835 train_time:389414ms step_avg:145.03ms | |
| step:2696/3000 train_loss:3.1745 train_time:389557ms step_avg:145.03ms | |
| step:2697/3000 train_loss:3.3738 train_time:389700ms step_avg:145.03ms | |
| step:2698/3000 train_loss:3.3382 train_time:389843ms step_avg:145.03ms | |
| step:2699/3000 train_loss:3.2970 train_time:389987ms step_avg:145.03ms | |
| step:2700/3000 train_loss:3.3911 train_time:390131ms step_avg:145.03ms | |
| step:2701/3000 train_loss:3.3704 train_time:390274ms step_avg:145.03ms | |
| step:2702/3000 train_loss:3.2646 train_time:390417ms step_avg:145.03ms | |
| step:2703/3000 train_loss:3.2895 train_time:390560ms step_avg:145.03ms | |
| step:2704/3000 train_loss:3.3013 train_time:390704ms step_avg:145.03ms | |
| step:2705/3000 train_loss:3.2755 train_time:390848ms step_avg:145.03ms | |
| step:2706/3000 train_loss:3.4343 train_time:390992ms step_avg:145.03ms | |
| step:2707/3000 train_loss:3.4048 train_time:391135ms step_avg:145.03ms | |
| step:2708/3000 train_loss:3.3060 train_time:391277ms step_avg:145.02ms | |
| step:2709/3000 train_loss:3.3086 train_time:391420ms step_avg:145.02ms | |
| step:2710/3000 train_loss:3.4084 train_time:391566ms step_avg:145.02ms | |
| step:2711/3000 train_loss:3.2849 train_time:391711ms step_avg:145.02ms | |
| step:2712/3000 train_loss:3.4057 train_time:391854ms step_avg:145.02ms | |
| step:2713/3000 train_loss:3.1453 train_time:391997ms step_avg:145.02ms | |
| step:2714/3000 train_loss:3.3352 train_time:392139ms step_avg:145.02ms | |
| step:2715/3000 train_loss:3.2153 train_time:392283ms step_avg:145.02ms | |
| step:2716/3000 train_loss:3.2423 train_time:392429ms step_avg:145.02ms | |
| step:2717/3000 train_loss:3.4264 train_time:392573ms step_avg:145.02ms | |
| step:2718/3000 train_loss:3.3266 train_time:392716ms step_avg:145.02ms | |
| step:2719/3000 train_loss:3.5513 train_time:392859ms step_avg:145.02ms | |
| step:2720/3000 train_loss:3.2956 train_time:393003ms step_avg:145.02ms | |
| step:2721/3000 train_loss:3.2982 train_time:393147ms step_avg:145.02ms | |
| step:2722/3000 train_loss:3.5187 train_time:393292ms step_avg:145.02ms | |
| step:2723/3000 train_loss:3.2884 train_time:393435ms step_avg:145.02ms | |
| step:2724/3000 train_loss:3.4645 train_time:393578ms step_avg:145.02ms | |
| step:2725/3000 train_loss:3.3458 train_time:393722ms step_avg:145.02ms | |
| step:2726/3000 train_loss:3.3123 train_time:393867ms step_avg:145.02ms | |
| step:2727/3000 train_loss:3.3140 train_time:394012ms step_avg:145.02ms | |
| step:2728/3000 train_loss:3.6495 train_time:394154ms step_avg:145.02ms | |
| step:2729/3000 train_loss:3.3832 train_time:394298ms step_avg:145.02ms | |
| step:2730/3000 train_loss:3.2399 train_time:394440ms step_avg:145.01ms | |
| step:2731/3000 train_loss:3.3567 train_time:394584ms step_avg:145.01ms | |
| step:2732/3000 train_loss:3.2627 train_time:394730ms step_avg:145.01ms | |
| step:2733/3000 train_loss:3.1502 train_time:394874ms step_avg:145.01ms | |
| step:2734/3000 train_loss:3.2604 train_time:395016ms step_avg:145.01ms | |
| step:2735/3000 train_loss:3.3341 train_time:395160ms step_avg:145.01ms | |
| step:2736/3000 train_loss:3.2241 train_time:395304ms step_avg:145.01ms | |
| step:2737/3000 train_loss:3.6273 train_time:395448ms step_avg:145.01ms | |
| step:2738/3000 train_loss:3.3719 train_time:395593ms step_avg:145.01ms | |
| step:2739/3000 train_loss:3.5603 train_time:395735ms step_avg:145.01ms | |
| step:2740/3000 train_loss:3.3192 train_time:395878ms step_avg:145.01ms | |
| step:2741/3000 train_loss:3.3131 train_time:396022ms step_avg:145.01ms | |
| step:2742/3000 train_loss:3.2548 train_time:396167ms step_avg:145.01ms | |
| step:2743/3000 train_loss:3.3258 train_time:396312ms step_avg:145.01ms | |
| step:2744/3000 train_loss:3.3369 train_time:396455ms step_avg:145.01ms | |
| step:2745/3000 train_loss:3.4304 train_time:396598ms step_avg:145.01ms | |
| step:2746/3000 train_loss:3.2082 train_time:396742ms step_avg:145.01ms | |
| step:2747/3000 train_loss:3.3014 train_time:396886ms step_avg:145.01ms | |
| step:2748/3000 train_loss:3.3392 train_time:397031ms step_avg:145.01ms | |
| step:2749/3000 train_loss:3.4434 train_time:397175ms step_avg:145.01ms | |
| step:2750/3000 train_loss:3.2941 train_time:397317ms step_avg:145.01ms | |
| step:2750/3000 val_loss:3.3033 train_time:397355ms step_avg:145.02ms | |
| step:2751/3000 train_loss:3.3724 train_time:397469ms step_avg:145.01ms | |
| step:2752/3000 train_loss:3.4215 train_time:397617ms step_avg:145.01ms | |
| step:2753/3000 train_loss:3.3235 train_time:397760ms step_avg:145.01ms | |
| step:2754/3000 train_loss:3.2568 train_time:397903ms step_avg:145.01ms | |
| step:2755/3000 train_loss:3.2641 train_time:398044ms step_avg:145.01ms | |
| step:2756/3000 train_loss:3.3420 train_time:398186ms step_avg:145.01ms | |
| step:2757/3000 train_loss:3.2790 train_time:398330ms step_avg:145.01ms | |
| step:2758/3000 train_loss:3.1518 train_time:398477ms step_avg:145.01ms | |
| step:2759/3000 train_loss:3.5491 train_time:398624ms step_avg:145.01ms | |
| step:2760/3000 train_loss:3.3570 train_time:398767ms step_avg:145.01ms | |
| step:2761/3000 train_loss:3.3281 train_time:398909ms step_avg:145.01ms | |
| step:2762/3000 train_loss:3.2850 train_time:399051ms step_avg:145.00ms | |
| step:2763/3000 train_loss:3.2003 train_time:399196ms step_avg:145.00ms | |
| step:2764/3000 train_loss:3.3676 train_time:399339ms step_avg:145.00ms | |
| step:2765/3000 train_loss:3.2979 train_time:399483ms step_avg:145.00ms | |
| step:2766/3000 train_loss:3.1916 train_time:399627ms step_avg:145.00ms | |
| step:2767/3000 train_loss:3.2846 train_time:399771ms step_avg:145.00ms | |
| step:2768/3000 train_loss:3.3560 train_time:399916ms step_avg:145.00ms | |
| step:2769/3000 train_loss:3.2387 train_time:400059ms step_avg:145.00ms | |
| step:2770/3000 train_loss:3.3222 train_time:400203ms step_avg:145.00ms | |
| step:2771/3000 train_loss:3.2933 train_time:400345ms step_avg:145.00ms | |
| step:2772/3000 train_loss:3.7319 train_time:400488ms step_avg:145.00ms | |
| step:2773/3000 train_loss:3.2110 train_time:400633ms step_avg:145.00ms | |
| step:2774/3000 train_loss:3.3466 train_time:400778ms step_avg:145.00ms | |
| step:2775/3000 train_loss:3.4029 train_time:400923ms step_avg:145.00ms | |
| step:2776/3000 train_loss:3.3587 train_time:401066ms step_avg:145.00ms | |
| step:2777/3000 train_loss:3.4343 train_time:401209ms step_avg:145.00ms | |
| step:2778/3000 train_loss:3.4451 train_time:401352ms step_avg:145.00ms | |
| step:2779/3000 train_loss:3.3159 train_time:401498ms step_avg:145.00ms | |
| step:2780/3000 train_loss:3.1730 train_time:401642ms step_avg:145.00ms | |
| step:2781/3000 train_loss:3.3270 train_time:401785ms step_avg:145.00ms | |
| step:2782/3000 train_loss:3.3484 train_time:401928ms step_avg:145.00ms | |
| step:2783/3000 train_loss:3.2232 train_time:402073ms step_avg:145.00ms | |
| step:2784/3000 train_loss:3.3040 train_time:402217ms step_avg:145.00ms | |
| step:2785/3000 train_loss:3.3755 train_time:402360ms step_avg:144.99ms | |
| step:2786/3000 train_loss:3.2630 train_time:402504ms step_avg:144.99ms | |
| step:2787/3000 train_loss:3.3672 train_time:402647ms step_avg:144.99ms | |
| step:2788/3000 train_loss:3.3351 train_time:402791ms step_avg:144.99ms | |
| step:2789/3000 train_loss:3.2623 train_time:402936ms step_avg:144.99ms | |
| step:2790/3000 train_loss:3.3554 train_time:403080ms step_avg:144.99ms | |
| step:2791/3000 train_loss:3.2829 train_time:403223ms step_avg:144.99ms | |
| step:2792/3000 train_loss:3.1901 train_time:403366ms step_avg:144.99ms | |
| step:2793/3000 train_loss:3.2800 train_time:403510ms step_avg:144.99ms | |
| step:2794/3000 train_loss:3.3326 train_time:403655ms step_avg:144.99ms | |
| step:2795/3000 train_loss:3.2457 train_time:403799ms step_avg:144.99ms | |
| step:2796/3000 train_loss:3.2839 train_time:403943ms step_avg:144.99ms | |
| step:2797/3000 train_loss:3.1913 train_time:404086ms step_avg:144.99ms | |
| step:2798/3000 train_loss:3.3051 train_time:404229ms step_avg:144.99ms | |
| step:2799/3000 train_loss:3.2620 train_time:404374ms step_avg:144.99ms | |
| step:2800/3000 train_loss:3.4278 train_time:404519ms step_avg:144.99ms | |
| step:2801/3000 train_loss:3.3728 train_time:404662ms step_avg:144.99ms | |
| step:2802/3000 train_loss:3.3533 train_time:404805ms step_avg:144.99ms | |
| step:2803/3000 train_loss:3.2910 train_time:404947ms step_avg:144.99ms | |
| step:2804/3000 train_loss:3.4619 train_time:405091ms step_avg:144.99ms | |
| step:2805/3000 train_loss:3.4470 train_time:405237ms step_avg:144.99ms | |
| step:2806/3000 train_loss:3.1590 train_time:405381ms step_avg:144.99ms | |
| step:2807/3000 train_loss:3.5689 train_time:405524ms step_avg:144.99ms | |
| step:2808/3000 train_loss:3.3075 train_time:405667ms step_avg:144.98ms | |
| step:2809/3000 train_loss:3.2491 train_time:405811ms step_avg:144.98ms | |
| step:2810/3000 train_loss:3.2612 train_time:405956ms step_avg:144.98ms | |
| step:2811/3000 train_loss:3.4228 train_time:406100ms step_avg:144.98ms | |
| step:2812/3000 train_loss:3.4019 train_time:406244ms step_avg:144.98ms | |
| step:2813/3000 train_loss:3.1626 train_time:406387ms step_avg:144.98ms | |
| step:2814/3000 train_loss:3.3830 train_time:406531ms step_avg:144.98ms | |
| step:2815/3000 train_loss:3.4522 train_time:406676ms step_avg:144.98ms | |
| step:2816/3000 train_loss:3.2584 train_time:406820ms step_avg:144.98ms | |
| step:2817/3000 train_loss:2.9186 train_time:406963ms step_avg:144.98ms | |
| step:2818/3000 train_loss:3.2767 train_time:407105ms step_avg:144.98ms | |
| step:2819/3000 train_loss:3.2448 train_time:407249ms step_avg:144.98ms | |
| step:2820/3000 train_loss:3.4424 train_time:407393ms step_avg:144.98ms | |
| step:2821/3000 train_loss:3.2865 train_time:407538ms step_avg:144.98ms | |
| step:2822/3000 train_loss:3.3615 train_time:407683ms step_avg:144.98ms | |
| step:2823/3000 train_loss:3.2928 train_time:407825ms step_avg:144.98ms | |
| step:2824/3000 train_loss:3.2771 train_time:407969ms step_avg:144.98ms | |
| step:2825/3000 train_loss:3.1717 train_time:408114ms step_avg:144.98ms | |
| step:2826/3000 train_loss:3.4264 train_time:408258ms step_avg:144.98ms | |
| step:2827/3000 train_loss:3.3164 train_time:408403ms step_avg:144.98ms | |
| step:2828/3000 train_loss:3.2134 train_time:408545ms step_avg:144.98ms | |
| step:2829/3000 train_loss:3.3437 train_time:408689ms step_avg:144.98ms | |
| step:2830/3000 train_loss:3.3368 train_time:408833ms step_avg:144.98ms | |
| step:2831/3000 train_loss:3.2734 train_time:408978ms step_avg:144.98ms | |
| step:2832/3000 train_loss:3.4184 train_time:409121ms step_avg:144.98ms | |
| step:2833/3000 train_loss:3.3321 train_time:409264ms step_avg:144.97ms | |
| step:2834/3000 train_loss:3.3233 train_time:409407ms step_avg:144.97ms | |
| step:2835/3000 train_loss:3.1279 train_time:409662ms step_avg:145.01ms | |
| step:2836/3000 train_loss:3.3615 train_time:409815ms step_avg:145.02ms | |
| step:2837/3000 train_loss:3.2864 train_time:409957ms step_avg:145.01ms | |
| step:2838/3000 train_loss:3.5830 train_time:410100ms step_avg:145.01ms | |
| step:2839/3000 train_loss:3.2466 train_time:410241ms step_avg:145.01ms | |
| step:2840/3000 train_loss:3.2584 train_time:410383ms step_avg:145.01ms | |
| step:2841/3000 train_loss:3.3036 train_time:410526ms step_avg:145.01ms | |
| step:2842/3000 train_loss:3.2412 train_time:410678ms step_avg:145.01ms | |
| step:2843/3000 train_loss:3.2458 train_time:410823ms step_avg:145.01ms | |
| step:2844/3000 train_loss:3.4157 train_time:410965ms step_avg:145.01ms | |
| step:2845/3000 train_loss:3.2869 train_time:411107ms step_avg:145.01ms | |
| step:2846/3000 train_loss:3.3219 train_time:411249ms step_avg:145.01ms | |
| step:2847/3000 train_loss:3.2820 train_time:411392ms step_avg:145.01ms | |
| step:2848/3000 train_loss:3.5410 train_time:411537ms step_avg:145.01ms | |
| step:2849/3000 train_loss:3.2120 train_time:411684ms step_avg:145.01ms | |
| step:2850/3000 train_loss:3.2540 train_time:411956ms step_avg:145.05ms | |
| step:2851/3000 train_loss:3.3507 train_time:412098ms step_avg:145.05ms | |
| step:2852/3000 train_loss:3.3299 train_time:412240ms step_avg:145.05ms | |
| step:2853/3000 train_loss:3.2827 train_time:412383ms step_avg:145.05ms | |
| step:2854/3000 train_loss:3.3540 train_time:412523ms step_avg:145.05ms | |
| step:2855/3000 train_loss:3.1792 train_time:412665ms step_avg:145.05ms | |
| step:2856/3000 train_loss:3.2077 train_time:412811ms step_avg:145.05ms | |
| step:2857/3000 train_loss:3.3023 train_time:412960ms step_avg:145.05ms | |
| step:2858/3000 train_loss:3.2944 train_time:413104ms step_avg:145.05ms | |
| step:2859/3000 train_loss:3.2035 train_time:413245ms step_avg:145.05ms | |
| step:2860/3000 train_loss:3.2848 train_time:413388ms step_avg:145.05ms | |
| step:2861/3000 train_loss:3.2498 train_time:413531ms step_avg:145.05ms | |
| step:2862/3000 train_loss:3.2806 train_time:413675ms step_avg:145.05ms | |
| step:2863/3000 train_loss:3.3298 train_time:413820ms step_avg:145.05ms | |
| step:2864/3000 train_loss:3.5957 train_time:413964ms step_avg:145.05ms | |
| step:2865/3000 train_loss:3.3925 train_time:414109ms step_avg:145.05ms | |
| step:2866/3000 train_loss:3.2897 train_time:414252ms step_avg:145.05ms | |
| step:2867/3000 train_loss:3.1565 train_time:414397ms step_avg:145.05ms | |
| step:2868/3000 train_loss:3.3789 train_time:414540ms step_avg:145.05ms | |
| step:2869/3000 train_loss:3.3321 train_time:414683ms step_avg:145.04ms | |
| step:2870/3000 train_loss:3.2890 train_time:414826ms step_avg:145.04ms | |
| step:2871/3000 train_loss:3.4274 train_time:414970ms step_avg:145.04ms | |
| step:2872/3000 train_loss:3.1914 train_time:415117ms step_avg:145.04ms | |
| step:2873/3000 train_loss:3.2756 train_time:415261ms step_avg:145.04ms | |
| step:2874/3000 train_loss:3.1411 train_time:415404ms step_avg:145.04ms | |
| step:2875/3000 train_loss:3.2896 train_time:415546ms step_avg:145.04ms | |
| step:2875/3000 val_loss:3.2858 train_time:415584ms step_avg:145.06ms | |
| step:2876/3000 train_loss:3.2015 train_time:415701ms step_avg:145.05ms | |
| step:2877/3000 train_loss:3.1950 train_time:415849ms step_avg:145.05ms | |
| step:2878/3000 train_loss:3.2756 train_time:415993ms step_avg:145.05ms | |
| step:2879/3000 train_loss:3.3946 train_time:416134ms step_avg:145.04ms | |
| step:2880/3000 train_loss:3.3428 train_time:416275ms step_avg:145.04ms | |
| step:2881/3000 train_loss:3.3043 train_time:416417ms step_avg:145.04ms | |
| step:2882/3000 train_loss:3.2845 train_time:416561ms step_avg:145.04ms | |
| step:2883/3000 train_loss:3.3963 train_time:416709ms step_avg:145.04ms | |
| step:2884/3000 train_loss:3.1759 train_time:416854ms step_avg:145.04ms | |
| step:2885/3000 train_loss:3.1967 train_time:416997ms step_avg:145.04ms | |
| step:2886/3000 train_loss:3.2535 train_time:417140ms step_avg:145.04ms | |
| step:2887/3000 train_loss:3.2475 train_time:417283ms step_avg:145.04ms | |
| step:2888/3000 train_loss:3.2465 train_time:417426ms step_avg:145.04ms | |
| step:2889/3000 train_loss:3.2840 train_time:417571ms step_avg:145.04ms | |
| step:2890/3000 train_loss:3.4702 train_time:417715ms step_avg:145.04ms | |
| step:2891/3000 train_loss:3.3027 train_time:417859ms step_avg:145.04ms | |
| step:2892/3000 train_loss:3.1378 train_time:418003ms step_avg:145.04ms | |
| step:2893/3000 train_loss:3.0727 train_time:418148ms step_avg:145.04ms | |
| step:2894/3000 train_loss:3.2204 train_time:418291ms step_avg:145.04ms | |
| step:2895/3000 train_loss:3.0997 train_time:418434ms step_avg:145.04ms | |
| step:2896/3000 train_loss:3.2741 train_time:418576ms step_avg:145.04ms | |
| step:2897/3000 train_loss:3.4125 train_time:418721ms step_avg:145.04ms | |
| step:2898/3000 train_loss:3.2321 train_time:418866ms step_avg:145.04ms | |
| step:2899/3000 train_loss:3.3255 train_time:419010ms step_avg:145.04ms | |
| step:2900/3000 train_loss:3.2069 train_time:419153ms step_avg:145.04ms | |
| step:2901/3000 train_loss:3.3986 train_time:419297ms step_avg:145.04ms | |
| step:2902/3000 train_loss:3.3855 train_time:419439ms step_avg:145.03ms | |
| step:2903/3000 train_loss:3.4081 train_time:419582ms step_avg:145.03ms | |
| step:2904/3000 train_loss:3.1462 train_time:419728ms step_avg:145.03ms | |
| step:2905/3000 train_loss:3.2847 train_time:419873ms step_avg:145.03ms | |
| step:2906/3000 train_loss:3.2612 train_time:420016ms step_avg:145.03ms | |
| step:2907/3000 train_loss:3.3183 train_time:420159ms step_avg:145.03ms | |
| step:2908/3000 train_loss:3.2680 train_time:420303ms step_avg:145.03ms | |
| step:2909/3000 train_loss:3.2470 train_time:420449ms step_avg:145.03ms | |
| step:2910/3000 train_loss:3.5752 train_time:420592ms step_avg:145.03ms | |
| step:2911/3000 train_loss:3.2875 train_time:420736ms step_avg:145.03ms | |
| step:2912/3000 train_loss:3.2033 train_time:420879ms step_avg:145.03ms | |
| step:2913/3000 train_loss:3.1877 train_time:421023ms step_avg:145.03ms | |
| step:2914/3000 train_loss:3.6557 train_time:421169ms step_avg:145.03ms | |
| step:2915/3000 train_loss:3.2487 train_time:421314ms step_avg:145.03ms | |
| step:2916/3000 train_loss:3.2071 train_time:421455ms step_avg:145.03ms | |
| step:2917/3000 train_loss:3.1962 train_time:421598ms step_avg:145.03ms | |
| step:2918/3000 train_loss:3.4695 train_time:421741ms step_avg:145.03ms | |
| step:2919/3000 train_loss:2.9766 train_time:421886ms step_avg:145.03ms | |
| step:2920/3000 train_loss:3.1643 train_time:422032ms step_avg:145.03ms | |
| step:2921/3000 train_loss:3.1997 train_time:422175ms step_avg:145.03ms | |
| step:2922/3000 train_loss:3.2965 train_time:422319ms step_avg:145.03ms | |
| step:2923/3000 train_loss:3.3328 train_time:422462ms step_avg:145.03ms | |
| step:2924/3000 train_loss:3.3675 train_time:422607ms step_avg:145.03ms | |
| step:2925/3000 train_loss:3.3841 train_time:422750ms step_avg:145.03ms | |
| step:2926/3000 train_loss:3.2685 train_time:422894ms step_avg:145.03ms | |
| step:2927/3000 train_loss:3.2698 train_time:423036ms step_avg:145.02ms | |
| step:2928/3000 train_loss:3.2608 train_time:423181ms step_avg:145.02ms | |
| step:2929/3000 train_loss:3.2642 train_time:423325ms step_avg:145.02ms | |
| step:2930/3000 train_loss:3.2181 train_time:423469ms step_avg:145.02ms | |
| step:2931/3000 train_loss:3.2569 train_time:423613ms step_avg:145.02ms | |
| step:2932/3000 train_loss:3.3805 train_time:423756ms step_avg:145.02ms | |
| step:2933/3000 train_loss:3.4271 train_time:423899ms step_avg:145.02ms | |
| step:2934/3000 train_loss:3.3942 train_time:424042ms step_avg:145.02ms | |
| step:2935/3000 train_loss:3.2339 train_time:424188ms step_avg:145.02ms | |
| step:2936/3000 train_loss:3.2789 train_time:424332ms step_avg:145.02ms | |
| step:2937/3000 train_loss:3.2415 train_time:424474ms step_avg:145.02ms | |
| step:2938/3000 train_loss:3.2580 train_time:424617ms step_avg:145.02ms | |
| step:2939/3000 train_loss:3.2777 train_time:424761ms step_avg:145.02ms | |
| step:2940/3000 train_loss:3.3193 train_time:424905ms step_avg:145.02ms | |
| step:2941/3000 train_loss:3.3666 train_time:425050ms step_avg:145.02ms | |
| step:2942/3000 train_loss:3.3583 train_time:425193ms step_avg:145.02ms | |
| step:2943/3000 train_loss:3.2857 train_time:425336ms step_avg:145.02ms | |
| step:2944/3000 train_loss:3.1594 train_time:425478ms step_avg:145.02ms | |
| step:2945/3000 train_loss:3.1096 train_time:425622ms step_avg:145.02ms | |
| step:2946/3000 train_loss:3.3099 train_time:425766ms step_avg:145.02ms | |
| step:2947/3000 train_loss:3.3793 train_time:425911ms step_avg:145.02ms | |
| step:2948/3000 train_loss:3.3115 train_time:426055ms step_avg:145.02ms | |
| step:2949/3000 train_loss:3.5030 train_time:426197ms step_avg:145.01ms | |
| step:2950/3000 train_loss:3.2997 train_time:426339ms step_avg:145.01ms | |
| step:2951/3000 train_loss:3.3122 train_time:426484ms step_avg:145.01ms | |
| step:2952/3000 train_loss:3.7064 train_time:426630ms step_avg:145.01ms | |
| step:2953/3000 train_loss:3.3881 train_time:426773ms step_avg:145.01ms | |
| step:2954/3000 train_loss:3.3350 train_time:426916ms step_avg:145.01ms | |
| step:2955/3000 train_loss:3.3511 train_time:427059ms step_avg:145.01ms | |
| step:2956/3000 train_loss:3.2778 train_time:427203ms step_avg:145.01ms | |
| step:2957/3000 train_loss:3.2943 train_time:427348ms step_avg:145.01ms | |
| step:2958/3000 train_loss:3.1789 train_time:427491ms step_avg:145.01ms | |
| step:2959/3000 train_loss:3.2637 train_time:427634ms step_avg:145.01ms | |
| step:2960/3000 train_loss:3.4035 train_time:427777ms step_avg:145.01ms | |
| step:2961/3000 train_loss:3.2044 train_time:427921ms step_avg:145.01ms | |
| step:2962/3000 train_loss:3.3344 train_time:428065ms step_avg:145.01ms | |
| step:2963/3000 train_loss:3.2031 train_time:428210ms step_avg:145.01ms | |
| step:2964/3000 train_loss:3.2611 train_time:428353ms step_avg:145.01ms | |
| step:2965/3000 train_loss:3.2419 train_time:428496ms step_avg:145.01ms | |
| step:2966/3000 train_loss:3.3441 train_time:428639ms step_avg:145.01ms | |
| step:2967/3000 train_loss:3.2321 train_time:428784ms step_avg:145.01ms | |
| step:2968/3000 train_loss:3.4667 train_time:428930ms step_avg:145.01ms | |
| step:2969/3000 train_loss:3.3197 train_time:429073ms step_avg:145.01ms | |
| step:2970/3000 train_loss:3.3417 train_time:429216ms step_avg:145.01ms | |
| step:2971/3000 train_loss:3.3097 train_time:429359ms step_avg:145.00ms | |
| step:2972/3000 train_loss:3.3913 train_time:429503ms step_avg:145.00ms | |
| step:2973/3000 train_loss:3.2204 train_time:429647ms step_avg:145.00ms | |
| step:2974/3000 train_loss:3.2191 train_time:429792ms step_avg:145.00ms | |
| step:2975/3000 train_loss:3.1540 train_time:429935ms step_avg:145.00ms | |
| step:2976/3000 train_loss:3.2206 train_time:430077ms step_avg:145.00ms | |
| step:2977/3000 train_loss:3.2056 train_time:430221ms step_avg:145.00ms | |
| step:2978/3000 train_loss:3.2332 train_time:430367ms step_avg:145.00ms | |
| step:2979/3000 train_loss:3.5206 train_time:430512ms step_avg:145.00ms | |
| step:2980/3000 train_loss:3.3308 train_time:430655ms step_avg:145.00ms | |
| step:2981/3000 train_loss:3.3607 train_time:430798ms step_avg:145.00ms | |
| step:2982/3000 train_loss:3.3853 train_time:430941ms step_avg:145.00ms | |
| step:2983/3000 train_loss:3.4433 train_time:431086ms step_avg:145.00ms | |
| step:2984/3000 train_loss:3.2712 train_time:431230ms step_avg:145.00ms | |
| step:2985/3000 train_loss:3.3537 train_time:431374ms step_avg:145.00ms | |
| step:2986/3000 train_loss:3.3549 train_time:431517ms step_avg:145.00ms | |
| step:2987/3000 train_loss:3.3071 train_time:431660ms step_avg:145.00ms | |
| step:2988/3000 train_loss:3.4243 train_time:431805ms step_avg:145.00ms | |
| step:2989/3000 train_loss:3.0164 train_time:431949ms step_avg:145.00ms | |
| step:2990/3000 train_loss:3.3740 train_time:432093ms step_avg:145.00ms | |
| step:2991/3000 train_loss:3.3292 train_time:432236ms step_avg:145.00ms | |
| step:2992/3000 train_loss:3.2758 train_time:432380ms step_avg:145.00ms | |
| step:2993/3000 train_loss:3.2259 train_time:432524ms step_avg:145.00ms | |
| step:2994/3000 train_loss:3.3643 train_time:432670ms step_avg:145.00ms | |
| step:2995/3000 train_loss:3.1901 train_time:432814ms step_avg:145.00ms | |
| step:2996/3000 train_loss:3.1984 train_time:432957ms step_avg:145.00ms | |
| step:2997/3000 train_loss:3.2768 train_time:433101ms step_avg:145.00ms | |
| step:2998/3000 train_loss:3.2176 train_time:433245ms step_avg:144.99ms | |
| step:2999/3000 train_loss:3.3402 train_time:433390ms step_avg:144.99ms | |
| step:3000/3000 train_loss:3.2425 train_time:433533ms step_avg:144.99ms | |
| step:3000/3000 val_loss:3.2753 train_time:433570ms step_avg:145.01ms | |