手机版
你好,游客 登录 注册
背景:
阅读新闻

Pytorch使用多GPU

[日期:2017-09-29] 来源:Linux社区  作者:yongjieShi [字体: ]

在caffe中训练的时候如果使用多GPU则直接在运行程序的时候指定GPU的index即可,但是在Pytorch中则需要在声明模型之后,对声明的模型进行初始化,如:

cnn = DataParallel(AlexNet())

之后直接运行Pytorch之后则默认使用所有的GPU,为了说明上述初始化的作用,我用了一组畸变图像的数据集,写了一个Resent的模块,过了50个epoch,对比一下实验耗时的差别,代码如下:

# -*- coding: utf-8 -*-
# Implementation of https://arxiv.org/pdf/1512.03385.pdf/
# See section 4.2 for model architecture on CIFAR-10.
# Some part of the code was referenced below.
# https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py

import os
from PIL import Image
import time

import torch
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms
from torch.autograd import Variable
import torch.utils.data as data
from torch.nn import DataParallel


kwargs = {'num_workers': 1, 'pin_memory': True}
# def my dataloader, return the data and corresponding label


def default_loader(path):
    return Image.open(path).convert('RGB')


class myImageFloder(data.Dataset):  # Class inheritance
    def __init__(self, root, label, transform=None, target_transform=None, loader=default_loader):
        fh = open(label)
        c = 0
        imgs = []
        class_names = []
        for line in fh.readlines():
            if c == 0:
                class_names = [n.strip() for n in line.rstrip().split('    ')]
            else:
                cls = line.split()  # cls is a list
                fn = cls.pop(0)
                if os.path.isfile(os.path.join(root, fn)):
                    imgs.append((fn, tuple([float(v) for v in cls])))  # imgs is the list,and the content is the tuple
                    # we can use the append way to append the element for list
            c = c + 1
        self.root = root
        self.imgs = imgs
        self.classes = class_names
        self.transform = transform
        self.target_transform = target_transform
        self.loader = loader

    def __getitem__(self, index):
        fn, label = self.imgs[index]  # eventhough the imgs is just a list, it can return the elements of is
        # in a proper way
        img = self.loader(os.path.join(self.root, fn))
        if self.transform is not None:
            img = self.transform(img)
        return img, torch.Tensor(label)

    def __len__(self):
        return len(self.imgs)

    def getName(self):
        return self.classes

mytransform = transforms.Compose([transforms.ToTensor()])  # almost dont do any operation
train_data_root = "/home/ying/shiyongjie/rjp/generate_distortion_image_2016_03_15/0_Distorted_Image/Training"
test_data_root = "/home/ying/shiyongjie/rjp/generate_distortion_image_2016_03_15/0_Distorted_Image/Testing"
train_label = "/home/ying/shiyongjie/rjp/generate_distortion_image_2016_03_15/0_Distorted_Image/NameList_train.txt"
test_label = "/home/ying/shiyongjie/rjp/generate_distortion_image_2016_03_15/0_Distorted_Image/NameList_test.txt"

train_loader = torch.utils.data.DataLoader(
    myImageFloder(root=train_data_root, label=train_label, transform=mytransform),
    batch_size=64, shuffle=True, **kwargs)

test_loader = torch.utils.data.DataLoader(
    myImageFloder(root=test_data_root, label=test_label, transform=mytransform),
    batch_size=64, shuffle=True, **kwargs)


# 3x3 Convolution
def conv3x3(in_channels, out_channels, stride=1):
    return nn.Conv2d(in_channels, out_channels, kernel_size=3,
                    stride=stride, padding=1, bias=False)


# Residual Block
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(ResidualBlock, self).__init__()
        self.conv1 = conv3x3(in_channels, out_channels, stride)  # kernel size is default 3
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(out_channels, out_channels)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample

    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        if self.downsample:
            residual = self.downsample(x)
        out += residual
        out = self.relu(out)
        return out


# ResNet Module
class ResNet(nn.Module):
    def __init__(self, block, layers, num_classes=1):
        super(ResNet, self).__init__()
        self.in_channels = 16
        self.conv = conv3x3(3, 16)
        self.bn = nn.BatchNorm2d(16)
        self.relu = nn.ReLU(inplace=True)
        self.layer1 = self.make_layer(block, 16, layers[0])
        self.layer2 = self.make_layer(block, 32, layers[0], 2)
        self.layer3 = self.make_layer(block, 64, layers[1], 2)  # the input arg is blocks and the stride
        self.layer4 = self.make_layer(block, 128, layers[1], 2)
        self.layer5 = self.make_layer(block, 256, layers[1], 2)
        self.avg_pool = nn.AvgPool2d(kernel_size=8,stride=8)  # 2*2
        self.fc = nn.Linear(256*2*2, num_classes)

    def make_layer(self, block, out_channels, blocks, stride=1):
        downsample = None
        if (stride != 1) or (self.in_channels != out_channels):  # the input channel is not consistant with the output's
            downsample = nn.Sequential(  # do the downsample, def a conv, for example: 256*256*16 -> 128*128*32
                conv3x3(self.in_channels, out_channels, stride=stride),
                nn.BatchNorm2d(out_channels))
        layers = []
        layers.append(block(self.in_channels, out_channels, stride, downsample))
        self.in_channels = out_channels  # update the input channel and the output channel
        for i in range(1, blocks):  # reduce a block because the first block is already appened
            layers.append(block(out_channels, out_channels))  # 32*32 -> 8*8
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.conv(x)
        out = self.bn(out)
        out = self.relu(out)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out=self.layer4(out)
        out=self.layer5(out)
        out = self.avg_pool(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out


resnet = DataParallel(ResNet(ResidualBlock, [3, 3, 3]))
resnet.cuda()

# Loss and Optimizer
criterion = nn.MSELoss()
lr = 0.001
optimizer = torch.optim.Adam(resnet.parameters(), lr=lr)

# Training
start=time.clock()
for epoch in range(50):
    for i, (images, labels) in enumerate(train_loader):
        images = Variable(images.cuda())
        labels = Variable(labels.cuda())

        # Forward + Backward + Optimize
        optimizer.zero_grad()
        outputs = resnet(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        if (i + 1) % 100 == 0:
            print ("Epoch [%d/%d], Iter [%d/%d] Loss: %.4f" % (epoch + 1, 80, i + 1, 500, loss.data[0]))

    # Decaying Learning Rate
    if (epoch + 1) % 20 == 0:
        lr /= 3
        optimizer = torch.optim.Adam(resnet.parameters(), lr=lr)
elapsed=time.clock()-start
print("time used:",elapsed)
#        # Test
# correct = 0
# total = 0
# for images, labels in test_loader:
#    images = Variable(images.cuda())
#    outputs = resnet(images)
#    _, predicted = torch.max(outputs.data, 1)
#    total += labels.size(0)
#    correct += (predicted.cpu() == labels).sum()
#
# print('Accuracy of the model on the test images: %d %%' % (100 * correct / total))

# Save the Model
torch.save(resnet.state_dict(), 'resnet.pkl')

作为对比实验,我们同时把ResNet的声明方式修改为

resnet = ResNet(ResidualBlock, [3, 3, 3])

其余不变,再运行程序的时候不指定GPU,直接Python resnet.py,在声明DataParallel时,运行耗时结果如下:

('time used:', 17124.861335999998),watch -n 1 nvidia-smi确实显示占用两块GPU

在不声明DataParallel时,实验运行结果耗时如下:

('time used:', 30318.149681000003),watch -n 1 nvidia-smi确实显示占用一块GPU

可以看出,在声明DataParallel时时间压缩了近一半,所以在声明DataParalle是使用多GPU运行Pytorch的一种方法。

官方的doc也给出了多GPU使用的例子以及部分数据在GPU与部分数据在CPU上运行的例子

以下是两组实验结果的输出:

DataParalle初始化

Epoch [1/80], Iter [100/500] Loss: 916.5578

Epoch [1/80], Iter [200/500] Loss: 172.2591

Epoch [1/80], Iter [300/500] Loss: 179.8360

Epoch [1/80], Iter [400/500] Loss: 259.6867

Epoch [1/80], Iter [500/500] Loss: 244.0616

Epoch [1/80], Iter [600/500] Loss: 74.7015

Epoch [1/80], Iter [700/500] Loss: 63.1657

Epoch [1/80], Iter [800/500] Loss: 90.3517

Epoch [1/80], Iter [900/500] Loss: 70.4562

Epoch [2/80], Iter [100/500] Loss: 52.3249

Epoch [2/80], Iter [200/500] Loss: 129.1855

Epoch [2/80], Iter [300/500] Loss: 110.0157

Epoch [2/80], Iter [400/500] Loss: 64.9313

Epoch [2/80], Iter [500/500] Loss: 87.8385

Epoch [2/80], Iter [600/500] Loss: 118.5828

Epoch [2/80], Iter [700/500] Loss: 123.9575

Epoch [2/80], Iter [800/500] Loss: 79.1908

Epoch [2/80], Iter [900/500] Loss: 61.8099

Epoch [3/80], Iter [100/500] Loss: 50.4294

Epoch [3/80], Iter [200/500] Loss: 106.8135

Epoch [3/80], Iter [300/500] Loss: 83.2198

Epoch [3/80], Iter [400/500] Loss: 60.7116

Epoch [3/80], Iter [500/500] Loss: 101.9553

Epoch [3/80], Iter [600/500] Loss: 64.6967

Epoch [3/80], Iter [700/500] Loss: 66.2446

Epoch [3/80], Iter [800/500] Loss: 81.1825

Epoch [3/80], Iter [900/500] Loss: 53.9905

Epoch [4/80], Iter [100/500] Loss: 76.2977

Epoch [4/80], Iter [200/500] Loss: 18.4255

Epoch [4/80], Iter [300/500] Loss: 57.6188

Epoch [4/80], Iter [400/500] Loss: 45.6235

Epoch [4/80], Iter [500/500] Loss: 82.9265

Epoch [4/80], Iter [600/500] Loss: 119.6085

Epoch [4/80], Iter [700/500] Loss: 53.1355

Epoch [4/80], Iter [800/500] Loss: 29.5248

Epoch [4/80], Iter [900/500] Loss: 57.0401

Epoch [5/80], Iter [100/500] Loss: 47.2671

Epoch [5/80], Iter [200/500] Loss: 31.6928

Epoch [5/80], Iter [300/500] Loss: 38.0040

Epoch [5/80], Iter [400/500] Loss: 24.5184

Epoch [5/80], Iter [500/500] Loss: 33.8515

Epoch [5/80], Iter [600/500] Loss: 43.6560

Epoch [5/80], Iter [700/500] Loss: 68.2500

Epoch [5/80], Iter [800/500] Loss: 30.8259

Epoch [5/80], Iter [900/500] Loss: 43.9696

Epoch [6/80], Iter [100/500] Loss: 22.4120

Epoch [6/80], Iter [200/500] Loss: 45.5722

Epoch [6/80], Iter [300/500] Loss: 26.8331

Epoch [6/80], Iter [400/500] Loss: 58.1139

Epoch [6/80], Iter [500/500] Loss: 12.8767

Epoch [6/80], Iter [600/500] Loss: 26.6725

Epoch [6/80], Iter [700/500] Loss: 31.9800

Epoch [6/80], Iter [800/500] Loss: 91.2332

Epoch [6/80], Iter [900/500] Loss: 44.1361

Epoch [7/80], Iter [100/500] Loss: 13.1401

Epoch [7/80], Iter [200/500] Loss: 20.9435

Epoch [7/80], Iter [300/500] Loss: 28.0944

Epoch [7/80], Iter [400/500] Loss: 24.0240

Epoch [7/80], Iter [500/500] Loss: 43.3279

Epoch [7/80], Iter [600/500] Loss: 23.3077

Epoch [7/80], Iter [700/500] Loss: 32.9658

Epoch [7/80], Iter [800/500] Loss: 27.2044

Epoch [7/80], Iter [900/500] Loss: 25.5850

Epoch [8/80], Iter [100/500] Loss: 39.7642

Epoch [8/80], Iter [200/500] Loss: 17.7421

Epoch [8/80], Iter [300/500] Loss: 29.8965

Epoch [8/80], Iter [400/500] Loss: 20.6153

Epoch [8/80], Iter [500/500] Loss: 43.0224

Epoch [8/80], Iter [600/500] Loss: 58.1552

Epoch [8/80], Iter [700/500] Loss: 19.1967

Epoch [8/80], Iter [800/500] Loss: 34.9122

Epoch [8/80], Iter [900/500] Loss: 15.0651

Epoch [9/80], Iter [100/500] Loss: 18.5950

Epoch [9/80], Iter [200/500] Loss: 36.1891

Epoch [9/80], Iter [300/500] Loss: 22.4936

Epoch [9/80], Iter [400/500] Loss: 14.8044

Epoch [9/80], Iter [500/500] Loss: 16.6958

Epoch [9/80], Iter [600/500] Loss: 24.8461

Epoch [9/80], Iter [700/500] Loss: 13.7112

Epoch [9/80], Iter [800/500] Loss: 21.2906

Epoch [9/80], Iter [900/500] Loss: 31.6950

Epoch [10/80], Iter [100/500] Loss: 20.7707

Epoch [10/80], Iter [200/500] Loss: 15.6260

Epoch [10/80], Iter [300/500] Loss: 28.5737

Epoch [10/80], Iter [400/500] Loss: 36.6791

Epoch [10/80], Iter [500/500] Loss: 38.9839

Epoch [10/80], Iter [600/500] Loss: 14.4459

Epoch [10/80], Iter [700/500] Loss: 10.0907

Epoch [10/80], Iter [800/500] Loss: 17.9035

Epoch [10/80], Iter [900/500] Loss: 24.5759

Epoch [11/80], Iter [100/500] Loss: 19.8531

Epoch [11/80], Iter [200/500] Loss: 15.7126

Epoch [11/80], Iter [300/500] Loss: 18.0198

Epoch [11/80], Iter [400/500] Loss: 19.3038

Epoch [11/80], Iter [500/500] Loss: 27.4435

Epoch [11/80], Iter [600/500] Loss: 18.1086

Epoch [11/80], Iter [700/500] Loss: 10.8124

Epoch [11/80], Iter [800/500] Loss: 31.2389

Epoch [11/80], Iter [900/500] Loss: 14.4881

Epoch [12/80], Iter [100/500] Loss: 10.6320

Epoch [12/80], Iter [200/500] Loss: 26.8394

Epoch [12/80], Iter [300/500] Loss: 16.0246

Epoch [12/80], Iter [400/500] Loss: 16.3263

Epoch [12/80], Iter [500/500] Loss: 24.5880

Epoch [12/80], Iter [600/500] Loss: 15.7498

Epoch [12/80], Iter [700/500] Loss: 11.4933

Epoch [12/80], Iter [800/500] Loss: 9.7252

Epoch [12/80], Iter [900/500] Loss: 31.6774

Epoch [13/80], Iter [100/500] Loss: 21.1929

Epoch [13/80], Iter [200/500] Loss: 17.0953

Epoch [13/80], Iter [300/500] Loss: 21.1883

Epoch [13/80], Iter [400/500] Loss: 15.9005

Epoch [13/80], Iter [500/500] Loss: 14.7924

Epoch [13/80], Iter [600/500] Loss: 12.4324

Epoch [13/80], Iter [700/500] Loss: 12.0840

Epoch [13/80], Iter [800/500] Loss: 30.9664

Epoch [13/80], Iter [900/500] Loss: 14.9601

Epoch [14/80], Iter [100/500] Loss: 6.5126

Epoch [14/80], Iter [200/500] Loss: 11.3227

Epoch [14/80], Iter [300/500] Loss: 12.9980

Epoch [14/80], Iter [400/500] Loss: 13.8523

Epoch [14/80], Iter [500/500] Loss: 10.6771

Epoch [14/80], Iter [600/500] Loss: 7.3953

Epoch [14/80], Iter [700/500] Loss: 14.6829

Epoch [14/80], Iter [800/500] Loss: 15.6956

Epoch [14/80], Iter [900/500] Loss: 21.8876

Epoch [15/80], Iter [100/500] Loss: 5.1943

Epoch [15/80], Iter [200/500] Loss: 13.0731

Epoch [15/80], Iter [300/500] Loss: 6.8931

Epoch [15/80], Iter [400/500] Loss: 15.3212

Epoch [15/80], Iter [500/500] Loss: 8.1775

Epoch [15/80], Iter [600/500] Loss: 11.5664

Epoch [15/80], Iter [700/500] Loss: 5.5951

Epoch [15/80], Iter [800/500] Loss: 10.9075

Epoch [15/80], Iter [900/500] Loss: 14.8503

Epoch [16/80], Iter [100/500] Loss: 19.5184

Epoch [16/80], Iter [200/500] Loss: 10.3570

Epoch [16/80], Iter [300/500] Loss: 10.0997

Epoch [16/80], Iter [400/500] Loss: 9.7350

Epoch [16/80], Iter [500/500] Loss: 11.3000

Epoch [16/80], Iter [600/500] Loss: 21.6213

Epoch [16/80], Iter [700/500] Loss: 9.7907

Epoch [16/80], Iter [800/500] Loss: 10.0128

Epoch [16/80], Iter [900/500] Loss: 10.7869

Epoch [17/80], Iter [100/500] Loss: 9.2015

Epoch [17/80], Iter [200/500] Loss: 7.3021

Epoch [17/80], Iter [300/500] Loss: 5.9662

Epoch [17/80], Iter [400/500] Loss: 17.5215

Epoch [17/80], Iter [500/500] Loss: 7.3349

Epoch [17/80], Iter [600/500] Loss: 8.5626

Epoch [17/80], Iter [700/500] Loss: 12.7575

Epoch [17/80], Iter [800/500] Loss: 10.7792

Epoch [17/80], Iter [900/500] Loss: 7.0889

Epoch [18/80], Iter [100/500] Loss: 10.5613

Epoch [18/80], Iter [200/500] Loss: 3.0777

Epoch [18/80], Iter [300/500] Loss: 6.3598

Epoch [18/80], Iter [400/500] Loss: 7.9515

Epoch [18/80], Iter [500/500] Loss: 10.8023

Epoch [18/80], Iter [600/500] Loss: 7.3443

Epoch [18/80], Iter [700/500] Loss: 8.0862

Epoch [18/80], Iter [800/500] Loss: 15.2795

Epoch [18/80], Iter [900/500] Loss: 10.2788

Epoch [19/80], Iter [100/500] Loss: 5.0786

Epoch [19/80], Iter [200/500] Loss: 8.8248

Epoch [19/80], Iter [300/500] Loss: 4.9262

Epoch [19/80], Iter [400/500] Loss: 7.8992

Epoch [19/80], Iter [500/500] Loss: 13.1279

Epoch [19/80], Iter [600/500] Loss: 8.2703

Epoch [19/80], Iter [700/500] Loss: 4.1547

Epoch [19/80], Iter [800/500] Loss: 9.0542

Epoch [19/80], Iter [900/500] Loss: 6.7904

Epoch [20/80], Iter [100/500] Loss: 8.6150

Epoch [20/80], Iter [200/500] Loss: 3.7212

Epoch [20/80], Iter [300/500] Loss: 6.2832

Epoch [20/80], Iter [400/500] Loss: 10.1591

Epoch [20/80], Iter [500/500] Loss: 9.7668

Epoch [20/80], Iter [600/500] Loss: 4.7498

Epoch [20/80], Iter [700/500] Loss: 4.8831

Epoch [20/80], Iter [800/500] Loss: 7.7877

Epoch [20/80], Iter [900/500] Loss: 8.5114

Epoch [21/80], Iter [100/500] Loss: 2.1853

Epoch [21/80], Iter [200/500] Loss: 5.8741

Epoch [21/80], Iter [300/500] Loss: 5.3676

Epoch [21/80], Iter [400/500] Loss: 3.1155

Epoch [21/80], Iter [500/500] Loss: 4.2433

Epoch [21/80], Iter [600/500] Loss: 1.9783

Epoch [21/80], Iter [700/500] Loss: 2.7622

Epoch [21/80], Iter [800/500] Loss: 2.0112

Epoch [21/80], Iter [900/500] Loss: 2.2692

Epoch [22/80], Iter [100/500] Loss: 2.1882

Epoch [22/80], Iter [200/500] Loss: 4.2540

Epoch [22/80], Iter [300/500] Loss: 4.0126

Epoch [22/80], Iter [400/500] Loss: 2.2220

Epoch [22/80], Iter [500/500] Loss: 2.4755

Epoch [22/80], Iter [600/500] Loss: 3.0793

Epoch [22/80], Iter [700/500] Loss: 1.9128

Epoch [22/80], Iter [800/500] Loss: 4.8721

Epoch [22/80], Iter [900/500] Loss: 2.1349

Epoch [23/80], Iter [100/500] Loss: 1.8705

Epoch [23/80], Iter [200/500] Loss: 2.4326

Epoch [23/80], Iter [300/500] Loss: 1.5636

Epoch [23/80], Iter [400/500] Loss: 2.0465

Epoch [23/80], Iter [500/500] Loss: 1.5183

Epoch [23/80], Iter [600/500] Loss: 2.2711

Epoch [23/80], Iter [700/500] Loss: 2.8997

Epoch [23/80], Iter [800/500] Loss: 2.6150

Epoch [23/80], Iter [900/500] Loss: 2.8083

Epoch [24/80], Iter [100/500] Loss: 2.7177

Epoch [24/80], Iter [200/500] Loss: 3.2044

Epoch [24/80], Iter [300/500] Loss: 3.8137

Epoch [24/80], Iter [400/500] Loss: 1.9400

Epoch [24/80], Iter [500/500] Loss: 2.3550

Epoch [24/80], Iter [600/500] Loss: 1.6304

Epoch [24/80], Iter [700/500] Loss: 1.1287

Epoch [24/80], Iter [800/500] Loss: 2.1436

Epoch [24/80], Iter [900/500] Loss: 1.3761

Epoch [25/80], Iter [100/500] Loss: 1.9115

Epoch [25/80], Iter [200/500] Loss: 0.9423

Epoch [25/80], Iter [300/500] Loss: 1.1732

Epoch [25/80], Iter [400/500] Loss: 1.8946

Epoch [25/80], Iter [500/500] Loss: 1.4359

Epoch [25/80], Iter [600/500] Loss: 2.7499

Epoch [25/80], Iter [700/500] Loss: 3.2734

Epoch [25/80], Iter [800/500] Loss: 1.5863

Epoch [25/80], Iter [900/500] Loss: 2.8276

Epoch [26/80], Iter [100/500] Loss: 3.3783

Epoch [26/80], Iter [200/500] Loss: 1.6336

Epoch [26/80], Iter [300/500] Loss: 1.8298

Epoch [26/80], Iter [400/500] Loss: 1.1775

Epoch [26/80], Iter [500/500] Loss: 2.5811

Epoch [26/80], Iter [600/500] Loss: 1.2587

Epoch [26/80], Iter [700/500] Loss: 2.3547

Epoch [26/80], Iter [800/500] Loss: 3.2238

Epoch [26/80], Iter [900/500] Loss: 1.8571

Epoch [27/80], Iter [100/500] Loss: 1.9582

Epoch [27/80], Iter [200/500] Loss: 0.8752

Epoch [27/80], Iter [300/500] Loss: 1.5140

Epoch [27/80], Iter [400/500] Loss: 1.4624

Epoch [27/80], Iter [500/500] Loss: 3.6735

Epoch [27/80], Iter [600/500] Loss: 2.5618

Epoch [27/80], Iter [700/500] Loss: 1.3707

Epoch [27/80], Iter [800/500] Loss: 1.2286

Epoch [27/80], Iter [900/500] Loss: 2.4623

Epoch [28/80], Iter [100/500] Loss: 0.8966

Epoch [28/80], Iter [200/500] Loss: 1.4363

Epoch [28/80], Iter [300/500] Loss: 1.3229

Epoch [28/80], Iter [400/500] Loss: 1.4402

Epoch [28/80], Iter [500/500] Loss: 1.4920

Epoch [28/80], Iter [600/500] Loss: 1.9604

Epoch [28/80], Iter [700/500] Loss: 3.1165

Epoch [28/80], Iter [800/500] Loss: 1.0391

Epoch [28/80], Iter [900/500] Loss: 2.5201

Epoch [29/80], Iter [100/500] Loss: 1.8787

Epoch [29/80], Iter [200/500] Loss: 0.9840

Epoch [29/80], Iter [300/500] Loss: 1.4460

Epoch [29/80], Iter [400/500] Loss: 2.2886

Epoch [29/80], Iter [500/500] Loss: 1.4231

Epoch [29/80], Iter [600/500] Loss: 1.4980

Epoch [29/80], Iter [700/500] Loss: 2.3995

Epoch [29/80], Iter [800/500] Loss: 1.7662

Epoch [29/80], Iter [900/500] Loss: 2.3659

Epoch [30/80], Iter [100/500] Loss: 1.9505

Epoch [30/80], Iter [200/500] Loss: 1.1663

Epoch [30/80], Iter [300/500] Loss: 0.9471

Epoch [30/80], Iter [400/500] Loss: 0.9364

Epoch [30/80], Iter [500/500] Loss: 1.0124

Epoch [30/80], Iter [600/500] Loss: 1.2437

Epoch [30/80], Iter [700/500] Loss: 0.8796

Epoch [30/80], Iter [800/500] Loss: 1.2183

Epoch [30/80], Iter [900/500] Loss: 2.3959

Epoch [31/80], Iter [100/500] Loss: 1.4337

Epoch [31/80], Iter [200/500] Loss: 1.1861

Epoch [31/80], Iter [300/500] Loss: 1.2915

Epoch [31/80], Iter [400/500] Loss: 1.0188

Epoch [31/80], Iter [500/500] Loss: 2.2067

Epoch [31/80], Iter [600/500] Loss: 2.6476

Epoch [31/80], Iter [700/500] Loss: 1.1402

Epoch [31/80], Iter [800/500] Loss: 1.4248

Epoch [31/80], Iter [900/500] Loss: 1.0669

Epoch [32/80], Iter [100/500] Loss: 1.5955

Epoch [32/80], Iter [200/500] Loss: 1.7216

Epoch [32/80], Iter [300/500] Loss: 1.2304

Epoch [32/80], Iter [400/500] Loss: 1.7058

Epoch [32/80], Iter [500/500] Loss: 1.2115

Epoch [32/80], Iter [600/500] Loss: 1.6176

Epoch [32/80], Iter [700/500] Loss: 1.3043

Epoch [32/80], Iter [800/500] Loss: 1.9501

Epoch [32/80], Iter [900/500] Loss: 1.9035

Epoch [33/80], Iter [100/500] Loss: 1.9505

Epoch [33/80], Iter [200/500] Loss: 1.5603

Epoch [33/80], Iter [300/500] Loss: 1.5528

Epoch [33/80], Iter [400/500] Loss: 1.4192

Epoch [33/80], Iter [500/500] Loss: 1.2211

Epoch [33/80], Iter [600/500] Loss: 1.3927

Epoch [33/80], Iter [700/500] Loss: 2.3885

Epoch [33/80], Iter [800/500] Loss: 1.0948

Epoch [33/80], Iter [900/500] Loss: 1.6951

Epoch [34/80], Iter [100/500] Loss: 0.9534

Epoch [34/80], Iter [200/500] Loss: 0.7364

Epoch [34/80], Iter [300/500] Loss: 1.2372

Epoch [34/80], Iter [400/500] Loss: 1.6718

Epoch [34/80], Iter [500/500] Loss: 0.7804

Epoch [34/80], Iter [600/500] Loss: 2.1848

Epoch [34/80], Iter [700/500] Loss: 0.6333

Epoch [34/80], Iter [800/500] Loss: 1.6399

Epoch [34/80], Iter [900/500] Loss: 0.9555

Epoch [35/80], Iter [100/500] Loss: 1.5851

Epoch [35/80], Iter [200/500] Loss: 3.7824

Epoch [35/80], Iter [300/500] Loss: 2.5642

Epoch [35/80], Iter [400/500] Loss: 0.8965

Epoch [35/80], Iter [500/500] Loss: 1.9092

Epoch [35/80], Iter [600/500] Loss: 1.3729

Epoch [35/80], Iter [700/500] Loss: 2.2079

Epoch [35/80], Iter [800/500] Loss: 0.9051

Epoch [35/80], Iter [900/500] Loss: 1.1845

Epoch [36/80], Iter [100/500] Loss: 0.8240

Epoch [36/80], Iter [200/500] Loss: 1.1929

Epoch [36/80], Iter [300/500] Loss: 1.7051

Epoch [36/80], Iter [400/500] Loss: 0.7341

Epoch [36/80], Iter [500/500] Loss: 0.8078

Epoch [36/80], Iter [600/500] Loss: 0.7525

Epoch [36/80], Iter [700/500] Loss: 1.5739

Epoch [36/80], Iter [800/500] Loss: 1.3938

Epoch [36/80], Iter [900/500] Loss: 0.7145

Epoch [37/80], Iter [100/500] Loss: 0.9577

Epoch [37/80], Iter [200/500] Loss: 0.9464

Epoch [37/80], Iter [300/500] Loss: 1.0931

Epoch [37/80], Iter [400/500] Loss: 1.0390

Epoch [37/80], Iter [500/500] Loss: 1.3472

Epoch [37/80], Iter [600/500] Loss: 0.6312

Epoch [37/80], Iter [700/500] Loss: 0.6754

Epoch [37/80], Iter [800/500] Loss: 0.5888

Epoch [37/80], Iter [900/500] Loss: 3.1377

Epoch [38/80], Iter [100/500] Loss: 0.8339

Epoch [38/80], Iter [200/500] Loss: 0.9345

Epoch [38/80], Iter [300/500] Loss: 0.6615

Epoch [38/80], Iter [400/500] Loss: 1.6327

Epoch [38/80], Iter [500/500] Loss: 0.4701

Epoch [38/80], Iter [600/500] Loss: 1.1513

Epoch [38/80], Iter [700/500] Loss: 0.9013

Epoch [38/80], Iter [800/500] Loss: 2.7680

Epoch [38/80], Iter [900/500] Loss: 1.2733

Epoch [39/80], Iter [100/500] Loss: 3.0368

Epoch [39/80], Iter [200/500] Loss: 1.5569

Epoch [39/80], Iter [300/500] Loss: 0.5049

Epoch [39/80], Iter [400/500] Loss: 0.4075

Epoch [39/80], Iter [500/500] Loss: 0.9771

Epoch [39/80], Iter [600/500] Loss: 0.9003

Epoch [39/80], Iter [700/500] Loss: 1.6323

Epoch [39/80], Iter [800/500] Loss: 0.4881

Epoch [39/80], Iter [900/500] Loss: 2.1344

Epoch [40/80], Iter [100/500] Loss: 1.2439

Epoch [40/80], Iter [200/500] Loss: 1.3419

Epoch [40/80], Iter [300/500] Loss: 0.9575

Epoch [40/80], Iter [400/500] Loss: 1.4438

Epoch [40/80], Iter [500/500] Loss: 0.8559

Epoch [40/80], Iter [600/500] Loss: 1.0400

Epoch [40/80], Iter [700/500] Loss: 0.9063

Epoch [40/80], Iter [800/500] Loss: 1.0714

Epoch [40/80], Iter [900/500] Loss: 0.5098

Epoch [41/80], Iter [100/500] Loss: 0.5906

Epoch [41/80], Iter [200/500] Loss: 0.6610

Epoch [41/80], Iter [300/500] Loss: 0.4230

Epoch [41/80], Iter [400/500] Loss: 0.6014

Epoch [41/80], Iter [500/500] Loss: 0.3004

Epoch [41/80], Iter [600/500] Loss: 0.5606

Epoch [41/80], Iter [700/500] Loss: 0.4994

Epoch [41/80], Iter [800/500] Loss: 0.8664

Epoch [41/80], Iter [900/500] Loss: 0.5302

Epoch [42/80], Iter [100/500] Loss: 0.2961

Epoch [42/80], Iter [200/500] Loss: 0.2826

Epoch [42/80], Iter [300/500] Loss: 0.3575

Epoch [42/80], Iter [400/500] Loss: 0.3224

Epoch [42/80], Iter [500/500] Loss: 0.6851

Epoch [42/80], Iter [600/500] Loss: 0.2997

Epoch [42/80], Iter [700/500] Loss: 0.3907

Epoch [42/80], Iter [800/500] Loss: 0.4437

Epoch [42/80], Iter [900/500] Loss: 0.4847

Epoch [43/80], Iter [100/500] Loss: 0.5418

Epoch [43/80], Iter [200/500] Loss: 0.4099

Epoch [43/80], Iter [300/500] Loss: 0.3339

Epoch [43/80], Iter [400/500] Loss: 0.5546

Epoch [43/80], Iter [500/500] Loss: 0.5867

Epoch [43/80], Iter [600/500] Loss: 0.3540

Epoch [43/80], Iter [700/500] Loss: 0.4656

Epoch [43/80], Iter [800/500] Loss: 0.2922

Epoch [43/80], Iter [900/500] Loss: 0.3042

Epoch [44/80], Iter [100/500] Loss: 0.6309

Epoch [44/80], Iter [200/500] Loss: 0.2412

Epoch [44/80], Iter [300/500] Loss: 0.5505

Epoch [44/80], Iter [400/500] Loss: 0.4133

Epoch [44/80], Iter [500/500] Loss: 0.4317

Epoch [44/80], Iter [600/500] Loss: 0.4152

Epoch [44/80], Iter [700/500] Loss: 0.6375

Epoch [44/80], Iter [800/500] Loss: 0.3283

Epoch [44/80], Iter [900/500] Loss: 0.4399

Epoch [45/80], Iter [100/500] Loss: 0.2777

Epoch [45/80], Iter [200/500] Loss: 0.3131

Epoch [45/80], Iter [300/500] Loss: 0.2451

Epoch [45/80], Iter [400/500] Loss: 0.5350

Epoch [45/80], Iter [500/500] Loss: 0.2501

Epoch [45/80], Iter [600/500] Loss: 0.2076

Epoch [45/80], Iter [700/500] Loss: 0.2317

Epoch [45/80], Iter [800/500] Loss: 0.8772

Epoch [45/80], Iter [900/500] Loss: 0.4162

Epoch [46/80], Iter [100/500] Loss: 0.3190

Epoch [46/80], Iter [200/500] Loss: 0.2458

Epoch [46/80], Iter [300/500] Loss: 0.2976

Epoch [46/80], Iter [400/500] Loss: 0.3712

Epoch [46/80], Iter [500/500] Loss: 0.4305

Epoch [46/80], Iter [600/500] Loss: 0.5143

Epoch [46/80], Iter [700/500] Loss: 0.2622

Epoch [46/80], Iter [800/500] Loss: 0.5331

Epoch [46/80], Iter [900/500] Loss: 0.3598

Epoch [47/80], Iter [100/500] Loss: 0.2180

Epoch [47/80], Iter [200/500] Loss: 0.2275

Epoch [47/80], Iter [300/500] Loss: 0.5302

Epoch [47/80], Iter [400/500] Loss: 0.3535

Epoch [47/80], Iter [500/500] Loss: 0.5790

Epoch [47/80], Iter [600/500] Loss: 0.3741

Epoch [47/80], Iter [700/500] Loss: 0.5120

Epoch [47/80], Iter [800/500] Loss: 0.6204

Epoch [47/80], Iter [900/500] Loss: 0.4902

Epoch [48/80], Iter [100/500] Loss: 0.2668

Epoch [48/80], Iter [200/500] Loss: 0.5693

Epoch [48/80], Iter [300/500] Loss: 0.3328

Epoch [48/80], Iter [400/500] Loss: 0.2399

Epoch [48/80], Iter [500/500] Loss: 0.3160

Epoch [48/80], Iter [600/500] Loss: 0.2944

Epoch [48/80], Iter [700/500] Loss: 0.2742

Epoch [48/80], Iter [800/500] Loss: 0.5297

Epoch [48/80], Iter [900/500] Loss: 0.3755

Epoch [49/80], Iter [100/500] Loss: 0.2658

Epoch [49/80], Iter [200/500] Loss: 0.2223

Epoch [49/80], Iter [300/500] Loss: 0.4348

Epoch [49/80], Iter [400/500] Loss: 0.2313

Epoch [49/80], Iter [500/500] Loss: 0.2838

Epoch [49/80], Iter [600/500] Loss: 0.3415

Epoch [49/80], Iter [700/500] Loss: 0.3633

Epoch [49/80], Iter [800/500] Loss: 0.3768

Epoch [49/80], Iter [900/500] Loss: 0.5177

Epoch [50/80], Iter [100/500] Loss: 0.3538

Epoch [50/80], Iter [200/500] Loss: 0.2759

Epoch [50/80], Iter [300/500] Loss: 0.2255

Epoch [50/80], Iter [400/500] Loss: 0.3148

Epoch [50/80], Iter [500/500] Loss: 0.4502

Epoch [50/80], Iter [600/500] Loss: 0.3382

Epoch [50/80], Iter [700/500] Loss: 0.8207

Epoch [50/80], Iter [800/500] Loss: 0.3541

Epoch [50/80], Iter [900/500] Loss: 0.4090

('time used:', 17124.861335999998)

未被DaraParallel初始化

Epoch [1/80], Iter [100/500] Loss: 635.6779

Epoch [1/80], Iter [200/500] Loss: 247.5514

Epoch [1/80], Iter [300/500] Loss: 231.7609

Epoch [1/80], Iter [400/500] Loss: 198.7304

Epoch [1/80], Iter [500/500] Loss: 207.1028

Epoch [1/80], Iter [600/500] Loss: 114.7708

Epoch [1/80], Iter [700/500] Loss: 126.9886

Epoch [1/80], Iter [800/500] Loss: 160.8622

Epoch [1/80], Iter [900/500] Loss: 153.8121

Epoch [2/80], Iter [100/500] Loss: 106.6578

Epoch [2/80], Iter [200/500] Loss: 91.5044

Epoch [2/80], Iter [300/500] Loss: 111.4231

Epoch [2/80], Iter [400/500] Loss: 50.7004

Epoch [2/80], Iter [500/500] Loss: 58.9242

Epoch [2/80], Iter [600/500] Loss: 55.2035

Epoch [2/80], Iter [700/500] Loss: 26.7637

Epoch [2/80], Iter [800/500] Loss: 52.5472

Epoch [2/80], Iter [900/500] Loss: 51.7907

Epoch [3/80], Iter [100/500] Loss: 35.7970

Epoch [3/80], Iter [200/500] Loss: 59.1204

Epoch [3/80], Iter [300/500] Loss: 70.5727

Epoch [3/80], Iter [400/500] Loss: 50.1149

Epoch [3/80], Iter [500/500] Loss: 26.3628

Epoch [3/80], Iter [600/500] Loss: 67.3355

Epoch [3/80], Iter [700/500] Loss: 56.8271

Epoch [3/80], Iter [800/500] Loss: 46.5803

Epoch [3/80], Iter [900/500] Loss: 34.9568

Epoch [4/80], Iter [100/500] Loss: 67.0837

Epoch [4/80], Iter [200/500] Loss: 36.8596

Epoch [4/80], Iter [300/500] Loss: 37.6830

Epoch [4/80], Iter [400/500] Loss: 52.1378

Epoch [4/80], Iter [500/500] Loss: 104.5909

Epoch [4/80], Iter [600/500] Loss: 71.3509

Epoch [4/80], Iter [700/500] Loss: 28.4496

Epoch [4/80], Iter [800/500] Loss: 56.1399

Epoch [4/80], Iter [900/500] Loss: 58.7510

Epoch [5/80], Iter [100/500] Loss: 42.5710

Epoch [5/80], Iter [200/500] Loss: 25.5430

Epoch [5/80], Iter [300/500] Loss: 25.9271

Epoch [5/80], Iter [400/500] Loss: 75.8942

Epoch [5/80], Iter [500/500] Loss: 70.6782

Epoch [5/80], Iter [600/500] Loss: 10.7801

Epoch [5/80], Iter [700/500] Loss: 29.9416

Epoch [5/80], Iter [800/500] Loss: 47.0781

Epoch [5/80], Iter [900/500] Loss: 45.4692

Epoch [6/80], Iter [100/500] Loss: 51.3811

Epoch [6/80], Iter [200/500] Loss: 30.6207

Epoch [6/80], Iter [300/500] Loss: 35.4928

Epoch [6/80], Iter [400/500] Loss: 37.9467

Epoch [6/80], Iter [500/500] Loss: 36.7505

Epoch [6/80], Iter [600/500] Loss: 64.3528

Epoch [6/80], Iter [700/500] Loss: 73.6308

Epoch [6/80], Iter [800/500] Loss: 33.1290

Epoch [6/80], Iter [900/500] Loss: 34.2442

Epoch [7/80], Iter [100/500] Loss: 34.9157

Epoch [7/80], Iter [200/500] Loss: 26.8041

Epoch [7/80], Iter [300/500] Loss: 43.5796

Epoch [7/80], Iter [400/500] Loss: 31.5104

Epoch [7/80], Iter [500/500] Loss: 41.2132

Epoch [7/80], Iter [600/500] Loss: 23.1634

Epoch [7/80], Iter [700/500] Loss: 26.7399

Epoch [7/80], Iter [800/500] Loss: 60.4979

Epoch [7/80], Iter [900/500] Loss: 32.8528

Epoch [8/80], Iter [100/500] Loss: 36.6079

Epoch [8/80], Iter [200/500] Loss: 49.1552

Epoch [8/80], Iter [300/500] Loss: 21.2926

Epoch [8/80], Iter [400/500] Loss: 33.5335

Epoch [8/80], Iter [500/500] Loss: 50.1770

Epoch [8/80], Iter [600/500] Loss: 21.9908

Epoch [8/80], Iter [700/500] Loss: 40.2040

Epoch [8/80], Iter [800/500] Loss: 22.5460

Epoch [8/80], Iter [900/500] Loss: 43.9564

Epoch [9/80], Iter [100/500] Loss: 19.8116

Epoch [9/80], Iter [200/500] Loss: 8.5169

Epoch [9/80], Iter [300/500] Loss: 37.0475

Epoch [9/80], Iter [400/500] Loss: 74.2606

Epoch [9/80], Iter [500/500] Loss: 16.3256

Epoch [9/80], Iter [600/500] Loss: 26.0609

Epoch [9/80], Iter [700/500] Loss: 24.3721

Epoch [9/80], Iter [800/500] Loss: 37.5132

Epoch [9/80], Iter [900/500] Loss: 27.4818

Epoch [10/80], Iter [100/500] Loss: 11.7654

Epoch [10/80], Iter [200/500] Loss: 9.3536

Epoch [10/80], Iter [300/500] Loss: 11.6718

Epoch [10/80], Iter [400/500] Loss: 24.4423

Epoch [10/80], Iter [500/500] Loss: 25.6966

Epoch [10/80], Iter [600/500] Loss: 35.2358

Epoch [10/80], Iter [700/500] Loss: 17.2685

Epoch [10/80], Iter [800/500] Loss: 22.3965

Epoch [10/80], Iter [900/500] Loss: 42.6901

Epoch [11/80], Iter [100/500] Loss: 17.9832

Epoch [11/80], Iter [200/500] Loss: 18.8705

Epoch [11/80], Iter [300/500] Loss: 25.3700

Epoch [11/80], Iter [400/500] Loss: 10.8511

Epoch [11/80], Iter [500/500] Loss: 18.3028

Epoch [11/80], Iter [600/500] Loss: 23.2316

Epoch [11/80], Iter [700/500] Loss: 10.2498

Epoch [11/80], Iter [800/500] Loss: 14.7609

Epoch [11/80], Iter [900/500] Loss: 20.1801

Epoch [12/80], Iter [100/500] Loss: 23.8675

Epoch [12/80], Iter [200/500] Loss: 15.7924

Epoch [12/80], Iter [300/500] Loss: 13.7092

Epoch [12/80], Iter [400/500] Loss: 12.0196

Epoch [12/80], Iter [500/500] Loss: 7.2408

Epoch [12/80], Iter [600/500] Loss: 10.7912

Epoch [12/80], Iter [700/500] Loss: 11.9665

Epoch [12/80], Iter [800/500] Loss: 13.7599

Epoch [12/80], Iter [900/500] Loss: 18.3869

Epoch [13/80], Iter [100/500] Loss: 11.1715

Epoch [13/80], Iter [200/500] Loss: 17.6397

Epoch [13/80], Iter [300/500] Loss: 9.3256

Epoch [13/80], Iter [400/500] Loss: 12.7995

Epoch [13/80], Iter [500/500] Loss: 7.8598

Epoch [13/80], Iter [600/500] Loss: 10.7001

Epoch [13/80], Iter [700/500] Loss: 26.3672

Epoch [13/80], Iter [800/500] Loss: 15.4815

Epoch [13/80], Iter [900/500] Loss: 14.0478

Epoch [14/80], Iter [100/500] Loss: 16.0473

Epoch [14/80], Iter [200/500] Loss: 4.7192

Epoch [14/80], Iter [300/500] Loss: 10.7586

Epoch [14/80], Iter [400/500] Loss: 13.6734

Epoch [14/80], Iter [500/500] Loss: 9.3228

Epoch [14/80], Iter [600/500] Loss: 5.5830

Epoch [14/80], Iter [700/500] Loss: 7.5252

Epoch [14/80], Iter [800/500] Loss: 7.6239

Epoch [14/80], Iter [900/500] Loss: 7.1024

Epoch [15/80], Iter [100/500] Loss: 17.5188

Epoch [15/80], Iter [200/500] Loss: 11.8842

Epoch [15/80], Iter [300/500] Loss: 9.0330

Epoch [15/80], Iter [400/500] Loss: 11.7120

Epoch [15/80], Iter [500/500] Loss: 17.0862

Epoch [15/80], Iter [600/500] Loss: 11.4103

Epoch [15/80], Iter [700/500] Loss: 12.2746

Epoch [15/80], Iter [800/500] Loss: 13.6224

Epoch [15/80], Iter [900/500] Loss: 12.7686

Epoch [16/80], Iter [100/500] Loss: 5.5978

Epoch [16/80], Iter [200/500] Loss: 12.2122

Epoch [16/80], Iter [300/500] Loss: 5.1189

Epoch [16/80], Iter [400/500] Loss: 14.1793

Epoch [16/80], Iter [500/500] Loss: 10.3744

Epoch [16/80], Iter [600/500] Loss: 5.2099

Epoch [16/80], Iter [700/500] Loss: 6.7522

Epoch [16/80], Iter [800/500] Loss: 13.2532

Epoch [16/80], Iter [900/500] Loss: 6.7040

Epoch [17/80], Iter [100/500] Loss: 10.7390

Epoch [17/80], Iter [200/500] Loss: 8.1525

Epoch [17/80], Iter [300/500] Loss: 14.2229

Epoch [17/80], Iter [400/500] Loss: 7.6302

Epoch [17/80], Iter [500/500] Loss: 6.4554

Epoch [17/80], Iter [600/500] Loss: 8.2380

Epoch [17/80], Iter [700/500] Loss: 6.4445

Epoch [17/80], Iter [800/500] Loss: 8.4644

Epoch [17/80], Iter [900/500] Loss: 9.0200

Epoch [18/80], Iter [100/500] Loss: 9.5088

Epoch [18/80], Iter [200/500] Loss: 3.8648

Epoch [18/80], Iter [300/500] Loss: 8.8408

Epoch [18/80], Iter [400/500] Loss: 7.4195

Epoch [18/80], Iter [500/500] Loss: 15.0480

Epoch [18/80], Iter [600/500] Loss: 5.6232

Epoch [18/80], Iter [700/500] Loss: 5.2233

Epoch [18/80], Iter [800/500] Loss: 6.5702

Epoch [18/80], Iter [900/500] Loss: 13.7427

Epoch [19/80], Iter [100/500] Loss: 3.5658

Epoch [19/80], Iter [200/500] Loss: 4.7062

Epoch [19/80], Iter [300/500] Loss: 10.7831

Epoch [19/80], Iter [400/500] Loss: 13.1375

Epoch [19/80], Iter [500/500] Loss: 22.2764

Epoch [19/80], Iter [600/500] Loss: 10.3463

Epoch [19/80], Iter [700/500] Loss: 7.2373

Epoch [19/80], Iter [800/500] Loss: 5.5266

Epoch [19/80], Iter [900/500] Loss: 9.2434

Epoch [20/80], Iter [100/500] Loss: 7.8164

Epoch [20/80], Iter [200/500] Loss: 9.6628

Epoch [20/80], Iter [300/500] Loss: 4.1032

Epoch [20/80], Iter [400/500] Loss: 16.5922

Epoch [20/80], Iter [500/500] Loss: 6.9907

Epoch [20/80], Iter [600/500] Loss: 10.9906

Epoch [20/80], Iter [700/500] Loss: 8.5092

Epoch [20/80], Iter [800/500] Loss: 7.1332

Epoch [20/80], Iter [900/500] Loss: 6.1639

Epoch [21/80], Iter [100/500] Loss: 6.3100

Epoch [21/80], Iter [200/500] Loss: 4.5190

Epoch [21/80], Iter [300/500] Loss: 4.3493

Epoch [21/80], Iter [400/500] Loss: 7.9860

Epoch [21/80], Iter [500/500] Loss: 8.8312

Epoch [21/80], Iter [600/500] Loss: 10.7502

Epoch [21/80], Iter [700/500] Loss: 3.2116

Epoch [21/80], Iter [800/500] Loss: 4.0126

Epoch [21/80], Iter [900/500] Loss: 5.3675

Epoch [22/80], Iter [100/500] Loss: 1.4893

Epoch [22/80], Iter [200/500] Loss: 1.6984

Epoch [22/80], Iter [300/500] Loss: 2.6195

Epoch [22/80], Iter [400/500] Loss: 2.1465

Epoch [22/80], Iter [500/500] Loss: 2.9847

Epoch [22/80], Iter [600/500] Loss: 4.9699

Epoch [22/80], Iter [700/500] Loss: 1.6728

Epoch [22/80], Iter [800/500] Loss: 1.3381

Epoch [22/80], Iter [900/500] Loss: 2.0680

Epoch [23/80], Iter [100/500] Loss: 1.9145

Epoch [23/80], Iter [200/500] Loss: 0.9280

Epoch [23/80], Iter [300/500] Loss: 2.9585

Epoch [23/80], Iter [400/500] Loss: 1.0787

Epoch [23/80], Iter [500/500] Loss: 3.1779

Epoch [23/80], Iter [600/500] Loss: 2.4411

Epoch [23/80], Iter [700/500] Loss: 2.0049

Epoch [23/80], Iter [800/500] Loss: 2.2844

Epoch [23/80], Iter [900/500] Loss: 2.2328

Epoch [24/80], Iter [100/500] Loss: 1.5221

Epoch [24/80], Iter [200/500] Loss: 2.0100

Epoch [24/80], Iter [300/500] Loss: 1.8868

Epoch [24/80], Iter [400/500] Loss: 1.4898

Epoch [24/80], Iter [500/500] Loss: 1.1626

Epoch [24/80], Iter [600/500] Loss: 1.2527

Epoch [24/80], Iter [700/500] Loss: 1.3430

Epoch [24/80], Iter [800/500] Loss: 1.3355

Epoch [24/80], Iter [900/500] Loss: 1.8292

Epoch [25/80], Iter [100/500] Loss: 2.2471

Epoch [25/80], Iter [200/500] Loss: 2.8727

Epoch [25/80], Iter [300/500] Loss: 1.3531

Epoch [25/80], Iter [400/500] Loss: 1.1110

Epoch [25/80], Iter [500/500] Loss: 2.7648

Epoch [25/80], Iter [600/500] Loss: 1.8364

Epoch [25/80], Iter [700/500] Loss: 1.4299

Epoch [25/80], Iter [800/500] Loss: 1.5985

Epoch [25/80], Iter [900/500] Loss: 2.5364

Epoch [26/80], Iter [100/500] Loss: 2.6469

Epoch [26/80], Iter [200/500] Loss: 3.1215

Epoch [26/80], Iter [300/500] Loss: 1.4029

Epoch [26/80], Iter [400/500] Loss: 1.2688

Epoch [26/80], Iter [500/500] Loss: 2.4794

Epoch [26/80], Iter [600/500] Loss: 1.1937

Epoch [26/80], Iter [700/500] Loss: 1.0709

Epoch [26/80], Iter [800/500] Loss: 1.4961

Epoch [26/80], Iter [900/500] Loss: 1.4560

Epoch [27/80], Iter [100/500] Loss: 2.0633

Epoch [27/80], Iter [200/500] Loss: 2.6687

Epoch [27/80], Iter [300/500] Loss: 5.2073

Epoch [27/80], Iter [400/500] Loss: 2.2762

Epoch [27/80], Iter [500/500] Loss: 1.6105

Epoch [27/80], Iter [600/500] Loss: 1.6631

Epoch [27/80], Iter [700/500] Loss: 1.0523

Epoch [27/80], Iter [800/500] Loss: 2.8945

Epoch [27/80], Iter [900/500] Loss: 1.5388

Epoch [28/80], Iter [100/500] Loss: 1.6230

Epoch [28/80], Iter [200/500] Loss: 1.8003

Epoch [28/80], Iter [300/500] Loss: 1.4840

Epoch [28/80], Iter [400/500] Loss: 0.9465

Epoch [28/80], Iter [500/500] Loss: 1.6054

Epoch [28/80], Iter [600/500] Loss: 3.3669

Epoch [28/80], Iter [700/500] Loss: 1.4555

Epoch [28/80], Iter [800/500] Loss: 2.2903

Epoch [28/80], Iter [900/500] Loss: 1.2850

Epoch [29/80], Iter [100/500] Loss: 1.7152

Epoch [29/80], Iter [200/500] Loss: 1.2824

Epoch [29/80], Iter [300/500] Loss: 1.5778

Epoch [29/80], Iter [400/500] Loss: 3.1152

Epoch [29/80], Iter [500/500] Loss: 1.2492

Epoch [29/80], Iter [600/500] Loss: 0.9721

Epoch [29/80], Iter [700/500] Loss: 1.4465

Epoch [29/80], Iter [800/500] Loss: 0.9678

Epoch [29/80], Iter [900/500] Loss: 1.5000

Epoch [30/80], Iter [100/500] Loss: 1.5524

Epoch [30/80], Iter [200/500] Loss: 1.5233

Epoch [30/80], Iter [300/500] Loss: 1.4226

Epoch [30/80], Iter [400/500] Loss: 0.9432

Epoch [30/80], Iter [500/500] Loss: 1.4623

Epoch [30/80], Iter [600/500] Loss: 1.3845

Epoch [30/80], Iter [700/500] Loss: 1.3301

Epoch [30/80], Iter [800/500] Loss: 1.0105

Epoch [30/80], Iter [900/500] Loss: 1.8372

Epoch [31/80], Iter [100/500] Loss: 1.3019

Epoch [31/80], Iter [200/500] Loss: 1.1216

Epoch [31/80], Iter [300/500] Loss: 0.8553

Epoch [31/80], Iter [400/500] Loss: 1.6882

Epoch [31/80], Iter [500/500] Loss: 1.7691

Epoch [31/80], Iter [600/500] Loss: 1.7412

Epoch [31/80], Iter [700/500] Loss: 2.2204

Epoch [31/80], Iter [800/500] Loss: 0.6559

Epoch [31/80], Iter [900/500] Loss: 1.4613

Epoch [32/80], Iter [100/500] Loss: 1.1408

Epoch [32/80], Iter [200/500] Loss: 3.6378

Epoch [32/80], Iter [300/500] Loss: 1.5543

Epoch [32/80], Iter [400/500] Loss: 2.1538

Epoch [32/80], Iter [500/500] Loss: 1.1102

Epoch [32/80], Iter [600/500] Loss: 1.3187

Epoch [32/80], Iter [700/500] Loss: 0.7230

Epoch [32/80], Iter [800/500] Loss: 1.6149

Epoch [32/80], Iter [900/500] Loss: 1.0926

Epoch [33/80], Iter [100/500] Loss: 1.9460

Epoch [33/80], Iter [200/500] Loss: 0.9948

Epoch [33/80], Iter [300/500] Loss: 1.4460

Epoch [33/80], Iter [400/500] Loss: 1.5855

Epoch [33/80], Iter [500/500] Loss: 1.5834

Epoch [33/80], Iter [600/500] Loss: 0.8896

Epoch [33/80], Iter [700/500] Loss: 1.1927

Epoch [33/80], Iter [800/500] Loss: 1.5707

Epoch [33/80], Iter [900/500] Loss: 0.7817

Epoch [34/80], Iter [100/500] Loss: 0.9155

Epoch [34/80], Iter [200/500] Loss: 0.7930

Epoch [34/80], Iter [300/500] Loss: 1.2760

Epoch [34/80], Iter [400/500] Loss: 0.7170

Epoch [34/80], Iter [500/500] Loss: 1.9962

Epoch [34/80], Iter [600/500] Loss: 1.2418

Epoch [34/80], Iter [700/500] Loss: 1.4847

Epoch [34/80], Iter [800/500] Loss: 0.8495

Epoch [34/80], Iter [900/500] Loss: 1.3709

Epoch [35/80], Iter [100/500] Loss: 1.8495

Epoch [35/80], Iter [200/500] Loss: 0.9494

Epoch [35/80], Iter [300/500] Loss: 0.6224

Epoch [35/80], Iter [400/500] Loss: 0.5101

Epoch [35/80], Iter [500/500] Loss: 0.9373

Epoch [35/80], Iter [600/500] Loss: 1.5811

Epoch [35/80], Iter [700/500] Loss: 1.5295

Epoch [35/80], Iter [800/500] Loss: 0.7787

Epoch [35/80], Iter [900/500] Loss: 1.0337

Epoch [36/80], Iter [100/500] Loss: 0.6236

Epoch [36/80], Iter [200/500] Loss: 1.8516

Epoch [36/80], Iter [300/500] Loss: 1.5021

Epoch [36/80], Iter [400/500] Loss: 1.0459

Epoch [36/80], Iter [500/500] Loss: 1.4737

Epoch [36/80], Iter [600/500] Loss: 0.7842

Epoch [36/80], Iter [700/500] Loss: 1.6798

Epoch [36/80], Iter [800/500] Loss: 1.7413

Epoch [36/80], Iter [900/500] Loss: 0.6222

Epoch [37/80], Iter [100/500] Loss: 0.5713

Epoch [37/80], Iter [200/500] Loss: 1.3030

Epoch [37/80], Iter [300/500] Loss: 1.6937

Epoch [37/80], Iter [400/500] Loss: 0.8656

Epoch [37/80], Iter [500/500] Loss: 1.3340

Epoch [37/80], Iter [600/500] Loss: 0.6310

Epoch [37/80], Iter [700/500] Loss: 1.1445

Epoch [37/80], Iter [800/500] Loss: 0.6099

Epoch [37/80], Iter [900/500] Loss: 1.3679

Epoch [38/80], Iter [100/500] Loss: 0.9127

Epoch [38/80], Iter [200/500] Loss: 1.9450

Epoch [38/80], Iter [300/500] Loss: 1.2240

Epoch [38/80], Iter [400/500] Loss: 1.4049

Epoch [38/80], Iter [500/500] Loss: 0.9247

Epoch [38/80], Iter [600/500] Loss: 1.5308

Epoch [38/80], Iter [700/500] Loss: 1.9777

Epoch [38/80], Iter [800/500] Loss: 1.2109

Epoch [38/80], Iter [900/500] Loss: 0.8337

Epoch [39/80], Iter [100/500] Loss: 0.7904

Epoch [39/80], Iter [200/500] Loss: 0.8451

Epoch [39/80], Iter [300/500] Loss: 1.6993

Epoch [39/80], Iter [400/500] Loss: 1.2196

Epoch [39/80], Iter [500/500] Loss: 1.0665

Epoch [39/80], Iter [600/500] Loss: 0.7412

Epoch [39/80], Iter [700/500] Loss: 0.6486

Epoch [39/80], Iter [800/500] Loss: 1.5608

Epoch [39/80], Iter [900/500] Loss: 1.9978

Epoch [40/80], Iter [100/500] Loss: 1.7101

Epoch [40/80], Iter [200/500] Loss: 1.4484

Epoch [40/80], Iter [300/500] Loss: 1.5894

Epoch [40/80], Iter [400/500] Loss: 1.3371

Epoch [40/80], Iter [500/500] Loss: 0.9766

Epoch [40/80], Iter [600/500] Loss: 1.9935

Epoch [40/80], Iter [700/500] Loss: 2.0719

Epoch [40/80], Iter [800/500] Loss: 0.9455

Epoch [40/80], Iter [900/500] Loss: 0.8072

Epoch [41/80], Iter [100/500] Loss: 1.3899

Epoch [41/80], Iter [200/500] Loss: 0.9863

Epoch [41/80], Iter [300/500] Loss: 1.3738

Epoch [41/80], Iter [400/500] Loss: 0.6883

Epoch [41/80], Iter [500/500] Loss: 0.8442

Epoch [41/80], Iter [600/500] Loss: 2.0286

Epoch [41/80], Iter [700/500] Loss: 1.1960

Epoch [41/80], Iter [800/500] Loss: 1.2499

Epoch [41/80], Iter [900/500] Loss: 0.6043

Epoch [42/80], Iter [100/500] Loss: 0.3437

Epoch [42/80], Iter [200/500] Loss: 0.6596

Epoch [42/80], Iter [300/500] Loss: 0.4450

Epoch [42/80], Iter [400/500] Loss: 0.7189

Epoch [42/80], Iter [500/500] Loss: 0.5022

Epoch [42/80], Iter [600/500] Loss: 0.4597

Epoch [42/80], Iter [700/500] Loss: 0.7743

Epoch [42/80], Iter [800/500] Loss: 0.3344

Epoch [42/80], Iter [900/500] Loss: 0.7295

Epoch [43/80], Iter [100/500] Loss: 0.5074

Epoch [43/80], Iter [200/500] Loss: 0.3128

Epoch [43/80], Iter [300/500] Loss: 0.2800

Epoch [43/80], Iter [400/500] Loss: 0.3059

Epoch [43/80], Iter [500/500] Loss: 0.3486

Epoch [43/80], Iter [600/500] Loss: 0.7222

Epoch [43/80], Iter [700/500] Loss: 0.7349

Epoch [43/80], Iter [800/500] Loss: 0.8455

Epoch [43/80], Iter [900/500] Loss: 0.7261

Epoch [44/80], Iter [100/500] Loss: 0.5404

Epoch [44/80], Iter [200/500] Loss: 0.5428

Epoch [44/80], Iter [300/500] Loss: 0.5385

Epoch [44/80], Iter [400/500] Loss: 0.4106

Epoch [44/80], Iter [500/500] Loss: 0.5296

Epoch [44/80], Iter [600/500] Loss: 0.6045

Epoch [44/80], Iter [700/500] Loss: 0.3837

Epoch [44/80], Iter [800/500] Loss: 0.7552

Epoch [44/80], Iter [900/500] Loss: 0.4996

Epoch [45/80], Iter [100/500] Loss: 0.3381

Epoch [45/80], Iter [200/500] Loss: 0.3910

Epoch [45/80], Iter [300/500] Loss: 0.3790

Epoch [45/80], Iter [400/500] Loss: 0.2718

Epoch [45/80], Iter [500/500] Loss: 0.3572

Epoch [45/80], Iter [600/500] Loss: 0.2913

Epoch [45/80], Iter [700/500] Loss: 0.5244

Epoch [45/80], Iter [800/500] Loss: 0.3647

Epoch [45/80], Iter [900/500] Loss: 0.3161

Epoch [46/80], Iter [100/500] Loss: 0.4728

Epoch [46/80], Iter [200/500] Loss: 0.4386

Epoch [46/80], Iter [300/500] Loss: 0.2861

Epoch [46/80], Iter [400/500] Loss: 0.2460

Epoch [46/80], Iter [500/500] Loss: 0.3490

Epoch [46/80], Iter [600/500] Loss: 0.5804

Epoch [46/80], Iter [700/500] Loss: 0.4951

Epoch [46/80], Iter [800/500] Loss: 0.4600

Epoch [46/80], Iter [900/500] Loss: 0.5658

Epoch [47/80], Iter [100/500] Loss: 0.2479

Epoch [47/80], Iter [200/500] Loss: 0.2688

Epoch [47/80], Iter [300/500] Loss: 0.3082

Epoch [47/80], Iter [400/500] Loss: 0.3929

Epoch [47/80], Iter [500/500] Loss: 0.3126

Epoch [47/80], Iter [600/500] Loss: 0.5041

Epoch [47/80], Iter [700/500] Loss: 0.5848

Epoch [47/80], Iter [800/500] Loss: 0.4968

Epoch [47/80], Iter [900/500] Loss: 0.3496

Epoch [48/80], Iter [100/500] Loss: 0.2753

Epoch [48/80], Iter [200/500] Loss: 0.3885

Epoch [48/80], Iter [300/500] Loss: 0.3743

Epoch [48/80], Iter [400/500] Loss: 0.2425

Epoch [48/80], Iter [500/500] Loss: 0.2472

Epoch [48/80], Iter [600/500] Loss: 0.3003

Epoch [48/80], Iter [700/500] Loss: 0.4936

Epoch [48/80], Iter [800/500] Loss: 0.3169

Epoch [48/80], Iter [900/500] Loss: 0.2543

Epoch [49/80], Iter [100/500] Loss: 0.4262

Epoch [49/80], Iter [200/500] Loss: 0.3396

Epoch [49/80], Iter [300/500] Loss: 0.4670

Epoch [49/80], Iter [400/500] Loss: 0.2543

Epoch [49/80], Iter [500/500] Loss: 0.3146

Epoch [49/80], Iter [600/500] Loss: 1.3187

Epoch [49/80], Iter [700/500] Loss: 0.2993

Epoch [49/80], Iter [800/500] Loss: 0.3053

Epoch [49/80], Iter [900/500] Loss: 0.3343

Epoch [50/80], Iter [100/500] Loss: 0.2081

Epoch [50/80], Iter [200/500] Loss: 0.5631

Epoch [50/80], Iter [300/500] Loss: 0.4358

Epoch [50/80], Iter [400/500] Loss: 0.4028

Epoch [50/80], Iter [500/500] Loss: 0.2510

Epoch [50/80], Iter [600/500] Loss: 0.5876

Epoch [50/80], Iter [700/500] Loss: 0.3692

Epoch [50/80], Iter [800/500] Loss: 0.4500

Epoch [50/80], Iter [900/500] Loss: 0.1850

('time used:', 30318.149681000003)

本文永久更新链接地址http://www.linuxidc.com/Linux/2017-09/147181.htm

linux
本文评论   查看全部评论 (0)
表情: 表情 姓名: 字数

       

评论声明
  • 尊重网上道德,遵守中华人民共和国的各项有关法律法规
  • 承担一切因您的行为而直接或间接导致的民事或刑事法律责任
  • 本站管理人员有权保留或删除其管辖留言中的任意内容
  • 本站有权在网站内转载或引用您的评论
  • 参与本评论即表明您已经阅读并接受上述条款