初步接触深度学习,使用 pytorch 框架,关键代码如下:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 50, 5, stride=1, padding=1, bias=False)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(50, 100, 7, stride=1, padding=1, bias=False)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(1600 , 400)
        self.fc2 = nn.Linear(400, 100)
        self.fc3 = nn.Linear(100, 2)
    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features
net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
for epoch in range(5):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(data_loader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs = data['image']
        labels = data['label']
        # zero the parameter gradients
        optimizer.zero_grad()
        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0
print('Finished Training')
inputs(输入值):
tensor([[[[-0.3725, -0.3412, -0.3098,  ...,  0.5451,  0.5686,  0.6314],
          [-0.3020, -0.3098, -0.3333,  ...,  0.4039,  0.5451,  0.5294],
          [-0.2078, -0.2784, -0.3255,  ..., -0.0118,  0.2471,  0.5216],
          ...,
          [-0.4431, -0.5294, -0.4902,  ..., -1.0000, -0.1216,  0.4588],
          [-0.4196, -0.4431, -0.5451,  ..., -0.1843,  0.5451,  0.5294],
          [-0.2549, -0.4039, -0.5686,  ...,  0.4824,  0.5294,  0.5137]]],
        [[[ 0.4588,  0.3882,  0.3490,  ...,  0.0745,  0.3725,  0.5451],
          [ 0.5451,  0.4902,  0.4745,  ...,  0.0902,  0.4588,  0.5922],
          [ 0.5608,  0.5373,  0.5373,  ...,  0.2784,  0.5216,  0.6314],
          ...,
label(标签):
tensor([3, 4, 1, 5, 6, 3, 4, 0, 4, 3, 6, 3, 3, 3, 3, 3, 6, 3, 0, 5, 3, 3, 6, 0,
        5, 3, 0, 0, 2, 4, 3, 3, 4, 5, 4, 2, 2, 6, 3, 3, 3, 3, 3, 0, 3, 5, 3, 3,
        6, 5])
output(网络输出值?)
tensor([[ 0.0508, -0.0647],
        [ 0.0380, -0.0434],
        [ 0.0412, -0.0596],
        [ 0.0499, -0.0559],
        [ 0.0579, -0.0619],
        [ 0.0455, -0.0614],
        [ 0.0494, -0.0628],
        [ 0.0494, -0.0491],
        [ 0.0439, -0.0690],
        [ 0.0512, -0.0562],
        [ 0.0491, -0.0516],
        [ 0.0493, -0.0699],
        [ 0.0468, -0.0654],
        [ 0.0606, -0.0682],
        [ 0.0603, -0.0597],
        [ 0.0522, -0.0604],
        [ 0.0422, -0.0535],
        [ 0.0616, -0.0678],
        [ 0.0366, -0.0472],
        [ 0.0388, -0.0439],
        [ 0.0575, -0.0728],
        [ 0.0408, -0.0586],
        [ 0.0426, -0.0641],
        [ 0.0467, -0.0601],
        [ 0.0389, -0.0470],
        [ 0.0481, -0.0654],
        [ 0.0575, -0.0677],
        [ 0.0484, -0.0633],
        [ 0.0398, -0.0525],
        [ 0.0490, -0.0641],
        [ 0.0438, -0.0625],
        [ 0.0429, -0.0583],
        [ 0.0426, -0.0521],
        [ 0.0692, -0.0447],
        [ 0.0541, -0.0779],
        [ 0.0330, -0.0317],
        [ 0.0473, -0.0488],
        [ 0.0484, -0.0528],
        [ 0.0438, -0.0542],
        [ 0.0414, -0.0508],
        [ 0.0443, -0.0413],
        [ 0.0483, -0.0577],
        [ 0.0426, -0.0607],
        [ 0.0430, -0.0570],
        [ 0.0472, -0.0560],
        [ 0.0452, -0.0608],
        [ 0.0381, -0.0407],
        [ 0.0396, -0.0379],
        [ 0.0444, -0.0607],
        [ 0.0564, -0.0578]], grad_fn=<AddmmBackward>)
CrossEntropyLoss就会报错:indexError: Target 2 is out of bounds.
     1 
                    
                    heart4lor      2020-03-13 23:41:42 +08:00 
                    
                    看起来 output 和 label 的 shape 不一样啊? 
                 | 
            
     2 
                    
                    heart4lor      2020-03-13 23:47:47 +08:00 
                    
                    把最后一个全连接层 fc3 改成(100, 1)试试 
                 | 
            
     3 
                    
                    huntzhan      2020-03-14 00:08:06 +08:00     | 
            
     4 
                    
                    ipwx      2020-03-14 00:11:47 +08:00    self.fc3 = nn.Linear(100, 7) 
                 | 
            
     5 
                    
                    neosfung      2020-03-14 00:31:02 +08:00    self.fc3 = nn.Linear(100, 2) 输出是两个类别的 probability 
                但是你的 label 是有 7 种  | 
            
     6 
                    
                    longbye0      2020-03-14 01:38:54 +08:00    @heart4lor 别误导人。pytorch 的 CE 会自动做 softmax 和 nll 的,所以 label 的 shape 是(N,)。 
                如果你真要做 7 分类,也就是 label 是[0,1,2,3,4,5,6] 中的之一,#4#5 就是对的。 to 楼主,至少学个 ufldl 或者 ng 的机器学习再来调包吧,科班一点看个几章 prml 或者花书总要吧。  | 
            
     7 
                    
                    SlipStupig   OP  | 
            
     8 
                    
                    ipwx      2020-03-14 14:33:32 +08:00 
                    
                    顺便提一句,如果是二分类,self.fc3 = nn.Linear(100, 1) 就行了,然后用 nn. BCEWithLogitsLoss 
                 | 
            
     9 
                    
                    chizuo      2020-03-16 13:54:05 +08:00 
                    
                    @ipwx 二分类 loutdim=2 也可以的,配 crossentropy 和 outdim=1 与 BCEWithLogitsLoss 效果一样的。 
                 | 
            
     10 
                    
                    ipwx      2020-03-16 13:59:19 +08:00 
                    
                    @chizuo PyTorch 有特殊处理嘛? softmax 和 sigmoid 的自由度还是不一样的。二分类的 softmax 自由度是 2,sigmoid 自由度是 1。这种场景下我觉得自由度少 1 更好。 
                 | 
            
     11 
                    
                    chizuo      2020-03-16 14:17:02 +08:00 
                    
                    @ipwx 二分类这跟自由度有什么关系呢?? softmax 在二分类问题中与 sigmoid 效果“一模一样”。具体你把 p1, p2 分别用 sigmoid 和 softmax 写出来就明白了 
                 | 
            
     12 
                    
                    ipwx      2020-03-16 14:58:01 +08:00 
                    
                    
                 | 
            
     13 
                    
                    ipwx      2020-03-16 14:59:13 +08:00 
                    
                    第二种自由度大了以后,x_1 和 x_2 也许会发生 covariance shift,导致训练一直在向没有意义的方向走。虽然 momemtum 方法以及后续的 adaxxx 方法都在这方面有长足改进,还有 batch norm 之类的抵抗 covariance shift。但是能去掉一点风险就是一点风险,不是么? 
                 | 
            
     14 
                    
                    chizuo      2020-03-16 15:44:58 +08:00 
                    
                    
                 |