将棋でディープラーニングするその19(報酬に応じた勾配その2)

※この記事の内容は誤りがありますので、こちらの日記を参照してください。

前回の日記でChainerでミニバッチの要素を1件ずつ処理することで報酬に応じた勾配の計算を実装したが、softmax_cross_entropyのbackwardの処理で、誤差逆の後続に伝えるデルタの値に重みを掛けることで実装できることがわかった。

Chainerのリポジトリからsoftmax_cross_entropy.pyをコピーして、softmax_cross_entropy_with_weight.pyとしてプロジェクトに追加し、以下の通り編集する。

--- chainer/chainer/functions/loss/softmax_cross_entropy.py
+++ softmax_cross_entropy_with_weight.py
@@ -15,14 +15,15 @@
     return numpy.broadcast_arrays(array, dummy)[0]
 
 
-class SoftmaxCrossEntropy(function.Function):
+class SoftmaxCrossEntropyWithWeight(function.Function):
 
     """Softmax activation followed by a cross entropy loss."""
 
     normalize = True
 
-    def __init__(self, use_cudnn=True, normalize=True, cache_score=True,
+    def __init__(self, weight, use_cudnn=True, normalize=True, cache_score=True,
                  class_weight=None, ignore_label=-1, reduce='mean'):
+        self.weight = weight
         self.use_cudnn = use_cudnn
         self.normalize = normalize
         self.cache_score = cache_score
@@ -172,10 +173,13 @@
                 gx *= _broadcast_to(c, gx.shape)
             gx *= (t != self.ignore_label).reshape((len(t), 1, -1))
             gx = gx.reshape(y.shape)
+
         if self.reduce == 'mean':
             gx *= gloss * self._coeff
         else:
             gx *= gloss[:, None]
+        # weight
+        gx *= self.weight.reshape((len(y), 1))
         return gx, None
 
     def backward_gpu(self, inputs, grad_outputs):
@@ -195,33 +199,33 @@
 
         if self.class_weight is None:
             gx = cuda.elementwise(
-                'T y, S t, T coeff, S n_channel, S n_unit, S ignore_label',
+                'T y, T weight, S t, T coeff, S n_channel, S n_unit, S ignore_label',
                 'T gx',
                 '''
                     const int c = (i / n_unit % n_channel);
-                    gx = t == ignore_label ? 0 : coeff * (y - (c == t));
+                    gx = t == ignore_label ? 0 : coeff * (y - (c == t)) * weight;
                 ''',
                 'softmax_crossent_bwd')(
-                    y, cupy.expand_dims(t, 1), coeff, x.shape[1],
+                    y, self.weight.reshape((len(y), 1)), cupy.expand_dims(t, 1), coeff, x.shape[1],
                     n_unit, self.ignore_label)
         else:
             gx = cuda.elementwise(
-                'T y, raw T w, S t, T coeff, S n_channel, S n_unit, '
+                'T y, T weight, raw T w, S t, T coeff, S n_channel, S n_unit, '
                 'S ignore_label',
                 'T gx',
                 '''
                     const int c = (i / n_unit % n_channel);
-                    gx = t == ignore_label ? 0 : coeff * (y - (c == t)) * w[t];
+                    gx = t == ignore_label ? 0 : coeff * (y - (c == t)) * w[t] * weight;
                 ''',
                 'softmax_crossent_weight_bwd')(
-                    y, self.class_weight, cupy.expand_dims(t, 1), coeff,
+                    y, self.weight.reshape((len(y), 1)), self.class_weight, cupy.expand_dims(t, 1), coeff,
                     x.shape[1], n_unit, self.ignore_label)
 
         return gx, None
 
 
-def softmax_cross_entropy(
-        x, t, use_cudnn=True, normalize=True, cache_score=True,
+def softmax_cross_entropy_with_weight(
+        x, t, weight, use_cudnn=True, normalize=True, cache_score=True,
         class_weight=None, ignore_label=-1, reduce='mean'):
     """Computes cross entropy loss for pre-softmax activations.
 
@@ -271,6 +275,6 @@
 
     """
 
-    return SoftmaxCrossEntropy(
-        use_cudnn, normalize, cache_score, class_weight, ignore_label, reduce)(
+    return SoftmaxCrossEntropyWithWeight(
+        weight, use_cudnn, normalize, cache_score, class_weight, ignore_label, reduce)(
             x, t)

softmax_cross_entropyの引数にweightを増やして、backward_cpuおよびbackward_gpuのgxの計算でweightの値をミニバッチの各要素に掛けるようにしている。
こうすることで逆伝播の後続処理でも勾配に重みが掛けられる。

3層パーセプトロンに適用すると以下のようになる。

import numpy as np
from chainer import Chain, Function, Variable
from chainer import optimizers
import chainer.functions as F
import chainer.links as L
from softmax_cross_entropy_with_weight import *

W1 = [[ 1.21082544, -0.42751756],
      [ 1.35623264, -0.1971387 ],
      [-0.77883673,  0.28367677]]
W2 = [[ 0.08621028, -0.19540818,  0.78203094],
      [ 0.30133799,  1.3698988 , -0.01031571]]

class MyChain(Chain):
    def __init__(self):
        super(MyChain, self).__init__(
            l1=L.Linear(2, 3, initialW=np.array(W1)),
            l2=L.Linear(3, 2, initialW=np.array(W2)),
        )

    def __call__(self, x):
        h = F.relu(self.l1(x))
        return self.l2(h)

model = MyChain()
model.to_gpu()

optimizer = optimizers.SGD()
optimizer.setup(model)

# print param data
for path, param in model.namedparams():
    print(path)
    print(param.data)
print()

x_data = [[0.1, 0.4],
          [0.2, 0.5],
          [0.3, 0.6]]
t_data = [1, 0, 0]
z_data = [1.0, 0.5, 0.5]

x = Variable(cuda.to_gpu(np.array(x_data, dtype=np.float32)))
t = Variable(cuda.to_gpu(np.array(t_data, dtype=np.int32)))
z = cuda.to_gpu(np.array(z_data, dtype=np.float32))

y = model(x)

model.cleargrads()
loss = softmax_cross_entropy_with_weight(y, t, z)
loss.backward()

optimizer.update()

# print param data and grad
for path, param in model.namedparams():
    print(path)
    print(cuda.to_cpu(param.data))
    print(cuda.to_cpu(param.grad))