Deep Learning with Shogi AI on Mac and Google Colab Chapter 7 8

7.8

Automatic use of GPU and CPU

Added a process to automatically determine the PC environment with gethostname () and use GPU for iMac environment and GPU for Google Colab environment.

import socket
host = socket.gethostname()
#Get an IP address
# iMac          : xxxxxxxx
# Lenovo        : yyyyyyyy
# google colab  :random

if host == 'xxxxxxxx':
    gpu_en = 0
elif host == 'yyyyyyyy':
    gpu_en = 0
else:
    gpu_en = 1

if gpu_en == 1:
    from chainer import cuda, Variable

if gpu_en == 1:
    model.to_gpu()

    if gpu_en == 1:
        return(Variable(cuda.to_gpu(np.array(mini_batch_data, dtype=np.float32))),
               Variable(cuda.to_gpu(np.array(mini_batch_move, dtype=np.int32))))
    elif gpu_en == 0:
        return np.array(mini_batch_data, dtype=np.float32), np.array(mini_batch_move, dtype=np.int32)

pickle protocol

Since pickle creation with google colab is slow, once I went to pickle creation on the iMac side, when I tried to read the pickle file with colab and learn it, an error that does not support the protocol appeared. The cause is as follows. pickle supports Protocol 5 with Python 3.8 and above. iMac is Python 3.8.2 colab is Python 3.6.9 Pickle.dump (positions_train, f, pickle.HIGHEST_PROTOCOL) on iMac creates a protocol 5 pickle file. When I try to learn with Colab using this pickle file, I get an error that the protocol is not supported and Colab cannot read it. If you delete pickle.HIGHEST_PROTOCOL, a pickle file will be created with the default protocol 4. Now you can load it in Colab.

train_policy.py

`python-dlshogi\train_policy.py`


#!/usr/bin/env python3
# -*- coding: utf-8 -*-

#Environmental setting
#-----------------------------
import socket
host = socket.gethostname()
#Get an IP address
# iMac          : xxxxxxxx
# Lenovo        : yyyyyyyy
# google colab  :random

if host == 'xxxxxxxx':
    gpu_en = 0
elif host == 'yyyyyyyy':
    gpu_en = 0
else:
    gpu_en = 1

#Other notes
#If pickle creation in google colab is slow and you want to create pickle locally and load it in google colab
#Remove the pickle protocol option (make it the default).
#colab Python ver is 3.Because it is not 8 and the Highest protocol is not supported.
#  pickle.dump(positions_train, f, pickle.HIGHEST_PROTOCOL)Line
#-----------------------------

import numpy as np
import chainer
if gpu_en == 1:
    from chainer import cuda, Variable
from chainer import optimizers, serializers
import chainer.functions as F

from pydlshogi.common import *
from pydlshogi.network.policy import PolicyNetwork
from pydlshogi.features import *
from pydlshogi.read_kifu import *

import argparse
import random
import pickle
import os
import re

import logging

parser = argparse.ArgumentParser()
parser.add_argument('kifulist_train', type=str, help='train kifu list')
parser.add_argument('kifulist_test', type=str, help='test kifu list')
parser.add_argument('--batchsize', '-b', type=int, default=32, help='Number of positions in each mini-batch')
parser.add_argument('--test_batchsize', type=int, default=512, help='Number of positions in each test mini-batch')
parser.add_argument('--epoch', '-e', type=int, default=1, help='Number of epoch times')
parser.add_argument('--model', type=str, default='model/model_policy', help='model file name')
parser.add_argument('--state', type=str, default='model/state_policy', help='state file name')
parser.add_argument('--initmodel', '-m', default='', help='Initialize the model from given file')
parser.add_argument('--resume', '-r', default='', help='Resume the optimization from snapshot')
parser.add_argument('--log', default=None, help='log file path')
parser.add_argument('--lr', default=0.01, type=float, help='learning rate')
parser.add_argument('--eval_interval', '-i', default=1000, type=int, help='eval interval')
args = parser.parse_args()

logging.basicConfig(format='%(asctime)s\t%(levelname)s\t%(message)s',
                    datefmt='%Y/%m/%d %H:%M:%S', filename=args.log, level=logging.DEBUG)

model = PolicyNetwork() #Self-made model with external module

if gpu_en == 1:
    model.to_gpu()

optimizer = optimizers.SGD(lr=args.lr) #Instantiate an SGD class
# optimizer = optimizers.MomentumSGD(lr=args.lr, momentum=0.9) #Instantiate the MomentumSGD class
optimizer.setup(model)

# Init/Resume
if args.initmodel:
    logging.info('Load model from {}'.format(args.initmodel))
    serializers.load_npz(args.initmodel, model)
if args.resume:
    logging.info('Load optimizer state from {}'.format(args.resume))
    serializers.load_npz(args.resume, optimizer)

logging.info('read kifu start')

#If there is a saved pickle file, load the pickle file
# train data
#Search for and delete the extension of the training game record list (the list itself, not the contents of the list),.Store the string with pickle in a variable.
train_pickle_filename = re.sub(r'\..*?$', '', args.kifulist_train) + '.pickle'
if os.path.exists(train_pickle_filename):
    with open(train_pickle_filename, 'rb') as f: # train_pickle_What is in filename is read_kifu output position[([piece_bb 15 elements], [occupied 2 elements], [pieces_in_hand 2 element], [move_label 1 element], [win 1 element]), (Same set),... is the number of phases x the number of games]
        positions_train = pickle.load(f)
    logging.info('load train pickle')
else:
    positions_train = read_kifu(args.kifulist_train)

# test data
test_pickle_filename = re.sub(r'\..*?$', '', args.kifulist_test) + '.pickle'
if os.path.exists(test_pickle_filename):
    with open(test_pickle_filename, 'rb') as f:
        positions_test = pickle.load(f)
    logging.info('load test pickle')
else:
    positions_test = read_kifu(args.kifulist_test)

#If there is no saved pickle, dump (put) the contents of the variable read by the above else into the pickle file and save it.
if not os.path.exists(train_pickle_filename):
    with open(train_pickle_filename, 'wb') as f: #Open an empty pickle file
        pickle.dump(positions_train, f, pickle.HIGHEST_PROTOCOL) #Put the data in the opened pickle file.
    logging.info('save train pickle')
if not os.path.exists(test_pickle_filename):
    with open(test_pickle_filename, 'wb') as f:
        pickle.dump(positions_test, f, pickle.HIGHEST_PROTOCOL)
    logging.info('save test pickle')
logging.info('read kifu end')

logging.info('train position num = {}'.format(len(positions_train))) #Number of outermost elements of positions=The number of phases is output
logging.info('test position num = {}'.format(len(positions_test)))

# mini batch
def mini_batch(positions, i, batchsize):
    mini_batch_data = []
    mini_batch_move = []
    for b in range(batchsize):
        features, move, win = make_features(positions[i + b]) #For loop the outermost element number of positions, that is, the aspect
        mini_batch_data.append(features) #Append features for each phase=Policy network input data
        mini_batch_move.append(move)    #Append the move for each phase=Policy network teacher data
    if gpu_en == 1:
        return(Variable(cuda.to_gpu(np.array(mini_batch_data, dtype=np.float32))),
               Variable(cuda.to_gpu(np.array(mini_batch_move, dtype=np.int32))))
    elif gpu_en == 0:
        return np.array(mini_batch_data, dtype=np.float32), np.array(mini_batch_move, dtype=np.int32)

def mini_batch_for_test(positions, batchsize):
    mini_batch_data = []
    mini_batch_move = []
    for b in range(batchsize):
        features, move, win = make_features(random.choice(positions)) #Randomly choose from positions
        mini_batch_data.append(features)
        mini_batch_move.append(move)
    if gpu_en == 1:
        return(Variable(cuda.to_gpu(np.array(mini_batch_data, dtype=np.float32))),
               Variable(cuda.to_gpu(np.array(mini_batch_move, dtype=np.int32))))
    elif gpu_en == 0:
        return np.array(mini_batch_data, dtype=np.float32), np.array(mini_batch_move, dtype=np.int32)

#↑ Preparation for learning

# #↓ Learning loop
logging.info('start training')
itr = 0
sum_loss = 0
for e in range(args.epoch):
    positions_train_shuffled = random.sample(positions_train, len(positions_train))
    # positions_train is[([piece_bb 15 elements], [occupied 2 elements], [pieces_in_hand 2 element], [move_label 1 element], [win 1 element]), (Same set),... is the number of phases x the number of games]
    # random.sample(a,b)Randomly returns b element from a

    itr_epoch = 0
    sum_loss_epoch = 0
    for i in range(0, len(positions_train_shuffled) - args.batchsize, args.batchsize):
        #Forward propagation
        x, t = mini_batch(positions_train_shuffled, i, args.batchsize) #x: Phase diagram=Input, t: move=Teacher data
        y = model(x) #model is a self-made model with an external module
        model.cleargrads() #Gradient initialization
        #Loss calculation
        loss = F.softmax_cross_entropy(y, t)
        #Inverse error propagation
        loss.backward() #Calculate the gradient
        #Update parameters using gradient
        optimizer.update()

        itr += 1
        sum_loss += loss.data
        itr_epoch += 1
        sum_loss_epoch += loss.data

        #Evaluate at regular intervals (eval)_Execute every interval)
        # print train loss and test accuracy
        if optimizer.t % args.eval_interval == 0: # a %b returns the remainder of a divided by b. t is the update step. update()Incremented with.
            x, t = mini_batch_for_test(positions_test, args.test_batchsize) # x =Phase diagram, t=Move
            y = model(x)
            logging.info('epoch = {}, iteration = {}, loss = {}, accuracy = {}'.format(
                        optimizer.epoch + 1, optimizer.t, sum_loss / itr, F.accuracy(y,t).data))
            itr = 0
            sum_loss = 0

    # validate test data
    logging.info('validate test data')
    itr_test = 0
    sum_test_accuracy = 0
    for i in range(0, len(positions_test) - args.batchsize, args.batchsize): # positions_test is kifulist_read from test_Substituting the result of kifu
        x, t = mini_batch(positions_test, i, args.batchsize) #Exactly the same as during training
        y = model(x)
        itr_test += 1
        sum_test_accuracy += F.accuracy(y, t).data
    logging.info('epoch = {}, iteration = {}, train loss avr = {}, test accuracy = {}'.format(
                  optimizer.epoch + 1, optimizer.t, sum_loss_epoch / itr_epoch, sum_test_accuracy / itr_test))

    #1 Notify the optimizer when the epoch is finished to process the next epoch
    optimizer.new_epoch()

#Save model and optimizer state after learning all epochs
logging.info('save the model')
serializers.save_npz(args.model, model)
print('save the optimizer')
serializers.save_npz(args.state, optimizer)