--A few years ago, I did a tutorial on MNIST CNN for TensorFlow and left it alone. This is a common story.
――This time, I modified the tutorial to create a learning model for face images.
--The class and the 10 image size are the same as 28x28. The accuracy is around 80%. Well, it's a play, so I'm satisfied.
--The complete source is here.
--For the model itself, I referred to the following.
--Reference: TensorFlow mnist_deep.py
--num_classes ʻimg_rows ʻimg_cols uses the value of the configuration file. Added support for changing the number of classes and image size.
def model():
    """MNIST reference model."""
    num_classes = len(CLASSES)
    img_rows, img_cols = IMG_ROWS, IMG_COLS
    x = tf.compat.v1.placeholder(tf.float32, [None, img_rows*img_cols])
    with tf.name_scope('reshape'):
        x_image = tf.reshape(x, [-1, img_rows, img_cols, 1])
    with tf.name_scope('conv1'):
        W_conv1 = weight_variable([5, 5, 1, 32])
        b_conv1 = bias_variable([32])
        h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
    with tf.name_scope('pool1'):
        h_pool1 = max_pool_2x2(h_conv1)
    with tf.name_scope('conv2'):
        W_conv2 = weight_variable([5, 5, 32, 64])
        b_conv2 = bias_variable([64])
        h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
    with tf.name_scope('pool2'):
        h_pool2 = max_pool_2x2(h_conv2)
    with tf.name_scope('fc1'):
        W_fc1 = weight_variable([int(h_pool2.shape[1]) * int(h_pool2.shape[2]) * 64, 1024])
        b_fc1 = bias_variable([1024])
        h_pool2_flat = tf.reshape(h_pool2, [-1, int(h_pool2.shape[1]) * int(h_pool2.shape[2]) * 64])
        h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
    with tf.name_scope('dropout'):
        keep_prob = tf.compat.v1.placeholder(tf.float32)
        h_fc1_drop = tf.nn.dropout(h_fc1, rate=1-keep_prob)
    with tf.name_scope('fc2'):
        W_fc2 = weight_variable([1024, num_classes])
        b_fc2 = bias_variable([num_classes])
        y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
    return x, y_conv, keep_pro
――The following also uses the tutorial.
def conv2d(x, W):
    """conv2d returns a 2d convolution layer with full stride."""
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
    """max_pool_2x2 downsamples a feature map by 2X."""
    return tf.nn.max_pool2d(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
def weight_variable(shape):
    """weight_variable generates a weight variable of a given shape."""
    initial = tf.random.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)
def bias_variable(shape):
    """bias_variable generates a bias variable of a given shape."""
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)
――We are making it possible to read the previously created data set. --Also, the batch size and epoch can be changed.
def train(datasets, batch_size=128, epochs=12):
    """Learning."""
    x, y_conv, keep_prob = model()
    y_ = tf.compat.v1.placeholder(tf.float32, [None, 10])
    with tf.name_scope('loss'):
        cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(labels=y_, logits=y_conv)
    cross_entropy = tf.reduce_mean(cross_entropy)
    with tf.name_scope('adam_optimizer'):
        train_step = tf.compat.v1.train.AdamOptimizer(1e-4).minimize(cross_entropy)
    with tf.name_scope('accuracy'):
        correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
        correct_prediction = tf.cast(correct_prediction, tf.float32)
    accuracy = tf.reduce_mean(correct_prediction)
--Almost the same as the tutorial, but added saving of the model. --The save location is specified from the configuration file.
    saver = tf.compat.v1.train.Saver()
    os.makedirs(os.path.dirname(os.path.abspath(MODEL_FILE)), exist_ok=True)
――We modified it from the tutorial so that the accuracy can be displayed and the model can be saved for each epoch.
    with tf.compat.v1.Session() as sess:
        sess.run(tf.compat.v1.global_variables_initializer())
        next_epoch = 1
        print('epoch, train accuracy, test accuracy')
        while datasets.train.epochs_completed < epochs:
            train_images, train_labels = datasets.train.next_batch(batch_size)
            sess.run(train_step, feed_dict={x: train_images, y_: train_labels, keep_prob: 0.5})
            if datasets.train.epochs_completed == next_epoch:
                train_accuracy = accuracy.eval(feed_dict={x: datasets.train.images, y_: datasets.train.labels, keep_prob: 1.0})
                test_accuracy = accuracy.eval(feed_dict={x: datasets.test.images, y_: datasets.test.labels, keep_prob: 1.0})
                print('{:d}, {:.4f}, {:.4f}'.format(datasets.train.epochs_completed, train_accuracy, test_accuracy))
                saver.save(sess, MODEL_FILE)
                next_epoch = datasets.train.epochs_completed + 1
--Training is performed by specifying the --train option.
--Batch size is 128 and epoch is 120.
$ python face_deep.py --train
epoch, train accuracy, test accuracy
1, 0.4580, 0.4090
2, 0.5593, 0.4880
abridgement
119, 1.0000, 0.8110
120, 1.0000, 0.792

--The image must be numpy.
--You can change the result type with dtype.
def predict(images, dtype=None):
    """The inference result is numpy, int,Switch argmax with dtype."""
    tf.compat.v1.reset_default_graph()
    x, y_conv, keep_prob = model()
    with tf.compat.v1.Session() as sess:
        sess.run(tf.compat.v1.global_variables_initializer())
        saver = tf.compat.v1.train.Saver()
        saver.restore(sess, MODEL_FILE)
        results = sess.run(tf.nn.softmax(y_conv), feed_dict={x: images, keep_prob: 1.0})
        results = np.array(results * 100, dtype=np.uint8)
        if dtype == 'int':
            results = [[int(y) for y in result] for result in results]
        if dtype == 'argmax':
            results = [np.argmax(y) for y in results]
    return results
--Inference is performed with no options.
――It is the operation check level to the last. I am thinking of using it from a web application separately.
--The following is the inference result of the first 10 arrays of the test image of the dataset. The first few hundreds are labeled 0, so they seem to match.
$ python face_deep.py
abridgement
[[100   0   0   0   0   0   0   0   0   0]
 [ 99   0   0   0   0   0   0   0   0   0]
 [ 99   0   0   0   0   0   0   0   0   0]
 [  0  99   0   0   0   0   0   0   0   0]
 [ 99   0   0   0   0   0   0   0   0   0]
 [ 97   0   0   0   0   0   0   0   0   1]
 [ 99   0   0   0   0   0   0   0   0   0]
 [  0  99   0   0   0   0   0   0   0   0]
 [ 99   0   0   0   0   0   0   0   0   0]
 [ 36  63   0   0   0   0   0   0   0   0]]
--Modified the tutorial of MNIST CNN of TensorFlow to learn and infer facial images.
――Since it is a study level, it was enough to be able to perform learning and reasoning.
--Next time, let's try inference from the Flask web application.
Recommended Posts