# A feedforward neural network with a single hidden layer
#
# Tested on python3, tensorflow 0.10.0rc0 on OS X 10.11
# 09/2016 Yuttapong Thawornwattana


import numpy as np
import tensorflow as tf

from tensorflow.examples.tutorials.mnist import input_data

# Download MNIST dataset
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

# helper functions
def weight_variable(shape):
    """Create a weight variable with appropriate initialization."""
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    """Create a bias variable with appropriate initialization."""
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

# define the model
input_dim = 784
output_dim = 10
num_hidden = 500

# inputs
x = tf.placeholder(tf.float32, shape=[None, input_dim])
y_true = tf.placeholder(tf.float32, shape=[None, output_dim])

# hidden layer
w1 = weight_variable([input_dim, num_hidden])
b1 = bias_variable([num_hidden])
h = tf.nn.relu(tf.matmul(x, w1) + b1)

# output layer
w2 = weight_variable([num_hidden, output_dim])
b2 = bias_variable([output_dim])
logits = tf.matmul(h, w2) + b2

# loss function, averaged over all examples in the batch
cross_entropy = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits, y_true))

# optim using grad descent, with a learning rate of 0.5
optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)


# calculate accuracy
y = tf.nn.softmax(logits)
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_true, 1))
accuracy = tf.mul(tf.reduce_mean(tf.cast(correct_prediction, tf.float32)), 100.0)


# training
num_steps = 1000
batch_size = 100
loss = np.zeros(num_steps)

with tf.Session() as sess:
    # initialize variables
    sess.run(tf.initialize_all_variables())

    for i in range(num_steps):
        batch_x, batch_y = mnist.train.next_batch(batch_size)
        _, loss[i] = sess.run([optimizer, cross_entropy],
                             feed_dict={x: batch_x, y_true: batch_y})

    # accuracy on test data
    print("accuracy on test data: ",
          accuracy.eval(feed_dict={x: mnist.test.images, y_true: mnist.test.labels}))


# plot
import matplotlib
matplotlib.use('Agg')  # tell matplotlib to not use the default display (Xwindows backend)
import matplotlib.pyplot as plt

# # training loss over iters
plt.figure(figsize=(5, 4))
plt.plot(np.arange(num_steps), loss, 'k-')
plt.xlabel('Training step' + ' (batch of ' + str(batch_size) + ')')
plt.ylabel('Loss')
plt.tight_layout()
plt.savefig('fig/mnist-2-train-error.pdf')