from preprocessed_mnist import load_dataset
X_train, y_train, X_val, y_val, X_test, y_test = load_dataset()
print(X_train.shape, y_train.shape)
import matplotlib.pyplot as plt
%matplotlib inline
plt.imshow(X_train[0], cmap="Greys");
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)
X_train_flatten = X_train.reshape(X_train.shape[0], -1)
X_test_flatten = X_test.reshape(X_test.shape[0], -1)
print(X_train_flatten.shape, X_test_flatten.shape)
import tensorflow as tf
(m, n_x) = X_train_flatten.shape
n_y = 10 # 10 classes
# Create placeholders
X = tf.placeholder(tf.float32, shape=(None, n_x))
Y = tf.placeholder(tf.float32, shape=(None, n_y))
# Initialize parameters
W1 = tf.get_variable("W1", [n_x, 128], initializer=tf.contrib.layers.xavier_initializer())
b1 = tf.get_variable("b1", [128], initializer=tf.zeros_initializer())
W2 = tf.get_variable("W2", [128, 128], initializer=tf.contrib.layers.xavier_initializer())
b2 = tf.get_variable("b2", [128], initializer=tf.zeros_initializer())
W3 = tf.get_variable("W3", [128, n_y], initializer=tf.contrib.layers.xavier_initializer())
b3 = tf.get_variable("b3", [n_y], initializer=tf.zeros_initializer())
Z1 = tf.matmul(X, W1) + b1
A1 = tf.nn.relu(Z1)
Z2 = tf.matmul(A1, W2) + b2
A2 = tf.nn.relu(Z2)
Z3 = tf.matmul(A2, W3) + b3
A3 = tf.nn.sigmoid(Z3)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=Y, logits=A3))
learning_rate = 0.0001
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
init = tf.global_variables_initializer()
# convert to one-hot encooding
import numpy as np
y_train = np.eye(n_y)[y_train]
y_test = np.eye(n_y)[y_test]
num_epochs = 150
batch_size = 500
with tf.Session() as sess:
sess.run(init)
train_costs = []
test_costs = []
for epoch in range(num_epochs):
for i in range(0, m, batch_size):
sess.run(optimizer, feed_dict={X:X_train_flatten[i:i+batch_size], Y:y_train[i: i+batch_size]})
train_costs.append(sess.run(cost, feed_dict={X:X_train_flatten, Y:y_train}))
test_costs.append(sess.run(cost, feed_dict={X:X_test_flatten, Y:y_test}))
if epoch%10 == 9:
print("Test cost after " + str(epoch+1)+ " epochs: "+ str(train_costs[-1]))
iterations = list(range(num_epochs))
plt.plot(iterations, train_costs, label='Train')
plt.plot(iterations, test_costs, label='Test')
plt.ylabel('train cost')
plt.xlabel('iterations')
plt.show()
# Calculate the correct predictions
predict_op = tf.argmax(Z3, 1)
correct_prediction = tf.equal(predict_op, tf.argmax(Y, 1))
# Calculate accuracy on the test set
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print ("Train Accuracy:", accuracy.eval({X: X_train_flatten, Y: y_train}))
print ("Test Accuracy:", accuracy.eval({X: X_test_flatten, Y: y_test}))