### Reminder: The general methodology to build a Nerual Network is to:

1. Define the neural network structure (# of input units, # of hidden units, etc.).
2. Initialize the model's parameters
3. Loop:
• Implement forward propagation
• Compute loss
• Implement backward propagation to get the gradients

You often build helper functions to compute steps 1-3 and then merge them into one function wa call nn_model().

### A Single Hidden Layer Neural Network Source Code

Environment

• Language: Python 3+
• Extra Library: Anaconda
• IDE: PyCharm 2017.2
• OS: Mac OS 10.13
• Reference: Deeplearning.ai Courses at (study.163.com)
• Notes: There are some very small error(less than 0.0001) when we are running compute_cost function, and receive three warnings from console. (I have marked in source code.) The reason of that is caused by predicted value that appoach to zero closely, so the solution of that is using not standard normal distribution when initialze the weights in your code.

Directory Structure

deeplearning_ai_week3
|- Main.py
|- planar_utils.py
|- testCases.py


Source Code

# Main.py source code

import numpy as np
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
from deeplearning_ai_week3.testCases import *
import sklearn
import sklearn.datasets
import sklearn.linear_model

def layer_sizes(X, Y):
n_x = X.shape  # size of input layer
n_h = 4
n_y = Y.shape  # size of output layer

return n_x, n_h, n_y

# Initialize the model's parameters
def initialize_parameters(n_x, n_h, n_y):
np.random.seed(2)

W1 = np.random.randn(n_h, n_x) * 0.01
b1 = np.zeros((n_h, 1))
W2 = np.random.randn(n_y, n_h) * 0.01
b2 = np.zeros((n_y, 1))

assert (W1.shape == (n_h, n_x))
assert (b1.shape == (n_h, 1))
assert (W2.shape == (n_y, n_h))
assert (b2.shape == (n_y, 1))

parameters = {"W1": W1,
"b1": b1,
"W2": W2,
"b2": b2}

return parameters

def compute_cost(A2, Y, parameters):
m = Y.shape

"""
There are errors below:
1. RuntimeWarning: divide by zero encountered in log
logprobs = np.multiply(Y, np.log(A2)) + np.multiply((1. - Y), np.log(1. - A2))
2. RuntimeWarning: overflow encountered in exp
s = 1/(1+np.exp(-x))
"""
logprobs = np.multiply(np.log(A2), Y) + np.multiply((1 - Y), np.log(1 - A2))
cost = - np.sum(logprobs) / m

cost = np.squeeze(cost)

assert isinstance(cost, float)

return cost

def forward_propagation(X, parameters):
W1 = parameters["W1"]
b1 = parameters["b1"]
W2 = parameters["W2"]
b2 = parameters["b2"]

Z1 = np.dot(W1, X) + b1
A1 = np.tanh(Z1)
Z2 = np.dot(W2, A1) + b2
A2 = sigmoid(Z2)

assert (A2.shape == (1, X.shape))

cache = {"Z1": Z1,
"A1": A1,
"Z2": Z2,
"A2": A2}

return A2, cache

def backward_propagation(parameters, cache, X, Y):
m = X.shape

W1 = parameters["W1"]
W2 = parameters["W2"]

A1 = cache["A1"]
A2 = cache["A2"]

'''
Why calculating dZx don't need to divided by m and dWx, dbx need?
'''
dZ2 = A2 - Y
dW2 = (1 / m) * np.dot(dZ2, A1.T)
db2 = (1 / m) * np.sum(dZ2, axis=1, keepdims=True)
'''
I don't know why use np.multiply(np.dot(W2.T, dZ2), (1 - np.power(A1, 2))) instead of np.dot()
What is the different between them?

The shapes of each element are:
W2.T.shape == (4, 1)
dZ2.shape == (1, 400)
np.dot(W2.T, dZ2).shape == (4, 400)
A1.shape = (4, 400)
'''
dZ1 = np.dot(W2.T, dZ2) * (1 - np.power(A1, 2))
dW1 = (1 / m) * np.dot(dZ1, X.T)
db1 = (1 / m) * np.sum(dZ1, axis=1, keepdims=True)

"db1": db1,
"dW2": dW2,
"db2": db2}

W1 = parameters["W1"]
b1 = parameters["b1"]
W2 = parameters["W2"]
b2 = parameters["b2"]

W1 -= learning_rate * dW1
b1 -= learning_rate * db1
W2 -= learning_rate * dW2
b2 -= learning_rate * db2

parameters = {"W1": W1,
"b1": b1,
"W2": W2,
"b2": b2}

return parameters

def nn_model(X, Y, n_h, num_iterations=10000, print_cost=False):
np.random.seed(3)
n_x = layer_sizes(X, Y)
n_y = layer_sizes(X, Y)

parameters = initialize_parameters(n_x, n_h, n_y)
W1 = parameters["W1"]
b1 = parameters["b1"]
W2 = parameters["W2"]
b2 = parameters["b2"]

for i in range(0, num_iterations):
A2, cache = forward_propagation(X, parameters)
cost = compute_cost(A2, Y, parameters)
grads = backward_propagation(parameters, cache, X, Y)

if print_cost and i % 1000 == 0:
print("Cost after iteration %i: %f" % (i, cost))

return parameters

def predict(parameters, X):
A2, cache = forward_propagation(X, parameters)
predictions = np.round(A2)

return predictions

if __name__ == '__main__':
np.random.seed(1)

# Load a "flower" 2-class data set into variable X and Y

# Build a model with a n_h-dimensional hidden layer
parameters = nn_model(X, Y, n_h=4, num_iterations=10000, print_cost=True)

predictions = predict(parameters, X)
print('Accuracy: %d' % float(
(np.dot(Y, predictions.T) + np.dot(1 - Y, 1 - predictions.T)) / float(Y.size) * 100) + '%')

# Plot the decision boundary
plot_decision_boundary(lambda x: predict(parameters, x.T), X, Y)
plt.title("Decision Boundary for hidden layer size " + str(4))
plt.show()


Result

Cost after iteration 0: 0.693048
Cost after iteration 1000: 0.288083
Cost after iteration 2000: 0.254385
Cost after iteration 3000: 0.233864
Cost after iteration 4000: 0.226792
Cost after iteration 5000: 0.222644
Cost after iteration 6000: 0.219731
Cost after iteration 7000: 0.217504
Cost after iteration 8000: 0.219506
Cost after iteration 9000: 0.218621
Accuracy: 90%