Reminder: The general methodology to build a Nerual Network is to:

  1. Define the neural network structure (# of input units, # of hidden units, etc.).
  2. Initialize the model's parameters
  3. Loop:
    • Implement forward propagation
    • Compute loss
    • Implement backward propagation to get the gradients
    • Update parameters(gradient descent)

You often build helper functions to compute steps 1-3 and then merge them into one function wa call nn_model().

A Single Hidden Layer Neural Network Source Code

Environment

  • Language: Python 3+
  • Extra Library: Anaconda
  • IDE: PyCharm 2017.2
  • OS: Mac OS 10.13
  • Reference: Deeplearning.ai Courses at (study.163.com)
  • Notes: There are some very small error(less than 0.0001) when we are running compute_cost function, and receive three warnings from console. (I have marked in source code.) The reason of that is caused by predicted value that appoach to zero closely, so the solution of that is using not standard normal distribution when initialze the weights in your code.

Directory Structure

deeplearning_ai_week3
|- Main.py
|- planar_utils.py
|- testCases.py

planar_utils.py donwload link
testCases.py download link

Source Code

# Main.py source code

import numpy as np
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
from deeplearning_ai_week3.testCases import *
import sklearn
import sklearn.datasets
import sklearn.linear_model
from deeplearning_ai_week3.planar_utils import plot_decision_boundary, sigmoid, load_extra_datasets, load_planar_dataset


def layer_sizes(X, Y):
    n_x = X.shape[0]  # size of input layer
    n_h = 4
    n_y = Y.shape[0]  # size of output layer

    return n_x, n_h, n_y


# Initialize the model's parameters
def initialize_parameters(n_x, n_h, n_y):
    np.random.seed(2)

    W1 = np.random.randn(n_h, n_x) * 0.01
    b1 = np.zeros((n_h, 1))
    W2 = np.random.randn(n_y, n_h) * 0.01
    b2 = np.zeros((n_y, 1))

    assert (W1.shape == (n_h, n_x))
    assert (b1.shape == (n_h, 1))
    assert (W2.shape == (n_y, n_h))
    assert (b2.shape == (n_y, 1))

    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2}

    return parameters


def compute_cost(A2, Y, parameters):
    m = Y.shape[1]

    """
    There are errors below:
    1. RuntimeWarning: divide by zero encountered in log 
    logprobs = np.multiply(Y, np.log(A2)) + np.multiply((1. - Y), np.log(1. - A2))
    2. RuntimeWarning: overflow encountered in exp
    s = 1/(1+np.exp(-x))
    """
    logprobs = np.multiply(np.log(A2), Y) + np.multiply((1 - Y), np.log(1 - A2))
    cost = - np.sum(logprobs) / m

    cost = np.squeeze(cost)

    assert isinstance(cost, float)

    return cost


def forward_propagation(X, parameters):
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]

    Z1 = np.dot(W1, X) + b1
    A1 = np.tanh(Z1)
    Z2 = np.dot(W2, A1) + b2
    A2 = sigmoid(Z2)

    assert (A2.shape == (1, X.shape[1]))

    cache = {"Z1": Z1,
             "A1": A1,
             "Z2": Z2,
             "A2": A2}

    return A2, cache


def backward_propagation(parameters, cache, X, Y):
    m = X.shape[1]

    W1 = parameters["W1"]
    W2 = parameters["W2"]

    A1 = cache["A1"]
    A2 = cache["A2"]

    '''
    Why calculating dZx don't need to divided by m and dWx, dbx need?
    '''
    dZ2 = A2 - Y
    dW2 = (1 / m) * np.dot(dZ2, A1.T)
    db2 = (1 / m) * np.sum(dZ2, axis=1, keepdims=True)
    '''
    I don't know why use np.multiply(np.dot(W2.T, dZ2), (1 - np.power(A1, 2))) instead of np.dot()
    What is the different between them?

    The shapes of each element are:
    W2.T.shape == (4, 1)
    dZ2.shape == (1, 400)
    np.dot(W2.T, dZ2).shape == (4, 400)
    A1.shape = (4, 400)
    '''
    dZ1 = np.dot(W2.T, dZ2) * (1 - np.power(A1, 2))
    dW1 = (1 / m) * np.dot(dZ1, X.T)
    db1 = (1 / m) * np.sum(dZ1, axis=1, keepdims=True)

    grads = {"dW1": dW1,
             "db1": db1,
             "dW2": dW2,
             "db2": db2}

    return grads


def update_parameters(parameters, grads, learning_rate=1.2):
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]

    dW1 = grads["dW1"]
    db1 = grads["db1"]
    dW2 = grads["dW2"]
    db2 = grads["db2"]

    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1
    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2

    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2}

    return parameters


def nn_model(X, Y, n_h, num_iterations=10000, print_cost=False):
    np.random.seed(3)
    n_x = layer_sizes(X, Y)[0]
    n_y = layer_sizes(X, Y)[2]

    parameters = initialize_parameters(n_x, n_h, n_y)
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]

    for i in range(0, num_iterations):
        A2, cache = forward_propagation(X, parameters)
        cost = compute_cost(A2, Y, parameters)
        grads = backward_propagation(parameters, cache, X, Y)
        parameters = update_parameters(parameters, grads)

        if print_cost and i % 1000 == 0:
            print("Cost after iteration %i: %f" % (i, cost))

    return parameters


def predict(parameters, X):
    A2, cache = forward_propagation(X, parameters)
    predictions = np.round(A2)

    return predictions


if __name__ == '__main__':
    np.random.seed(1)

    # Load a "flower" 2-class data set into variable X and Y
    X, Y = load_planar_dataset()

    # Build a model with a n_h-dimensional hidden layer
    parameters = nn_model(X, Y, n_h=4, num_iterations=10000, print_cost=True)

    predictions = predict(parameters, X)
    print('Accuracy: %d' % float(
        (np.dot(Y, predictions.T) + np.dot(1 - Y, 1 - predictions.T)) / float(Y.size) * 100) + '%')

    # Plot the decision boundary
    plot_decision_boundary(lambda x: predict(parameters, x.T), X, Y)
    plt.title("Decision Boundary for hidden layer size " + str(4))
    plt.show()

Result

Cost after iteration 0: 0.693048
Cost after iteration 1000: 0.288083
Cost after iteration 2000: 0.254385
Cost after iteration 3000: 0.233864
Cost after iteration 4000: 0.226792
Cost after iteration 5000: 0.222644
Cost after iteration 6000: 0.219731
Cost after iteration 7000: 0.217504
Cost after iteration 8000: 0.219506
Cost after iteration 9000: 0.218621
Accuracy: 90%
ResultFigure