In [1]:
import numpy as np

def myRelu(z):
    x = np.clip(z,0,np.inf)
    return x
def dMyRelu(z):
    tmp = (z>0).astype(float)
    J = np.diag(tmp)
    return J

In [2]:
z = np.random.normal(0,1,5)
x = myRelu(z)
J = dMyRelu(z)
print("z = {},\nx = {},\nJ = {}".format(z, x, J))

z = [ 0.04424291 -0.66054354  1.58899743 -0.30882183 -0.06852131],
x = [0.04424291 0.         1.58899743 0.         0.        ],
J = [[1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]


In [3]:
h = [10, 256, 512, 1]
depth = len(h)-1
W = [np.random.normal(0,1,(h[d+1],h[d])) for d in range(depth)]
# print(W)

def f(x):
    z_hist = []; x_hist = []
    for d in range(depth-1):
        z = W[d]@x
        x = myRelu(z)
        z_hist.append(z)
        x_hist.append(x)
    return W[-1]@x, z_hist, x_hist

x0 = np.random.normal(0,1,h[0])
print(f(x0)[0])
# def f(x0):
#     z1 = W[0]@x0
#     x1 = myRelu(z1)
#     z2 = W[1]@x1
#     x2 = myRelu(z2)
#     f = W[2]@x2
#     return f

[-144.7493557]


In [4]:
def df_forward(x):    
    _, z_hist, x_hist = f(x)
    J = W[0]
    print("forward, J.shape = {}".format(J.shape))
    for d in range(depth-1):
        J = dMyRelu(z_hist[d])@J
        J = W[d+1]@J
        print("forward, J.shape = {}".format(J.shape))
    return J

def df_backward(x):
    _, z_hist, x_hist = f(x)
    J = W[-1]
    print("backward, J.shape = {}".format(J.shape))
    for d in reversed(range(depth-1)):
        J = J@dMyRelu(z_hist[d])
        J = J@W[d]
        print("backward, J.shape = {}".format(J.shape))
    return J


print(df_forward(x0))
print(df_backward(x0))
# print(np.linalg.norm(df_forward(x0)-df_backward(x0), np.inf))

forward, J.shape = (256, 10)
forward, J.shape = (512, 10)
forward, J.shape = (1, 10)
[[-258.36554866  -97.6021517  -204.43489232  -57.40188988   57.19934187
    41.10554691  142.12464324 -144.20916934  -62.9182526   246.171591  ]]
backward, J.shape = (1, 512)
backward, J.shape = (1, 256)
backward, J.shape = (1, 10)
[[-258.36554866  -97.6021517  -204.43489232  -57.40188988   57.19934187
    41.10554691  142.12464324 -144.20916934  -62.9182526   246.171591  ]]


In [6]:
def gradientChecking(x, f, df):
    n = x.shape[0]
    d = f(x)[0].shape[0]
    J, eps = np.zeros((d,n)), 1e-6
    for i in range(n):
        ei = np.eye(n)[:,i]
        J[:,i] = (f(x+eps*ei)[0]-f(x-eps*ei)[0])/(2*eps)
    return np.linalg.norm(J-df(x), np.inf) < 1e-4


x0 = np.random.normal(0,1,h[0])
print("Result of gradient checking:", gradientChecking(x0, f, df_forward))
print("Result of gradient checking:", gradientChecking(x0, f, df_backward))

forward, J.shape = (256, 10)
forward, J.shape = (512, 10)
forward, J.shape = (1, 10)
Result of gradient checking: True
backward, J.shape = (1, 512)
backward, J.shape = (1, 256)
backward, J.shape = (1, 10)
Result of gradient checking: True
