In [None]:
#PCA

import numpy as np
from numpy import random
from numpy import linalg 
from matplotlib import pyplot as plt

# generate data
n = 1000; # number of data points
x = 2*random.rand(n)
y = random.rand()*x + 0.2*random.rand(n)

# PCA
x = x - np.mean(x) # zero-mean
y = y - np.mean(y) # 
D = np.concatenate(([x], [y]), axis=0) # data matrix (2 x n)
C = D@D.T # co-variance matrix
val,vec = linalg.eig(C) # PCA: eigen-decomposition
p1 = vec[:,0] # first PC
p2 = vec[:,1] # second PC

# display data and PCA axes
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(x,y,'.') # data
plt.plot([0, p1[0]], [0, p1[1]], 'r')
plt.plot([0, p2[0]], [0, p2[1]], 'r')
ax.set(xlim=(-1, 1), ylim=(-1, 1))
ax.set_aspect('equal', 'box')
plt.show()

In [None]:
# eigenfaces 

%reset -f

# load images
import numpy as np
from numpy import linalg
import numpy.matlib 
import os
import matplotlib.pyplot as plt
from IPython.display import clear_output
from time import sleep
 
N = 0
xdim = 256
ydim = 256
D = np.zeros((3370,xdim*ydim))
for filename in os.listdir('./faces'):
    if( filename.endswith(".png") ):
        img = plt.imread("./faces/" + filename)
        sz = img.shape
        D[N,:] = img.flatten() # flatten image and pack into data matrix rows
        
        if(False):
            plt.figure(figsize=(xdim/72,ydim/72))
            plt.imshow(img, cmap='gray') # display the image
            plt.axis('off')
            plt.show()
            sleep(0.01)
            clear_output(wait=True)
        N = N + 1
        
D = D.T # make images (num_images x pixels) in size   
print( "loaded " + str(N) + " images" )

In [None]:
# eigenfaces (dimensionality reduction)

mu = np.mean(D, axis = 1) # mean
mu = np.matlib.repmat(mu, N, 1)
D  = D - mu.T # zero mean data
C2 = D.T@D; # compute co-variance matrix of smaller matrix (N x N)
val,vec = linalg.eig(C2); # compute eigenvectors

In [None]:
# eigenfaces (visualize top m eigenfaces)

for k in range(9):
    e = D@vec[:,k]
    plt.subplot(3,3,k+1); 
    plt.imshow( e.reshape(xdim,ydim), cmap='gray');
    plt.axis('off')

In [None]:
# eigenfaces (project each image onto new PCA basis of dimension d)

d = 3; # number of eigenvectors to project onto
P = np.zeros((d,N))
for k in range(d): # assume eigenvalues are in sorted order
    e = D@vec[:,k] # compute k^th eigenvector of larger co-variance matrix
    P[k,:] = e.T@D; # project each image onto this eigenvector    

In [None]:
# eigenfaces (find nearest neighbor in eigenface space)

D  = D + mu.T # add mean back (for visualization below)

j = 1 # index into face image in data matrix D
mindist = 1e10
for k in range(N):
    if( j != k ):
        dist = linalg.norm(P[:,k]-P[:,j]) # distance between k^th and j^th face
        if( dist < mindist ):
            mindist = dist
            minind = k

plt.subplot(121); 
plt.imshow( D[:,j].reshape(xdim,ydim), cmap='gray'); plt.axis('off');
plt.subplot(122); 
plt.imshow( D[:,minind].reshape(xdim,ydim), cmap='gray'); plt.axis('off')

In [None]:
# TSNE

%reset -f

import matplotlib.pyplot as plt 
from matplotlib import cm
import numpy as np
from sklearn import datasets
from sklearn.manifold import TSNE

# load data
digits = datasets.load_digits()

N,d = digits.data.shape
plt.figure(figsize=(10, 10))
for i in range(25):
    plt.subplot(5,5,i+1)
    plt.imshow( np.reshape(digits.data[i,:],(8,8)) )
    plt.axis('off')

In [None]:
# TSNE (dimensionality reduction)

tsne = TSNE(n_components=2, random_state=0)
X    = tsne.fit_transform(digits.data) # 2D representation
y    = range(len(digits.target_names)) # labels for visual

In [None]:
# TSNE (visualize)
plt.figure(figsize=(10, 10))

viridis = cm.get_cmap('viridis')
colors  = np.linspace(0,1,10)
for i,c,l in zip(y, colors, digits.target_names):
    plt.scatter( X[digits.target==i,0], 
                 X[digits.target==i,1], 
                 color=viridis(c), label=l )
plt.legend()
plt.show()