{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# DOLLY ZOOM" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import copy\n", "import imageio\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import os\n", "from math import sin, cos" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# scene parameters\n", "f_start = 0.3 # the focal length f to start\n", "grid_count = 20 # the number of squares in one dimension of the plane\n", "Z_start = 100 # the starting distance between the camera and the cube\n", "Z_end = 40 # the last distance between camera and the cube\n", "steps = 30 # number of dolly zoom steps to take between Z_start and Z_end" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# WRITE THIS FUNCTION -- EVERYTHING ELSE IS SUPPLIED\n", "#\n", "# build extrinsic (M) and intrinsic (K) matrix and return their product K @ M, a 3 x 4 transformation matrix\n", "# in_trans is a 1D array containing the rotation Rx,Ry,RZ (in degrees), and translation Tx,Ty,Tz\n", "# f is the camera focal length\n", "# transformation matrix should be a numpy matrix of size 3 x 4\n", "def get_projective_matrix(in_trns, f):\n", "\n", " # TODO...\n", " \n", " return M" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# return the set of points on a plane in a grid form. \n", "# every row consist of four points which are corners of each grid square.\n", "# Input: the number of grids in each dimension\n", "# Output: the Nx4 matrix with 3D homogenous points for each square on the plane\n", "def get_plane_mesh(in_plane, grid_n):\n", " # the two edges of the square\n", " X = np.linspace(in_plane[0, 0], in_plane[1, 0], grid_n)\n", " Y = np.linspace(in_plane[1, 1], in_plane[2, 1], grid_n)\n", " U,V = np.meshgrid(X, Y)\n", " \n", " out = np.concatenate((U[:,:,np.newaxis], V[:,:,np.newaxis], \n", " np.zeros_like(U)[:,:,np.newaxis],\n", " np.ones_like(U)[:,:,np.newaxis]), axis=2)\n", " return out" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# transform the object using its transform parameters and then project it on the sensor: f=focal length\n", "def transform_points(in_obj, f):\n", " \n", " out_obj = dict(in_obj)\n", " \n", " XYZ = in_obj['points'].copy()\n", " in_trns = in_obj['transform'].copy()\n", " \n", " # perspective projection\n", " M = get_projective_matrix(in_trns, f)\n", " \n", " p = M @ np.transpose(XYZ) # projection (homogenous coordinates)\n", " x = p[0,:]/p[2,:] # convert to non-homogenous coordinates\n", " y = p[1,:]/p[2,:] #\n", " \n", " # put the sensor points in the out scene\n", " out_obj['points'] = np.hstack((x[:, np.newaxis], y[:, np.newaxis]))\n", " \n", " return out_obj" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# draw the single plane points\n", "def draw_plane(in_ax, in_pts):\n", " \n", " c = 0\n", " for i in range(in_pts.shape[0]-1):\n", " for j in range(in_pts.shape[1]-1):\n", " if c%2 == 0:\n", " in_ax.fill([in_pts[i,j,0], in_pts[i+1,j,0], in_pts[i+1,j+1,0], in_pts[i,j+1,0]],\n", " [in_pts[i,j,1], in_pts[i+1,j,1], in_pts[i+1,j+1,1], in_pts[i,j+1,1]], 'k-')\n", " else:\n", " in_ax.fill([in_pts[i,j,0], in_pts[i+1,j,0], in_pts[i+1,j+1,0], in_pts[i,j+1,0]],\n", " [in_pts[i,j,1], in_pts[i+1,j,1], in_pts[i+1,j+1,1], in_pts[i,j+1,1]], 'w-')\n", " c = c + 1\n", " \n", "# given one scene in a scene dictionary, visualize the scene \n", "def visualize_scene(in_scene):\n", " \n", " # draw\n", " my_dpi = 96\n", " fig = plt.figure(figsize=(800/my_dpi, 400/my_dpi), dpi=my_dpi)\n", " ax = fig.add_subplot()\n", " \n", " # plot the ground plane\n", " xy = in_scene['ground_plane']['points']\n", " cur_proj = np.concatenate((np.reshape(xy[:,0], (grid_count, grid_count, 1)), \n", " np.reshape(xy[:,1], (grid_count, grid_count, 1))), axis=2)\n", " draw_plane(ax, cur_proj)\n", "\n", " # plot the right plane\n", " xy = in_scene['right_plane']['points']\n", " cur_proj = np.concatenate((np.reshape(xy[:,0], (grid_count, grid_count, 1)), \n", " np.reshape(xy[:,1], (grid_count, grid_count, 1))), axis=2)\n", " draw_plane(ax, cur_proj)\n", "\n", " # plot the left plane\n", " xy = in_scene['left_plane']['points']\n", " cur_proj = np.concatenate((np.reshape(xy[:,0], (grid_count, grid_count, 1)), \n", " np.reshape(xy[:,1], (grid_count, grid_count, 1))), axis=2)\n", " draw_plane(ax, cur_proj)\n", "\n", " # plot the back plane\n", " xy = in_scene['back_plane']['points']\n", " cur_proj = np.concatenate((np.reshape(xy[:,0], (grid_count, grid_count, 1)), \n", " np.reshape(xy[:,1], (grid_count, grid_count, 1))), axis=2)\n", " draw_plane(ax, cur_proj)\n", "\n", " # plot the cube\n", " xy = in_scene['cube']['points']\n", " x = xy[:,0].copy()\n", " y = xy[:,1].copy()\n", "\n", " # draw six faces of the cube\n", " plt.fill([x[0],x[1],x[2],x[3]],[y[0],y[1],y[2],y[3]],color=[0.7,0.7,0.7,0.5])\n", " plt.fill([x[4],x[5],x[6],x[7]],[y[4],y[5],y[6],y[7]],color=[0.7,0.7,0.7,0.5])\n", " plt.fill([x[0],x[1],x[5],x[4]],[y[0],y[1],y[5],y[4]],color=[0.7,0.7,0.7,0.5])\n", " plt.fill([x[1],x[2],x[6],x[5]],[y[1],y[2],y[6],y[5]],color=[0.7,0.7,0.7,0.5])\n", " plt.fill([x[2],x[3],x[7],x[6]],[y[2],y[3],y[7],y[6]],color=[0.7,0.7,0.7,0.5])\n", " plt.fill([x[3],x[0],x[4],x[7]],[y[3],y[0],y[4],y[7]],color=[0.7,0.7,0.7,0.5])\n", "\n", " # draw six edges of the cube\n", " plt.plot([x[0],x[1]],[y[0],y[1]],'ro-')\n", " plt.plot([x[1],x[2]],[y[1],y[2]],'ro-')\n", " plt.plot([x[2],x[3]],[y[2],y[3]],'ro-')\n", " plt.plot([x[3],x[0]],[y[3],y[0]],'ro-')\n", "\n", " plt.plot([x[4],x[5]],[y[4],y[5]],'ro-')\n", " plt.plot([x[5],x[6]],[y[5],y[6]],'ro-')\n", " plt.plot([x[6],x[7]],[y[6],y[7]],'ro-')\n", " plt.plot([x[7],x[4]],[y[7],y[4]],'ro-')\n", "\n", " plt.plot([x[0],x[4]],[y[0],y[4]],'ro-')\n", " plt.plot([x[1],x[5]],[y[1],y[5]],'ro-')\n", " plt.plot([x[2],x[6]],[y[2],y[6]],'ro-')\n", " plt.plot([x[3],x[7]],[y[3],y[7]],'ro-')\n", "\n", " ax.set_aspect('equal', adjustable='box')\n", " plt.xlim([-0.2, 0.2])\n", " plt.ylim([-0.1, 0.1])\n", " plt.xticks(ticks=[])\n", " plt.yticks(ticks=[])\n", " plt.draw()\n", " plt.tight_layout()\n", " \n", " # save the current plot and return this image\n", " plt.savefig('temp.png', dpi=my_dpi)\n", " image = imageio.imread('temp.png')\n", " os.remove('temp.png')\n", " return image" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# scene dictionary has keys as object names, each object key will store object transformation and object points\n", "\n", "# 3-D, unit-cube (homogenous coordinates)\n", "cube = np.array([[0,0,0,1],[1,0,0,1],[1,1,0,1],[0,1,0,1],\n", " [0,0,1,1],[1,0,1,1],[1,1,1,1],[0,1,1,1]])\n", "\n", "# make length of the cube 12 meters\n", "cube[:, :3] = 12*cube[:, :3]\n", "cube[:, :3] = cube[:, :3] - 6\n", "\n", "height = 400 # height of the walls\n", "plane = np.array([[0,0,0,1],[height,0,0,1],[height,height,0,1],[0,height,0,1]])\n", "\n", "# create an empty scene\n", "scene = {}\n", "scene['ground_plane'] = {}\n", "scene['right_plane'] = {}\n", "scene['left_plane'] = {}\n", "scene['back_plane'] = {}\n", "scene['cube'] = {}\n", "\n", "# put the points and transformation (Rx, Ry, Rz, Tx, Ty, Tz)\n", "scene['ground_plane']['points'] = np.reshape(get_plane_mesh(plane, grid_count), (-1, 4))\n", "scene['ground_plane']['transform'] = np.array([90, 0, 0,-height/2, -30, Z_start])\n", "\n", "scene['right_plane']['points'] = np.reshape(get_plane_mesh(plane, grid_count), (-1, 4))\n", "scene['right_plane']['transform'] = np.array([0, -90, 0, height/2, -30, Z_start])\n", "\n", "scene['left_plane']['points'] = np.reshape(get_plane_mesh(plane, grid_count), (-1, 4))\n", "scene['left_plane']['transform'] = np.array([0, -90, 0, -height/2, -30, Z_start])\n", "\n", "scene['back_plane']['points'] = np.reshape(get_plane_mesh(plane, grid_count), (-1, 4))\n", "scene['back_plane']['transform'] = np.array([0, 0, 0, -height/2, -30, height+Z_start])\n", "\n", "scene['cube']['points'] = cube\n", "scene['cube']['transform'] = np.array([30,30,0,0,0, Z_start])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# perform dolly zoom of the entire scene and save as a gif\n", "\n", "# these are the z-movement of the scene for dolly\n", "Tzs = np.linspace(Z_start, Z_end, steps)\n", "fs = f_start*Tzs/Z_start\n", "\n", "# initialize the animation file\n", "gif_writer = imageio.get_writer('output.gif', mode='I')\n", "\n", "# update the scene and generate images\n", "for i in range(steps):\n", " \n", " projected_scene = {}\n", " for k in scene.keys(): # iterate over the objects\n", " \n", " # current object in the scene\n", " cur_obj = copy.deepcopy(scene[k])\n", " \n", " # modify the Z-values of the current object in the new_scene\n", " cur_obj['transform'][-1] = cur_obj['transform'][-1] + (Tzs[i]-Z_start)\n", " \n", " # project this objects on the sensor\n", " projected_scene[k] = transform_points(cur_obj, fs[i])\n", " \n", " # visualize the scene\n", " plot_img = visualize_scene(projected_scene)\n", " \n", " # write the current image in the gif\n", " gif_writer.append_data(plot_img)\n", " \n", "gif_writer.close()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.9" } }, "nbformat": 4, "nbformat_minor": 2 }