PyTorch Get Started ======================== :date: 19 oct 2019 Credit --------- PyTorch Official Tutorial WHAT IS PYTORCH? ---------------- .. code:: python import torch;import numpy as np x=torch.empty([2,3]) # shape=[2,3] x=torch.rand([2,3]) # shape=[2,3] x=torch.zeros([2,3],dtype=torch.float) # shape=[2,3] x=torch.tensor([1.,6]) x=x.new_ones([2,3],dtype=torch.double) # shape=[2,3] x=torch.randn_like(x,dtype=torch.float) print(x);print(x.shape) print(x[0,:]);print(x[:,0]) y=torch.rand([2,3]) # shape=[2,3] print(x+y);print(torch.add(x,y)); print(y.add_(x)) # y is changed a=x.view([6]);b=x.view([-1,2]) print(x.shape,a.shape,b.shape) a=x.numpy();print(a) x.add_(1);print(a) # a changes with x ! a=np.ones([3]) # shape=[3] b=torch.from_numpy(a) np.add(a,1,out=a) print(a);print(b) # b changes with a ! device = torch.device("cuda" if torch.cuda.is_available() else "cpu") y=torch.ones_like(x,device=device) # create on "device" directly x=x.to(device) # alternative z=x+y print(z);print(z.to("cpu"));print(z.to("cpu",torch.double)) .. parsed-literal:: tensor([[ 1.3245, -0.1591, 0.5772], [-0.3548, 2.1270, -2.4076]]) torch.Size([2, 3]) tensor([ 1.3245, -0.1591, 0.5772]) tensor([ 1.3245, -0.3548]) tensor([[ 1.4469, 0.7045, 1.0438], [ 0.0810, 2.9837, -1.5898]]) tensor([[ 1.4469, 0.7045, 1.0438], [ 0.0810, 2.9837, -1.5898]]) tensor([[ 1.4469, 0.7045, 1.0438], [ 0.0810, 2.9837, -1.5898]]) torch.Size([2, 3]) torch.Size([6]) torch.Size([3, 2]) [[ 1.3244952 -0.15909314 0.57717997] [-0.35482323 2.126999 -2.4075844 ]] [[ 2.3244953 0.84090686 1.5771799 ] [ 0.64517677 3.126999 -1.4075844 ]] [2. 2. 2.] tensor([2., 2., 2.], dtype=torch.float64) tensor([[ 3.3245, 1.8409, 2.5772], [ 1.6452, 4.1270, -0.4076]]) tensor([[ 3.3245, 1.8409, 2.5772], [ 1.6452, 4.1270, -0.4076]]) tensor([[ 3.3245, 1.8409, 2.5772], [ 1.6452, 4.1270, -0.4076]], dtype=torch.float64) AUTOGRAD -------- .. code:: python print("\nWrong usage: Used by a AND b") x=torch.ones([2,2],requires_grad=True) a=torch.sum(x);b=torch.sum(x*2) a.backward();print(a);print(x.grad) # da/dx b.backward();print(b);print(x.grad) # db/dx print("\nRight usage: Used by a OR b") x=torch.ones([2,2],requires_grad=True) a=torch.sum(x) a.backward();print(a);print(x.grad) # da/dx x=torch.ones([2,2],requires_grad=True) b=torch.sum(x*2) b.backward();print(b);print(x.grad) # db/dx .. parsed-literal:: Wrong usage: Used by a AND b tensor(4., grad_fn=) tensor([[1., 1.], [1., 1.]]) tensor(8., grad_fn=) tensor([[3., 3.], [3., 3.]]) Right usage: Used by a OR b tensor(4., grad_fn=) tensor([[1., 1.], [1., 1.]]) tensor(8., grad_fn=) tensor([[2., 2.], [2., 2.]]) .. code:: python x=torch.randn([3], requires_grad=True) y=x*2 v=torch.randn([3], requires_grad=True) y.backward(v) print(x.grad) with torch.no_grad(): print((x**2).requires_grad) .. parsed-literal:: tensor([-0.9035, -0.0756, 1.8223]) False NEURAL NETWORKS --------------- .. figure::  :alt: image.webp image.webp Kernel with padding ~~~~~~~~~~~~~~~~~~~ .. figure::  :alt: image.webp image.webp Stride (3,2) (height,width) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. figure::  :alt: image.webp image.webp .. code:: python import torch import torch.nn as nn import torch.nn.functional as F class Net(nn.Module): def __init__(self): super(Net,self).__init__() # 1 input image channel, 6 output channels, 3x3 square conv self.conv1=nn.Conv2d(1,6,3) self.conv2=nn.Conv2d(6,16,3) self.linear1=nn.Linear(16*6*6,120) self.linear2=nn.Linear(120,6) def forward(self,x): # max pooling over (2,2) window x=F.max_pool2d(F.relu(self.conv1(x)),(2,2)) # 1 number if square shape x=F.max_pool2d(F.relu(self.conv2(x)),2) x=x.view(-1,self.numFlatFeatures(x)) x=F.relu(self.linear1(x)) return self.linear2(x) def numFlatFeatures(self,x): shapes=x.shape[1:] # all but Batch dimension(0) numFeatures=1 for s in shapes: numFeatures*=s return numFeatures net=Net();print(net) params=list(net.parameters()) for i in range(len(params)): print(i,params[i].shape) batchSize=2 input=torch.randn([batchSize,1,32,32]) output=net(input) print(output);print(output.shape) #================================== # Begin Custom optimizer net.zero_grad() # Zero the gradient buffers of all parameters #output.backward(torch.randn(output.shape)) target=torch.randn(output.shape) criterion=nn.MSELoss() # This is a class loss=criterion(output,target) print(loss) grad_fn=loss.grad_fn while(grad_fn): print(grad_fn) try: grad_fn=grad_fn.next_functions[0][0] except: break # Clear existing gradients # else gradients will be accumulated to existing gradients net.zero_grad() print('net.conv1.bias.grad before backward:',net.conv1.bias.grad) loss.backward() print('net.conv1.bias.grad after backward:',net.conv1.bias.grad) learnRate=0.01 for parameter in net.parameters(): parameter.data.sub_(learnRate*parameter.grad.data) # End Custom optimizer #================================== #================================== # Begin Torch optimizer import torch.optim as optim optimizer=optim.SGD(net.parameters(),lr=learnRate) optimizer.zero_grad() output=net(input) loss=criterion(output,target) loss.backward() optimizer.step() # End Torch optimizer #================================== .. parsed-literal:: Net( (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1)) (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1)) (linear1): Linear(in_features=576, out_features=120, bias=True) (linear2): Linear(in_features=120, out_features=6, bias=True) ) 0 torch.Size([6, 1, 3, 3]) 1 torch.Size([6]) 2 torch.Size([16, 6, 3, 3]) 3 torch.Size([16]) 4 torch.Size([120, 576]) 5 torch.Size([120]) 6 torch.Size([6, 120]) 7 torch.Size([6]) tensor([[-0.0427, 0.2950, -0.0963, -0.0848, 0.0039, -0.1088], [-0.0443, 0.3183, -0.0833, -0.0584, -0.1068, -0.0729]], grad_fn=) torch.Size([2, 6]) tensor(1.1637, grad_fn=) net.conv1.bias.grad before backward: None net.conv1.bias.grad after backward: tensor([-0.0616, 0.0531, 0.0373, 0.0389, 0.0173, -0.0261]) TRAINING A CLASSIFIER --------------------- .. code:: python path='./net.pth' torch.save(net.state_dict(),path) net2=Net() net2.load_state_dict(torch.load(path)) .. parsed-literal::