PyTorch Get Started

date:

19 oct 2019

Credit

PyTorch Official Tutorial

WHAT IS PYTORCH?

import torch;import numpy as np

x=torch.empty([2,3]) # shape=[2,3]
x=torch.rand([2,3])  # shape=[2,3]
x=torch.zeros([2,3],dtype=torch.float) # shape=[2,3]
x=torch.tensor([1.,6])
x=x.new_ones([2,3],dtype=torch.double) # shape=[2,3]
x=torch.randn_like(x,dtype=torch.float)
print(x);print(x.shape)
print(x[0,:]);print(x[:,0])

y=torch.rand([2,3]) # shape=[2,3]
print(x+y);print(torch.add(x,y));
print(y.add_(x)) # y is changed

a=x.view([6]);b=x.view([-1,2])
print(x.shape,a.shape,b.shape)

a=x.numpy();print(a)
x.add_(1);print(a) # a changes with x !

a=np.ones([3]) # shape=[3]
b=torch.from_numpy(a)
np.add(a,1,out=a)
print(a);print(b) # b changes with a !

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
y=torch.ones_like(x,device=device) # create on "device" directly
x=x.to(device) # alternative
z=x+y
print(z);print(z.to("cpu"));print(z.to("cpu",torch.double))
tensor([[ 1.3245, -0.1591,  0.5772],
        [-0.3548,  2.1270, -2.4076]])
torch.Size([2, 3])
tensor([ 1.3245, -0.1591,  0.5772])
tensor([ 1.3245, -0.3548])
tensor([[ 1.4469,  0.7045,  1.0438],
        [ 0.0810,  2.9837, -1.5898]])
tensor([[ 1.4469,  0.7045,  1.0438],
        [ 0.0810,  2.9837, -1.5898]])
tensor([[ 1.4469,  0.7045,  1.0438],
        [ 0.0810,  2.9837, -1.5898]])
torch.Size([2, 3]) torch.Size([6]) torch.Size([3, 2])
[[ 1.3244952  -0.15909314  0.57717997]
 [-0.35482323  2.126999   -2.4075844 ]]
[[ 2.3244953   0.84090686  1.5771799 ]
 [ 0.64517677  3.126999   -1.4075844 ]]
[2. 2. 2.]
tensor([2., 2., 2.], dtype=torch.float64)
tensor([[ 3.3245,  1.8409,  2.5772],
        [ 1.6452,  4.1270, -0.4076]])
tensor([[ 3.3245,  1.8409,  2.5772],
        [ 1.6452,  4.1270, -0.4076]])
tensor([[ 3.3245,  1.8409,  2.5772],
        [ 1.6452,  4.1270, -0.4076]], dtype=torch.float64)

AUTOGRAD

print("\nWrong usage: Used by a AND b")
x=torch.ones([2,2],requires_grad=True)
a=torch.sum(x);b=torch.sum(x*2)
a.backward();print(a);print(x.grad) # da/dx
b.backward();print(b);print(x.grad) # db/dx

print("\nRight usage: Used by a OR b")
x=torch.ones([2,2],requires_grad=True)
a=torch.sum(x)
a.backward();print(a);print(x.grad) # da/dx

x=torch.ones([2,2],requires_grad=True)
b=torch.sum(x*2)
b.backward();print(b);print(x.grad) # db/dx
Wrong usage: Used by a AND b
tensor(4., grad_fn=<SumBackward0>)
tensor([[1., 1.],
        [1., 1.]])
tensor(8., grad_fn=<SumBackward0>)
tensor([[3., 3.],
        [3., 3.]])

Right usage: Used by a OR b
tensor(4., grad_fn=<SumBackward0>)
tensor([[1., 1.],
        [1., 1.]])
tensor(8., grad_fn=<SumBackward0>)
tensor([[2., 2.],
        [2., 2.]])
x=torch.randn([3], requires_grad=True)
y=x*2
v=torch.randn([3], requires_grad=True)
y.backward(v)
print(x.grad)

with torch.no_grad():
  print((x**2).requires_grad)
tensor([-0.9035, -0.0756,  1.8223])
False

NEURAL NETWORKS

image.webp

image.webp

Kernel with padding

image.webp

image.webp

Stride (3,2) (height,width)

image.webp

image.webp

import torch
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
  def __init__(self):
    super(Net,self).__init__()
    # 1 input image channel, 6 output channels, 3x3 square conv
    self.conv1=nn.Conv2d(1,6,3)
    self.conv2=nn.Conv2d(6,16,3)
    self.linear1=nn.Linear(16*6*6,120)
    self.linear2=nn.Linear(120,6)
  def forward(self,x):
    # max pooling over (2,2) window
    x=F.max_pool2d(F.relu(self.conv1(x)),(2,2))
    # 1 number if square shape
    x=F.max_pool2d(F.relu(self.conv2(x)),2)
    x=x.view(-1,self.numFlatFeatures(x))
    x=F.relu(self.linear1(x))
    return self.linear2(x)
  def numFlatFeatures(self,x):
    shapes=x.shape[1:] # all but Batch dimension(0)
    numFeatures=1
    for s in shapes:
      numFeatures*=s
    return numFeatures

net=Net();print(net)
params=list(net.parameters())
for i in range(len(params)):
  print(i,params[i].shape)

batchSize=2
input=torch.randn([batchSize,1,32,32])
output=net(input)
print(output);print(output.shape)

#==================================
# Begin Custom optimizer
net.zero_grad() # Zero the gradient buffers of all parameters
#output.backward(torch.randn(output.shape))
target=torch.randn(output.shape)
criterion=nn.MSELoss() # This is a class
loss=criterion(output,target)
print(loss)
grad_fn=loss.grad_fn
while(grad_fn):
  print(grad_fn)
  try:
    grad_fn=grad_fn.next_functions[0][0]
  except:
    break
# Clear existing gradients
# else gradients will be accumulated to existing gradients
net.zero_grad()
print('net.conv1.bias.grad before backward:',net.conv1.bias.grad)
loss.backward()
print('net.conv1.bias.grad after backward:',net.conv1.bias.grad)

learnRate=0.01
for parameter in net.parameters():
  parameter.data.sub_(learnRate*parameter.grad.data)
# End Custom optimizer
#==================================

#==================================
# Begin Torch optimizer
import torch.optim as optim
optimizer=optim.SGD(net.parameters(),lr=learnRate)
optimizer.zero_grad()
output=net(input)
loss=criterion(output,target)
loss.backward()
optimizer.step()
# End Torch optimizer
#==================================
Net(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (linear1): Linear(in_features=576, out_features=120, bias=True)
  (linear2): Linear(in_features=120, out_features=6, bias=True)
)
0 torch.Size([6, 1, 3, 3])
1 torch.Size([6])
2 torch.Size([16, 6, 3, 3])
3 torch.Size([16])
4 torch.Size([120, 576])
5 torch.Size([120])
6 torch.Size([6, 120])
7 torch.Size([6])
tensor([[-0.0427,  0.2950, -0.0963, -0.0848,  0.0039, -0.1088],
        [-0.0443,  0.3183, -0.0833, -0.0584, -0.1068, -0.0729]],
       grad_fn=<AddmmBackward>)
torch.Size([2, 6])
tensor(1.1637, grad_fn=<MseLossBackward>)
<MseLossBackward object at 0x7fe6a874c978>
<AddmmBackward object at 0x7fe6a874c8d0>
<AccumulateGrad object at 0x7fe6a874c978>
net.conv1.bias.grad before backward: None
net.conv1.bias.grad after backward: tensor([-0.0616,  0.0531,  0.0373,  0.0389,  0.0173, -0.0261])

TRAINING A CLASSIFIER

path='./net.pth'
torch.save(net.state_dict(),path)
net2=Net()
net2.load_state_dict(torch.load(path))
<All keys matched successfully>