|
| 1 | +#%% [markdown] |
| 2 | +# in the name of God the most compassionate the most merciful |
| 3 | +# Pytorch basics : introduction on tensors |
| 4 | +import torch |
| 5 | +import numpy as np |
| 6 | + |
| 7 | + |
| 8 | +# Here we are going to see what torch is and how it is similar to numpy! |
| 9 | +# Basically torch is a framework that is used for training and working with deep neural networks. |
| 10 | +# What is Pytorch then? |
| 11 | +# PyTorch is a Python package that provides two high-level features: |
| 12 | +# 1.Tensor computation (like NumPy) with strong GPU acceleration |
| 13 | +# 2.Deep neural networks built on a tape-based autograd system |
| 14 | +# Basically PYtorch is the python wrapper for torch! |
| 15 | +# You can reuse your favorite Python packages such as NumPy, SciPy and Cython to extend PyTorch when needed. |
| 16 | +# in this section we are going to have an introduction concerning torch as a numpy replacement! in working with tensors |
| 17 | +# lets see how we can ue pytorch in this sense ! |
| 18 | +#%% |
| 19 | +# what is a tensor? |
| 20 | +# a tensor is a general name given to arrays/vectors/matrices. |
| 21 | +# a vector is simply a 1 dimensional tensor. |
| 22 | +# a matrix is simply a 2 dimensional tensor. |
| 23 | +# and so on. |
| 24 | +# so if we have a matrix that has 3 channels(RGB for example), that is a 3d tensor |
| 25 | +# I guess you get the idea. whenever we talk about tensor, remember its just another name |
| 26 | +# for an array (we call a 1d array a vector, a 2d array a matrix so you can see tensor as an array!) |
| 27 | +# Tensors are the fundamental data structure used for neural networks so in Pytorch we will be dealing with |
| 28 | +# them a lot! (nearly all of the time!) |
| 29 | + |
| 30 | +# creating new tensors |
| 31 | +# create a tensor of size (5) , (2, 2), (3, 5, 6) with all zeros, ones, and random values, no values |
| 32 | +t = torch.Tensor(size=(1,)) |
| 33 | +t1_zeros = torch.zeros(size=(5,)) |
| 34 | +t1_ones = torch.ones(size=(2,2)) |
| 35 | +t1_rand = torch.rand(size=(2,2,2)) |
| 36 | +t1_empt = torch.empty(size=(2,2,2)) |
| 37 | + |
| 38 | + |
| 39 | +print(f'zeros: {t1_zeros}') |
| 40 | +print(f'ones: {t1_ones}') |
| 41 | +print(f'rand: {t1_rand}') |
| 42 | +print(f'empt: {t1_empt}') |
| 43 | +print(t) |
| 44 | + |
| 45 | + |
| 46 | +# what if we want our tensors to have specific data! easy, we can send our data using list! |
| 47 | +# or a numpy array! |
| 48 | +# here we are creating a tensor from a list of numbers (1, 2, 3, 4)! |
| 49 | +data_1 = torch.tensor([1, 2, 3, 4]) |
| 50 | +print(f'{data_1}') |
| 51 | + |
| 52 | +# using an numpy array |
| 53 | +array_np = np.random.rand(4) |
| 54 | +data_2 = torch.tensor(array_np) |
| 55 | +print(f'np: {array_np}') |
| 56 | +print(f'{data_2}') |
| 57 | + |
| 58 | + |
| 59 | +# there is a difference in the number of decimals, we can use printoptions to get what we want! |
| 60 | +torch.set_printoptions(precision=8) |
| 61 | +print(f'np: {array_np}') |
| 62 | +print(f'{data_2}') |
| 63 | + |
| 64 | +# lets go to defaul! we can use default profile and just go back to defaul! |
| 65 | +# as it turns out, we can use other profiles (short, full) as well for our uses! |
| 66 | +torch.set_printoptions(profile='default') |
| 67 | +print(f'np: {array_np}') |
| 68 | +print(f'{data_2}') |
| 69 | + |
| 70 | +# by the way we can directly create a new tensor from a numpy array! like this |
| 71 | +tensor_from_numpy = torch.from_numpy(array_np) |
| 72 | +print(f'tensor_from_numpy: {tensor_from_numpy}') |
| 73 | + |
| 74 | + |
| 75 | +# intrestingly we can get the underlying numpy array from a tensor, using. numpy() method! |
| 76 | +print(f'data_2(torch tensor): {data_2}') |
| 77 | +print(f'data_2.numpy()(converted to numpy!): {data_2.numpy()}') |
| 78 | +#%% |
| 79 | + |
| 80 | +# Ok, so we just learnt how to create tensors. in the beginning we said we can leverage GPU! |
| 81 | +# so lets see how we can do that! but before that, we need to check if GPU support is available to us! |
| 82 | +print(f'is GPU enabled? : {torch.cuda.is_available()}') |
| 83 | +# so as it turns out, all tensors, can have two modes, they can either be on CPU or GPU |
| 84 | +# the tensors we created so far are in CPU mode. to see on which device our tensors are created |
| 85 | +# we can simply use the device property! |
| 86 | +print(f'data_2 is created on : {data_2.device.type}') |
| 87 | + |
| 88 | +# so how do we move or define a new tensor or an existing one from one device to another? |
| 89 | +# we can easily do that using to() or cpu() or cuda() methods. |
| 90 | +# here how we do it. |
| 91 | +# puts the tensor on gpu! |
| 92 | +data_2 = data_2.cuda() |
| 93 | +print(f'data_2 device : {data_2.device.type}') |
| 94 | + |
| 95 | +# puts the tnesor back to cpu! |
| 96 | +data_2 = data_2.cpu() |
| 97 | +print(f'data_2 device : {data_2.device.type}') |
| 98 | + |
| 99 | +# we can do better, and based on the system for example decide if a tensor can use gpu or not! |
| 100 | +device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') |
| 101 | +data_2 = data_2.to(device) |
| 102 | +print(f'data_2 device : {data_2.device.type}') |
| 103 | +# |
| 104 | +# now if we want to create a tensor on specific device in the definition we do |
| 105 | +# the 0 denotes the specific gpu on the system. if you have only 1, you can simply |
| 106 | +# use cuda, if you want to use a specific gpu, you use its respective index! |
| 107 | +data_3 = torch.rand(size=(2,2), device='cuda:0') # or device = 0 |
| 108 | +print(f'data_3 device: {data_3.device}') |
| 109 | +# how do we get how many gpus are available on the system? easy we do : |
| 110 | +from torch import cuda |
| 111 | +gpu_count = cuda.device_count() |
| 112 | +print(f'all gpus available : {gpu_count}') |
| 113 | +print(f'gpu name : {cuda.get_device_name(0)}') |
| 114 | +print(f'gpu capability : {cuda.get_device_capability(0)}') |
| 115 | +# you can find all intresting methods for your use here https://pytorch.org/docs/stable/cuda.html |
| 116 | + |
| 117 | + |
| 118 | +#%% |
| 119 | + |
| 120 | +# Ok, now what if we have a tensor that is already on a specific device(it can be cpu or a gpu) |
| 121 | +# and also has a specific datatype!( all of our tensors can have dtype! the default is float32!) |
| 122 | +# in such cases, we can simply use the torch.*_new methods. lets see |
| 123 | +tensor_special = torch.rand(size=(2,2), device = 'cuda', dtype=torch.float16) |
| 124 | +print(tensor_special) |
| 125 | + |
| 126 | +# now lets create a new tensor from this one that is both on cuda and uses float16!! |
| 127 | +new_tensor_ones = tensor_special.new_ones(size=(2,2)) |
| 128 | +print(new_tensor_ones) |
| 129 | +# we have other options such as new_tensor, new_empty, new_full, new_zeros |
| 130 | +new_tensor_full = tensor_special.new_full(size=(2,2), fill_value=.3) |
| 131 | +print(new_tensor_full) |
| 132 | + |
| 133 | + |
| 134 | + |
| 135 | +#%% |
| 136 | +# now that we learnt how to create a new tensor, initialize a tensor, specify different dtypes, device, etc |
| 137 | +# lets work on addition, subtraction, multiplication, negation, transpose, and the likes |
| 138 | +# for adding two tensors, |
| 139 | +# either a tensor should be as scaler, or has 1 dimension in comon |
| 140 | +t1 = torch.tensor([1., 2., 3., 4.]) |
| 141 | +t2 = torch.tensor([[10.,10.,10.,10.],[10.,10.,10.,10.]]) |
| 142 | +t3 = torch.tensor([0.5]) |
| 143 | +print(f't1 = {t1}') |
| 144 | +print(f't2 = {t2}') |
| 145 | +print(f't3 = {t3}') |
| 146 | +print(f't1 + t2 =\n {t1 + t2}') |
| 147 | +print(f't1 + t3 =\n {t1 + t3}') |
| 148 | +#%% |
| 149 | +# adding and subtracting is really obvious, but when it comes to multiplilication we have several options! |
| 150 | +# mm, matmul, bmm |
| 151 | +# basically mm and matmul are kinda the same, they both do multipilication, the difference is, |
| 152 | +# the matmul does the broadcasting as well while the mm doesnt. |
| 153 | +# it is recommened to use mm, becasue if the dimensions dont match, you'll face an error and know where to fix! |
| 154 | +# however, in matmul, when the dimensions dont match, it may broadcast and thus dont give you an error while |
| 155 | +# the result may very well be wrong! so to be on the safe side, always try to use mm! |
| 156 | +# bmm is mm with batches. basically if you do want to multiply several samples of two tensors |
| 157 | +# you can use bmm. we will see how this works later on so dont worry about it! |
| 158 | + |
| 159 | +# torch.matmul(tensor1, tensor2, out=None) → Tensor |
| 160 | +# Matrix product of two tensors. |
| 161 | +# The behavior depends on the dimensionality of the tensors as follows: |
| 162 | +# If both tensors are 1-dimensional, the dot product (scalar) is returned. |
| 163 | +# If both arguments are 2-dimensional, the matrix-matrix product is returned. |
| 164 | +# If the first argument is 1-dimensional and the second argument is 2-dimensional, |
| 165 | +# a 1 is prepended to its dimension for the purpose of the matrix multiply. |
| 166 | +# After the matrix multiply, the prepended dimension is removed. |
| 167 | +# If the first argument is 2-dimensional and the second argument is 1-dimensional, |
| 168 | +# the matrix-vector product is returned. |
| 169 | +# If both arguments are at least 1-dimensional and at least one argument is N-dimensional |
| 170 | +# (where N > 2), then a batched matrix multiply is returned. If the first argument is |
| 171 | +# 1-dimensional, a 1 is prepended to its dimension for the purpose of the batched matrix |
| 172 | +# multiply and removed after. If the second argument is 1-dimensional, a 1 is appended to |
| 173 | +# its dimension for the purpose of the batched matrix multiple and removed after. |
| 174 | +# The non-matrix (i.e. batch) dimensions are broadcasted (and thus must be broadcastable). |
| 175 | +# For example, if tensor1 is a (×ばつn×ばつm)(j \times 1 \times n \times m)(×ばつn×ばつm) tensor and |
| 176 | +# tensor2 is a (k×ばつp)(k \times m \times p)(k×ばつp) tensor, out will be an |
| 177 | +# (×ばつk×ばつn×ばつp)(j \times k \times n \times p)(×ばつk×ばつn×ばつp) tensor. |
| 178 | +# Note |
| 179 | +# The 1-dimensional dot product version of this function does not support an out parameter. |
| 180 | + |
| 181 | +data_1 = torch.rand(size=(2,3)) |
| 182 | +data_2 = torch.rand(size = (2,)) |
| 183 | +# pay careful attention to the dimensions and how the multiplication is carried out! |
| 184 | +# data2 * data1 |
| 185 | +data_3 = torch.matmul(data_2, data_1) |
| 186 | +print(f'data_2(2,) * data_1(2x3): {data_3}') |
| 187 | +# as you just saw, the data_2 was broadcasted so it can be multiplied by data_1 |
| 188 | +# data_2 was 1d, and it was treated as (1,2) so the dimensions can between two tensors |
| 189 | +# are valid. thus the output is a 1x3 tensor! |
| 190 | +# this is how we do transpose! |
| 191 | +data_4 = torch.matmul(data_1.t(), data_2) |
| 192 | +print(f'data_1.t()(3x2) * data_2(2,): {data_4}') |
| 193 | +# now in this example, the data_2 again is broadcasted ans this time |
| 194 | +# it is treated as (2x1) tensor so the dimensions between tensors are valid |
| 195 | +# as you can see the output is a tensor of 3x1. |
| 196 | + |
| 197 | +# we can do all of these using mm! |
| 198 | +print('using torch.mm:') |
| 199 | +# mm is short for matrix multiply, so all dimensions must be specified! |
| 200 | +# unlike matmul, there is no broadcasting going on here! we must specify all dimensions ourselevs |
| 201 | +# thats why we used .view() to reshape our tensor to the form it needs to be to have a proper multiplication! |
| 202 | +data_3_2 = torch.mm(data_2.view(1,2), data_1) |
| 203 | +print(f'data_2(1x2) * data_1(2x3): {data_3_2}') |
| 204 | +# this is how we do transpose! |
| 205 | +data_4_2 = torch.mm(data_1.t(), data_2.view(2,1)) |
| 206 | +print(f'data_1.t()(3x2) * data_2(2x1): {data_4_2}') |
| 207 | + |
| 208 | + |
| 209 | +# if you want to know more about boradcasting in Pytorch read more here : |
| 210 | +# https://pytorch.org/docs/stable/notes/broadcasting.html#broadcasting-semantics |
| 211 | + |
| 212 | +# In short, if a PyTorch operation supports broadcast, then its Tensor arguments |
| 213 | +# can be automatically expanded to be of equal sizes (without making copies of the data). |
| 214 | + |
| 215 | +# Two tensors are "broadcastable" if the following rules hold: |
| 216 | +# Each tensor has at least one dimension (like what we just saw in our example above!) |
| 217 | +# When iterating over the dimension sizes, starting at the trailing dimension, |
| 218 | +# the dimension sizes must either be equal, one of them is 1, or one of them does not exist. |
| 219 | +x=torch.empty(5,7,3) |
| 220 | +y=torch.empty(5,7,3) |
| 221 | +# same shapes are always broadcastable (i.e. the above rules always hold) |
| 222 | + |
| 223 | +x=torch.empty((0,)) |
| 224 | +y=torch.empty(2,2) |
| 225 | +# x and y are not broadcastable, because x does not have at least 1 dimension |
| 226 | + |
| 227 | +# can line up trailing dimensions |
| 228 | +x=torch.empty(5,3,4,1) |
| 229 | +y=torch.empty( 3,1,1) |
| 230 | +# x and y are broadcastable. |
| 231 | +# 1st trailing dimension: both have size 1 |
| 232 | +# 2nd trailing dimension: y has size 1 |
| 233 | +# 3rd trailing dimension: x size == y size |
| 234 | +# 4th trailing dimension: y dimension doesn't exist |
| 235 | + |
| 236 | +# but: |
| 237 | +x=torch.empty(5,2,4,1) |
| 238 | +y=torch.empty( 3,1,1) |
| 239 | +# x and y are not broadcastable, because in the 3rd trailing dimension 2 != 3 |
| 240 | + |
| 241 | +# Backwards compatibility |
| 242 | +# Prior versions of PyTorch allowed certain pointwise functions to execute on |
| 243 | +# tensors with different shapes, as long as the number of elements in each tensor was equal. |
| 244 | +# The pointwise operation would then be carried out by viewing each tensor as 1-dimensional. |
| 245 | +# PyTorch now supports broadcasting and the "1-dimensional" pointwise behavior is considered |
| 246 | +# deprecated and will generate a Python warning in cases where tensors are not broadcastable, |
| 247 | +# but have the same number of elements. |
| 248 | +# Note that the introduction of broadcasting can cause backwards incompatible changes in the |
| 249 | +# case where two tensors do not have the same shape, but are broadcastable and have the same |
| 250 | +# number of elements. For Example: |
| 251 | +# torch.add(torch.ones(4,1), torch.randn(4)) |
| 252 | +# would previously produce a Tensor with size: torch.Size([4,1]), but now produces a Tensor |
| 253 | +# with size: torch.Size([4,4]). In order to help identify cases in your code where backwards |
| 254 | +# incompatibilities introduced by broadcasting may exist, you may set torch.utils.backcompat. |
| 255 | +# broadcast_warning.enabled to True, which will generate a python warning in such cases. |
| 256 | +# For Example: |
| 257 | +# torch.utils.backcompat.broadcast_warning.enabled=True |
| 258 | +# torch.add(torch.ones(4,1), torch.ones(4)) |
| 259 | +# __main__:1: UserWarning: self and other do not have the same shape, but are broadcastable, |
| 260 | +# and have the same number of elements. |
| 261 | +# Changing behavior in a backwards incompatible manner to broadcasting rather than viewing as |
| 262 | +# 1-dimensional. |
| 263 | + |
| 264 | +# now that we have the multiplication covered, lets takl about how to change the shape of our tensors |
| 265 | +# for this we have several options. |
| 266 | +# x.reshape(): this is like what we have in numpy, but there is a catche here. |
| 267 | +# sometimes, reshape, just changes the shape and returns the very same data (x) |
| 268 | +# but sometimes, it returns a 'clone' of the data because of some internal operations! |
| 269 | +# (it copies the data to some other memory location and thus return a clone!!) |
| 270 | +# As it is explained in the docs : |
| 271 | +# Returns a tensor with the same data and number of elements as input, |
| 272 | +# but with the specified shape. When possible, the returned tensor will |
| 273 | +# be a view of input. Otherwise, it will be a 'copy'. Contiguous inputs and |
| 274 | +# inputs with compatible strides can be reshaped without copying, but you |
| 275 | +# should not depend on the copying vs. viewing behavior. |
| 276 | +# |
| 277 | +# view(): This is what we should be using nearly 100% of all times! view always returns the same |
| 278 | +# data(x). it works just like reshape, but with the benifit of returning the very same data! |
| 279 | +# (there is a note that we will get to later when we deal with rnns and lstms!) |
| 280 | +# As we see in the docs (https://pytorch.org/docs/stable/tensors.html#torch.Tensor.view): |
| 281 | +# Returns a new tensor with the same data as the self tensor but of a different shape. |
| 282 | +# The returned tensor shares the same data and must have the same number of elements, |
| 283 | +# but may have a different size. |
| 284 | +# For a tensor to be 'viewed', the new view size must be 'Compatible' with its |
| 285 | +# original size and stride, i.e. each new view dimension must either: |
| 286 | +# i.be a subspace of an original dimension, or |
| 287 | +# ii.only span across original dimensions d,d+1,...,d+k that satisfy the following |
| 288 | +# contiguity-like condition that ∀i=0,...,k−1 |
| 289 | +# stride[i] = stride[i+1] ×ばつ size[i+1] |
| 290 | +# Otherwise, contiguous() needs to be called before the tensor can be viewed. |
| 291 | +# See also: reshape(), which returns a view if the shapes are compatible, and copies |
| 292 | +# (equivalent to calling contiguous()) otherwise. |
| 293 | + |
| 294 | +# resize_(): as the name implies it 'physically' resizes the tensor 'inplace' (note the '_' which denotes inplace |
| 295 | +# operation) there is a catch here as well! |
| 296 | +# if the new specified dimensions, result in a larger tensor, new uninitialized data will be |
| 297 | +# resulted. similarly, if the new dimensions are less than the actual dimensions, data will be |
| 298 | +# lost! |
| 299 | +# so the best option as you can see is to use view unless, you specifically intend on using the other two! |
| 300 | +# knowing their pitfals ! in which case is fine!! |
| 301 | +# in our introductory tutorial, we will always be using view! |
| 302 | +# |
| 303 | +# |
| 304 | + |
| 305 | + |
| 306 | +#%% |
| 307 | +# inplace operations |
| 308 | +# tensors also provide inplace version of some operations such as mul, add, abs, cos, etc |
| 309 | +# these inplace operations are denoted by and underscore or '_' at the end |
| 310 | +# add_ |
| 311 | +# mul_ |
| 312 | +a = torch.tensor([1]) |
| 313 | +print(f'a = {a}') |
| 314 | +a.mul_(2) |
| 315 | +print(f'a*2 = {a}') |
| 316 | +a.div_(2) |
| 317 | +print(f'a/2 = {a}') |
| 318 | +a.add_(2) |
| 319 | +print(f'a+2 = {a}') |
| 320 | +a.sub_(2) |
| 321 | +print(f'a-2 = {a}') |
| 322 | +#%% |
| 323 | +# now let create a simple hidden layer with a weight and bias and input |
| 324 | +# lets imlement a simple 1 layer and then 2 layer neural network! |
| 325 | +# dont worry here we will keep it simple! |
| 326 | +# our network has 5 neurons in its hidden layer, gets an input with 7 data points |
| 327 | +# and creates 1 output |
| 328 | +# lets write the calculation for 1 step only!(forward propagation only) |
| 329 | +inputs = torch.randn(2,7) |
| 330 | +W = torch.rand(size=(7,5)) |
| 331 | +b = torch.rand(size=(5,)) |
| 332 | +W_output = torch.rand(size=(5,1)) |
| 333 | +b_output = torch.rand(size=(1,)) |
| 334 | + |
| 335 | +def sigmoid(x): |
| 336 | + return 1/(1+torch.exp(-x)) |
| 337 | +output = sigmoid(torch.mm(inputs,W) + b) |
| 338 | +output = sigmoid(torch.mm(output,W_output) + b_output) |
| 339 | +print(output) |
| 340 | + |
| 341 | +#%% |
| 342 | + |
| 343 | + |
0 commit comments