@@ -219,7 +219,9 @@ def forward(self, x):
219
219
# The simplest update rule used in practice is the Stochastic Gradient
220
220
# Descent (SGD):
221
221
#
222
- # ``weight = weight - learning_rate * gradient``
222
+ # .. code:: python
223
+ #
224
+ # weight = weight - learning_rate * gradient
223
225
#
224
226
# We can implement this using simple Python code:
225
227
#
@@ -233,18 +235,21 @@ def forward(self, x):
233
235
# update rules such as SGD, Nesterov-SGD, Adam, RMSProp, etc.
234
236
# To enable this, we built a small package: ``torch.optim`` that
235
237
# implements all these methods. Using it is very simple:
236
-
237
- import torch .optim as optim
238
-
239
- # create your optimizer
240
- optimizer = optim .SGD (net .parameters (), lr = 0.01 )
241
-
242
- # in your training loop:
243
- optimizer .zero_grad () # zero the gradient buffers
244
- output = net (input )
245
- loss = criterion (output , target )
246
- loss .backward ()
247
- optimizer .step () # Does the update
238
+ #
239
+ # .. code:: python
240
+ #
241
+ # import torch.optim as optim
242
+ #
243
+ # # create your optimizer
244
+ # optimizer = optim.SGD(net.parameters(), lr=0.01)
245
+ #
246
+ # # in your training loop:
247
+ # optimizer.zero_grad() # zero the gradient buffers
248
+ # output = net(input)
249
+ # loss = criterion(output, target)
250
+ # loss.backward()
251
+ # optimizer.step() # Does the update
252
+ #
248
253
249
254
250
255
###############################################################
0 commit comments