Skip to content

Commit f223648

Browse files
author
Svetlana Karslioglu
authored
Merge branch 'main' into adversarial-example-vectorization
2 parents 291241d + dcb527d commit f223648

File tree

3 files changed

+31
-21
lines changed

3 files changed

+31
-21
lines changed

.jenkins/metadata.json

+3
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@
2222
"duration": 320,
2323
"needs": "gpu.nvidia.small.multi"
2424
},
25+
"beginner_source/blitz/data_parallel_tutorial.py": {
26+
"needs": "gpu.nvidia.small.multi"
27+
},
2528
"intermediate_source/model_parallel_tutorial.py": {
2629
"needs": "gpu.nvidia.small.multi"
2730
},

conf.py

+5
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@
3333
sys.path.insert(0, os.path.abspath('./.jenkins'))
3434
import pytorch_sphinx_theme
3535
import torch
36+
import numpy
37+
import gc
3638
import glob
3739
import random
3840
import shutil
@@ -87,9 +89,12 @@
8789
# -- Sphinx-gallery configuration --------------------------------------------
8890

8991
def reset_seeds(gallery_conf, fname):
92+
torch.cuda.empty_cache()
9093
torch.manual_seed(42)
9194
torch.set_default_device(None)
9295
random.seed(10)
96+
numpy.random.seed(10)
97+
gc.collect()
9398

9499
sphinx_gallery_conf = {
95100
'examples_dirs': ['beginner_source', 'intermediate_source',

intermediate_source/seq2seq_translation_tutorial.py

+23-21
Original file line numberDiff line numberDiff line change
@@ -441,20 +441,20 @@ def forward_step(self, input, hidden):
441441
# :alt:
442442
#
443443
#
444-
# Bahdanau attention, also known as additive attention, is a commonly used
445-
# attention mechanism in sequence-to-sequence models, particularly in neural
446-
# machine translation tasks. It was introduced by Bahdanau et al. in their
447-
# paper titled `Neural Machine Translation by Jointly Learning to Align and Translate <https://arxiv.org/pdf/1409.0473.pdf>`__.
448-
# This attention mechanism employs a learned alignment model to compute attention
449-
# scores between the encoder and decoder hidden states. It utilizes a feed-forward
444+
# Bahdanau attention, also known as additive attention, is a commonly used
445+
# attention mechanism in sequence-to-sequence models, particularly in neural
446+
# machine translation tasks. It was introduced by Bahdanau et al. in their
447+
# paper titled `Neural Machine Translation by Jointly Learning to Align and Translate <https://arxiv.org/pdf/1409.0473.pdf>`__.
448+
# This attention mechanism employs a learned alignment model to compute attention
449+
# scores between the encoder and decoder hidden states. It utilizes a feed-forward
450450
# neural network to calculate alignment scores.
451451
#
452-
# However, there are alternative attention mechanisms available, such as Luong attention,
453-
# which computes attention scores by taking the dot product between the decoder hidden
454-
# state and the encoder hidden states. It does not involve the non-linear transformation
452+
# However, there are alternative attention mechanisms available, such as Luong attention,
453+
# which computes attention scores by taking the dot product between the decoder hidden
454+
# state and the encoder hidden states. It does not involve the non-linear transformation
455455
# used in Bahdanau attention.
456456
#
457-
# In this tutorial, we will be using Bahdanau attention. However, it would be a valuable
457+
# In this tutorial, we will be using Bahdanau attention. However, it would be a valuable
458458
# exercise to explore modifying the attention mechanism to use Luong attention.
459459

460460
class BahdanauAttention(nn.Module):
@@ -467,7 +467,7 @@ def __init__(self, hidden_size):
467467
def forward(self, query, keys):
468468
scores = self.Va(torch.tanh(self.Wa(query) + self.Ua(keys)))
469469
scores = scores.squeeze(2).unsqueeze(1)
470-
470+
471471
weights = F.softmax(scores, dim=-1)
472472
context = torch.bmm(weights, keys)
473473

@@ -605,9 +605,9 @@ def get_dataloader(batch_size):
605605
# ``teacher_forcing_ratio`` up to use more of it.
606606
#
607607

608-
def train_epoch(dataloader, encoder, decoder, encoder_optimizer,
608+
def train_epoch(dataloader, encoder, decoder, encoder_optimizer,
609609
decoder_optimizer, criterion):
610-
610+
611611
total_loss = 0
612612
for data in dataloader:
613613
input_tensor, target_tensor = data
@@ -617,7 +617,7 @@ def train_epoch(dataloader, encoder, decoder, encoder_optimizer,
617617

618618
encoder_outputs, encoder_hidden = encoder(input_tensor)
619619
decoder_outputs, _, _ = decoder(encoder_outputs, encoder_hidden, target_tensor)
620-
620+
621621
loss = criterion(
622622
decoder_outputs.view(-1, decoder_outputs.size(-1)),
623623
target_tensor.view(-1)
@@ -628,7 +628,7 @@ def train_epoch(dataloader, encoder, decoder, encoder_optimizer,
628628
decoder_optimizer.step()
629629

630630
total_loss += loss.item()
631-
631+
632632
return total_loss / len(dataloader)
633633

634634

@@ -671,7 +671,7 @@ def train(train_dataloader, encoder, decoder, n_epochs, learning_rate=0.001,
671671
plot_losses = []
672672
print_loss_total = 0 # Reset every print_every
673673
plot_loss_total = 0 # Reset every plot_every
674-
674+
675675
encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
676676
decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate)
677677
criterion = nn.NLLLoss()
@@ -680,7 +680,7 @@ def train(train_dataloader, encoder, decoder, n_epochs, learning_rate=0.001,
680680
loss = train_epoch(train_dataloader, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion)
681681
print_loss_total += loss
682682
plot_loss_total += loss
683-
683+
684684
if epoch % print_every == 0:
685685
print_loss_avg = print_loss_total / print_every
686686
print_loss_total = 0
@@ -691,7 +691,7 @@ def train(train_dataloader, encoder, decoder, n_epochs, learning_rate=0.001,
691691
plot_loss_avg = plot_loss_total / plot_every
692692
plot_losses.append(plot_loss_avg)
693693
plot_loss_total = 0
694-
694+
695695
showPlot(plot_losses)
696696

697697
######################################################################
@@ -736,7 +736,7 @@ def evaluate(encoder, decoder, sentence, input_lang, output_lang):
736736

737737
_, topi = decoder_outputs.topk(1)
738738
decoded_ids = topi.squeeze()
739-
739+
740740
decoded_words = []
741741
for idx in decoded_ids:
742742
if idx.item() == EOS_token:
@@ -793,7 +793,9 @@ def evaluateRandomly(encoder, decoder, n=10):
793793

794794
######################################################################
795795
#
796-
796+
# Set dropout layers to ``eval`` mode
797+
encoder.eval()
798+
decoder.eval()
797799
evaluateRandomly(encoder, decoder)
798800

799801

@@ -807,7 +809,7 @@ def evaluateRandomly(encoder, decoder, n=10):
807809
# at each time step.
808810
#
809811
# You could simply run ``plt.matshow(attentions)`` to see attention output
810-
# displayed as a matrix. For a better viewing experience we will do the
812+
# displayed as a matrix. For a better viewing experience we will do the
811813
# extra work of adding axes and labels:
812814
#
813815

0 commit comments

Comments
 (0)