Show Code
import torch
import torch.nn as nn
import numpy as np
import random
import matplotlib.pyplot as plt
# Check for GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")Using device: cpu
Implementing alphapet generator using RNN. We will try building the model using Pytorch then implement it using numpy.
inputs = np.array([
["A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z"],
["Z","Y","X","W","V","U","T","S","R","Q","P","O","N","M","L","K","J","I","H","G","F","E","D","C","B","A"],
["B","D","F","H","J","L","N","P","R","T","V","X","Z","A","C","E","G","I","K","M","O","Q","S","U","W","Y"],
["M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z","A","B","C","D","E","F","G","H","I","J","K","L"],
["H","G","F","E","D","C","B","A","L","K","J","I","P","O","N","M","U","T","S","R","Q","X","W","V","Z","Y"]
])
expected = np.array([
["B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z","A"],
["A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z"],
["C","E","G","I","K","M","O","Q","S","U","W","Y","A","B","D","F","H","J","L","N","P","R","T","V","X","Z"],
["N","O","P","Q","R","S","T","U","V","W","X","Y","Z","A","B","C","D","E","F","G","H","I","J","K","L","M"],
["I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z","A","B","C","D","E","F","G","H"]
])# define input and target tensors
import string
# 1. Create index mapping
char_to_ix = {char: i for i, char in enumerate(string.ascii_uppercase)}
# 2. Convert numpy array of chars to numpy array of integers
# using np.vectorize to apply the map to the whole array at once
input_indices = np.vectorize(char_to_ix.get)(inputs)
target_indices = np.vectorize(char_to_ix.get)(expected)
# 3. Convert to PyTorch LongTensor (Long is required for indices)
input_tensor = torch.from_numpy(input_indices).long().to(device)
target_tensor = torch.from_numpy(target_indices).long().to(device)
print("Input tensor shape:", input_tensor.shape)
print("Target tensor shape:", target_tensor.shape)Input tensor shape: torch.Size([5, 26])
Target tensor shape: torch.Size([5, 26])
The shape (5, 26) comes from your input data:
class AlphabetRNN(nn.Module):
def __init__(self, input_size, hidden_size, output_size, n_layers=1):
super(AlphabetRNN, self).__init__()
self.hidden_size = hidden_size
self.n_layers = n_layers
self.embedding = nn.Embedding(input_size, hidden_size)
self.rnn = nn.RNN(hidden_size, hidden_size, n_layers, batch_first=True)
self.fc = nn.Linear(hidden_size, output_size)
def forward(self, x, hidden):
embedded = self.embedding(x)
rnn_out, hidden = self.rnn(embedded, hidden)
output = self.fc(rnn_out)
return output, hidden
def init_hidden(self, batch_size):
return torch.zeros(self.n_layers, batch_size, self.hidden_size).to(device)#Define training step function
def train_step(model, criterion, optimizer, input_tensor, target_tensor):
model.train()
batch_size = input_tensor.size(0)
hidden = model.init_hidden(batch_size)
optimizer.zero_grad()
output, hidden = model(input_tensor, hidden)
loss = criterion(output, target_tensor)
loss.backward()
optimizer.step()
return loss.item()# define hyperparameters
n_characters = 26
hidden_size = 128
n_layers = 1
learning_rate = 0.002
epochs = 100
# Model, loss, optimizer
model = AlphabetRNN(n_characters, hidden_size, n_characters,
n_layers=n_layers).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
print(model)AlphabetRNN(
(embedding): Embedding(26, 128)
(rnn): RNN(128, 128, batch_first=True)
(fc): Linear(in_features=128, out_features=26, bias=True)
)
all_losses = []
running_loss = 0.0
model.train()
for it in range(1, epochs + 1):
loss = train_step(
model=model,
optimizer=optimizer,
criterion=criterion,
input_tensor=input_tensor,
target_tensor=target_tensor,
)
running_loss += loss
all_losses.append(loss)
if it % 10 == 0:
print(f"Epoch {it}/{epochs}, Loss: {loss:.4f}")
print("Training finished.")
Epoch 10/100, Loss: 1.7973
Epoch 20/100, Loss: 0.8749
Epoch 30/100, Loss: 0.3988
Epoch 40/100, Loss: 0.1792
Epoch 50/100, Loss: 0.0642
Epoch 60/100, Loss: 0.0268
Epoch 70/100, Loss: 0.0161
Epoch 80/100, Loss: 0.0114
Epoch 90/100, Loss: 0.0089
Epoch 100/100, Loss: 0.0074
Training finished.
def forward_hook(name):
def hook(module, inputs, outputs):
print(f"\n๐น {name}")
def _tensor_stats(t: torch.Tensor) -> str:
shape = tuple(t.shape)
dtype = t.dtype
if t.is_floating_point() or t.is_complex():
return f"shape={shape} dtype={dtype} mean={t.mean().item():.4f}"
# integer/bool tensors (e.g., embedding indices)
t_min = t.min().item() if t.numel() else "n/a"
t_max = t.max().item() if t.numel() else "n/a"
return f"shape={shape} dtype={dtype} min={t_min} max={t_max}"
# inputs is a tuple
for i, x in enumerate(inputs):
if torch.is_tensor(x):
print(f" inp[{i}] {_tensor_stats(x)}")
# outputs can be tensor or tuple
if torch.is_tensor(outputs):
print(f" out {_tensor_stats(outputs)}")
elif isinstance(outputs, tuple):
for i, o in enumerate(outputs):
if torch.is_tensor(o):
print(f" out[{i}] {_tensor_stats(o)}")
return hookmodel.embedding.register_forward_hook(forward_hook("Embedding"))
model.rnn.register_forward_hook(forward_hook("RNN"))
model.fc.register_forward_hook(forward_hook("Linear"))
# pick one sequence: shape (1, 26)
x = input_tensor[0:1]
hidden = model.init_hidden(x.size(0))
with torch.no_grad():
output, hidden = model(x, hidden)
๐น Embedding
--------------------------------------------------------------------------- RuntimeError Traceback (most recent call last) Cell In[57], line 10 7 hidden = model.init_hidden(x.size(0)) 9 with torch.no_grad(): ---> 10 output, hidden = model(x, hidden) File c:\Users\user\Documents\GitHub\simpe-AI\venv\Lib\site-packages\torch\nn\modules\module.py:1775, in Module._wrapped_call_impl(self, *args, **kwargs) 1773 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] 1774 else: -> 1775 return self._call_impl(*args, **kwargs) File c:\Users\user\Documents\GitHub\simpe-AI\venv\Lib\site-packages\torch\nn\modules\module.py:1786, in Module._call_impl(self, *args, **kwargs) 1781 # If we don't have any hooks, we want to skip the rest of the logic in 1782 # this function, and just call forward. 1783 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1784 or _global_backward_pre_hooks or _global_backward_hooks 1785 or _global_forward_hooks or _global_forward_pre_hooks): -> 1786 return forward_call(*args, **kwargs) 1788 result = None 1789 called_always_called_hooks = set() Cell In[26], line 12, in AlphabetRNN.forward(self, x, hidden) 11 def forward(self, x, hidden): ---> 12 embedded = self.embedding(x) 13 rnn_out, hidden = self.rnn(embedded, hidden) 14 output = self.fc(rnn_out) File c:\Users\user\Documents\GitHub\simpe-AI\venv\Lib\site-packages\torch\nn\modules\module.py:1775, in Module._wrapped_call_impl(self, *args, **kwargs) 1773 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] 1774 else: -> 1775 return self._call_impl(*args, **kwargs) File c:\Users\user\Documents\GitHub\simpe-AI\venv\Lib\site-packages\torch\nn\modules\module.py:1881, in Module._call_impl(self, *args, **kwargs) 1878 return inner() 1880 try: -> 1881 return inner() 1882 except Exception: 1883 # run always called hooks if they have not already been run 1884 # For now only forward hooks have the always_call option but perhaps 1885 # this functionality should be added to full backward hooks as well. 1886 for hook_id, hook in _global_forward_hooks.items(): File c:\Users\user\Documents\GitHub\simpe-AI\venv\Lib\site-packages\torch\nn\modules\module.py:1842, in Module._call_impl.<locals>.inner() 1840 hook_result = hook(self, args, kwargs, result) 1841 else: -> 1842 hook_result = hook(self, args, result) 1844 if hook_result is not None: 1845 result = hook_result Cell In[49], line 8, in forward_hook.<locals>.hook(module, inputs, outputs) 6 for i, x in enumerate(inputs): 7 if torch.is_tensor(x): ----> 8 print(f" inp[{i}] shape={tuple(x.shape)} mean={x.mean():.4f}") 10 # outputs can be tensor or tuple 11 if torch.is_tensor(outputs): RuntimeError: mean(): could not infer output dtype. Input dtype must be either a floating point or complex dtype. Got: Long
import string
import torch
char_to_ix = {c: i for i, c in enumerate(string.ascii_uppercase)}
ix_to_char = {i: c for i, c in enumerate(string.ascii_uppercase)}
@torch.no_grad()
def generate_next_letters(model, seed, n_next=5, *, temperature=1.0, greedy=True):
"""
seed: list[str] like ["A","B","C"] OR a string like "ABC"
n_next: how many next letters to generate
temperature: >1 more random, <1 more confident (only used if greedy=False)
greedy: if True uses argmax; if False samples from softmax( logits / temperature )
returns: (seed_letters, generated_letters)
"""
model.eval()
device_ = next(model.parameters()).device
# Normalize seed to list of uppercase letters
if isinstance(seed, str):
seed_letters = [c for c in seed.upper() if c.strip() != ""]
else:
seed_letters = [str(c).upper() for c in seed]
if len(seed_letters) == 0:
raise ValueError("Seed must contain at least 1 letter.")
# Map seed -> indices (batch=1)
seed_ix = [char_to_ix[c] for c in seed_letters]
x = torch.tensor(seed_ix, dtype=torch.long,
device=device_).unsqueeze(0) # (1, seq_len)
# Create hidden on the correct device (avoid relying on global `device`)
hidden = torch.zeros(model.n_layers, 1, model.hidden_size, device=device_)
# Feed the whole seed to update hidden state
logits, hidden = model(x, hidden) # logits: (1, seq_len, 26)
generated = []
last_ix = x[:, -1:] # (1, 1) last token of seed
for _ in range(n_next):
step_logits, hidden = model(last_ix, hidden) # (1, 1, 26)
step_logits = step_logits[:, -1, :] # (1, 26)
if greedy:
next_ix = step_logits.argmax(dim=-1) # (1,)
else:
probs = torch.softmax(
step_logits / max(temperature, 1e-8), dim=-1) # (1, 26)
next_ix = torch.multinomial(
probs, num_samples=1).squeeze(-1) # (1,)
next_char = ix_to_char[int(next_ix.item())]
generated.append(next_char)
last_ix = next_ix.unsqueeze(0) # (1, 1)
return seed_letters, generated
# Example:
seed_letters, next5 = generate_next_letters(model, ["A", "B", "I"], n_next=5)
print("Seed:", seed_letters, "Next:", next5)
seed_letters, next5 = generate_next_letters(model, "JKL", n_next=5)
print("Seed:", seed_letters, "Next:", next5)Seed: ['A', 'B', 'I'] Next: ['E', 'F', 'G', 'H', 'I']
Seed: ['J', 'K', 'L'] Next: ['B', 'M', 'N', 'O', 'P']
Map each letter to ineger index, and reverse map index.
def string_to_one_hot(s):
"""
s: string of uppercase letters, e.g. "ABC"
returns: tensor of shape (len(s), 26) with one-hot encoding
"""
indices = [char_to_ix[c] for c in s]
#only numpy
one_hot = np.zeros((len(s), len(string.ascii_uppercase)), dtype=np.float32)
one_hot[np.arange(len(s)), indices] = 1
return np.array(one_hot)
string_to_one_hot("ABC")array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]], dtype=float32)
Comments