class BiLSTMNERTagger(nn.Module):
def __init__(self, emb_dim, hid_dim, n_layers, token_vocab_size, tag_vocab_size):
super().__init__()
self.embedding = nn.Embedding(token_vocab_size, emb_dim, padding_idx=0)
self.rnn = nn.LSTM(emb_dim, hid_dim, num_layers=n_layers, batch_first=True, bidirectional=True)
self.fc = nn.Linear(2*hid_dim, tag_vocab_size)
self.softmax = nn.Softmax()
self.dropout = nn.Dropout(0.1)
self.hidden = None
def forward(self, words, words_len):
#YOUR CODE HERE
out = self.dropout(self.embedding(words))
out = nn.utils.rnn.pack_padded_sequence(out, words_len.cpu().numpy(), enforce_sorted=False, batch_first=True)
out, (hidden, cell) = self.rnn(out)
out = hidden[-1, :, :]
self.hidden = hidden
out = self.fc(out)
prediction = self.softmax(out)
return prediction
torch.manual_seed(42)
model = BiLSTMNERTagger(
emb_dim=20,
hid_dim=64,
n_layers=2,
token_vocab_size=len(tokens_vocab),
tag_vocab_size=len(ner_vocab),)
model = model.to(device)
This is my error:
115 def forward(self, input: Tensor) -> Tensor:
--> 116 return F.linear(input, self.weight, self.bias)
117
118 def extra_repr(self) -> str:
RuntimeError: mat1 and mat2 shapes cannot be multiplied (32x64 and 128x10)
So the issue is that I thought that in a bidirectional model, the vector would double after I pass each word through my LSTM error, but that doesn't seem the case. And because of that, I cannot feed it to my linear layer.
Could you please tell me what the issue is, and how to fix it?