当前位置:   article > 正文

使用PyTorch实现LSTM生成ai诗_pytorch基于 lstm 的自动写诗

pytorch基于 lstm 的自动写诗

最近学习torch的一个小demo。

什么是LSTM?

长短时记忆网络(Long Short-Term Memory,LSTM)是一种循环神经网络RNN)的变体,旨在解决传统RNN在处理长序列时的梯度消失和梯度爆炸问题。LSTM引入了一种特殊的存储单元和门控机制,以更有效地捕捉和处理序列数据中的长期依赖关系。

通俗点说就是:LSTM是一种改进版的递归神经网络(RNN)。它的主要特点是可以记住更长时间的信息,这使得它在处理序列数据(如文本、时间序列、语音等)时非常有效。

步骤如下

数据准备

  1. import torch
  2. import torch.nn as nn
  3. import torch.optim as optim
  4. import numpy as np
  5. import random
  6. import string
  7. import os
  8. # 数据加载和预处理
  9. def load_data(filepath):
  10. with open(filepath, 'r', encoding='utf-8') as file:
  11. text = file.read()
  12. return text
  13. def preprocess_text(text):
  14. text = text.lower()
  15. text = text.translate(str.maketrans('', '', string.punctuation))
  16. return text
  17. data_path = 'poetry.txt' # 替换为实际的诗歌数据文件路径
  18. text = load_data(data_path)
  19. text = preprocess_text(text)
  20. chars = sorted(list(set(text)))
  21. char_to_idx = {char: idx for idx, char in enumerate(chars)}
  22. idx_to_char = {idx: char for char, idx in char_to_idx.items()}
  23. vocab_size = len(chars)
  24. print(f"Total characters: {len(text)}")
  25. print(f"Vocabulary size: {vocab_size}")

模型构建

定义LSTM模型:

  1. class LSTMModel(nn.Module):
  2. def __init__(self, input_size, hidden_size, output_size, num_layers=2):
  3. super(LSTMModel, self).__init__()
  4. self.hidden_size = hidden_size
  5. self.num_layers = num_layers
  6. self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
  7. self.fc = nn.Linear(hidden_size, output_size)
  8. self.softmax = nn.LogSoftmax(dim=1)
  9. def forward(self, x, hidden):
  10. lstm_out, hidden = self.lstm(x, hidden)
  11. output = self.fc(lstm_out[:, -1, :])
  12. output = self.softmax(output)
  13. return output, hidden
  14. def init_hidden(self, batch_size):
  15. weight = next(self.parameters()).data
  16. hidden = (weight.new(self.num_layers, batch_size, self.hidden_size).zero_(),
  17. weight.new(self.num_layers, batch_size, self.hidden_size).zero_())
  18. return hidden

训练模型

将数据转换成LSTM需要的格式:

  1. def prepare_data(text, seq_length):
  2. inputs = []
  3. targets = []
  4. for i in range(0, len(text) - seq_length, 1):
  5. seq_in = text[i:i + seq_length]
  6. seq_out = text[i + seq_length]
  7. inputs.append([char_to_idx[char] for char in seq_in])
  8. targets.append(char_to_idx[seq_out])
  9. return inputs, targets
  10. seq_length = 100
  11. inputs, targets = prepare_data(text, seq_length)
  12. # Convert to tensors
  13. inputs = torch.tensor(inputs, dtype=torch.long)
  14. targets = torch.tensor(targets, dtype=torch.long)
  15. batch_size = 64
  16. input_size = vocab_size
  17. hidden_size = 256
  18. output_size = vocab_size
  19. num_epochs = 20
  20. learning_rate = 0.001
  21. model = LSTMModel(input_size, hidden_size, output_size)
  22. criterion = nn.NLLLoss()
  23. optimizer = optim.Adam(model.parameters(), lr=learning_rate)
  24. # Training loop
  25. for epoch in range(num_epochs):
  26. h = model.init_hidden(batch_size)
  27. total_loss = 0
  28. for i in range(0, len(inputs), batch_size):
  29. x = inputs[i:i + batch_size]
  30. y = targets[i:i + batch_size]
  31. x = nn.functional.one_hot(x, num_classes=vocab_size).float()
  32. output, h = model(x, h)
  33. loss = criterion(output, y)
  34. optimizer.zero_grad()
  35. loss.backward()
  36. optimizer.step()
  37. total_loss += loss.item()
  38. print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(inputs):.4f}")

生成

  1. def generate_text(model, start_str, length=100):
  2. model.eval()
  3. with torch.no_grad():
  4. input_eval = torch.tensor([char_to_idx[char] for char in start_str], dtype=torch.long).unsqueeze(0)
  5. input_eval = nn.functional.one_hot(input_eval, num_classes=vocab_size).float()
  6. h = model.init_hidden(1)
  7. predicted_text = start_str
  8. for _ in range(length):
  9. output, h = model(input_eval, h)
  10. prob = torch.softmax(output, dim=1).data
  11. predicted_idx = torch.multinomial(prob, num_samples=1).item()
  12. predicted_char = idx_to_char[predicted_idx]
  13. predicted_text += predicted_char
  14. input_eval = torch.tensor([[predicted_idx]], dtype=torch.long)
  15. input_eval = nn.functional.one_hot(input_eval, num_classes=vocab_size).float()
  16. return predicted_text
  17. start_string = "春眠不觉晓"
  18. generated_text = generate_text(model, start_string)
  19. print(generated_text)

运行结果如下:

运行的肯定不好,但至少出结果了。诗歌我这边只放了几句,可以自己通过外部文件放入更多素材。

整体代码直接运行即可:

  1. import torch
  2. import torch.nn as nn
  3. import torch.optim as optim
  4. import numpy as np
  5. import random
  6. import string
  7. # 预定义一些中文诗歌数据
  8. text = """
  9. 春眠不觉晓,处处闻啼鸟。
  10. 夜来风雨声,花落知多少。
  11. 床前明月光,疑是地上霜。
  12. 举头望明月,低头思故乡。
  13. 红豆生南国,春来发几枝。
  14. 愿君多采撷,此物最相思。
  15. """
  16. # 数据预处理
  17. def preprocess_text(text):
  18. text = text.replace('\n', '')
  19. return text
  20. text = preprocess_text(text)
  21. chars = sorted(list(set(text)))
  22. char_to_idx = {char: idx for idx, char in enumerate(chars)}
  23. idx_to_char = {idx: char for char, idx in char_to_idx.items()}
  24. vocab_size = len(chars)
  25. print(f"Total characters: {len(text)}")
  26. print(f"Vocabulary size: {vocab_size}")
  27. class LSTMModel(nn.Module):
  28. def __init__(self, input_size, hidden_size, output_size, num_layers=2):
  29. super(LSTMModel, self).__init__()
  30. self.hidden_size = hidden_size
  31. self.num_layers = num_layers
  32. self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
  33. self.fc = nn.Linear(hidden_size, output_size)
  34. self.softmax = nn.LogSoftmax(dim=1)
  35. def forward(self, x, hidden):
  36. lstm_out, hidden = self.lstm(x, hidden)
  37. output = self.fc(lstm_out[:, -1, :])
  38. output = self.softmax(output)
  39. return output, hidden
  40. def init_hidden(self, batch_size):
  41. weight = next(self.parameters()).data
  42. hidden = (weight.new(self.num_layers, batch_size, self.hidden_size).zero_(),
  43. weight.new(self.num_layers, batch_size, self.hidden_size).zero_())
  44. return hidden
  45. def prepare_data(text, seq_length):
  46. inputs = []
  47. targets = []
  48. for i in range(0, len(text) - seq_length, 1):
  49. seq_in = text[i:i + seq_length]
  50. seq_out = text[i + seq_length]
  51. inputs.append([char_to_idx[char] for char in seq_in])
  52. targets.append(char_to_idx[seq_out])
  53. return inputs, targets
  54. seq_length = 10
  55. inputs, targets = prepare_data(text, seq_length)
  56. # Convert to tensors
  57. inputs = torch.tensor(inputs, dtype=torch.long)
  58. targets = torch.tensor(targets, dtype=torch.long)
  59. batch_size = 64
  60. input_size = vocab_size
  61. hidden_size = 256
  62. output_size = vocab_size
  63. num_epochs = 50
  64. learning_rate = 0.003
  65. model = LSTMModel(input_size, hidden_size, output_size)
  66. criterion = nn.NLLLoss()
  67. optimizer = optim.Adam(model.parameters(), lr=learning_rate)
  68. # Training loop
  69. for epoch in range(num_epochs):
  70. h = model.init_hidden(batch_size)
  71. total_loss = 0
  72. for i in range(0, len(inputs), batch_size):
  73. x = inputs[i:i + batch_size]
  74. y = targets[i:i + batch_size]
  75. if x.size(0) != batch_size:
  76. continue
  77. x = nn.functional.one_hot(x, num_classes=vocab_size).float()
  78. output, h = model(x, h)
  79. loss = criterion(output, y)
  80. optimizer.zero_grad()
  81. loss.backward()
  82. optimizer.step()
  83. total_loss += loss.item()
  84. print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {total_loss / len(inputs):.4f}")
  85. def generate_text(model, start_str, length=100):
  86. model.eval()
  87. with torch.no_grad():
  88. input_eval = torch.tensor([char_to_idx[char] for char in start_str], dtype=torch.long).unsqueeze(0)
  89. input_eval = nn.functional.one_hot(input_eval, num_classes=vocab_size).float()
  90. h = model.init_hidden(1)
  91. predicted_text = start_str
  92. for _ in range(length):
  93. output, h = model(input_eval, h)
  94. prob = torch.softmax(output, dim=1).data
  95. predicted_idx = torch.multinomial(prob, num_samples=1).item()
  96. predicted_char = idx_to_char[predicted_idx]
  97. predicted_text += predicted_char
  98. input_eval = torch.tensor([[predicted_idx]], dtype=torch.long)
  99. input_eval = nn.functional.one_hot(input_eval, num_classes=vocab_size).float()
  100. return predicted_text
  101. start_string = "春眠不觉晓"
  102. generated_text = generate_text(model, start_string, length=100)
  103. print(generated_text)

本文内容由网友自发贡献,转载请注明出处:【wpsshop博客】
推荐阅读
相关标签
  

闽ICP备14008679号