循环神经网络(Recurrent Neural Network, RNN)是一种用于处理序列数据的神经网络模型。与传统的前馈神经网络不同,RNN具有内部记忆能力,可以捕捉到序列中元素之间的依赖关系。这种特性使得RNN在自然语言处理、语音识别、时间序列预测等需要考虑上下文信息的任务中表现出色。
RNN的基本结构
RNN的基本结构包括输入层、隐藏层和输出层。在处理序列数据时,RNN会按照序列的时间顺序逐个处理每个元素。对于序列中的每一个时间步,RNN不仅会接收该时间步的输入,还会接收上一个时间步的隐藏状态作为输入。这样,通过将之前的信息传递给后续的处理步骤,RNN能够利用历史信息来影响当前的输出。
方法
InitializeWeightsAndBiases()
:使用随机值初始化权重矩阵和偏置向量。Sigmoid()
:激活函数,用于隐藏层的非线性变换。RandomMatrix()
:生成指定大小的随机矩阵,用于权重的初始化。Softmax()
:通常用于多分类问题中的输出层,将输出转换为概率分布。Forward()
:前向传播方法,根据输入数据计算每个时间步的输出。它会更新隐藏状态,并最终返回所有时间步的输出列表。Backward()
:反向传播方法,用于根据预测输出与目标输出之间的差异调整模型参数。它计算梯度并更新权重和偏置。UpdateWeights()
:根据计算出的梯度更新模型的权重和偏置。Train()
:训练模型的方法,通过多次迭代(epoch)对输入数据进行前向传播和反向传播,以优化模型参数。Predict()
:预测方法,根据输入数据返回每个时间步的预测结果索引,即输出概率最高的类别。
说明
这只是一个基础的 RNN 模型实现,实际应用中可能需要考虑更多的优化技术,比如使用长短期记忆网络(LSTM)、门控循环单元(GRU)等更复杂的架构来改善性能。
using System;
using System.Linq;
using System.Collections.Generic;
namespace Project.NeuralNetwork
{
/// <summary>
/// 构建神经网络
/// </summary>
public class RnnModel
{
/// <summary>
/// 输入层大小
/// </summary>
private readonly int _inputSize;
/// <summary>
/// 隐藏层大小
/// </summary>
private readonly int _hiddenSize;
/// <summary>
/// 输出层大小
/// </summary>
private readonly int _outputSize;
/// <summary>
/// 输入到隐藏层的权重
/// </summary>
private double[,] _weightsInputHidden;
/// <summary>
/// 隐藏层到隐藏层的权重
/// </summary>
private double[,] _weightsHiddenHidden;
/// <summary>
/// 隐藏层到输出层的权重
/// </summary>
private double[,] _weightsHiddenOutput;
/// <summary>
/// 隐藏层偏置
/// </summary>
private double[] _biasHidden;
/// <summary>
/// 输出层偏置
/// </summary>
private double[] _biasOutput;
/// <summary>
/// 隐藏层状态
/// </summary>
private double[] _hiddenState;
/// <summary>
/// 初始化模型的构造函数
/// </summary>
/// <param name="inputSize"></param>
/// <param name="hiddenSize"></param>
/// <param name="outputSize"></param>
public RnnModel(int inputSize, int hiddenSize, int outputSize)
{
_inputSize = inputSize;
_hiddenSize = hiddenSize;
_outputSize = outputSize;
InitializeWeightsAndBiases();
}
/// <summary>
/// 初始化权重和偏置
/// </summary>
private void InitializeWeightsAndBiases()
{
_weightsInputHidden = RandomMatrix(_inputSize, _hiddenSize);
_weightsHiddenHidden = RandomMatrix(_hiddenSize, _hiddenSize);
_weightsHiddenOutput = RandomMatrix(_hiddenSize, _outputSize);
_biasHidden = new double[_hiddenSize];
_biasOutput = new double[_outputSize];
}
/// <summary>
/// 激活函数
/// </summary>
/// <param name="x"></param>
/// <returns></returns>
private double Sigmoid(double x)
{
return 1 / (1 + Math.Exp(-x));
}
/// <summary>
/// 生成随机矩阵
/// </summary>
/// <param name="rows"></param>
/// <param name="cols"></param>
/// <returns></returns>
private double[,] RandomMatrix(int rows, int cols)
{
var matrix = new double[rows, cols];
var random = new Random();
for (int i = 0; i < rows; i++)
{
for (int j = 0; j < cols; j++)
{
matrix[i, j] = random.NextDouble() * 2 - 1; // [-1, 1]
}
}
return matrix;
}
/// <summary>
/// 前向传播
/// </summary>
/// <param name="inputs"></param>
/// <returns></returns>
public List<double[]> Forward(List<double[]> inputs)
{
_hiddenState = new double[_hiddenSize];
var outputs = new List<double[]>();
foreach (var input in inputs)
{
var hidden = new double[_hiddenSize];
for (int h = 0; h < _hiddenSize; h++)
{
hidden[h] = _biasHidden[h];
for (int i = 0; i < _inputSize; i++)
{
hidden[h] += _weightsInputHidden[i, h] * input[i];
}
for (int hh = 0; hh < _hiddenSize; hh++)
{
hidden[h] += _weightsHiddenHidden[hh, h] * _hiddenState[hh];
}
hidden[h] = Sigmoid(hidden[h]);
}
_hiddenState = hidden;
var output = Output(hidden);
outputs.Add(output);
}
return outputs;
}
/// <summary>
/// 输出层
/// </summary>
/// <param name="h"></param>
/// <returns></returns>
private double[] Output(double[] h)
{
double[] y = new double[_outputSize];
for (int i = 0; i < _outputSize; i++)
{
double sum = _biasOutput[i];
for (int j = 0; j < _hiddenSize; j++)
{
sum += h[j] * _weightsHiddenOutput[j, i];
}
y[i] = sum;
}
return Softmax(y);
}
/// <summary>
/// 输出层的激活函数
/// </summary>
/// <param name="x"></param>
/// <returns></returns>
private double[] Softmax(double[] x)
{
double max = x.Max();
double expSum = x.Select(xi => Math.Exp(xi - max)).Sum();
return x.Select(xi => Math.Exp(xi - max) / expSum).ToArray();
}
/// <summary>
/// 反向传播
/// </summary>
/// <param name="inputs"></param>
/// <param name="targets"></param>
/// <param name="outputs"></param>
/// <param name="learningRate"></param>
private void Backward(List<double[]> inputs, List<double[]> targets, List<double[]> outputs, double learningRate)
{
//输入到隐藏层的梯度
double[,] dWeightsInputHidden = new double[_inputSize, _hiddenSize];
//隐藏层到隐藏层的梯度
double[,] dWeightsHiddenHidden = new double[_hiddenSize, _hiddenSize];
//隐藏层到输出层的梯度
double[,] dWeightsHiddenOutput = new double[_hiddenSize, _outputSize];
//隐藏层的偏置
double[] dBiasHidden = new double[_hiddenSize];
//输出层的偏置
double[] dBiasOutput = new double[_outputSize];
for (int t = inputs.Count - 1; t >= 0; t--)
{
double[] targetVector = new double[_outputSize];
Array.Copy(targets[t], targetVector, _outputSize);
// 计算输出层的误差
for (int o = 0; o < _outputSize; o++)
{
dBiasOutput[o] = outputs[t][o] - targetVector[o];
}
// 计算隐藏层到输出层的梯度
for (int o = 0; o < _outputSize; o++)
{
for (int h = 0; h < _hiddenSize; h++)
{
dWeightsHiddenOutput[h, o] += dBiasOutput[o] * _hiddenState[h];
}
}
// 计算隐藏层的偏置
double[] dh = new double[_hiddenSize];
for (int h = 0; h < _hiddenSize; h++)
{
double error = 0;
for (int o = 0; o < _outputSize; o++)
{
error += dBiasOutput[o] * _weightsHiddenOutput[h, o];
}
dh[h] = error * (_hiddenState[h] * (1 - _hiddenState[h]));
}
for (int h = 0; h < _hiddenSize; h++)
{
dBiasHidden[h] += dh[h];
}
//计算输入到隐藏层的梯度
for (int h = 0; h < _hiddenSize; h++)
{
for (int i = 0; i < _inputSize; i++)
{
dWeightsInputHidden[i, h] += dh[h] * inputs[t][i];
}
}
// 计算输入到隐藏层的梯度
if (t > 0)
{
for (int h = 0; h < _hiddenSize; h++)
{
for (int hh = 0; hh < _hiddenSize; hh++)
{
dWeightsHiddenHidden[hh, h] += dh[h] * _hiddenState[hh];
}
}
}
}
// 更新权重和偏置
UpdateWeights(dWeightsInputHidden, dWeightsHiddenHidden, dWeightsHiddenOutput, dBiasHidden, dBiasOutput, learningRate);
}
/// <summary>
/// 更新权重
/// </summary>
/// <param name="dWxh"></param>
/// <param name="dWhh"></param>
/// <param name="dWhy"></param>
/// <param name="dbh"></param>
/// <param name="dby"></param>
/// <param name="learningRate"></param>
private void UpdateWeights(double[,] dWeightsInputHidden, double[,] dWeightsHiddenHidden, double[,] dWeightsHiddenOutput, double[] dBiasHidden, double[] dBiasOutput, double learningRate)
{
// 更新输入到隐藏层的权重
for (int i = 0; i < _inputSize; i++)
{
for (int h = 0; h < _hiddenSize; h++)
{
_weightsInputHidden[i, h] -= learningRate * dWeightsInputHidden[i, h];
}
}
//更新隐藏层到隐藏层的权重
for (int h = 0; h < _hiddenSize; h++)
{
for (int hh = 0; hh < _hiddenSize; hh++)
{
_weightsHiddenHidden[h, hh] -= learningRate * dWeightsHiddenHidden[h, hh];
}
}
//更新隐藏层到输出层的权重
for (int h = 0; h < _hiddenSize; h++)
{
for (int o = 0; o < _outputSize; o++)
{
_weightsHiddenOutput[h, o] -= learningRate * dWeightsHiddenOutput[h, o];
}
}
//更新隐藏层的偏置
for (int h = 0; h < _hiddenSize; h++)
{
_biasHidden[h] -= learningRate * dBiasHidden[h];
}
//更新输出层的偏置
for (int o = 0; o < _outputSize; o++)
{
_biasOutput[o] -= learningRate * dBiasOutput[o];
}
}
/// <summary>
/// 训练
/// </summary>
/// <param name="inputs"></param>
/// <param name="targets"></param>
/// <param name="epochs"></param>
/// <param name="learningRate"></param>
public void Train(List<List<double[]>> inputs, List<List<double[]>> targets, double learningRate, int epochs)
{
for (int epoch = 0; epoch < epochs; epoch++)
{
for (int i = 0; i < inputs.Count; i++)
{
List<double[]> input = inputs[i];
List<double[]> target = targets[i];
List<double[]> outputs = Forward(input);
Backward(input, target, outputs, learningRate);
}
}
}
/// <summary>
/// 预测
/// </summary>
/// <param name="inputs"></param>
/// <returns></returns>
public int[] Predict(List<double[]> inputs)
{
var output = Forward(inputs);
var predictedIndices = output.Select(o => Array.IndexOf(o, o.Max())).ToArray();
return predictedIndices;
}
}
}
- 准备训练数据
- 训练网络
- 测试并输出结果
public static void Rnn_Predict()
{
// 定义数据集
var data = new List<Tuple<string[], string[]>>
{
Tuple.Create(new string[] { "早安" }, new string[] { "早上好" }),
Tuple.Create(new string[] { "午安" }, new string[] { "中午好" }),
Tuple.Create(new string[] { "晚安" }, new string[] { "晚上好" }),
Tuple.Create(new string[] { "你好吗?" }, new string[] { "我很好,谢谢。" })
};
// 创建词汇表
var allWords = data.SelectMany(t => t.Item1.Concat(t.Item2)).Distinct().ToList();
var wordToIndex = allWords.ToDictionary(word => word, word => allWords.IndexOf(word));
// 将字符串转换为one-hot编码
List<List<double[]>> inputsData = new List<List<double[]>>();
List<List<double[]>> targetsData = new List<List<double[]>>();
foreach (var item in data)
{
var inputSequence = item.Item1.Select(word => OneHotEncode(word, wordToIndex)).ToList();
var targetSequence = item.Item2.Select(word => OneHotEncode(word, wordToIndex)).ToList();
inputsData.Add(inputSequence);
targetsData.Add(targetSequence);
}
double[] OneHotEncode(string word, Dictionary<string, int> wordToIndex)
{
var encoding = new double[wordToIndex.Count];
encoding[wordToIndex[word]] = 1;
return encoding;
}
//开始训练
int inputSize = allWords.Count;
int hiddenSize = allWords.Count;
int outputSize = allWords.Count;
RnnModel model = new RnnModel(inputSize, hiddenSize, outputSize);
int epochs = 10000;
double learningRate = 0.1;
model.Train(inputsData, targetsData, learningRate, epochs);
//预测
while (true)
{
Console.Write("你: ");
string userInput = Console.ReadLine();
if (userInput.ToLower() == "exit")
{
break;
}
if (!allWords.Contains(userInput))
{
Console.WriteLine("对不起,我不认识这些词。");
continue;
}
var testInput = new List<double[]> { OneHotEncode(userInput, wordToIndex) };
var prediction = model.Predict(testInput);
var predictedWords = prediction.Select(index => allWords[index]).ToArray();
Console.WriteLine($"机器人: {string.Join(", ", predictedWords)}");
}
}