AI通过数据构建一个独有对话机器人,尝试构建快速构建专有知识的机器人。
前端使用tinker实现一个简单的对话窗口,
后端使用自己的数据进行不断的训练,有需要的可以依据自己的实际情况进行修改,和优化
import tkinter as tk
from tkinter import scrolledtext
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import jieba
import string, psycopg2
# 连接PostgreSQL数据库
conn = psycopg2.connect(database="postgres", user="postgres", password="postgres", host="localhost", port="5433")
cur = conn.cursor()
cur.execute("SELECT questions, answers FROM public.stock_chatbot")
data_db = cur.fetchall()
conn.close()
# 准备数据
questions = [q for q, _ in data_db]
answers = [a for _, a in data_db]
# 数据准备
data = {
"question": questions,
"answer": answers
}
df = pd.DataFrame(data)
# 文本预处理
def preprocess_text(text):
text = text.lower()
text = text.translate(str.maketrans('', '', string.punctuation))
words = jieba.lcut(text)
return " ".join(words)
df['question'] = df['question'].apply(preprocess_text)
# 向量化问题
vectorizer = TfidfVectorizer()
question_vectors = vectorizer.fit_transform(df['question'])
def send_message():
user_input = entry.get()
entry.delete(0, tk.END)
if user_input.lower() == 'quit':
root.destroy()
return
user_input_preprocessed = preprocess_text(user_input)
user_vector = vectorizer.transform([user_input_preprocessed])
similarities = cosine_similarity(user_vector, question_vectors)
best_match_index = similarities.argmax()
similarity_score = similarities[0][best_match_index]
if similarity_score > 0.2:
response = df['answer'].iloc[best_match_index]
else:
keywords = user_input_preprocessed.split()
possible_questions = []
for q in df['question']:
q_keywords = q.split()
common_keywords = set(keywords).intersection(set(q_keywords))
if len(common_keywords) > 0:
possible_questions.append(q)
if possible_questions:
new_similarities = []
for question in possible_questions:
temp_vector = vectorizer.transform([question])
sim = cosine_similarity(user_vector, temp_vector)
new_similarities.append(sim[0][0])
best_guess_index = possible_questions.index(max(new_similarities))
response = df['answer'].iloc[best_guess_index]
else:
response = "很抱歉,我没能理解你的问题。"
chat_history.insert(tk.END, f"你: {user_input}\n")
chat_history.insert(tk.END, f"机器人: {response}\n\n")
root = tk.Tk()
root.title("聊天机器人")
root.geometry("400x350")
chat_history = scrolledtext.ScrolledText(root, height=20, width=50)
chat_history.pack(pady=10)
entry = tk.Entry(root, width=40)
entry.pack(pady=10)
send_button = tk.Button(root, text="发送", command=send_message)
send_button.pack()
root.mainloop()