用免费的github的key调用gpt实现一个简单的rag自动打分评测系统，不用任何框架

1.环境准备

!pip install pymupdf numpy openai

2.导入依赖

import fitz
import os
import numpy as np
import json
from openai import OpenAI

3.pdf提取文本

def extract_text_from_pdf(pdf_path):
    """
    从 PDF 文件中提取文本内容。

    参数:
    pdf_path (str): PDF 文件的路径。

    返回:
    str: 提取出的完整文本内容。
    """
    # 打开 PDF 文件
    mypdf = fitz.open(pdf_path)
    all_text = ""  # 初始化一个空字符串，用于存储所有页的文本

    # 遍历 PDF 的每一页
    for page_num in range(mypdf.page_count):
        page = mypdf[page_num]  # 获取当前页
        text = page.get_text("text")  # 提取当前页的纯文本内容
        all_text += text  # 将当前页的文本追加到总文本中

    return all_text  # 返回所有提取出的文本

4.将上个步骤提取的文本按固定长度分段

def chunk_text(text, n, overlap):
    """
    将输入文本按固定长度进行分段，支持设置重叠部分。

    参数:
    text (str): 需要被分段的原始文本。
    n (int): 每个文本块的字符数。
    overlap (int): 相邻文本块之间重叠的字符数。

    返回:
    List[str]: 分段后的文本块列表。
    """
    chunks = []  # 初始化一个空列表，用于存储所有的文本块

    # 以步长 (n - overlap) 遍历整个文本
    for i in range(0, len(text), n - overlap):
        # 从当前位置 i 开始，取 n 个字符，作为一个 chunk 添加到列表中
        chunks.append(text[i:i + n])

    return chunks  # 返回所有分好的文本块

5.初始化openai实例

import os
from openai import OpenAI

token = ""
endpoint = "https://models.inference.ai.azure.com"
model_name = "gpt-4o"

client = OpenAI(
    base_url=endpoint,
    api_key=token,
)

6.从指定pdf文件中提取文本内容

# 定义 PDF 文件的路径
pdf_path = "/content/设计前沿-ai文生图.pdf"

# 调用自定义函数，从 PDF 文件中提取纯文本内容
extracted_text = extract_text_from_pdf(pdf_path)

# 将提取到的文本进行分块处理
# 每个块包含 1000 个字符，块与块之间有 200 个字符的重叠部分
text_chunks = chunk_text(extracted_text, 1000, 200)

# 打印生成的文本块数量
print("Number of text chunks:", len(text_chunks))

# 打印第一个文本块的内容，查看是否提取和分块成功
print("\nFirst text chunk:")
print(text_chunks[0])

7.将文本转换为向量

def create_embeddings(text, model="text-embedding-3-small"):
    """
    使用指定的 OpenAI 模型为输入文本创建嵌入向量（embeddings）。

    参数:
    text (str 或 List[str]): 要进行嵌入的文本或文本列表。
    model (str): 使用的嵌入模型名称，默认使用 "text-embedding-3-small"。

    返回:
    object: 来自 OpenAI API 的响应结果，包含嵌入向量信息。
    """
    # 使用 OpenAI 客户端调用嵌入接口，生成文本的向量表示
    response = client.embeddings.create(
        model=model,  # 使用的嵌入模型
        input=text     # 传入待编码的文本（可以是字符串或字符串列表）
    )

    return response  # 返回包含嵌入信息的 API 响应

8.计算两个向量（用户问题和检索到的文本块）的相似度

def cosine_similarity(vec1, vec2):
    """
    计算两个向量之间的余弦相似度（Cosine Similarity）。

    参数:
    vec1 (np.ndarray): 第一个向量（numpy 数组）。
    vec2 (np.ndarray): 第二个向量（numpy 数组）。

    返回:
    float: 两个向量之间的余弦相似度，取值范围 [-1, 1]，越接近 1 表示越相似。
    """
    # 计算两个向量的点积（内积）
    dot_product = np.dot(vec1, vec2)

    # 分别计算两个向量的模（长度）
    norm_vec1 = np.linalg.norm(vec1)
    norm_vec2 = np.linalg.norm(vec2)

    # 用点积除以两个模的乘积，得到余弦相似度
    similarity = dot_product / (norm_vec1 * norm_vec2)

    return similarity  # 返回余弦相似度得分

9.检索逻辑

def semantic_search(query, text_chunks, embeddings, k=5):
    """
    使用语义相似度在文本块中检索与查询最相关的内容。

    参数:
    query (str): 用户输入的查询问题。
    text_chunks (List[str]): 需要检索的文本块列表。
    embeddings (List[dict]): 每个文本块对应的嵌入向量（OpenAI 返回的数据结构）。
    k (int): 返回与查询最相关的前 k 个文本块。默认值为 5。

    返回:
    List[str]: 与查询最相关的前 k 个文本块（按相似度排序）。
    """
    # 为用户查询生成嵌入向量
    query_embedding = create_embeddings(query).data[0].embedding

    similarity_scores = []  # 初始化一个列表，用于存储每个文本块与查询的相似度得分

    # 遍历所有文本块的嵌入向量，与查询向量计算相似度
    for i, chunk_embedding in enumerate(embeddings):
        # 提取该文本块的向量并计算与查询向量的余弦相似度
        similarity_score = cosine_similarity(
            np.array(query_embedding),
            np.array(chunk_embedding.embedding)
        )
        # 将文本块的索引和相似度添加到结果列表中
        similarity_scores.append((i, similarity_score))

    # 按相似度从高到低排序
    similarity_scores.sort(key=lambda x: x[1], reverse=True)

    # 获取前 k 个最相关文本块的索引
    top_indices = [index for index, _ in similarity_scores[:k]]

    # 返回对应的文本块内容
    return [text_chunks[index] for index in top_indices]

用户输入 query
↓
生成 query 向量
↓
与每个 chunk 的向量计算余弦相似度
↓
相似度排序 → 选前 k 个
↓
返回最相关的文本块（用于回答生成）
10. 从pdf提取的文本中找到与问题最相似的前两个文本（通过向量找）

# 从 JSON 文件中加载验证数据（val.json 文件中存有多个问答对）
with open('/content/ val.json') as f:
    data = json.load(f)

# 从验证数据中提取第一个问题（用户查询）
query = data[0]['question']

# 使用语义检索，从文本块中找出与该问题最相关的前 2 个文本块
top_chunks = semantic_search(query, text_chunks, response.data, k=2)

# 打印用户提问的内容
print("Query:", query)

# 打印检索到的前 2 个最相关的文本块，用作后续生成回答的上下文
for i, chunk in enumerate(top_chunks):
    print(f"Context {i + 1}:\n{chunk}\n=====================================")

11.定义提示词实现对问题的回答

# 定义 AI 助手的系统提示词（设定它的行为准则）
# 要求它必须基于上下文回答；如果上下文中找不到答案，就明确说没有信息。
system_prompt = (
    "You are an AI assistant that strictly answers based on the given context. "
    "If the answer cannot be derived directly from the provided context, respond with: "
    "'I do not have enough information to answer that.'"
)

# 定义生成 AI 回答的函数
def generate_response(system_prompt, user_message, model="gpt-4o"):
    """
    使用指定的大语言模型生成回答。

    参数:
    system_prompt (str): 系统提示词，引导 AI 的行为（定义身份、回答风格等）。
    user_message (str): 用户的消息（包括上下文和问题）。
    model (str): 使用的模型名称，默认使用 gpt-4o（也可以替换为其他兼容模型）。

    返回:
    dict: 模型生成的完整响应对象（包含回答内容等）。
    """
    # 调用 Chat 接口，生成回答
    response = client.chat.completions.create(
        model=model,        # 指定使用的模型
        temperature=0,      # 温度设置为 0，表示尽可能确定性回答（减少随机性）
        messages=[
            {"role": "system", "content": system_prompt},  # 系统设定
            {"role": "user", "content": user_message}      # 用户输入（包含上下文 + 问题）
        ]
    )
    return response  # 返回完整响应对象

12.结合上步给出的答案以及真正的答案进行打分


# 定义评分系统的提示词（system prompt）
# 告诉 AI 你是一个智能评估系统，要根据回答的准确性打分：
# 1 分：完全正确；0.5 分：部分正确；0 分：错误或答非所问。
evaluate_system_prompt = (
    "You are an intelligent evaluation system tasked with assessing the AI assistant's responses. "
    "If the AI assistant's response is very close to the true response, assign a score of 1. "
    "If the response is incorrect or unsatisfactory in relation to the true response, assign a score of 0. "
    "If the response is partially aligned with the true response, assign a score of 0.5."
)
# 构造评估任务的完整提示内容（evaluation_prompt）
# 包括：
# - 用户问题（User Query）
# - AI 的实际回答（AI Response）
# - 标准答案（True Response）
# - 评分规则（evaluate_system_prompt）
evaluation_prompt = (
    f"User Query: {query}\n"
    f"AI Response:\n{ai_response.choices[0].message.content}\n"
    f"True Response: {data[0]['ideal_answer']}\n"
    f"{evaluate_system_prompt}"
)
# 使用之前定义的 generate_response 函数，调用模型进行评分
# 评分系统会根据 AI 的回答和真实答案之间的匹配程度输出评分（0, 0.5, 1）
evaluation_response = generate_response(evaluate_system_prompt, evaluation_prompt)
# 打印模型生成的评分结果（通常会包含得分和简要理由）
print(evaluation_response.choices[0].message.content)