简介
这里不对 langchain 和 chatGPT 进行介绍,仅对实现过程进行整理
环境
Python >=3.8
Flask2.2.3
Jinja23.1.2
langchain0.0.143
openai0.27.4
实现 总结功能
使用 langchain 和 openai 接口实现总结功能
实现逻辑:通过text_splitter
将pdf 分块,送入 langchain 的summarize_chain
中进行处理
同样也可以使用 OpenAIEmbeddings
来实现,文档地址:langchain 官方文档
创建文件:summarize.py
from langchain import PromptTemplate
from langchain.callbacks import get_openai_callback
from langchain.chains.summarize import load_summarize_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
def summarize_docs(docs, doc_url, llm):
print(f'You have {len(docs)} document(s) in your {doc_url} data')
print(f'There are {len(docs[0].page_content)} characters in your document')
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
split_docs = text_splitter.split_documents(docs)
print(f'You have {len(split_docs)} split document(s)')
prompt_template = """Write a concise summary of the following:
{text}
CONCISE SUMMARY IN CHINESE:"""
PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
chain = load_summarize_chain(llm, chain_type="map_reduce", verbose=False, return_intermediate_steps=True,
map_prompt=PROMPT, combine_prompt=PROMPT)
response = ""
with get_openai_callback() as cb:
response = chain({"input_documents": split_docs}, return_only_outputs=True)
print(f"Total Tokens: {cb.total_tokens}")
print(f"Prompt Tokens: {cb.prompt_tokens}")
print(f"Completion Tokens: {cb.completion_tokens}")
print(f"Successful Requests: {cb.successful_requests}")
print(f"Total Cost (USD): ${cb.total_cost}")
return response
创建接口
使用 Flask 框架创建简单的接口
创建文件server.py
import os
from flask import Flask, request, make_response, render_template
from langchain import OpenAI
from langchain.document_loaders import PyPDFLoader
from summarize import summarize_docs
app = Flask(__name__)
@app.route('/summarize', methods=['POST'])
def summarize():
index_path = "./upload"
if 'file' not in request.files:
return "Please send a POST request with a file", 400
uploaded_file = request.files["file"]
filename = uploaded_file.filename
filepath = os.path.join(index_path, os.path.basename(filename))
uploaded_file.save(filepath)
llm = OpenAI(temperature=0, openai_api_key=OPENAI_API_KEY, model_name="text-davinci-003",
openai_api_base=OPENAI_API_BASE)
loader = PyPDFLoader(filepath)
pages = loader.load_and_split()
result = summarize_docs(pages, filepath, llm)
return make_response(str(result.get("output_text"))), 200
if __name__ == '__main__':
if not os.path.exists('./upload'):
os.makedirs('./upload')
os.environ["OPENAI_API_KEY"] = "sk-XXXXXXXXXXXXXXXXXXXXXXXXX"
OPENAI_API_KEY = os.environ['OPENAI_API_KEY']
OPENAI_API_BASE = 'https://XXXX/v1'
app.run(port=19100, host='127.0.0.1')
创建页面
在 server.py
中添加路由地址
@app.route('/')
def index():
msg = "welcome to pdf summarize."
return render_template("web.html", data=msg)
创建目录 templates
, 并创建 html 文件 web.html
:
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>文件上传</title>
<style>
body {
font-family: Arial, sans-serif;
margin: 0;
padding: 0;
background-color: #f5f5f5;
}
.container {
max-width: 600px;
margin: 0 auto;
padding: 20px;
background-color: #fff;
border-radius: 10px;
box-shadow: 0 0 10px rgba(0, 0, 0, .2);
}
h1 {
margin-top: 0;
font-size: 32px;
color: #333;
text-align: center;
}
form {
display: flex;
flex-direction: column;
align-items: center;
}
input[type="file"] {
margin-bottom: 20px;
font-size: 16px;
color: #333;
padding: 10px;
border: 1px solid #ccc;
border-radius: 5px;
background-color: #fff;
box-shadow: 0 0 5px rgba(0, 0, 0, .1);
}
button {
padding: 10px;
background-color: #4CAF50;
color: #fff;
border: none;
border-radius: 5px;
cursor: pointer;
transition: background-color .2s;
}
button:hover {
background-color: #3e8e41;
}
.result {
margin-top: 20px;
padding: 20px;
background-color: #f1f1f1;
border-radius: 5px;
white-space: pre-wrap;
}
.progress {
margin-top: 20px;
width: 100%;
height: 20px;
background-color: #f1f1f1;
border-radius: 5px;
overflow: hidden;
box-shadow: 0 0 5px rgba(0, 0, 0, .1);
}
.bar {
width: 0;
height: 100%;
background-color: #4CAF50;
transition: width .2s;
}
</style>
</head>
<body>
<div class="container">
<h1>文件上传</h1>
<form id="upload-form" method="POST" action="http://127.0.0.1:5000/summarize" enctype="multipart/form-data">
<input type="file" name="file">
<button type="submit">生成摘要</button>
</form>
<div class="progress">
<div class="bar"></div>
</div>
<h2>返回结果</h2>
<div>目前响应时间较长,700k 文件响应时间为22秒,请耐心等待</div>
<div class="result">
<div id="result-text"></div>
</div>
<div>页面生成 power by openai chatGPT-3.5</div>
</div>
<script>
const form = document.querySelector('#upload-form');
const progressBar = document.querySelector('.bar');
form.addEventListener('submit', async (event) => {
event.preventDefault();
const formData = new FormData(form);
const xhr = new XMLHttpRequest();
xhr.upload.addEventListener('progress', (event) => {
const percent = (event.loaded / event.total) * 100;
progressBar.style.width = percent + '%';
});
xhr.onreadystatechange = () => {
if (xhr.readyState === XMLHttpRequest.DONE && xhr.status === 200) {
progressBar.style.width = '0';
document.querySelector('#result-text').textContent = xhr.responseText;
}
};
xhr.open(form.method, form.action);
xhr.send(formData);
});
</script>
</body>
</html>
运行展示
完成后整体项目结构如下:
运行效果如下: