复制以下语句
python3 -m venv myvenv
source myvenv/bin/activate
pip install modelscope
pip install transformers_stream_generator
pip install transformers
pip install tiktoken
pip install accelerate
pip install bitsandbytes
touch run.py
vi run.py
复制下面代码粘贴 到 run.py
import os
import platform
from modelscope import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
model_id = 'qwen/Qwen-7B-Chat'
revision = 'v1.0.1'
tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision, trust_remote_code=True)
# use fp16
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", revision=revision,
trust_remote_code=True, fp16=True).eval()
model.generation_config = GenerationConfig.from_pretrained(model_id,
trust_remote_code=True) # 可指定不同的生成长度、top_p等相关超参
stop_stream = False
def clear_screen():
if platform.system() == "Windows":
os.system("cls")
else:
os.system("clear")
def print_history(history):
for pair in history:
print(f"\nUser:{pair[0]}\nQwen-7B:{pair[1]}")
def main():
history, response = [], ''
global stop_stream
clear_screen()
print("欢迎使用 Qwen-7B 模型,输入内容即可进行对话,clear 清空对话历史,stop 终止程序")
while True:
query = input("\nUser:")
if query.strip() == "stop":
break
if query.strip() == "clear":
history = []
clear_screen()
print("欢迎使用 Qwen-7B 模型,输入内容即可进行对话,clear 清空对话历史,stop 终止程序")
continue
for response in model.chat(tokenizer, query, history=history, stream=True):
if stop_stream:
stop_stream = False
break
else:
clear_screen()
print_history(history)
print(f"\nUser: {query}")
print("\nQwen-7B:", end="")
print(response)
history.append((query, response))
if __name__ == "__main__":
main()
按下以下按键: ESC键 :wq 回车
就保存好了
然后就运行
python run.py