metahuman-stream是基于ernerf模型的流式数字人,实现音视频同步对话。
metahuman-stream
xtts-streaming-server
srs
部署 srs
# rtmpserver
docker run -it -d \
-p 1935:1935 -p 1985:1985 -p 8080:8080 -p 8000:8000/udp -p 10080:10080/udp \
--name srs \
registry.cn-hangzhou.aliyuncs.com/ossrs/srs:5
部署 xtts-streaming-server
下载XTTS-v2模型到宿主机
docker run --gpus=all -d -p 9000:80 \
-e COQUI_TOS_AGREED=1 \
-v /opt/data/model/XTTS-v2:/app/tts_models \
--name him-xtts-streaming-server \
ghcr.io/coqui-ai/xtts-streaming-server:latest
部署 metahuman-stream
参考项目的Dockerfile
启动容器
sudo docker run --gpus all -it -d \
--network=host \
--name nerfstream \
nvcr.io/nvidia/cuda:11.6.1-cudnn8-devel-ubuntu20.04
sudo docker exec -ti nerfstream bash
安装工具和anaconda
apt-get update -yq --fix-missing \
&& DEBIAN_FRONTEND=noninteractive apt-get install -yq --no-install-recommends \
pkg-config \
wget \
cmake \
curl \
git \
vim
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
sh Miniconda3-latest-Linux-x86_64.sh -b -u -p ~/miniconda3
~/miniconda3/bin/conda init
source ~/.bashrc
创建conda环境&安装依赖
conda create -n nerfstream python=3.10
conda activate nerfstream
pip config set global.index-url https://mirrors.aliyun.com/pypi/simple/
conda install pytorch==1.12.1 torchvision==0.13.1 cudatoolkit=11.3 -c pytorch
pip install "git+https://github.com/facebookresearch/pytorch3d.git"
pip install tensorflow-gpu==2.8.0
pip uninstall protobuf
pip install protobuf==3.20.1
conda install ffmpeg
apt-get install portaudio19-dev python-all-dev python3-all-dev
conda install pyaudio
克隆源码&安装项目依赖
cd /
git clone https://github.com/lipku/metahuman-stream.git
cd metahuman-stream
pip install -r requirements.txt
cd /
git clone https://github.com/lipku/python_rtmpstream.git
cd python_rtmpstream
git submodule update --init
pip install wheel
cd python
# 修改conda目录
vim CMakeLists.txt
pip install .
配置LLM
以Qwen为例,修改app.py
def llm_response(message):
from llm.LLM import LLM
llm = LLM().init_model('Qwen', model_path= 'Qwen/Qwen-1_8B-Chat')
response = llm.chat(message)
print(response)
return response
运行
export HF_ENDPOINT=https://hf-mirror.com
python app.py # 默认
python app.py --tts xtts --ref_file data/ref.wav #指定克隆的语音