下载HF AutoTrain 模型的配置文件
- 一.在huggingface上创建AutoTrain项目
- 二.通过HF用户名和autotrain项目名,拼接以下url,下载模型列表(json格式)到指定目录
- 三.解析上面的json文件、去重、批量下载模型配置文件(权重以外的文件)
一.在huggingface上创建AutoTrain项目
二.通过HF用户名和autotrain项目名,拼接以下url,下载模型列表(json格式)到指定目录
mkdir model_names
cd model_names
wget https://username-projectname.hf.space/ui/model_choices/llm:sft -O llm_sft.txt
wget https://username-projectname.hf.space/ui/model_choices/llm:orpo -O llm_orpo.txt
wget https://username-projectname.hf.space/ui/model_choices/llm:generic -O llm_generic.txt
wget https://username-projectname.hf.space/ui/model_choices/llm:dpo -O llm_dpo.txt
wget https://username-projectname.hf.space/ui/model_choices/llm:reward -O llm_reward.txt
wget https://username-projectname.hf.space/ui/model_choices/text-classification -O text_classification.txt
wget https://username-projectname.hf.space/ui/model_choices/text-regression -O text_regression.txt
wget https://username-projectname.hf.space/ui/model_choices/seq2seq -O seq2seq.txt
wget https://username-projectname.hf.space/ui/model_choices/token-classification -O token_classification.txt
wget https://username-projectname.hf.space/ui/model_choices/dreambooth -O dreambooth.txt
wget https://username-projectname.hf.space/ui/model_choices/image-classification -O image_classification.txt
wget https://username-projectname.hf.space/ui/model_choices/image-object-detection -O image_object_detection.txt
三.解析上面的json文件、去重、批量下载模型配置文件(权重以外的文件)
from huggingface_hub import snapshot_download
from pathlib import Path
import os
import glob
import json
import tqdm
def download_model(repo_id):
models_path = Path.cwd().joinpath("models",repo_id)
models_path.mkdir(parents=True, exist_ok=True)
if len(glob.glob(os.path.join(models_path, "*.json")))>0:
return
snapshot_download(repo_id=repo_id,
allow_patterns=["*.json", "tokenizer*","README.md"],
local_dir=models_path,
resume_download=True,
token="hf_YOUR_TOKEN")
def load_meta_info():
file_path="meta.txt"
if os.path.exists(file_path):
repo_ids=[]
with open(file_path, "r") as f:
lines=f.readlines()
for line in lines:
items=line.strip().split(",")
repo_ids.append(items[0])
return repo_ids
repo_ids=set()
repo_id_model_type_map=dict()
for file in sorted(glob.glob("model_names/*.txt")):
model_type=os.path.basename(file).split(".")[0]
with open(file, "r") as f:
for item in json.loads(f.read().strip()):
repo_id=item["id"]
repo_ids.add(repo_id)
if repo_id not in repo_id_model_type_map:
repo_id_model_type_map[repo_id]=set()
repo_id_model_type_map[repo_id].add(model_type)
with open(file_path, "w") as f:
for repo_id in repo_ids:
model_types=repo_id_model_type_map[repo_id]
f.write(f"{repo_id}, {model_types}\n")
return repo_ids
for repo_id in tqdm.tqdm(load_meta_info()):
print(repo_id)
if repo_id in ["Corcelio/mobius","briaai/BRIA-2.3"]:
continue
download_model(repo_id)