下载huggingface中数据集/模型(保存到本地指定路径)

news2025/2/23 4:09:32

一. snapshot_download

# 1.安装huggingface_hub
# pip install huggingface_hub


import os

from huggingface_hub import snapshot_download


print('downloading entire files...')
# 注意，这种方式仍然保存在cache_dir中
snapshot_download(repo_id="ibrahimhamamci/CT-RATE", repo_type="dataset",
                  local_dir="本地路径",
                  local_dir_use_symlinks=False, resume_download=True,
                  token='hf_***')


# 保存到特定的路径
snapshot_download(repo_id="ibrahimhamamci/CT-RATE", repo_type="dataset",
                  cache_dir="本地路径",
                  local_dir_use_symlinks=False, resume_download=True,
                  token='hf_***')



# download single file...，下载单个文件
# from huggingface_hub import hf_hub_download
# hf_hub_download(repo_id="ibrahimhamamci/CT-RATE", filename='config.json', 
#                 repo_type="dataset", filename='....',
#                 local_dir="/home/miao/data/dataset/CT-RATE/dataset/train",
#                 local_dir_use_symlinks=False, resume_download=True,
#                 force_download=False, subfolder='dataset/train/train_10006')

注意事项：

获取token的网址
下图的repo_id为“google/gemma-7b”

二. huggingface-cli

# 1.安装huggingface-cli
# pip install -U "huggingface[cli]"
# 命令行输入：huggingface-cli -h，可以查看对应的帮助文档

# 2.下载模型
# 注意，模型仍然保存在cache_dir文档中
huggingface-cli download --token hf_*** --resume-download meta-llama/Llama-2-7b-hf --local-dir Llama-2-7b-hf

# 3.下载数据集
# 这种方式，数据集会保存到"/home/local-dir/"中
huggingface-cli download --repo-type dataset --token hf_** --resume-download ibrahimhamamci/CT-RATE --cache-dir /home/local-dir --local-dir-use-symlinks False