Linux | 安装 lb-toolkits
最近又需要下载葵花的数据,之前分享过一次代码。今天发现之前的环境不小心被我删了,而运行相关的代码需要安装lb-toolkits这个库,今天正好记录了一下安装lb-toolkits的过程。
这里安装的版本是1.2.4,别问为什么是这个版本,因为只安到了这个版本才成功。
- https://pypi.org/project/lb-toolkits/1.2.4/#files
本来是直接通过
pip install lb-toolkits==1.2.4
命令直接安装的,但是发现一直报错。
安装过程
下面记录一下成功安装的过程
安装python版本
这里先安装python=3.11的版本
conda create -n py311 python=3.11
安装相关依赖
由于lb-toolkits需要相关的依赖环境
库名 版本 库名 版本 库名 版本
numpy 1.2.0 pyhdf 0.10.0 h5py 1.0.0
netcdf4 1.0.0 tqdm 4.0.0 gdal 2.0.0
pillow 7.0.0 paramiko 2.10.0 cdsapi 0.5.0
所以在安装之前先安装他的依赖
conda install conda-forge::pyhdf h5py netcdf4 tqdm gdal pillow paramiko cdsapi
安装lb-toolkits 1.2.4
依赖环境安装完了,然后再使用pip 安装具体的版本
pip install lb-toolkits==1.2.4
安装成功了
测试脚本
运行相关脚本前需要再装一个库
conda install bs4
运行相关脚本,下载2023年8月和9月的数据:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Apr 6 14:25:52 2023
@author: jianpu
"""
import os
import sys
import datetime
import time
from lb_toolkits.tools import ftppro
#from lb_toolkits.tools import writejson
class downloadH8(object):
def __init__(self, username, password):
self.ftp = ftppro(FTPHOST, username, password)
def search_ahi8_l1_netcdf(self, starttime, endtime=None, pattern=None, skip=False):
'''
下载葵花8号卫星L1 NetCDF数据文件
Parameters
----------
starttime : datetime
下载所需数据的起始时间
endtime : datetime
下载所需数据的起始时间
pattern: list, optional
模糊匹配参数
Returns
-------
list
下载的文件列表
'''
if endtime is None :
endtime = starttime
downfilelist = []
nowdate = starttime
while nowdate <= endtime :
# 拼接H8 ftp 目录
sourceRoot = os.path.join('/jma/netcdf', nowdate.strftime("%Y%m"), nowdate.strftime("%d"))
sourceRoot = sourceRoot.replace('\\','/')
# 获取文件列表
filelist = self.GetFileList(starttime, endtime, sourceRoot, pattern)
# filelist = [f for f in filelist if f.startswith('NC_H08_') and f.endswith('.06001_06001.nc')]
if len(filelist) == 0 :
nowdate += datetime.timedelta(days=1)
print('未匹配当前时间【%s】的文件' %(nowdate.strftime('%Y-%m-%d')))
continue
nowdate += datetime.timedelta(days=1)
downfilelist.extend(filelist)
return downfilelist
def GetFileList(self, starttime, endtime, srcpath, pattern=None):
''' 根据输入时间,匹配获取H8 L1数据文件名 '''
downfiles = []
srcpath = srcpath.replace('\\', '/')
filelist = self.ftp.listdir(srcpath)
filelist.sort()
for filename in filelist :
namelist = filename.split('_')
nowdate = datetime.datetime.strptime('%s %s' %(namelist[2], namelist[3]), '%Y%m%d %H%M')
if (nowdate < starttime) | (nowdate > endtime) :
continue
downflag = True
# 根据传入的匹配参数,匹配文件名中是否包含相应的字符串
if pattern is not None :
if isinstance(pattern, list) :
for item in pattern :
if item in filename :
downflag = True
# break
else:
downflag = False
break
elif isinstance(pattern, str) :
if pattern in filename :
downflag = True
else:
downflag = False
if downflag :
srcname = os.path.join(srcpath, filename)
srcname = srcname.replace('\\','/')
downfiles.append(srcname)
return downfiles
def download(self, outdir, srcfile, blocksize=1*1024, skip=False):
"""通过ftp接口下载H8 L1数据文件"""
if not os.path.exists(outdir):
os.makedirs(outdir)
print('成功创建路径:%s' %(outdir))
if isinstance(srcfile, list) :
count = len(srcfile)
for srcname in srcfile:
count -= 1
self._download(outdir, srcname, blocksize=blocksize, skip=skip, count=count+1)
elif isinstance(srcfile, str) :
self._download(outdir, srcfile, blocksize=blocksize, skip=skip)
def _download(self, outdir, srcname, blocksize=1*1024, skip=False, count=1):
print('='*100)
basename = os.path.basename(srcname)
dstname = os.path.join(outdir, basename)
if skip :
return srcname
if os.path.isfile(dstname) :
print('文件已存在,跳过下载>>【%s】' %(dstname))
return srcname
stime = time.time()
print(datetime.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'),
'开始下载文件【%d】: %s'%(count, srcname))
if self.ftp.downloadFile(srcname, outdir, blocksize=blocksize):
print(datetime.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'),
'成功下载文件【%s】:%s' %(count, dstname))
else:
print(datetime.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'),
'下载文件失败【%s】:%s' %(count, dstname))
etime = time.time()
print('下载文件共用%.2f秒' %(etime - stime))
return srcname
def check_data_completeness(file_list, start_time, end_time):
expected_num_files = (end_time - start_time).days *48 + 48 # 48 show 30min/time; 144 show 10min/time
actual_num_files = len(file_list)
if actual_num_files == expected_num_files:
print("已经下载了全部数据。")
else:
print("有 %d 个数据文件缺失。" % (expected_num_files - actual_num_files))
expected_file_names = []
actual_file_names = []
for i in range(expected_num_files):
file_time = start_time + datetime.timedelta(minutes=i * 30)
file_name = "NC_H08_%s_R21_FLDK.06001_06001.nc" % (file_time.strftime("%Y%m%d_%H%M"))
expected_file_names.append(file_name)
for file_path in file_list:
file_name = os.path.basename(file_path)
actual_file_names.append(file_name)
missing_file_names = set(expected_file_names) - set(actual_file_names)
for missing_file_name in missing_file_names:
print("缺失文件:%s" % missing_file_name)
FTPHOST='ftp.ptree.jaxa.jp'
# create an instance of the downloadH8 class
h8_downloader = downloadH8('xxx', 'xxx')
## 2016 1440\0240 loss
# search for H8 files for a specific date
start_time = datetime.datetime(2023, 8, 27)
end_time = datetime.datetime(2023, 9,7,23, 59, 59)
file_list = h8_downloader.search_ahi8_l1_netcdf(start_time, end_time,pattern=['R21','02401_02401'])
# 选取每30分钟的数据文件名
selected_files = []
for file in file_list:
if file.endswith(".nc"):
if file[40:42] in ["00", "30"]:
selected_files.append(file)
# 打印选取的文件名
print(selected_files)
check_data_completeness(selected_files,start_time, end_time)
from tqdm import tqdm
for file in tqdm(selected_files):
h8_downloader.download('/DatadiskExt/down_h8_code/', file)
使用nohup将脚本提交到后台,
nohup python down_kuihua8_30min.py > down_H8-2023-08_09-15.log 2>&1 &
并使用tail命令查看相关下载的日志:
tail -f down_H8-2023-08_09-15.log
可以发现数据正在后台下载