准备一份股票列表的CSV文件,文件格式如下
code | name | close | cmv | date_ipo |
300434 | 金石亚药 | 12.89 | 4279829590 | 20150424 |
300380 | 安硕信息 | 19.31 | 2419934163 | 20140128 |
688123 | 聚辰股份 | 132.82 | 11140872666 | 20191223 |
300586 | 美联新材 | 20.34 | 7908821381 | 20170104 |
300534 | 陇神戎发 | 12.96 | 3894650631 | 20160913 |
300813 | 泰林生物 | 60.55 | 1605296635 | 20200114 |
688259 | 创耀科技 | 88.76 | 1578212358 | 20220112 |
301211 | 亨迪药业 | 30.43 | 1771753855 | 20211222 |
688099 | 晶晨股份 | 75.5 | 31214093954 | 20190808 |
范例CSV下载
程序所在目录下,手工建好3个目录
data/day
data/week
data/month
导入必要的包
import akshare as ak
import numpy as np
import pandas as pd
import warnings
import time
import datetime as dt
warnings.filterwarnings("ignore")
读取股票列表
#需要下载的股票列表
#将 yyyymmdd形式的日期字符串,转换为yyyy-mm-dd形式
def f_str2dtstr(x):
try:
return dt.datetime.strptime(x,'%Y%m%d').strftime('%Y-%m-%d')
except:
return np.NaN
df = pd.read_csv('wz.csv',dtype={'code':str,'name':str,'close':float,'cmv':float,'date_ipo':str})
#print(df)
df['date_ipo'] = df['date_ipo'].apply(lambda x: f_str2dtstr(x))
df
设置线程工作相关参数
import threading
job_per_thread = 15
remainder = len(df) % job_per_thread
count_thread = 0
if remainder == 0:
count_thread = int(len(df)/job_per_thread)
else:
count_thread = int((len(df)-job_per_thread)/job_per_thread) +1
print(job_per_thread) # 每个线程处理多少个股票数据
print(count_thread) # 余数,最后一个线程处理多少个股票数据
print(remainder) # 线程数量
线程处理函数
- 参数为线程编号(第X号线程,根据循环的变量来的)
- 网络读取数据,纠错5次
- 日线,月线,周线全部下载
- 增量下载、任意时间下载(对比本地数据,删除本地第一条数据,以新下载的为准)
def proc_get(m):
if remainder == 0:
count_job = job_per_thread
else:
if m == (count_thread-1):
count_job = remainder
else:
count_job = job_per_thread
for i in range(0,count_job):
code = df.iloc[m*job_per_thread+i,0]
date_s = '1970-01-01'
d = pd.DataFrame(data=None,columns=['date','open','high','low','close','volume','turnover'])
p_s = ['weekly','daily', 'monthly']
for k in range(0,len(p_s)):
item_p = p_s[k]
path_file = "./data/"
if item_p == 'daily':
path_file = path_file+"day/"
elif item_p == 'weekly':
path_file = path_file+"week/"
elif item_p == 'monthly':
path_file = path_file+"month/"
else:
break
path_file = path_file+"%s.csv"%(code)
try:
d = pd.read_csv("./data/day/%s.csv"%(code),dtype={'code':str,'code':str,'date':str})
except:
pass
if len(d) > 0:
#print(d.iloc[-1,0])
date_s = d.iloc[-1,0]
date_e = dt.datetime.now().strftime('%Y%m%d')
if dt.datetime.strptime(date_s,'%Y-%m-%d') < dt.datetime.strptime(date_e,'%Y%m%d') :
for j in range(0,5):
try:
a = ak.stock_zh_a_hist(symbol=code,period=item_p,start_date=date_s,end_date=date_e,adjust="qfq")
a = a[['日期','开盘','最高','最低','收盘','成交量','换手率']]
a.columns=['date','open','high','low','close','volume','turnover']
#print(a)
if len(d) > 0:
d = pd.concat([d.iloc[0:-2,:],a],axis=0,ignore_index=True)
else:
d = a
break
except:
print(code)
time.sleep(3)
if len(d) > 0:
d.to_csv(path_file,index=False,date_format="%Y-%m-%d")
开启多线程
threads = []
for m in range(0,count_thread):
t = threading.Thread(target=proc_get,args=(m,)) #注意即使一个参数,参数也要有,结尾
#print(m)
#print(t)
threads.append(t)
t.start()
for t in threads:
t.join()
说明:
网络下载数据失败的代码会被打印出来
一般情况下超过5次的就是真的没数据
没数据的一般是一些生成了股票代码但没交易的新股
得空就更新一下你的K线数据呗,增量挺快的
数据全部取得是前复权的
未复权,包含前收盘价的数据可以换一个源取,有兴趣的可以做一下。
任何疑问 turui@163.net
当前接口获取的数据,起码比巨宽的强
以SH000300数据为例,巨宽的数据从2005.04.08才有
看到这种形状,赶紧入吧,涨停板等着你数据