python 爬虫程序:
#京东.py import json import pprint import re import requests # name_turnover = {} url = "https://s.taobao.com/search?data-key=s&data-value=88&ajax=true&_ksTS=1686118766568_2290&callback=jsonp2291&ie=utf8&initiative_id=staobaoz_20230607&stats_click=search_radio_all%3A1&js=1&imgfile=&q=%E7%94%9F%E9%B2%9C%E8%94%AC%E8%8F%9C&suggest=0_1&_input_charset=utf-8&wq=%E7%94%9F%E9%B2%9C&suggest_query=%E7%94%9F%E9%B2%9C&source=suggest&bcoffset=-2&ntoffset=4&p4ppushleft=2%2C48&s=44" headers = { "referer":"https://s.taobao.com/search?ie=utf8&initiative_id=staobaoz_20230607&stats_click=search_radio_all%3A1&js=1&imgfile=&q=%E7%94%9F%E9%B2%9C%E8%94%AC%E8%8F%9C&suggest=0_1&_input_charset=utf-8&wq=%E7%94%9F%E9%B2%9C&suggest_query=%E7%94%9F%E9%B2%9C&source=suggest&bcoffset=1&ntoffset=1&p4ppushleft=2%2C48&s=44", "cookie":"cna=oEmTGn6xI1gCAXWIV32z/Mil; miid=987046928798160839; lLtC1_=1; cookie2=1fd1729b734bce71cfd191f40f41bd6a; t=6a02fb735e3adc573557c6e0ab62f54c; _tb_token_=eb837b375301f; tk_trace=oTRxOWSBNwn9dPyorMJE%2FoPdY8zMG1aAN%2F0TkjYGZjkj6rrK3kv4LgxGhtlxvv5DK7siwhr9c%2BstdFdCVk5KH7UN3Btd4iqEU17g2jJGikl4OiEw5IdUcpsDvhC1WZJPhSJYYp%2B6UrKYwPiRWfSvBW4zK6%2BYutXuF6HE%2F3pjhSu3qOR7542b5NcQn%2FJI4AJlgd81EFPZUBkkGw2XAV0%2Fpgo51zEBDT1kYOSpTYZWgd2bnYHINx1YUUHrAoA5U1t48eAPBi4KOhH9G0rEvJ4yD8ROSwbvaRDs8i2n8fCp2dv8gCS4r8WxXvoVcZRRLUn%2BFloUy0gD0ESCWBuP%2Fw3BLdopZD2d%2BbgE7mvWkdhRUo1LPvt4EftUe524UBNo31Rzlzme07NcxiMJOgiDOcgKFtTCY2I%2Fn0wkBIM2lnXIpUHie240uOKUvQTLXZRlMAcECW42IKet1wXX1fflLXVHluNPUKlvAA%3D%3D; _samesite_flag_=true; xlly_s=1; sgcookie=E100yC3wSCQAjJ8ZAdrVby4vcYf0LCBKEoFxeTliqs9%2Bygxrr1n7lZQfAm9pZ%2BJzi%2Bne2HYq8DFA83iR8Un%2B%2F6a6Yf44YPNJrHD0jJ0AiallGe0%3D; unb=3610730283; uc3=nk2=F5REPhy1f9gdkw%3D%3D&vt3=F8dCsf5xcO7RJvDbgAc%3D&lg2=UtASsssmOIJ0bQ%3D%3D&id2=UNaGuKCjXeSfkQ%3D%3D; csg=0d01eec1; lgc=tb10388584; cancelledSubSites=empty; cookie17=UNaGuKCjXeSfkQ%3D%3D; dnk=tb10388584; skt=78b9e90c3fa18c1a; existShop=MTY4NjExNDU2MA%3D%3D; uc4=nk4=0%40FY4PbIBD6eDor6y5P%2B1mh74jdY0S&id4=0%40UgGP%2FESshppTHtaPI2%2B4zIkPaqaV; tracknick=tb10388584; _cc_=U%2BGCWk%2F7og%3D%3D; _l_g_=Ug%3D%3D; sg=431; _nk_=tb10388584; cookie1=UUjYFkC38FgBaH1zIGShfVmSWD4lS9gurHNK8Qagryw%3D; _m_h5_tk=4b23f4db07a46f97c8fe25c2a1422a40_1686123228984; _m_h5_tk_enc=36062541f2db73097a52b1d6730a9ae0; mt=ci=10_1; thw=cn; uc1=existShop=false&pas=0&cookie16=URm48syIJ1yk0MX2J7mAAEhTuw%3D%3D&cookie21=V32FPkk%2FgihF%2FS5nr3O5&cookie14=Uoe8jJKLOYaWxQ%3D%3D&cookie15=UIHiLt3xD8xYTw%3D%3D; alitrackid=www.taobao.com; lastalitrackid=www.taobao.com; JSESSIONID=C88639CB9D97037EC0D952DCB6CCA308; tfstk=cHrfBp9wvIAbrvsMmj6zAXszY80NCD7SeZGzh6aKVNEBdFLqwh1DRHlI6q4MNcktF; l=fBMmKTMRTc8OGYMNBO5CKurza77ON3ObcAVzaNbMiIEGC6BRKvvGD7xQ23IdECxRR8XlifT64jvnyCJt1ehu-ykjJ0YEae1VivEDCeX0WOyN.; isg=BDc3yZa8H470bpz5X9-TBA9lxiuB_Atemwyse4n3foK_OHd6lcxtr5ReGphm1uPW", "User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36' } reponse = requests.get(url=url,headers=headers) html_data = reponse.text # datas = str(html_data) # print(html_data) # json_Data = html_data.replace("(","").replace(")","") c = re.compile('jsonp2291\(({.*?})\)') re_Data = re.findall(c,html_data) re_Datas = json.loads(re_Data[0]) # print(re_Datas) names_list =list() turnover_list = list() dict_index = re_Datas["mods"]["itemlist"]["data"]["auctions"] for i in range(44): names = dict_index[i]["raw_title"] names_list.append(names) turnover = dict_index[i]["view_sales"] turnover_list.append(turnover) for a ,b in zip( names_list,turnover_list): name_turnover[a] = b # print(name_turnover)
数据连接存入程序:
#pymysql.py import pymysql import 京东 list = [] data = 京东.name_turnover #连接数据库 db = pymysql.connect(host = '127.0.0.1' # 连接名称,默认127.0.0.1 ,user = 'root' # 用户名 ,passwd='123456' # 密码 ,port= 3307 # 端口,默认为mysql8.026 ,db='jd' # 数据库名称 ,charset='utf8' # 字符编码 ) # 创建 SQL 游标对象,游标对象主要用来执行 SQL 语句 cursor = db.cursor() sql = """drop table if exists address_book""" cursor.execute(sql) # 要执行的 SQL 语句 #创建表的sql try: sql=''' create table inforsmatioss( sname varchar(150), numcount varchar(100) ) ''' cursor.execute(sql) except Exception as e: print(e) db.rollback() finally: for j, k in zip(data.keys(),data.values()): pass sql = "insert into inforsmatioss(sname,numcount) values(%s,%s)" dict_data = (j,k) cursor.execute(sql, dict_data) db.commit() cursor.close() db.close()
不好做啊,实在不易