半成品.
from DrissionPage import ChromiumPage
import time
from selenium import webdriver
url='https://p4psearch.1688.com/hamlet.html?scene=6&cosite=baidujj_pz&location=re&trackid=885662561117990122602'
page=ChromiumPage()
page.get(url)
def key_wof():
index1=0
for index in range(1,10):
index1+=1
print(f'正在爬取第{index1}页')
size=0
# 初始0
height=200
# 初始高度
if(index1>10):
break
else:
for inex_list in page.eles('.offer-item'):
price1=inex_list.ele('.symbol').inner_html
prices2=inex_list.ele('.number n-b').inner_html
price3=inex_list.ele('.number n-s').inner_html
price=price1+prices2+price3
# 获取商品价格
ab=810703814121
date_base=f'https://detail.1688.com/offer/{ab}.html'
# 这里是demo链接,具体链接根据实际商品来...
# 待处理的url
dit={
'商品图片':inex_list.ele('.offer-img').attr('src'),
'商品链接':date_base,
'商品名称':inex_list.ele('.offer-title two-row').inner_html,
'商品价格':price,
'商品销量':inex_list.ele('.solt').inner_html,
'商品来源商家':inex_list.ele('.name').inner_html,
'商品商家状态':inex_list.ele('.ww-link ww-inline ww-online').attr('title'),
'商品商家链接':inex_list.ele('.ww-link ww-inline ww-online').attr('href')
}
# time.sleep(2)
# 给停留一段时间,让页面数据,进行爬取,--为了防止被验证码动态检测
page.scroll.to_location(size+200,height+500)
time.sleep(0.1)
page.scroll.to_location(size+600,height+700)
time.sleep(0.2)
page.scroll.to_location(size+800,height+900)
time.sleep(0.1)
page.scroll.to_location(size+990,height+1100)
page.scroll.to_location(size+1200,height+1300)
print(dit)
def _key():
over_list=input('请输入商品名称:') # 未做处理==========================
if(over_list==''):
print('输入为空,请重新输入!')
else:
key_wof()
_key()