需求分析
从https://pic.netbian.com/4kfengjing/网站爬取图片,并保存
Python实现
def get_htmls ( pages= list ( range ( 2 , 5 ) ) ) :
"""获取待爬取网页"""
pages_list = [ ]
for page in pages:
url = f"https://pic.netbian.com/4kfengjing/index_ { page} .html"
response = requests. get( url)
response. encoding = 'gbk'
pages_list. append( response. text)
return pages_list
get_htmls( pages= list ( range ( 2 , 5 ) ) )
def get_picturs ( htmls) :
"""获取所有图片,并下载"""
for html in htmls:
soup = BeautifulSoup( html, 'html.parser' )
pic_li = soup. find( 'div' , id = 'main' ) . find( 'div' , class_= 'slist' ) . find( 'ul' , class_= 'clearfix' )
image_path = pic_li. find_all( 'img' )
for file in image_path:
pic_name = './practice05/' + file [ 'alt' ] . replace( " " , '_' ) + '.jpg'
src = file [ 'src' ]
src = f"https://pic.netbian.com/ { src} "
response = requests. get( src)
with open ( pic_name, 'wb' ) as f:
f. write( response. content)
print ( "图片已下载并保存为:{}" . format ( pic_name) )
htmls = get_htmls( pages= list ( range ( 2 , 5 ) ) )
get_picturs( htmls)
爬取结果展示