【node】用node爬取网页图片，并创建目录将爬取的图片存储到目录中，附带源码

news2025/4/26 21:14:46

效果图：

完整代码：

var myRequest = require('request')
var myCheerio = require('cheerio')
const http = require('http');
const fs = require('fs');

// 要爬取的目标网页地址
var myURL = 'https://588ku.com/sheyingtu/0-0-default-0-1/'

// 目标文件夹路径
const destinationFolder = './images/'

// 创建目标文件夹（如果不存在）
if (!fs.existsSync(destinationFolder)) {
    fs.mkdirSync(destinationFolder);
}

function request(options, callback) {
    var options = {
        url: options.url, headers: options.headers
    }
    myRequest(options, callback)
}
// 配置请求
var options = {
    url: myURL,
    headers: {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.65 Safari/537.36'
    },
    encoding: null // 添加此行，以便返回二进制数据
}
request(options, function (err, res, body) {
    var html = body;
    // 将 HTML 内容加载进 cheerio，并指定 { decodeEntities: false } 参数以保留原始的实体编码，而不进行解码
    myCheerio.load(html, { decodeEntities: false })
    var $ = myCheerio.load(html);
    $('.item-img img').each(function () {
        var img = $(this);
        let imageUrl = img.attr('data-original')

        // 发起 HTTP 请求并下载图片
        http.get(`http://${imageUrl}`, function (response) {
            const imagePath = `${destinationFolder}${img.attr('title').replace(/\s/g, '')}.png`;

            const fileStream = fs.createWriteStream(imagePath);
            response.pipe(fileStream);

            fileStream.on('finish', function () {
                fileStream.close();
            });
        });
    })
})