长截图问题
问题:使用dom-to-image和html2canvas来进行长截图会出现一个问题,如果图片非常长,一些图片会只加载一半,如果图片再长一些,截图就会为空。
目前我测试的结果:截图的大小在8mb出现图片缺了的情况
截图为空不是库的bug,是浏览器到了极限,无法输出了,这时cpu飙到快100%。
图片缺失这个情况还可以优化,可以想办法把dom进行拆分,再分别截图,最后拼接图片
优化长截图的思路:
- 还是用js里的dom-to-image,不断拆分dom,分别截图,最后合并
- 使用selenium截图,滚动截图,再用opencv拼接图片
上面两个思路,是不是能实现无限长的截图呢?
方法一:按理说应该能无限长的,但是浏览器扛不住,我的测试结果是截图大小在18mb左右,再大一些就不输出了。
方法二:这个方法能实现无限长的截图,但太慢了
代码实现
方法一:推荐做法
核心思想:把如下图所示拆分的dom分别使用dom-to-image截图,再通过canvas合并图片,最后下载
要截图的完整dom
拆分的dom1
拆分的dom2
拆分的dom3
const start = async() => {
// 拆分dom
const domArr = this.splitChatDom()
// 生成图片到imageList
domArr.forEach((domList,index) => {
const dom = this.mergeChat(domList)
document.body.appendChild(dom)
this.createImage(dom,index)
})
await this.sleep(5000);
// 合并图片
this.mergeImage()
await this.sleep(5000);
// 下载
this.download(this.canvas.toDataURL('image/png'),this.getFilename())
}
start()
把需要截图的子元素取出来,放到数组中
// 拆分chat子元素
splitChatDom() {
let chat = document.querySelector("#chat")
let chat_inner = chat.getElementsByClassName("chat-inner")[0]
const dom_length = chat_inner.children.length
let childs = chat_inner.children
let domArr = []
let i = 0;
// 拆分个数
const num = 20
while (i < dom_length) {
let arr = []
for (let j=0; j< num && i+j<dom_length; j++) {
const element = childs[i+j];
arr.push(element)
}
domArr.push(arr)
i+=num
}
return domArr
},
还原dom的父节点
mergeChat(domList) {
let chat = document.createElement("div")
chat.id = "chat"
let chat_inner = document.createElement("div")
chat_inner.className = "chat-inner"
chat.appendChild(chat_inner)
// console.log(domList);
domList.forEach((dom) => {
chat_inner.appendChild(dom)
})
return chat
},
分别截图
// 根据dom创建图片
createImage(dom,index) {
domtoimage
.toPng(dom, { quality: 1 })
.then((dataUrl) => {
this.imageList[index] = dataUrl
this.heightList[index] = dom.offsetHeight
})
},
合并截图
// 合并图片
mergeImage() {
this.canvas = null
let canvas = document.createElement("canvas")
let context = canvas.getContext("2d")
context.scale(0.8,0.8)
const scale = 0.8;
const width = 1000 * scale; // 确保canvas的尺寸和图片一样
const canvas_height = this.heightList.reduce((a,b)=>a+b-170) * scale;
canvas.width = width
canvas.height = canvas_height
let sumHeight = 0
for (let i = 0; i < this.imageList.length; i++) {
const dataUrl = this.imageList[i];
const height = this.heightList[i]
let img = new Image()
img.src = dataUrl
img.width = width
img.height = height
img.onload = () => {
context.drawImage(img,0,sumHeight,width,height * scale)
sumHeight += (height - 170) * scale
this.mergeFlag[i] = true
}
}
this.canvas = canvas
},
暂停方法,需要在异步方法中调用
sleep(time){
return new Promise((resolve)=>{
setTimeout(()=>{
resolve();
}, time)
})
},
方法二:效率低
def save_screenshot(url, fp_pic):
chrome_options = webdriver.ChromeOptions();
chrome_options.add_experimental_option("excludeSwitches", ['enable-automation']);
driver = webdriver.Chrome(options=chrome_options);
driver.get(url)
time.sleep(5)
# 获取要截图的dom的高度
chat = driver.find_element_by_id("chat")
dom_height = chat.get_attribute("offsetHeight")
offset_top = chat.get_attribute('offsetTop')
page_height = 500
driver.set_window_size(1080, page_height)
# 滚动到要截图的位置
#driver.execute_script("arguments[0].scrollIntoView();", chat)
driver.execute_script(
'window.scrollBy(0,{});'.format(int(offset_top))
)
driver.save_screenshot('temp.png')
n = int(dom_height) // (page_height-85) # 需要滚动的次数
base_mat = np.atleast_2d(Image.open('temp.png')) # 打开截图并转为二维矩阵
for i in range(n):
print(i,page_height*(i+1))
driver.execute_script(
'window.scrollBy(0,{});'.format(page_height-85)
#'window.scrollTo(0,{});'.format(page_height*(i+1)+int(offset_top))
)
driver.save_screenshot(f'temp_{i}.png') # 保存截图
mat = np.atleast_2d(Image.open(f'temp_{i}.png')) # 打开截图并转为二维矩阵
base_mat = np.append(base_mat, mat, axis=0) # 拼接图片的二维矩阵
Image.fromarray(base_mat).save(fp_pic, format='PNG')
os.remove(f'temp_{i}.png')
os.remove('temp.png')
driver.quit()
if __name__ == "__main__":
url = ''
save_screenshot(url,'aaa.png')
# 缩放图片
img = cv2.imread('aaa.png')
img = img[:,:1250]
img = cv2.resize(img,None,fx=0.64, fy=0.64, interpolation = cv2.INTER_CUBIC)
cv2.imwrite('aaa.png',img)