网点地址:公司介绍-泰州名列新材料有限公司 (zhaosw.com)
问题如下:
在网站中看到的电话号码在页面源码中无法查看
破解步骤:
1.找到woff文件
查找字体的class属性,全文查找font-face-encrypted找到如下内容,可以看到css样式中存在一个url,且是base64加密的:
使用base64解密后,下载woff文件。代码如下:
import base64
woff_str = 'AAEAAAAKAIAAAwAgT1MvMpadtGoAAACsAAAAYGNtYXAADUG6AAABDAAAAUJnbHlmuscfOQAAAlAAAAOAaGVhZBw6tLIAAAXQAAAANmhoZWEGVAICAAAGCAAAACRobXR4GYgAAQAABiwAAAAsbG9jYQQ+BUwAAAZYAAAAGG1heHAADwAtAAAGcAAAACBuYW1lDSyMNwAABpAAAAGGcG9zdAtgCzgAAAgYAAAATAAEAlIBkAAFAAACmQLMAAAAjwKZAswAAAHrADMBCQAAAgAGAwAAAAAAAAAAAAEQAAAAAAAAAAAAAABQZkVkAMBAC0AUAyz/LABcAywA1AAAAAEAAAAAAxgAAAAAACAAAQAAAAMAAAADAAAAHAABAAAAAAA8AAMAAQAAABwABAAgAAAABAAEAAEAAEAU//8AAEAL//+/9gABAAAAAAAAAQYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAD/LAGYAywAAwAHAAAXESERJSERIQABmP6bATL+ztQEAPwAMwOaAAAAAAEAAP8sATwDLAAIAAATETMRIwYHFTbEeEhflXcCYfzLBACJZXFHAAACAAD/JwKVAywADwAYAAA3Bh4BPgIuAgcTIwMOARMuAT4BFhQGJgUFQKrvhzUGQLuf9ofvNTSvNQtRtpJ1r5RVtWMLe5ieh2kpAYL+ilh1/uo4qZgYXvaADAAAAAIAAP8WAnQDLAAMABcAABMGHgI+ARICJiIOARc+AR4CFA4BLgEGBiNRxsZiEi6Yy4xMdRJukVcSQIuHOQFpmrSjYlHRAUsBAKl0tXeglwaXuePAQGj6AAACAAD/LALKAywACgANAAAlFTM1MzUjESMBFSUhAQHKcY+PWf4eAcr+swFND+PjZQK4/UhlZQHWAAIAAP8sAoADLAAPABwAABczEjY0LgIiDgIeATcGAyY+ARcyHgEHDgImqYDjdCNddG6MYy8pwJJSoAg/byM6YxcXI0yAbtQBgPSdY14uLm+p0XoWrgG1S4wuBVJ6V0Y6DGMAAAABAAH/KQJnAzUAHQAANx4CPgEmJz4BLgEOAQcXPgEeAQYHFR4CDgEmJwEXo9+vHlhSby9AntN7I2kvno0Lb3t7bwxenmkXN4GHBo3kjCRFvJ5MGIxpEoc6TKppDEsSWJ57BWNMAAAAAAEAAP8sAmwDMAAXAAAXFSE1IRI+AS4CBw4BBxc+Ah4BBgIGGAJU/lfsphFAZV48ZJQvcB1xfF8RR+yZvBhrAQ/+t4JAEwYGfIgSU1MLTXal/tOOAAMAAP8sAngDLAAZACMALAAANxQeAjI+AjQmJz4BLgIiDgIeARcOARc+Ah4BDgEuARM0PgEeAQYiJgAdUnuqb0A1UmNvNC5qXXVkUikGL0ZSRngDZJeBEjuYjDUXR6lkBliwUlxGb1IpKUZpsHsdNLyMTBgdUl6AZCkph0BGXgxYgYEjHWoCDjtYC0aSamoAAAEAAP8sAogDLAAIAAAXMxIBNSEVIQCLhVsBHf14Agn+79QBzAHNZ23+OQAAAQAA/ycCjgMsAB0AADcHHgI+Ay4DBzY3ITUhAgcXPgEeAg4BJmtrHqGyiV8vBh1Cj5pZHR4Bff4qZQxlMJR3TgU1fcpWHo53DDZZiYNxcEIGKl+ha/4kJAxCJCRrm31NJAAAAAEAAAABAABQ06IJXw889QALBAAAAAAA3Si4tQAAAADdKLi1AAD/FgLKAzUAAAAIAAIAAAAAAAAAAQAAAyz/LABcAsoAAAAAAsoAAQAAAAAAAAAAAAAAAAAAAAsBmAAAATwAAAKVAAACdAAAAsoAAAKAAAACZwABAmwAAAJ4AAACiAAAAo4AAAAAABYAKgBYAIQAoADSAQYBMAF4AY4BwAABAAAACwAtAAMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADACWAAEAAAAAAAEACgAAAAEAAAAAAAIACgAKAAEAAAAAAAMACgAUAAEAAAAAAAQACgAeAAEAAAAAAAUAHgAoAAEAAAAAAAYACgBGAAMAAQQJAAEAFABQAAMAAQQJAAIAFABkAAMAAQQJAAMAFAB4AAMAAQQJAAQAFACMAAMAAQQJAAUAPACgAAMAAQQJAAYAFADcaWNvbm51bWJlcmljb25udW1iZXJpY29ubnVtYmVyaWNvbm51bWJlclZlcnNpb24gMS4wOyBGb250RWRpdG9yICh2MS4wKWljb25udW1iZXIAaQBjAG8AbgBuAHUAbQBiAGUAcgBpAGMAbwBuAG4AdQBtAGIAZQByAGkAYwBvAG4AbgB1AG0AYgBlAHIAaQBjAG8AbgBuAHUAbQBiAGUAcgBWAGUAcgBzAGkAbwBuACAAMQAuADAAOwAgAEYAbwBuAHQARQBkAGkAdABvAHIAIAAoAHYAMQAuADAAKQBpAGMAbwBuAG4AdQBtAGIAZQByAAAAAgAAAAAAAAAyAAAAAAAAAAAAAAAAAAAAAAAAAAAACwALAAABAgEDAQQBBQEGAQcBCAEJAQoBCwExATYBMAE0ATkBMwEyATgBNwE1'
contents = base64.b64decode(woff_str)
open('font.woff', 'wb').write(contents)
2.使用TTFont将woff转换成xml文件
代码如下:
from fontTools.ttLib import TTFont,BytesIO
f = TTFont('font.woff')
f.saveXML('font.xml')
运行结果如下:
3.拿到xml中的数据存放到字典中
代码如下:
# 获取xml文件根目录
root_obj = ElementTree.parse('font.xml').getroot()
# 定位到map节点
base = root_obj.find('cmap').findall('cmap_format_4')[0].findall('map')
# 这里主要是整个关系字典,将unicode方法和值对应起来
dicts = {}
for i in base:
dicts[i.attrib['code'][2:]] = i.attrib['name']
上面代码的意思就是在xml文件中找到下图部分的内容,然后创建一个字典用来保存对应关系,例如:400b对应1。
4.使用字典替换页面源码对应的部分
由于woff文件是动态变化的,所以每次请求的时候都要重新下载并转换woff文件。
全程源码如下:
import base64
import re
import requests
from fontTools.ttLib import TTFont,BytesIO
from xml.etree import ElementTree
def download():
headers = {
"authority": "mlxcl.zhaosw.com",
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"accept-language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
"cache-control": "no-cache",
"pragma": "no-cache",
"referer": "https://mlxcl.zhaosw.com/products",
"sec-ch-ua": "\"Not_A Brand\";v=\"8\", \"Chromium\";v=\"120\", \"Microsoft Edge\";v=\"120\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"Windows\"",
"sec-fetch-dest": "document",
"sec-fetch-mode": "navigate",
"sec-fetch-site": "same-origin",
"sec-fetch-user": "?1",
"upgrade-insecure-requests": "1",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0"
}
# 请求网站
url = "https://mlxcl.zhaosw.com/about"
response = requests.get(url, headers=headers).text
# 拿到字体的 base64 字符串
woff_str = re.findall('url\(data:application/x-font-ttf;charset=utf-8;base64,(.*?)\)', response)[0]
contents = base64.b64decode(woff_str)
open('font.woff', 'wb').write(contents)
return response
def transform():
# 接着把字体文件转格式
f = TTFont('font.woff')
f.saveXML('font.xml')
# 获取xml文件根目录
root_obj = ElementTree.parse('font.xml').getroot()
# 定位到map节点
base = root_obj.find('cmap').findall('cmap_format_4')[0].findall('map')
# 这里主要是整个关系字典,将unicode方法和值对应起来
all_dict = {}
for i in base:
all_dict[i.attrib['code'][2:]] = i.attrib['name']
return all_dict
def replace_text(text, all_dict):
for key in all_dict:
text = text.replace('&#x' + key + ';', all_dict[key])
print(text)
if __name__ == '__main__':
# 下载woff文件
html = download()
# 转换为xml并得到字典
dicts = transform()
# 文本替换
replace_text(html, dicts)