文章目录
- 1. 写在前面
- 2. 接口分析
- 3. 分析还原
【🏠作者主页】:吴秋霖
【💼作者介绍】:擅长爬虫与JS加密逆向分析!Python领域优质创作者、CSDN博客专家、阿里云博客专家、华为云享专家。一路走来长期坚守并致力于Python与爬虫领域研究与开发工作!
【🌟作者推荐】:对爬虫领域以及JS逆向分析感兴趣的朋友可以关注《爬虫JS逆向实战》《深耕爬虫领域》
未来作者会持续更新所用到、学到、看到的技术知识!包括但不限于:各类验证码突防、爬虫APP与JS逆向分析、RPA自动化、分布式爬虫、Python领域等相关文章
作者声明:文章仅供学习交流与参考!严禁用于任何商业与非法用途!否则由此产生的一切后果均与作者无关!如有侵权,请联系作者本人进行删除!
1. 写在前面
做爬虫脑子一定要灵光~不然怎么当大佬!正所谓大路不通走水路,水路不通走山路!相信很多工程师在面对一个端采集遇到瓶颈的时候什么M端、APP端、小程序端、Web端都几乎会挑一个或多个去摸排分析一下。风控这个东西很奇妙也很玄学,有时候还真有那么一些低风控的接口或者端存在
小程序端这么说吧,体量大点的风控基本都拉的很强且有的跟某信有因果!除非真的没有其他端的数据源了,一般少有人去(总之非可选的主流战场)
分析目标:
5Lmd5YS/572R57uc
2. 接口分析
随便通过关键词在搜索接口看看发包情况,直接Curl重放是403无效的,会出现签名错误。这个如下所示:
也就是说上面的sign是动态实时生成的,另外还有一个token参数也是有时效性的,将请求参数拿出来看看,如下所示:
data = {
"method": "serverless.function.runtime.invoke",
"params": "{\"functionTarget\":\"DCloud-clientDB\",\"functionArgs\":{\"command\":{\"$db\":[{\"$method\":\"collection\",\"$param\":[\"a_novels\"]},{\"$method\":\"where\",\"$param\":[{\"novels_name\":{\"$regexp\":{\"source\":\"玄幻\",\"flags\":\"\"}}}]},{\"$method\":\"get\",\"$param\":[]}]},\"clientInfo\":{\"PLATFORM\":\"mp-weixin\",\"OS\":\"mac\",\"APPID\":\"__UNI__1B787A1\",\"DEVICEID\":\"17356293548006764541\",\"scene\":1089,\"deviceId\":\"17356293548006764541\",\"appId\":\"__UNI__1B787A1\",\"appName\":\"\",\"appVersion\":\"2.0.1\",\"appVersionCode\":\"201\",\"appLanguage\":\"zh-Hans\",\"uniCompilerVersion\":\"4.36\",\"uniRuntimeVersion\":\"4.36\",\"uniPlatform\":\"mp-weixin\",\"deviceBrand\":\"apple\",\"deviceModel\":\"MacBookPro15,2\",\"deviceType\":\"pc\",\"osName\":\"mac\",\"osVersion\":\"OS\",\"hostVersion\":\"3.8.7\",\"hostName\":\"WeChat\",\"locale\":\"zh-Hans\",\"LOCALE\":\"zh-Hans\"},\"uniIdToken\":\"\"}}",
"spaceId": "mp-f510ce55-6e44-40b0-b249-d278726b813d",
"timestamp": 1735785506387,
"token": "63bc0ed6-f8a1-40ab-bb07-d11bf678ab21"
}
初看大概是MD5标准加密,拼接一下路径、参数加个时间戳啥的
3. 分析还原
在开始分析JS之前,可以通过反编译小程序的方式去分析也可以使用大佬开源的调试工具来辅助分析,有些Web端分析比较多的使用这个辅助还是蛮不错的。如下所示:
这里在页面直接搜索的话有时候资源多会比较慢,可以直接通过堆栈或者编译好的JS文件简单的进行静态分析,如下所示:
可以看到Ae就是生成签名的调用方法,而clientSecret是一段密文,后续它将参与加密用到,如下所示:
直接跳转到Ae方法处,可以看到Object.keys(e).sort()获取e对象里面的所有键进行一个排序,存在的话则使用&拼接,然后去掉所拼接字符串开头多余的一个&确保格式正确,如下所示:
接下来需要分析P方法,也就是最终的加密算法实现,进入当前方法直接来到了如下所示:
_createHmacHelper: function(e) {
return function(t, n) {
return new d.HMAC.init(e,n).finalize(t)
}
}
这里推测d.HMAC是个库框架中的HMAC实现,而init(e, n)与finalize(t)是该HMAC对象的初始化和最终处理方法,注意入口处的时候是使用了密钥的,所以要分析是什么算法难度是不大的,肯定不是一个单纯的MD5,对应JS代码如下所示:
var k = A((function(e, t) {
var n;
e.exports = n = n || function(e, t) {
var n = Object.create || function() {
function e() {}
return function(t) {
var n;
return e.prototype = t,
n = new e,
e.prototype = null,
n
}
}()
, r = {}
, o = r.lib = {}
, i = o.Base = {
extend: function(e) {
var t = n(this);
return e && t.mixIn(e),
t.hasOwnProperty("init") && this.init !== t.init || (t.init = function() {
t.$super.init.apply(this, arguments)
}
),
t.init.prototype = t,
t.$super = this,
t
},
create: function() {
var e = this.extend();
return e.init.apply(e, arguments),
e
},
init: function() {},
mixIn: function(e) {
for (var t in e)
e.hasOwnProperty(t) && (this[t] = e[t]);
e.hasOwnProperty("toString") && (this.toString = e.toString)
},
clone: function() {
return this.init.prototype.extend(this)
}
}
, a = o.WordArray = i.extend({
init: function(e, t) {
e = this.words = e || [],
this.sigBytes = null != t ? t : 4 * e.length
},
toString: function(e) {
return (e || c).stringify(this)
},
concat: function(e) {
var t = this.words
, n = e.words
, r = this.sigBytes
, o = e.sigBytes;
if (this.clamp(),
r % 4)
for (var i = 0; i < o; i++) {
var a = n[i >>> 2] >>> 24 - i % 4 * 8 & 255;
t[r + i >>> 2] |= a << 24 - (r + i) % 4 * 8
}
else
for (i = 0; i < o; i += 4)
t[r + i >>> 2] = n[i >>> 2];
return this.sigBytes += o,
this
},
clamp: function() {
var t = this.words
, n = this.sigBytes;
t[n >>> 2] &= 4294967295 << 32 - n % 4 * 8,
t.length = e.ceil(n / 4)
},
clone: function() {
var e = i.clone.call(this);
return e.words = this.words.slice(0),
e
},
random: function(t) {
for (var n, r = [], o = function(t) {
t = t;
var n = 987654321
, r = 4294967295;
return function() {
var o = ((n = 36969 * (65535 & n) + (n >> 16) & r) << 16) + (t = 18e3 * (65535 & t) + (t >> 16) & r) & r;
return o /= 4294967296,
(o += .5) * (e.random() > .5 ? 1 : -1)
}
}, i = 0; i < t; i += 4) {
var u = o(4294967296 * (n || e.random()));
n = 987654071 * u(),
r.push(4294967296 * u() | 0)
}
return new a.init(r,t)
}
})
, u = r.enc = {}
, c = u.Hex = {
stringify: function(e) {
for (var t = e.words, n = e.sigBytes, r = [], o = 0; o < n; o++) {
var i = t[o >>> 2] >>> 24 - o % 4 * 8 & 255;
r.push((i >>> 4).toString(16)),
r.push((15 & i).toString(16))
}
return r.join("")
},
parse: function(e) {
for (var t = e.length, n = [], r = 0; r < t; r += 2)
n[r >>> 3] |= parseInt(e.substr(r, 2), 16) << 24 - r % 8 * 4;
return new a.init(n,t / 2)
}
}
, s = u.Latin1 = {
stringify: function(e) {
for (var t = e.words, n = e.sigBytes, r = [], o = 0; o < n; o++) {
var i = t[o >>> 2] >>> 24 - o % 4 * 8 & 255;
r.push(String.fromCharCode(i))
}
return r.join("")
},
parse: function(e) {
for (var t = e.length, n = [], r = 0; r < t; r++)
n[r >>> 2] |= (255 & e.charCodeAt(r)) << 24 - r % 4 * 8;
return new a.init(n,t)
}
}
, l = u.Utf8 = {
stringify: function(e) {
try {
return decodeURIComponent(escape(s.stringify(e)))
} catch (e) {
throw new Error("Malformed UTF-8 data")
}
},
parse: function(e) {
return s.parse(unescape(encodeURIComponent(e)))
}
}
, f = o.BufferedBlockAlgorithm = i.extend({
reset: function() {
this._data = new a.init,
this._nDataBytes = 0
},
_append: function(e) {
"string" == typeof e && (e = l.parse(e)),
this._data.concat(e),
this._nDataBytes += e.sigBytes
},
_process: function(t) {
var n = this._data
, r = n.words
, o = n.sigBytes
, i = this.blockSize
, u = o / (4 * i)
, c = (u = t ? e.ceil(u) : e.max((0 | u) - this._minBufferSize, 0)) * i
, s = e.min(4 * c, o);
if (c) {
for (var l = 0; l < c; l += i)
this._doProcessBlock(r, l);
var f = r.splice(0, c);
n.sigBytes -= s
}
return new a.init(f,s)
},
clone: function() {
var e = i.clone.call(this);
return e._data = this._data.clone(),
e
},
_minBufferSize: 0
});
o.Hasher = f.extend({
cfg: i.extend(),
init: function(e) {
this.cfg = this.cfg.extend(e),
this.reset()
},
reset: function() {
f.reset.call(this),
this._doReset()
},
update: function(e) {
return this._append(e),
this._process(),
this
},
finalize: function(e) {
return e && this._append(e),
this._doFinalize()
},
blockSize: 16,
_createHelper: function(e) {
return function(t, n) {
return new e.init(n).finalize(t)
}
},
_createHmacHelper: function(e) {
return function(t, n) {
return new d.HMAC.init(e,n).finalize(t)
}
}
});
var d = r.algo = {};
return r
}(Math)
}
))
, P = (A((function(e, t) {
var n;
e.exports = (n = k,
function(e) {
var t = n
, r = t.lib
, o = r.WordArray
, i = r.Hasher
, a = t.algo
, u = [];
!function() {
for (var t = 0; t < 64; t++)
u[t] = 4294967296 * e.abs(e.sin(t + 1)) | 0
}();
var c = a.MD5 = i.extend({
_doReset: function() {
this._hash = new o.init([1732584193, 4023233417, 2562383102, 271733878])
},
_doProcessBlock: function(e, t) {
for (var n = 0; n < 16; n++) {
var r = t + n
, o = e[r];
e[r] = 16711935 & (o << 8 | o >>> 24) | 4278255360 & (o << 24 | o >>> 8)
}
var i = this._hash.words
, a = e[t + 0]
, c = e[t + 1]
, p = e[t + 2]
, h = e[t + 3]
, g = e[t + 4]
, v = e[t + 5]
, y = e[t + 6]
, m = e[t + 7]
, b = e[t + 8]
, w = e[t + 9]
, _ = e[t + 10]
, x = e[t + 11]
, S = e[t + 12]
, O = e[t + 13]
, A = e[t + 14]
, k = e[t + 15]
, P = i[0]
, T = i[1]
, j = i[2]
, E = i[3];
P = s(P, T, j, E, a, 7, u[0]),
E = s(E, P, T, j, c, 12, u[1]),
j = s(j, E, P, T, p, 17, u[2]),
T = s(T, j, E, P, h, 22, u[3]),
P = s(P, T, j, E, g, 7, u[4]),
E = s(E, P, T, j, v, 12, u[5]),
j = s(j, E, P, T, y, 17, u[6]),
T = s(T, j, E, P, m, 22, u[7]),
P = s(P, T, j, E, b, 7, u[8]),
E = s(E, P, T, j, w, 12, u[9]),
j = s(j, E, P, T, _, 17, u[10]),
T = s(T, j, E, P, x, 22, u[11]),
P = s(P, T, j, E, S, 7, u[12]),
E = s(E, P, T, j, O, 12, u[13]),
j = s(j, E, P, T, A, 17, u[14]),
P = l(P, T = s(T, j, E, P, k, 22, u[15]), j, E, c, 5, u[16]),
E = l(E, P, T, j, y, 9, u[17]),
j = l(j, E, P, T, x, 14, u[18]),
T = l(T, j, E, P, a, 20, u[19]),
P = l(P, T, j, E, v, 5, u[20]),
E = l(E, P, T, j, _, 9, u[21]),
j = l(j, E, P, T, k, 14, u[22]),
T = l(T, j, E, P, g, 20, u[23]),
P = l(P, T, j, E, w, 5, u[24]),
E = l(E, P, T, j, A, 9, u[25]),
j = l(j, E, P, T, h, 14, u[26]),
T = l(T, j, E, P, b, 20, u[27]),
P = l(P, T, j, E, O, 5, u[28]),
E = l(E, P, T, j, p, 9, u[29]),
j = l(j, E, P, T, m, 14, u[30]),
P = f(P, T = l(T, j, E, P, S, 20, u[31]), j, E, v, 4, u[32]),
E = f(E, P, T, j, b, 11, u[33]),
j = f(j, E, P, T, x, 16, u[34]),
T = f(T, j, E, P, A, 23, u[35]),
P = f(P, T, j, E, c, 4, u[36]),
E = f(E, P, T, j, g, 11, u[37]),
j = f(j, E, P, T, m, 16, u[38]),
T = f(T, j, E, P, _, 23, u[39]),
P = f(P, T, j, E, O, 4, u[40]),
E = f(E, P, T, j, a, 11, u[41]),
j = f(j, E, P, T, h, 16, u[42]),
T = f(T, j, E, P, y, 23, u[43]),
P = f(P, T, j, E, w, 4, u[44]),
E = f(E, P, T, j, S, 11, u[45]),
j = f(j, E, P, T, k, 16, u[46]),
P = d(P, T = f(T, j, E, P, p, 23, u[47]), j, E, a, 6, u[48]),
E = d(E, P, T, j, m, 10, u[49]),
j = d(j, E, P, T, A, 15, u[50]),
T = d(T, j, E, P, v, 21, u[51]),
P = d(P, T, j, E, S, 6, u[52]),
E = d(E, P, T, j, h, 10, u[53]),
j = d(j, E, P, T, _, 15, u[54]),
T = d(T, j, E, P, c, 21, u[55]),
P = d(P, T, j, E, b, 6, u[56]),
E = d(E, P, T, j, k, 10, u[57]),
j = d(j, E, P, T, y, 15, u[58]),
T = d(T, j, E, P, O, 21, u[59]),
P = d(P, T, j, E, g, 6, u[60]),
E = d(E, P, T, j, x, 10, u[61]),
j = d(j, E, P, T, p, 15, u[62]),
T = d(T, j, E, P, w, 21, u[63]),
i[0] = i[0] + P | 0,
i[1] = i[1] + T | 0,
i[2] = i[2] + j | 0,
i[3] = i[3] + E | 0
},
_doFinalize: function() {
var t = this._data
, n = t.words
, r = 8 * this._nDataBytes
, o = 8 * t.sigBytes;
n[o >>> 5] |= 128 << 24 - o % 32;
var i = e.floor(r / 4294967296)
, a = r;
n[15 + (o + 64 >>> 9 << 4)] = 16711935 & (i << 8 | i >>> 24) | 4278255360 & (i << 24 | i >>> 8),
n[14 + (o + 64 >>> 9 << 4)] = 16711935 & (a << 8 | a >>> 24) | 4278255360 & (a << 24 | a >>> 8),
t.sigBytes = 4 * (n.length + 1),
this._process();
for (var u = this._hash, c = u.words, s = 0; s < 4; s++) {
var l = c[s];
c[s] = 16711935 & (l << 8 | l >>> 24) | 4278255360 & (l << 24 | l >>> 8)
}
return u
},
clone: function() {
var e = i.clone.call(this);
return e._hash = this._hash.clone(),
e
}
});
function s(e, t, n, r, o, i, a) {
var u = e + (t & n | ~t & r) + o + a;
return (u << i | u >>> 32 - i) + t
}
function l(e, t, n, r, o, i, a) {
var u = e + (t & r | n & ~r) + o + a;
return (u << i | u >>> 32 - i) + t
}
function f(e, t, n, r, o, i, a) {
var u = e + (t ^ n ^ r) + o + a;
return (u << i | u >>> 32 - i) + t
}
function d(e, t, n, r, o, i, a) {
var u = e + (n ^ (t | ~r)) + o + a;
return (u << i | u >>> 32 - i) + t
}
t.MD5 = i._createHelper(c),
t.HmacMD5 = i._createHmacHelper(c)
}(Math),
n.MD5)
}
)),
A((function(e, t) {
var n;
e.exports = (n = k,
void function() {
var e = n
, t = e.lib.Base
, r = e.enc.Utf8;
e.algo.HMAC = t.extend({
init: function(e, t) {
e = this._hasher = new e.init,
"string" == typeof t && (t = r.parse(t));
var n = e.blockSize
, o = 4 * n;
t.sigBytes > o && (t = e.finalize(t)),
t.clamp();
for (var i = this._oKey = t.clone(), a = this._iKey = t.clone(), u = i.words, c = a.words, s = 0; s < n; s++)
u[s] ^= 1549556828,
c[s] ^= 909522486;
i.sigBytes = a.sigBytes = o,
this.reset()
},
reset: function() {
var e = this._hasher;
e.reset(),
e.update(this._iKey)
},
update: function(e) {
return this._hasher.update(e),
this
},
finalize: function(e) {
var t = this._hasher
, n = t.finalize(e);
return t.reset(),
t.finalize(this._oKey.clone().concat(n))
}
})
}())
}
)),
A((function(e, t) {
e.exports = k.HmacMD5
}
通过对上面的JS代码进行静态分析可发现最终使用的加密为HmacMD5,结合有密钥参与加密,可以直接Python进行算法还原测试验证即可,算法实现如下所示:
def sign(e, t):
sorted_keys = sorted(e.keys())
n = ""
for key in sorted_keys:
if e[key]:
n += f"&{key}={e[key]}"
n = n[1:]
signature = hmac.new(t.encode('utf-8'), n.encode('utf-8'), hashlib.md5).hexdigest()
return signature
HMAC-MD5是基于MD5哈希算法的消息认证码,HMAC是一种利用密钥的哈希算法,用于生成一个“签名”,并且常用于消息的完整性验证和认证(如API请求签名)
最终简单的编写一个搜索代码查看一下是否可以正常获取到相关的数据,如下所示:
案列比较简单,适合正在或想要学习逆向新手小伙伴练练手