1. 写在前面
最近在写爬虫的时候发现一些小网站上的MD5加密,在我们团队开发的千把个网站爬虫内发现这种MD5加密的案例还不少!话不多说,直接拿出来分析一波
之后我准备抽时间总结出一个关于如何快速搜索与定位JS加密函数与分析与辨别常见加密算法特征的些个小技巧
目标站点:
aHR0cHM6Ly93d3cub2Fwa3p5aC5jbi9zL3MtJUU2JThCJTlCJUU1JTk1JTg2JUU5JTkzJUI2JUU4JUExJThDPyZzaz1kMmRiMGJjZjI1MTExMzlkMzZiNzUxNTg5YTc1ZTllOA==
2. 分析
这个网站主要讲的就是搜索接口那个地方,可以看到搜索URL中有一个sk的参数,这个参数就是我们要分析还原的目标
sk参数的加密是通过外部JS文件的调用处理,所以在网站内部我们无法直接搜索到相关加密入口
这里我以招商银行搜索为例,sk参数的加密值如下:
d2db0bcf2511139d36b751589a75e9e8(这是固定的)
可以看到这是一个32位的加密值,我们先假设不添加sk参数是否能够正常从搜索接口拿到数据
通过简单的构造请求发现并不能拿到搜索数据,给到我们的是一个简短的HTML代码,重点就在这串HTML代码内
HTML代码内有如下几个参数,这是调用MD5加密JS文件所需的参数
hexcase、b64pad、chrsz、s_wb
其中s_wb就是我们加密传的参数值,每一个关键词搜索对应的s_wb跟sk值都是固定的
3. 扣代码验证
需要注意的是返回的HTML代码中有两个JS文件,我们先看第一个5dm.js文件
function hex_md5(s){ return binl2hex(core_md5(str2binl(s), s.length * chrsz));}
function b64_md5(s){ return binl2b64(core_md5(str2binl(s), s.length * chrsz));}
function hex_hmac_md5(key, data) { return binl2hex(core_hmac_md5(key, data)); }
function b64_hmac_md5(key, data) { return binl2b64(core_hmac_md5(key, data)); }
function calcMD5(s){ return binl2hex(core_md5(str2binl(s), s.length * chrsz));}
function md5_vm_test()
{
return hex_md5("abc") == "900150983cd24fb0d6963f7d28e17f72";
}
function cxx(a, d) {
var e;
a = a - 0,e = str_spl[a]
return e;
}
function core_md5(x, len)
{
x[len >> 5] |= 0x80 << ((len) % 32);
x[(((len + 64) >>> 9) << 4) + 14] = len;
var a = 1732584193;
var b = -271733879;
var c = -1732584194;
var d = 271733878;
for(var i = 0; i < x.length; i += 16)
{
var olda = a;
var oldb = b;
var oldc = c;
var oldd = d;
a = md5_ff(a, b, c, d, x[i+ 0], 7 , -680876936);
d = md5_ff(d, a, b, c, x[i+ 1], 12, -389564586);
c = md5_ff(c, d, a, b, x[i+ 2], 17, 606105819);
b = md5_ff(b, c, d, a, x[i+ 3], 22, -1044525330);
a = md5_ff(a, b, c, d, x[i+ 4], 7 , -176418897);
d = md5_ff(d, a, b, c, x[i+ 5], 12, 1200080426);
c = md5_ff(c, d, a, b, x[i+ 6], 17, -1473231341);
b = md5_ff(b, c, d, a, x[i+ 7], 22, -45705983);
a = md5_ff(a, b, c, d, x[i+ 8], 7 , 1770035416);
d = md5_ff(d, a, b, c, x[i+ 9], 12, -1958414417);
c = md5_ff(c, d, a, b, x[i+10], 17, -42063);
b = md5_ff(b, c, d, a, x[i+11], 22, -1990404162);
a = md5_ff(a, b, c, d, x[i+12], 7 , 1804603682);
d = md5_ff(d, a, b, c, x[i+13], 12, -40341101);
c = md5_ff(c, d, a, b, x[i+14], 17, -1502002290);
b = md5_ff(b, c, d, a, x[i+15], 22, 1236535329);
a = md5_gg(a, b, c, d, x[i+ 1], 5 , -165796510);
d = md5_gg(d, a, b, c, x[i+ 6], 9 , -1069501632);
c = md5_gg(c, d, a, b, x[i+11], 14, 643717713);
b = md5_gg(b, c, d, a, x[i+ 0], 20, -373897302);
a = md5_gg(a, b, c, d, x[i+ 5], 5 , -701558691);
d = md5_gg(d, a, b, c, x[i+10], 9 , 38016083);
c = md5_gg(c, d, a, b, x[i+15], 14, -660478335);
b = md5_gg(b, c, d, a, x[i+ 4], 20, -405537848);
a = md5_gg(a, b, c, d, x[i+ 9], 5 , 568446438);
d = md5_gg(d, a, b, c, x[i+14], 9 , -1019803690);
c = md5_gg(c, d, a, b, x[i+ 3], 14, -187363961);
b = md5_gg(b, c, d, a, x[i+ 8], 20, 1163531501);
a = md5_gg(a, b, c, d, x[i+13], 5 , -1444681467);
d = md5_gg(d, a, b, c, x[i+ 2], 9 , -51403784);
c = md5_gg(c, d, a, b, x[i+ 7], 14, 1735328473);
b = md5_gg(b, c, d, a, x[i+12], 20, -1926607734);
a = md5_hh(a, b, c, d, x[i+ 5], 4 , -378558);
d = md5_hh(d, a, b, c, x[i+ 8], 11, -2022574463);
c = md5_hh(c, d, a, b, x[i+11], 16, 1839030562);
b = md5_hh(b, c, d, a, x[i+14], 23, -35309556);
a = md5_hh(a, b, c, d, x[i+ 1], 4 , -1530992060);
d = md5_hh(d, a, b, c, x[i+ 4], 11, 1272893353);
c = md5_hh(c, d, a, b, x[i+ 7], 16, -155497632);
b = md5_hh(b, c, d, a, x[i+10], 23, -1094730640);
a = md5_hh(a, b, c, d, x[i+13], 4 , 681279174);
d = md5_hh(d, a, b, c, x[i+ 0], 11, -358537222);
c = md5_hh(c, d, a, b, x[i+ 3], 16, -722521979);
b = md5_hh(b, c, d, a, x[i+ 6], 23, 76029189);
a = md5_hh(a, b, c, d, x[i+ 9], 4 , -640364487);
d = md5_hh(d, a, b, c, x[i+12], 11, -421815835);
c = md5_hh(c, d, a, b, x[i+15], 16, 530742520);
b = md5_hh(b, c, d, a, x[i+ 2], 23, -995338651);
a = md5_ii(a, b, c, d, x[i+ 0], 6 , -198630844);
d = md5_ii(d, a, b, c, x[i+ 7], 10, 1126891415);
c = md5_ii(c, d, a, b, x[i+14], 15, -1416354905);
b = md5_ii(b, c, d, a, x[i+ 5], 21, -57434055);
a = md5_ii(a, b, c, d, x[i+12], 6 , 1700485571);
d = md5_ii(d, a, b, c, x[i+ 3], 10, -1894986606);
c = md5_ii(c, d, a, b, x[i+10], 15, -1051523);
b = md5_ii(b, c, d, a, x[i+ 1], 21, -2054922799);
a = md5_ii(a, b, c, d, x[i+ 8], 6 , 1873313359);
d = md5_ii(d, a, b, c, x[i+15], 10, -30611744);
c = md5_ii(c, d, a, b, x[i+ 6], 15, -1560198380);
b = md5_ii(b, c, d, a, x[i+13], 21, 1309151649);
a = md5_ii(a, b, c, d, x[i+ 4], 6 , -145523070);
d = md5_ii(d, a, b, c, x[i+11], 10, -1120210379);
c = md5_ii(c, d, a, b, x[i+ 2], 15, 718787259);
b = md5_ii(b, c, d, a, x[i+ 9], 21, -343485551);
a = safe_add(a, olda);
b = safe_add(b, oldb);
c = safe_add(c, oldc);
d = safe_add(d, oldd);
}
return Array(a, b, c, d);
}
function md5_cmn(q, a, b, x, s, t)
{
return safe_add(bit_rol(safe_add(safe_add(a, q), safe_add(x, t)), s),b);
}
function md5_ff(a, b, c, d, x, s, t)
{
return md5_cmn((b & c) | ((~b) & d), a, b, x, s, t);
}
function md5_gg(a, b, c, d, x, s, t)
{
return md5_cmn((b & d) | (c & (~d)), a, b, x, s, t);
}
function md5_hh(a, b, c, d, x, s, t)
{
return md5_cmn(b ^ c ^ d, a, b, x, s, t);
}
function md5_ii(a, b, c, d, x, s, t)
{
return md5_cmn(c ^ (b | (~d)), a, b, x, s, t);
}
function core_hmac_md5(key, data)
{
var bkey = str2binl(key);
if(bkey.length > 16) bkey = core_md5(bkey, key.length * chrsz);
var ipad = Array(16), opad = Array(16);
for(var i = 0; i < 16; i++)
{
ipad[i] = bkey[i] ^ 0x36363636;
opad[i] = bkey[i] ^ 0x5C5C5C5C;
}
var hash = core_md5(ipad.concat(str2binl(data)), 512 + data.length * chrsz);
return core_md5(opad.concat(hash), 512 + 128);
}
function safe_add(x, y)
{
var lsw = (x & 0xFFFF) + (y & 0xFFFF);
var msw = (x >> 16) + (y >> 16) + (lsw >> 16);
return (msw << 16) | (lsw & 0xFFFF);
}
function bit_rol(num, cnt)
{
return (num << cnt) | (num >>> (32 - cnt));
}
function str2binl(str)
{
var bin = Array();
var mask = (1 << chrsz) - 1;
for(var i = 0; i < str.length * chrsz; i += chrsz)
bin[i>>5] |= (str.charCodeAt(i / chrsz) & mask) << (i%32);
return bin;
}
function binl2hex(binarray)
{
var hex_tab = hexcase ? "0123456789ABCDEF" : "0123456789abcdef";
var str = "";
for(var i = 0; i < binarray.length * 4; i++)
{
str += hex_tab.charAt((binarray[i>>2] >> ((i%4)*8+4)) & 0xF) +
hex_tab.charAt((binarray[i>>2] >> ((i%4)*8 )) & 0xF);
}
return str;
}
function binl2b64(binarray)
{
var tab = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
var str = "";
for(var i = 0; i < binarray.length * 4; i += 3)
{
var triplet = (((binarray[i >> 2] >> 8 * ( i %4)) & 0xFF) << 16)
| (((binarray[i+1 >> 2] >> 8 * ((i+1)%4)) & 0xFF) << 8 )
| ((binarray[i+2 >> 2] >> 8 * ((i+2)%4)) & 0xFF);
for(var j = 0; j < 4; j++)
{
if(i * 8 + j * 6 > binarray.length * 32) str += b64pad;
else str += tab.charAt((triplet >> 6*(3-j)) & 0x3F);
}
}
return str;
}
这是一个典型的MD5加密算法,MD5、SHA1、BASE64、16进制这类加密都有一些关键词的特征,后续文章着重总结
现在我们拿到了MD5的加密算法,也拿到了HTML里面的参数,仔细看那几个参数变量正是JS加密代码所需
这里我们在控制台测试验证一下,如下图可以看到MD5的加密算法是没有任何问题的,s_wb经过加密得到了一个32位的值
不过跟上面网站得到的值并不一样,因为之前提到过每一个关键词的搜索加密值都是固定的
显然加密的参数s_wb的值似乎加了盐,加盐则是通过在任意固定位置插入特定字符串,让散列后的结果和使用原始密码的散列结果不相符
现在我们分析另外一个JS文件,这个JS代码是混淆了的
eval( function ( p, a, c, k, e, d ) {
e = function ( c ) {
return ( c < a ? "" : e( parseInt( c / a ) ) ) + ( ( c = c % a ) > 35 ? String.fromCharCode( c + 29 ) : c.toString( 36 ) )
};
if ( !''.replace( /^/, String ) ) {
while ( c-- ) d[ e( c ) ] = k[ c ] || e( c );
k = [ function ( e ) {
return d[ e ]
} ];
e = function () {
return '\\w+'
};
c = 1;
};
while ( c-- )
if ( k[ c ] ) p = p.replace( new RegExp( '\\b' + e( c ) + '\\b', 'g' ), k[ c ] );
return p;
}( 'g 1Y=\'6,1,3,a,c,a,b,b,v,v,j,j,0,r,o,c,e,a,d,c,a,c,a,c,o,p,c,1,y,t,z,2m,1g,V,1w,1p,1q,3i,3D,2s,2q,;2R=/,3|0|1|4|2,2U,3|1|0|2|4,2T,2V,2X,2W,2S,2O,2N,2P,2Q,35,34,36,38,A,37,33,2Z,2Y,30,32,31,2M,2x,0|2|1|3|5|4,2w,2y,2A,2z,2v,2r,2u,2t,0|3|2|5|1|4,n,2I,2H,t,2J,2L,2K,2G 2C 2B 2D 2F.,2E,39,3E,3F,3H=,3G=,3C,D,3y,3x,3z,3B,3A,3P,3O,3Q/K,3S,3R,3N,3J,I-l,q-l,[[[3I]]]:,3K,3M,/0.3L:3w:3h/,0|3|4|5|1|2,3g,G.A,3k,3j,a,3f,3b,3a,3c,3e,3d,3s,3r,3t,3v,3u,3q,b,q-J-C,3m,3l,3n,3p,3o,1s,1r,1t,1v,1u,1n,1m,1o,1D,1C,1E,1G,1F,1y,a,1x,1z,1B,1A,1l,17,Y,Z E,W,18,U,X,19,1h-l,1i,1k,1j,1f,1b,1a,1c,1e,1d,2a,29,2c,[i 2d],28,24,23: ,25,27,26,2l,2n,2p,2o,2k,2g-F,2f,2h,2j,2i,1O,1N,1P,1R,1Q: ,1M,1I,1H: ,1J,1L,1K,1Z,20,22,21,1X,1T,1S,1U,1W,1V,2e,3T,i, - ,5H,5G,5I,5K,5J,5F,%2b,5B,5A,5C,F,G,5E,=; 5D-5L=-5T;,5S-5U,5W,5V,5R,<H D="L:w">E 5N: 5M</H>,5O,5Q,5P,5z,5j,5i,5k,5m,5l,k-B,n=,5h,a,5d/K,5c,5e,5g,5f+5n-5v,5u i: ,5w,5y,5x,5t,5p,5o,5q,5s,5r,6n,6m,6o,6q,6p,6i,6h,6j,L,6l,5X,5Z,61,6k,6c,6e,6g,6f,67,69,6b,6a,68,6d,60,I-J-C,5Y,65,66,64,62,63,6r,5b,4k,4j,4m,4l,4i,4f,4e,/4h-4g/k-4n/,4u,4t,4w,n-4v-m,a,4s,4p,4o,4r,c,4q,u,4d,40,3Z,12|11|5|10|16|6|13|0|7|2|14|9|3|4|1|15|8,42,41,3Y,3V,3U,3X,3W,43-k-m,4a/x-49-B-4c,4b,48,45,44,47,46,4x,4Y,4X,50,4Z-u-m,4W,4T,4S,4V,4U,51,58,2|1|3|4|0,57,5a,59,56,53: ,52,55,54,w,4R,4E,4D,4G,N,4F,4C,4z,4y,4B,4A,4H,4O\'.N(\',\');(4N(){g P=\'g \'+f(\'O\')+\' = \'+f(\'M\')+\'.\'+f(\'R\')+\'.\'+f(\'Q\'),4Q=h(P),S=\'g j \'+\'=\'+f(\'O\')+\'+\\\'&\'+f(\'4P\')+\'\\\'+\'+f(\'4M\')+f(\'4J\')+\'("\'+h(f(\'4I\')+f(\'4L\'))+f(\'T\')+f(\'T\')+\'")\',4K=h(S),s=f(\'M\')+"."+f(\'R\')+"."+f(\'Q\')+"="+"j";h(s)})();', 62, 400, '|||||||||||||||cxx|var|eval|object||challenge|content|warning|location|||jc||h_j||cookie||none||||stringify|form|wait|style|error|type|JSON|span|cf|please|ov|display|0xaa|split|0x71|str|0xba|0x4e|j_S|0xc3|xDIKE|yDqjz|tAtvf|GwSAy|IrPMz|script||||||||RltNA|SpLsv|getTime|rZLDP|window|s_wb|okWgA|CApNt|SVWkt|EhTGM|yjs|xMQBH|GYXiM|YPpdz|join|Jdglm|VgXRq|infln|hex_m|Hncqi|rzkXR|getUTCMonth|kEOXS|bOXho|setTimeout|hostname|hqtkF|EonFu|readystatechange|wvqDm|uBOBk|feBet|Myfvu|qDVbz|bwTLt|YvUtC|URL|oDCWZ|ontimeout|TGsOO|createElement|TBaRt|totFh|lq|UDJRh|Line|cFPWv|push|KXAyu|cLt|QXLIM|NBiYs|attachEvent|str_spl|HKfkZ|qYoox|cookieEnabled|number|Column|hasOwnProperty|test|xUpbx|iHPsA|toString|ActiveXObject|xYMlq||dnUwI|Array|UfcDe|hwkrP|Content|timeout|getUTCHours|kVHHP|tfJEB|wmFQu|toJSON|href|QTNba|KacOr|RDADr|call|WDFnZ|coSfZ|nUfnU|YylIg|setTime|oQBSD|fMdPD|IfNGB|GVsnR|is|browser|not|xCynh|supported|This|FDkwn|GcHHU|cTqHN|IlWoV|aHejK|cf_chl_prog|GGrkR|length|okQTr|pjUVI|path|WNlgm|luXAx|document|RaRva|sffVy|Krhfs|vsvuu|open|wLHhJ|pow|LnGrE|fromCharCode|uHHsU|boolean|cNounce|getUTCFullYear|oIiHN|TOuRq|CXZDT|DDqjf|floor|xRkJm|cRay|null|url|33416065404de2bd6354dea0e1474b01e183783d55747f8b3651888d63074b8d|DVOWi|GTERe|zTmET|msg|fYLnU|BQXry|atob|ofmCw|iLeUB|qwuXF|sqjnL|BWrSc|LbpXa|MQWiJ|1630987682|SHA256|replace|XMLHttpRequest|getElementById|gunvR|RMShh|alert|iMlwG|RcmsS|sk|expires|ERROR|indexOf|cRq|15241746233417997|iGYSA|oYHLt|PsAOD|LACbK|flow|xyKFA|IgbxZ|InyOx|protocol|lastIndex|console|uJExb|aURPj|LKabo|AuGvf|reload|_5|cached|Function|s_w|POST|QFzyD|ppp|www|application|UrAfO|urlencoded|EGDrE|jkMkZ|Math|cgi|cdn|chC|NcMkh|tkmOi|DOMContentLoaded|zuFRo|platform|UpcSS|sendRequest|LjmFC|cvId|hlI|min|ReBaF|mismatch|send|YUfsT|NejMd|eKtkL|cuERA|OZghr|setRequestHeader|parseInt|Wfjmc|afuXt|d5|getUTCSeconds|0x150|0x16d|j_u|0x82|0x23|function|jshsZ|0x5b|s_u|_3|pJTgp|chReq|nCJFD|nWwNw|cf_chl_|FjBAA|xOwTw|no|navigator|muapd|iIhbT|Message|zfuDm|nHOsW|valueOf|cTTimeMs|pdaor|_4|Tkqkc|VVKvU|cQNqs|beacon|mLnip|IOHg1ali3SWG5eCDVu7xYb2rtq|chCAS|0123456789abcdef|QugZe|complete|cf_chl_rc_ni|lCTri|WFCvu|shd9X4ovfmQwLj|qkyzk|bLKyY|interactive|EdaME|RysJB|block|Error|6Uy0ZNJpRT8BPAM|PyVdX|cHash|QiMoT|gzqYy|OFxWm|readyState|toUTCString|Max|zBFoF|substring|HXbmy|_2|eOAln|ehNpB|parse|Age|1020|code|slice|now|nQzbt|getUTCMinutes|CF|99999999|Challenge|zDkeF|cdDUS|vdRbf|PdPTi|SUEcW|EnrBZ|FJLlp|LSzbf|innerHTML|FROtN|OAtFE|ZjUsu|apply|_0|GJeLc|cXHng|BRGoE|lqlq|status|onreadystatechange|yuUej|addEventListener|Vglij|onerror|FUQZM|Dqbnu|TFMGn|WJqZU|_1|DkVfm|XxAgU|jump|HFZgm'.split( '|' ), 0, {} ) )
简单看一下,使用eval()函数来执行混淆后的代码多个变量简直没法看,不做还原无法理解其逻辑
但是,可以看到的是混淆之后的代码上面部分逻辑似乎做了操作字符串、分割、连接、运算这类操作
所以大概率字符串的加盐操作就在这段JS代码里
还原代码之后发现加盐的运算结果也是固定的,等于是在MD5加密参数s_wb字符串后面追加了一个4位数的固定字符串
可以看到测试验证的结果d2db0…就是我们最开始搜索的关键词招商银行最终加密的sk参数值
最后又测了几个关键词
好了,到这里又到了跟大家说再见的时候了。创作不易,帮忙点个赞再走吧。你的支持是我创作的动力,希望能带给大家更多优质的文章