爬虫实战:汽车之家配置页面 破解伪元素和混淆JS
本篇介绍如何破解汽车之家配置页面的伪元素和混淆的JS。
** 温馨提示:如需转载本文,请注明内容出处。**
本文链接:https://www.cnblogs.com/grom/p/9242156.html
(本文分多次编辑,可从原文章查看最新更新)
笔者爬取得网站中,印象最为深刻的就是汽车之家的网站了,也是麻烦最多的网站之一了,特点是页面大面积使用伪元素代替关键字,解析伪元素的JS进行了动态混淆,每次刷新后的JS都是不同的,页面被禁用右键菜单,无法选中或复制。
(因为破解了一周,怕分享出来后汽车之家就改了所以到现在运行了半年后才分享出来= =)
网站地址:http://car.autohome.com.cn/config/spec/25898.html
基本就是这样,如果单纯的抓取页面元素后会是这样:
开始分析:
1.整个页面通过及配置数据都是直接Write出来的,因为配置项的详情在页面JS里同页面一起生成,并非通过接口。
2.配置项数据在页面上
(PS小妙招:将网页保存本地后发现文字依旧显示,然后大面积删除JS后刷新页面,如果文字依旧显示,继续删,直到找到加载数据的JS为止)
事后发现第一个变量keyLink是左边配置名称的超链接
第二个变量config是我们要的配置上半页(到车轮制动那),
第三个变量option是主/被动安全装备及以下,
第三四个变量color和innerColor是外观内饰颜色
其他的没什么用,第五个可能是什么运动套装之类的,豪车才有,没仔细看。
3.解密JS在这里
这个JS是被混淆过的,不可以根据变量名去获取。
4.破解流程,拿到这个配置JSON串,然后找到解析JS,计算JS的变量得到字典集(一大串文字)和下标集(一大串数字集合)根据下标取字典里对应的文字,得到真正的数据字典,然后替换指定的伪元素。
5.解析被混淆的JS,格式化后可以得到这样的一串JS
提供一个完整的JS,有兴趣的小伙伴可以去研究研究
function(nv_) { var pk_ = function () { \'return pk_\'; return \'S\'; }; function AH_() { function _A() { return \'UV\'; }; if (_A() == \'UV,\') { return \'AH_\'; } else { return _A(); } } function cU_() { \'return cU_\'; return \'万价\'; } var xN_ = \'元全准\'; function $GetCustomStyle$() { var $customstyle$ = \'\'; try { if (HS_GetCustomStyle) { $customstyle$ = HS_GetCustomStyle(); } else { if (navigator.userAgent.indexOf(\'Windows NT 5\') != -1) { $customstyle$ = \'margin-bottom:-4.8px;\'; } else { $customstyle$ = \'margin-bottom:-5px;\'; } } } catch (e) { } return $customstyle$; } var Qz_ = \'前力功\'; var rC_ = function () { \'rC_\'; var _r = function () { return \'动助华\'; }; return _r(); }; var cO_ = function () { \'cO_\'; var _c = function () { return \'压\'; }; return _c(); }; function ts_() { \'return ts_\'; return \'号合\'; } var vO_ = function (vO__) { var _v = function (vO__) { \'return vO_\'; return vO__; }; return _v(vO__); }; var zS_ = \'喷\'; function Gm_() { function _G() { return \'Gm_\'; }; if (_G() == \'Gm__\') { return _G(); } else { return \'器国\'; } } function Fo_() { function _F() { return \'地\'; }; if (_F() == \'地\') { return \'地\'; } else { return _F(); } } var wo_ = function (wo__) { var _w = function (wo__) { \'return wo_\'; return wo__; }; return _w(wo__); }; var zk_ = function (zk__) { var _z = function (zk__) { \'return zk_\'; return zk__; }; return _z(zk__); }; function WT_() { function _W() { return \'子实容\'; }; if (_W() == \'子实容\') { return \'子实容\'; } else { return _W(); } } var Ma_ = \'宽\'; var vk_ = function () { \'vk_\'; var _v = function () { return \'寸导小\'; }; return _v(); }; var zl_ = \'度式弗\'; var ZS_ = function () { \'ZS_\'; var _Z = function () { return \'径\'; }; return _Z(); }; function Wh_() { \'return Wh_\'; return \'悬\'; } function fG_() { function _f() { return \'成\'; }; if (_f() == \'成\') { return \'成\'; } else { return _f(); } } function $GetClassName$($index$) { return \'.hs_kw\' + $index$ + \'_configMd\'; } function $RuleCalss1$() { return \'::before {content:\' } function kE_() { function _k() { return \'或\'; }; if (_k() == \'或\') { return \'或\'; } else { return _k(); } } function wp_() { \'return wp_\'; return \'扭\'; } var yW_ = \'承\'; function bc_() { \'return bc_\'; return \'指\'; } function tk_() { function _t() { return \'tk__\'; }; if (_t() == \'tk__\') { return \'排\'; } else { return _t(); } } var Yp_ = function () { \'return Yp_\'; return \'数\'; }; function pR_() { function _p() { return \'pR__\'; }; if (_p() == \'pR__\') { return \'整\'; } else { return _p(); } } function BS_() { function _B() { return \'最\'; }; if (_B() == \'最\') { return \'最\'; } else { return _B(); } } var Bi_ = \'构\'; var fQ_ = \'架\'; function $GetWindow$() { return this[\'\' + YE_() + (function (MR__) { \'return MR_\'; return MR__; })(\'in\') + zh_()]; } var Bh_ = function () { \'Bh_\'; var _B = function () { return \'标\'; }; return _B(); }; var JW_ = function () { \'return JW_\'; return \'格\'; }; function wd_() { function _w() { return \'wd__\'; }; if (_w() == \'wd__\') { return \'梁\'; } else { return _w(); } } function UX_() { function _U() { return \'UX__\'; }; if (_U() == \'UX__\') { return \'械\'; } else { return _U(); } } function QU_() { function _Q() { return \'气油\'; }; if (_Q() == \'气油,\') { return \'QU_\'; } else { return _Q(); } } var Ed_ = function () { \'return Ed_\'; return \'测\'; }; function cZ_() { \'return cZ_\'; return \'海液\'; } var UZ_ = function (UZ__) { var _U = function (UZ__) { \'return UZ_\'; return UZ__; }; return _U(UZ__); }; var vI_ = function () { \'return vI_\'; return \'燃\'; }; var EI_ = function () { \'EI_\'; var _E = function () { return \'版独率\'; }; return _E(); }; function DT_() { function _D() { return \'盖\'; }; if (_D() == \'盖\') { return \'盖\'; } else { return _D(); } } var JI_ = function (JI__) { var _J = function (JI__) { \'return JI_\'; return JI__; }; return _J(JI__); }; function $Split$($item$, $index$) { if ($item$) { return $item$[\'\' + jn_() + Dg_() + iu_()]($index$); } else { return \'\'; } } function YY_() { \'return YY_\'; return \'积\'; } function hb_() { function _h() { return \'称程立\'; }; if (_h() == \'称程立\') { return \'称程立\'; } else { return _h(); } } var DC_ = function () { \'return DC_\'; return \'箱\'; }; var ec_ = function () { \'return ec_\'; return \'综\'; }; var $ruleDict$ = \'\'; var $rulePosList$ = \'\'; var Wr_ = function () { \'Wr_\'; var _W = function () { return \'缩\'; }; return _W(); }; function zq_() { function _z() { return \'zq_\'; }; if (_z() == \'zq__\') { return _z(); } else { return \'胎自\'; } } var YS_ = function (YS__) { \'return YS_\'; return YS__; }; var Hj_ = \'距车转\'; function Du_() { function _D() { return \'轮\'; }; if (_D() == \'轮\') { return \'轮\'; } else { return _D(); } } var cQ_ = function () { \'return cQ_\'; return \'轴载进\'; }; function WM_() { \'return WM_\'; return \'适\'; } function yQ_() { \'return yQ_\'; return \'速\'; } var uC_ = function () { \'return uC_\'; return \'配量铝\'; }; var lz_ = function (lz__) { var _l = function (lz__) { \'return lz_\'; return lz__; }; return _l(lz__); }; var Te_ = \'间隙风\'; var Ph_ = function () { \'Ph_\'; var _P = function () { return \'马\'; }; return _P(); }; function UO_() { function _U() { return \'驱驻\'; }; if (_U() == \'驱驻,\') { return \'UO_\'; } else { return _U(); } } function Iw_() { \'return Iw_\'; return \'高麦\'; } var KE_ = \'7;107;3\'; function HA_() { function _H() { return \';9\'; }; if (_H() == \';9,\') { return \'HA_\'; } else { return _H(); } } function PI_() { function _P() { return \'PI_\'; }; if (_P() == \'PI__\') { return _P(); } else { return \'5;70\'; } } function yr_() { \'return yr_\'; return \'82,29\'; } var mK_ = function () { \'return mK_\'; return \'1\'; }; var Ff_ = \'16,117;\'; function $Innerhtml$($item$, $index$) { var $tempArray$ = $GetElementsByCss$($GetClassName$($item$)); for (x in $tempArray$) { $tempArray$[x].innerHTML = $index$; try { $tempArray$[x].currentStyle = \'\'; } catch (e) { } } } function vs_() { function _v() { return \'vs_\'; }; if (_v() == \'vs__\') { return _v(); } else { return \'5,31\'; } } var Ds_ = \';102,11\'; function DV_() { function _D() { return \'0;42,\'; }; if (_D() == \'0;42,\') { return \'0;42,\'; } else { return _D(); } } function lU_() { function _l() { return \'49;57,3\'; }; if (_l() == \'49;57,3\') { return \'49;57,3\'; } else { return _l(); } } var yc_ = function (yc__) { \'return yc_\'; return yc__; }; function lf_() { function _l() { return \'66,\'; }; if (_l() == \'66,\') { return \'66,\'; } else { return _l(); } } var IN_ = function () { \'return IN_\'; return \'115\'; }; function Fb_() { function _F() { return \'Fb__\'; }; if (_F() == \'Fb__\') { return \',54;1\'; } else { return _F(); } } function $InsertRule$($index$, $item$) { $sheet$[\'\' + Mn_() + BP_ + Ni_() + FS_() + qg_() + KK_() + (function (cT__) { \'return cT_\'; return cT__; })(\'e\')]($GetClassName$($index$) + $RuleCalss1$() + \'"\' + $item$ + \'" }\', 0); var $tempArray$ = $GetElementsByCss$($GetClassName$($index$)); for (x in $tempArray$) { try { $tempArray$[x].currentStyle = \'\'; } catch (e) { } } } var GE_ = function () { \'GE_\'; var _G = function () { return \'01,11\'; }; return _G(); }; function Xq_() { function _X() { return \'5\'; }; if (_X() == \'5\') { return \'5\'; } else { return _X(); } } var UE_ = function () { \'return UE_\'; return \',54;7\'; }; var Xv_ = function () { \'return Xv_\'; return \'4\'; }; var wv_ = \';40\'; function Kb_() { function _K() { return \',3\'; }; if (_K() == \',3,\') { return \'Kb_\'; } else { return _K(); } } var Ej_ = \'0,0,1\'; function Xm_() { function _X() { return \'Xm_\'; }; if (_X() == \'Xm__\') { return _X(); } else { return \';1\'; } } function NT_() { \'return NT_\'; return \'21,101\'; } function rN_() { \'return rN_\'; return \';\'; } var Fc_ = function () { \'Fc_\'; var _F = function () { return \'7,60;\'; }; return _F(); }; function $ChartAt$($item$) { return $ruleDict$[\'\' + (function () { \'return Sm_\'; return \'c\' })() + aT_() + wF_()](parseInt($item$)); } function vC_() { \'return vC_\'; return \'98;53\'; } var iB_ = function () { \'iB_\'; var _i = function () { return \',\'; }; return _i(); }; function sn_() { \'return sn_\'; return \'11\'; } function ZU_() { function _Z() { return \'ZU_\'; }; if (_Z() == \'ZU__\') { return _Z(); } else { return \'2;51\'; } } function lM_() { \'return lM_\'; return \',105,\'; } function CF_() { function _C() { return \'44;67,9\'; }; if (_C() == \'44;67,9\') { return \'44;67,9\'; } else { return _C(); } } function Ri_() { \'return Ri_\'; return \'2;6,67\'; } function Ye_() { function _Y() { return \'Ye_\'; }; if (_Y() == \'Ye__\') { return _Y(); } else { return \';111\'; } } function HB_() { \'return HB_\'; return \',66;1\'; } function EW_() { \'return EW_\'; return \'3,10\'; } var cW_ = function () { \'return cW_\'; return \'3\'; }; function $GetDefaultView$() { return nv_[\'\' + Tb_() + Vo_() + \'au\' + FI_() + ak_() + (function () { \'return Ya_\'; return \'Vie\' })() + (function () { \'return Ki_\'; return \'w\' })()]; } function Yf_() { \'return Yf_\'; return \',100;37\'; } var oh_ = function (oh__) { var _o = function (oh__) { \'return oh_\'; return oh__; }; return _o(oh__); }; var Jn_ = \'3\'; function tl_() { function _t() { return \';48,\'; }; if (_t() == \';48,,\') { return \'tl_\'; } else { return _t(); } } var xY_ = function () { \'return xY_\'; return \'15;88,2\'; }; var AD_ = function () { \'AD_\'; var _A = function () { return \'1;4\'; }; return _A(); }; var iX_ = function (iX__) { var _i = function (iX__) { \'return iX_\'; return iX__; }; return _i(iX__); }; var Cy_ = function () { \'Cy_\'; var _C = function () { return \';90,79;\'; }; return _C(); }; function CV_() { \'return CV_\'; return \'1,10;94\'; } function Xx_() { function _X() { return \'Xx__\'; }; if (_X() == \'Xx__\') { return \',\'; } else { return _X(); } } var QW_ = function () { \'QW_\'; var _Q = function () { return \'7\'; }; return _Q(); }; function Vh_() { function _V() { return \'Vh__\'; }; if (_V() == \'Vh__\') { return \'2\'; } else { return _V(); } } function Bw_() { \'return Bw_\'; return \';13,1\'; } var Vs_ = \'2,1\'; var Sq_ = \'6\'; function ed_() { function _e() { return \',27;1\'; }; if (_e() == \',27;1\') { return \',27;1\'; } else { return _e(); } } function Tn_() { function _T() { return \'Tn_\'; }; if (_T() == \'Tn__\') { return _T(); } else { return \'23,45,\'; } } function pr_() { function _p() { return \'pr__\'; }; if (_p() == \'pr__\') { return \'8\'; } else { return _p(); } } var aZ_ = function () { \'return aZ_\'; return \';31,9\'; }; var CL_ = \'116\'; function fk_() { function _f() { return \'fk__\'; }; if (_f() == \'fk__\') { return \';78\'; } else { return _f(); } } var pz_ = function (pz__) { \'return pz_\'; return pz__; }; function bC_() { function _b() { return \'bC__\'; }; if (_b() == \'bC__\') { return \'5\'; } else { return _b(); } } function $ResetSystemFun$() { if ($GetWindow$()[\'\' + iH_() + Ct_() + Ap_() + XV_() + GP_() + BJ_() + fB_() + iz_()] != undefined) { if (window.hs_fuckyou == undefined) { window.hs_fuckyou = $GetWindow$()[\'\' + iH_() + Ct_() + Ap_() + XV_() + GP_() + BJ_() + fB_() + iz_()]; } } if ($GetDefaultView$()) { if ($GetDefaultView$()[\'\' + iH_() + Ct_() + Ap_() + XV_() + GP_() + BJ_() + fB_() + iz_()] != undefined) { if (window.hs_fuckyou_dd == undefined) { window.hs_fuckyou_dd = $GetDefaultView$()[\'\' + iH_() + Ct_() + Ap_() + XV_() + GP_() + BJ_() + fB_() + iz_()]; } } } } var YD_ = function () { \'return YD_\'; return \'8,64;15\'; }; var Dl_ = \',76;5\'; function $InsertRuleRun$() { for ($index$ = 0; $index$ < $rulePosList$.length; $index$++) { var $tempArray$ = $Split$($rulePosList$[$index$], \',\'); var $temp$ = \'\'; for ($itemIndex$ = 0; $itemIndex$ < $tempArray$.length; $itemIndex$++) { $temp$ += $ChartAt$($tempArray$[$itemIndex$]) + \'\'; } $InsertRule$($index$, $temp$); } } var dl_ = function (dl__) { var _d = function (dl__) { \'return dl_\'; return dl__; }; return _d(dl__); }; function jK_() { function _j() { return \'jK__\'; }; if (_j() == \'jK__\') { return \'3,91;32\'; } else { return _j(); } } function fI_() { function _f() { return \',71;\'; }; if (_f() == \',71;,\') { return \'fI_\'; } else { return _f(); } } function Wm_() { function _W() { return \'24,\'; }; if (_W() == \'24,\') { return \'24,\'; } else { return _W(); } } var CP_ = function () { \'return CP_\'; return \'6\'; }; var Ga_ = function (Ga__) { var _G = function (Ga__) { \'return Ga_\'; return Ga__; }; return _G(Ga__); }; function pT_() { \'return pT_\'; return \';12\'; } function Ae_() { function _A() { return \'2,43;\'; }; if (_A() == \'2,43;\') { return \'2,43;\'; } else { return _A(); } } var Ry_ = function () { \'Ry_\'; var _R = function () { return \'1\'; }; return _R(); }; var rM_ = \'23,103,\'; function XI_() { function _X() { return \'XI_\'; }; if (_X() == \'XI__\') { return _X(); } else { return \'93;9\'; } } var gk_ = \'7,6\'; function oQ_() { function _o() { return \'2;4;1\'; }; if (_o() == \'2;4;1\') { return \'2;4;1\'; } else { return _o(); } } function kp_() { \'return kp_\'; return \'04\'; } function NC_() { function _N() { return \'100;28\'; }; if (_N() == \'100;28,\') { return \'NC_\'; } else { return _N(); } } function NP_() { function _N() { return \'NP_\'; }; if (_N() == \'NP__\') { return _N(); } else { return \';52;\'; } } var sT_ = \'50,14,6\'; function ux_() { function _u() { return \'ux__\'; }; if (_u() == \'ux__\') { return \'3;50,81\'; } else { return _u(); } } function hT_() { function _h() { return \'hT__\'; }; if (_h() == \'hT__\') { return \';\'; } else { return _h(); } } function tL_() { \'return tL_\'; return \'90,5;\'; } var sX_ = \'114,4\'; function qx_() { \'return qx_\'; return \'14;78,\'; } var kS_ = function () { \'return kS_\'; return \'26;96,8\'; }; var OC_ = function (OC__) { \'return OC_\'; return OC__; }; var eT_ = function (eT__) { var _e = function (eT__) { \'return eT_\'; return eT__; }; return _e(eT__); }; function yV_() { \'return yV_\'; return \'8;90,\'; } function $GetLocationURL$() { return $GetWindow$()[\'\' + Kp_() + Ka_() + Lw_][\'\' + rI_() + hw_() + MU_(\'f\')]; } function Ra_() { function _R() { return \'Ra__\'; }; if (_R() == \'Ra__\') { return \'46;25\'; } else { return _R(); } } function Hh_() { \'return Hh_\'; return \';18\'; } function $SystemFunction1$($item$) { $ResetSystemFun$(); if ($GetWindow$()[\'\' + iH_() + Ct_() + Ap_() + XV_() + GP_() + BJ_() + fB_() + iz_()] != undefined) { $GetWindow$()[\'\' + iH_() + Ct_() + Ap_() + XV_() + GP_() + BJ_() + fB_() + iz_()] = function (element, pseudoElt) { if (pseudoElt != undefined && typeof (pseudoElt) == \'string\' && pseudoElt.toLowerCase().indexOf(\':before\') > -1) { var obj = {}; obj.getPropertyValue = function (x) { return x; }; return obj; } else { return window.hs_fuckyou(element, pseudoElt); } }; } return $item$; } function Wc_() { function _W() { return \';\'; }; if (_W() == \';\') { return \';\'; } else { return _W(); } } var $imgPosList$ = \'\'; var Rd_ = function () { \'Rd_\'; var _R = function () { return \'75,86;7\'; }; return _R(); }; var uZ_ = function () { \'uZ_\'; var _u = function () { return \'3\'; }; return _u(); }; function nn_() { function _n() { return \',67;9\'; }; if (_n() == \',67;9\') { return \',67;9\'; } else { return _n(); } } function Kj_() { function _K() { return \'Kj__\'; }; if (_K() == \'Kj__\') { return \',41,3\'; } else { return _K(); } } var Zk_ = \'8;36,\'; function JK_() { function _J() { return \'83;35,6\'; }; if (_J() == \'83;35,6\') { return \'83;35,6\'; } else { return _J(); } } var Zn_ = function (Zn__) { var _Z = function (Zn__) { \'return Zn_\'; return Zn__; }; return _Z(Zn__); }; function hV_() { function _h() { return \'hV_\'; }; if (_h() == \'hV__\') { return _h(); } else { return \',93;\'; } } var JL_ = \'58,\'; function $SuperInsertRule$() { if ($sheet$ !== undefined && $sheet$[\'\' + Mn_() + BP_ + Ni_() + FS_() + qg_() + KK_() + (function (cT__) { \'return cT_\'; return cT__; })(\'e\')]) { return true; } else { return false; } } var UA_ = function () { \'UA_\'; var _U = function () { return \'59;106,\'; }; return _U(); }; var bQ_ = \'6\'; var zR_ = function () { \'zR_\'; var _z = function () { return \'6\'; }; return _z(); }; var JD_ = function (JD__) { var _J = function (JD__) { \'return JD_\'; return JD__; }; return _J(JD__); }; function gs_() { function _g() { return \'gs_\'; }; if (_g() == \'gs__\') { return _g(); } else { return \'7;66,9\'; } } function pf_() { function _p() { return \'0;\'; }; if (_p() == \'0;,\') { return \'pf_\'; } else { return _p(); } } var Hz_ = function (Hz__) { \'return Hz_\'; return Hz__; }; function Ix_() { \'return Ix_\'; return \'20;\'; } var fV_ = function () { \'return fV_\'; return \'6\'; }; function xQ_() { function _x() { return \'xQ_\'; }; if (_x() == \'xQ__\') { return _x(); } else { return \'9,119;\'; } } function CE_() { function _C() { return \'CE__\'; }; if (_C() == \'CE__\') { return \'2\'; } else { return _C(); } } var fN_ = \'3,12,16\'; function DG_() { function _D() { return \',27\'; }; if (_D() == \',27\') { return \',27\'; } else { return _D(); } } function JZ_() { \'return JZ_\'; return \';19,\'; } function uk_() { function _u() { return \'89,65;1\'; }; if (_u() == \'89,65;1\') { return \'89,65;1\'; } else { return _u(); } } var jW_ = function () { \'return jW_\'; return \'09,11\'; }; var Hu_ = function () { \'Hu_\'; var _H = function () { return \'8;23,10\'; }; return _H(); }; function Jw_() { function _J() { return \'Jw_\'; }; if (_J() == \'Jw__\') { return _J(); } else { return \'3,\'; } } var nP_ = \'1\'; var ZL_ = \'00;20;3\'; var Dw_ = function () { \'return Dw_\'; return \'9\'; }; function iH_() { \'return iH_\'; return \'get\'; } function Ct_() { function _C() { return \'Co\'; }; if (_C() == \'Co,\') { return \'Ct_\'; } else { return _C(); } } function Ap_() { function _A() { return \'Ap__\'; }; if (_A() == \'Ap__\') { return \'m\'; } else { return _A(); } } var XV_ = function () { \'return XV_\'; return \'put\'; }; function GP_() { \'return GP_\'; return \'edS\'; } var BJ_ = function () { \'BJ_\'; var _B = function () { return \'t\'; }; return _B(); }; var fB_ = function () { \'return fB_\'; return \'y\'; }; function iz_() { function _i() { return \'le\'; }; if (_i() == \'le,\') { return \'iz_\'; } else { return _i(); } } function Mn_() { function _M() { return \'i\'; }; if (_M() == \'i\') { return \'i\'; } else { return _M(); } } var BP_ = \'nse\'; var Ni_ = function () { \'Ni_\'; var _N = function () { return \'r\'; }; return _N(); }; function FS_() { \'return FS_\'; return \'t\'; } var qg_ = function () { \'qg_\'; var _q = function () { return \'R\'; }; return _q(); }; function KK_() { \'return KK_\'; return \'ul\'; } function YE_() { \'return YE_\'; return \'w\'; } function zh_() { function _z() { return \'zh__\'; }; if (_z() == \'zh__\') { return \'dow\'; } else { return _z(); } } var Tb_ = function () { \'Tb_\'; var _T = function () { return \'d\'; }; return _T(); }; function Vo_() { function _V() { return \'Vo_\'; }; if (_V() == \'Vo__\') { return _V(); } else { return \'ef\'; } } var FI_ = function () { \'FI_\'; var _F = function () { return \'l\'; }; return _F(); }; function ak_() { function _a() { return \'t\'; }; if (_a() == \'t\') { return \'t\'; } else { return _a(); } } function $SystemFunction2$($item$) { $ResetSystemFun$(); if ($GetDefaultView$()) { if ($GetDefaultView$()[\'\' + iH_() + Ct_() + Ap_() + XV_() + GP_() + BJ_() + fB_() + iz_()] != undefined) { $GetDefaultView$()[\'\' + iH_() + Ct_() + Ap_() + XV_() + GP_() + BJ_() + fB_() + iz_()] = function (element, pseudoElt) { if (pseudoElt != undefined && typeof (pseudoElt) == \'string\' && pseudoElt.toLowerCase().indexOf(\':before\') > -1) { var obj = {}; obj.getPropertyValue = function (x) { return x; }; return obj; } else { return window.hs_fuckyou_dd(element, pseudoElt); } }; } } return $item$; } function $FillDicData$() { $ruleDict$ = $GetWindow$()[\'\' + ht_() + Sc_() + (function () { \'return vW_\'; return \'e\' })() + (function () { \'return FC_\'; return \'URI\' })() + UU_ + gA_ + Qg_() + Ec_ + ZP_()](\'\' + pk_() + AH_() + cU_() + (function () { \'return KF_\'; return \'体供保\' })() + xN_ + \'列制\' + Qz_ + rC_() + cO_() + ts_() + \'名后\' + vO_(\'吸商\') + zS_ + Gm_() + Fo_() + wo_(\'型备\') + zk_(\'多大\') + WT_() + Ma_ + vk_() + \'尺年\' + zl_ + ZS_() + Wh_() + fG_() + kE_() + wp_() + yW_ + bc_() + tk_() + Yp_() + pR_() + (function () { \'return KX_\'; return \'时\' })() + BS_() + (function () { \'return Ty_\'; return \'机\' })() + Bi_ + fQ_ + Bh_() + JW_() + wd_() + UX_() + (function () { \'return PM_\'; return \'比\' })() + QU_() + Ed_() + cZ_() + UZ_(\'点然\') + vI_() + EI_() + (function (eL__) { \'return eL_\'; return eL__; })(\'环电\') + DT_() + JI_(\'盘矩\') + (function () { \'return ez_\'; return \'离\' })() + YY_() + hb_() + DC_() + ec_() + Wr_() + (function () { \'return xW_\'; return \'缸\' })() + (function (gW__) { \'return gW_\'; return gW__; })(\'置耗\') + zq_() + YS_(\'舒行\') + (function () { \'return BN_\'; return \'规豪质\' })() + Hj_ + Du_() + cQ_() + WM_() + (function (yl__) { \'return yl_\'; return yl__; })(\'逊通\') + yQ_() + uC_() + lz_(\'长门\') + Te_ + Ph_() + UO_() + Iw_() + $SystemFunction1$(\'\')); $rulePosList$ = $Split$(($SystemFunction1$(\'\') + \'\' + (function () { \'return Xs_\'; return \'77,\' })() + KE_ + HA_() + PI_() + (function (vJ__) { \'return vJ_\'; return vJ__; })(\',19;\') + yr_() + (function () { \'return Uj_\'; return \',\' })() + mK_() + Ff_ + (function () { \'return lX_\'; return \'67,87;5\' })() + vs_() + Ds_ + DV_() + lU_() + yc_(\'3;\') + lf_() + IN_() + Fb_() + GE_() + Xq_() + UE_() + Xv_() + wv_ + Kb_() + Ej_ + \',2\' + Xm_() + NT_() + rN_() + (function (qt__) { \'return qt_\'; return qt__; })(\'23,4\') + Fc_() + vC_() + iB_() + sn_() + ZU_() + lM_() + CF_() + Ri_() + Ye_() + HB_() + EW_() + cW_() + Yf_() + oh_(\',4\') + Jn_ + tl_() + xY_() + AD_() + iX_(\'7,60\') + Cy_() + (function () { \'return zK_\'; return \'6\' })() + CV_() + Xx_() + QW_() + Vh_() + Bw_() + Vs_ + Sq_ + ed_() + Tn_() + \'10\' + pr_() + aZ_() + (function () { \'return VT_\'; return \'3;56,\' })() + CL_ + fk_() + pz_(\',34;\') + bC_() + YD_() + Dl_ + dl_(\'3,11;1\') + jK_() + fI_() + Wm_() + CP_() + Ga_(\'6;13\') + \',47,60\' + pT_() + Ae_() + Ry_() + (function () { \'return VR_\'; return \'1\' })() + (function (YX__) { \'return YX_\'; return YX__; })(\'3;\') + rM_ + XI_() + gk_ + oQ_() + kp_() + (function () { \'return eq_\'; return \',\' })() + NC_() + NP_() + sT_ + ux_() + hT_() + tL_() + sX_ + (function () { \'return FK_\'; return \'3;17,\' })() + qx_() + kS_() + OC_(\'5;80\') + eT_(\',44;\') + yV_() + Ra_() + Hh_() + Wc_() + Rd_() + uZ_() + nn_() + Kj_() + Zk_ + JK_() + Zn_(\'8;\') + (function (GM__) { \'return GM_\'; return GM__; })(\'13,103\') + hV_() + JL_ + UA_() + bQ_ + zR_() + JD_(\';22,84\') + (function (Wf__) { \'return Wf_\'; return Wf__; })(\';99,\') + gs_() + pf_() + (function () { \'return Ia_\'; return \'99,112;\' })() + Hz_(\'13,1\') + Ix_() + fV_() + xQ_() + CE_() + fN_ + DG_() + JZ_() + uk_() + jW_() + Hu_() + Jw_() + nP_ + ZL_ + Dw_()), $SystemFunction2$(\';\')); $imgPosList$ = $Split$((\'##imgPosList_jsFuns##\' + $SystemFunction2$(\';\')), $SystemFunction1$(\';\')); $RenderToHTML$(); return \';\'; } function $GetElementsByCss$($item$) { return document.querySelectorAll($item$); } function Rm_() { function _R() { return \'g\'; }; if (_R() == \'g\') { return \'g\'; } else { return _R(); } } var sf_ = function () { \'sf_\'; var _s = function () { return \'e\'; }; return _s(); }; var kJ_ = function () { \'kJ_\'; var _k = function () { return \'P\'; }; return _k(); }; var VZ_ = function (VZ__) { \'return VZ_\'; return VZ__; }; function Bf_() { function _B() { return \'Bf__\'; }; if (_B() == \'Bf__\') { return \'p\'; } else { return _B(); } } var UF_ = function () { \'UF_\'; var _U = function () { return \'e\'; }; return _U(); }; var pB_ = function () { \'return pB_\'; return \'r\'; }; function ry_() { function _r() { return \'ry_\'; }; if (_r() == \'ry__\') { return _r(); } else { return \'Va\'; } } function XP_() { function _X() { return \'XP__\'; }; if (_X() == \'XP__\') { return \'l\'; } else { return _X(); } } var Yy_ = function () { \'return Yy_\'; return \'u\'; }; var ue_ = function () { \'ue_\'; var _u = function () { return \'e\'; }; return _u(); }; var Kp_ = function () { \'Kp_\'; var _K = function () { return \'loc\'; }; return _K(); }; function Ka_() { function _K() { return \'Ka__\'; }; if (_K() == \'Ka__\') { return \'ati\'; } else { return _K(); } } var Lw_ = \'on\'; var rI_ = function () { \'return rI_\'; return \'h\'; }; function hw_() { function _h() { return \'hw_\'; }; if (_h() == \'hw__\') { return _h(); } else { return \'re\'; } } var MU_ = function (MU__) { \'return MU_\'; return MU__; }; function jn_() { \'return jn_\'; return \'s\'; } function Dg_() { function _D() { return \'Dg__\'; }; if (_D() == \'Dg__\') { return \'pli\'; } else { return _D(); } } var iu_ = function () { \'iu_\'; var _i = function () { return \'t\'; }; return _i(); }; var $style$ = nv_.createElement(\'style\'); if (nv_.head) { nv_.head.appendChild($style$); } else { nv_.getElementsByTagName(\'head\')[0].appendChild($style$); } var $sheet$ = $style$.sheet; function ht_() { function _h() { return \'ht_\'; }; if (_h() == \'ht__\') { return _h(); } else { return \'de\'; } } function Sc_() { \'return Sc_\'; return \'cod\'; } function $RenderToHTML$() { $InsertRuleRun$(); } var UU_ = \'C\'; var gA_ = \'o\'; function Qg_() { function _Q() { return \'mpo\'; }; if (_Q() == \'mpo\') { return \'mpo\'; } else { return _Q(); } } var Ec_ = \'nen\'; var ZP_ = function () { \'ZP_\'; var _Z = function () { return \'t\'; }; return _Z(); }; function aT_() { function _a() { return \'aT__\'; }; if (_a() == \'aT__\') { return \'har\'; } else { return _a(); } } function wF_() { function _w() { return \'At\'; }; if (_w() == \'At,\') { return \'wF_\'; } else { return _w(); } } var yd_ = $FillDicData$(\'aJ_\'); function Xn_() { function _X() { return \'_;_\'; }; if (_X() == \'_;_\') { return \'_;_\'; } else { return _X(); } } function iJ_() { \'return iJ_\'; return \';\'; } function bN_() { \'return bN_\'; return \'7\'; } var vY_ = \';\'; function PG_() { \'return PG_\'; return \'_0\'; } var FG_ = function () { \'return FG_\'; return \'3\'; }; function uV_() { function _u() { return \'6\'; }; if (_u() == \'6\') { return \'6\'; } else { return _u(); } } var lI_ = function () { \'return lI_\'; return \'3;7\'; }; })(document);
替换伪元素的整个JS
6.全文所有JS代码因为被混淆,可能会有差异,但结构一样,可仔细寻找。
开始解析:
如上图所示,里面有好多函数和变量,里面会返回一段文字或者符号,这些零零散散的文字将被组成一个完成数据字典库,
大致分为这几种:
直接变量赋值的,如
var mH_ = \'例\'
通过函数为变量赋值的,值等于return后面的字符串,如
var lI_ = function() { \'return lI_\'; return \'3;7\'; };
函数,调用的时候获得值,值等于return后面的字符串,如
function hw_() { function _h() { return \'hw_\'; }; if (_h() == \'hw__\') { return _h(); } else { return \'re\'; } }
(其实笔者想过使用.net直接运行JS,后来发现他们这个JS是有错误的,并且(……)(document)这种形式使用MSScriptControl.ScriptControl和JScript都无法识别,只能硬着头皮分析了。。。如果有能识别这种JS,求留言推荐,十分感谢。)
众所周知,函数是需要被调用才能运行的,那么入口呢,就很巧妙的隐藏在了这里 ↓
var HH_ = $FillDicData$(\'iU_\');
接着会跳到这个函数
这个就是调用上面的那些大部分的变量组成字典集
紧接着下面的这个方法就是获取下标集合
这个方法实现根据坐标集取得字典 ,注意这个方法是不混淆的!可以直接搜索方法名找到。
“77,7”就是”环保” 通过这种方式替换页面的伪元素
分析到这里了,后面也就不难了,不再详细说明,如有不明白的,可以留言给我。
获取数据字典,模拟了刚才分析的JS
1 #region 获取汽车之家车辆信息 2 /// <summary> 3 /// 获取汽车之家车辆信息 4 /// </summary> 5 /// <param name="Parameter">参数(汽车之家ID或者Url)</param> 6 /// <param name="Url">是否为Url</param> 7 /// <param name="JsonKeyLink"></param> 8 /// <param name="JsonConfig"></param> 9 /// <param name="JsonOption"></param> 10 /// <param name="JsonColor"></param> 11 /// <param name="JsonInnerColor"></param> 12 /// <param name="JsonBag"></param> 13 /// <param name="ErrorMessage"></param> 14 /// <returns></returns> 15 public bool GetAutoHomeCarInfo(string Parameter, bool Url, ref string JsonKeyLink, ref string JsonConfig, ref string JsonOption, ref string JsonColor, ref string JsonInnerColor, ref string JsonBag, ref string ErrorMessage) 16 { 17 if (Url) return false; 18 #region 19 try 20 { 21 //这里的变量是车型ID 22 string strUrl = Url ? Parameter : "http://car.autohome.com.cn/config/spec/" + Parameter + ".html"; 23 HttpWebRequest webrequest = (HttpWebRequest)WebRequest.Create(strUrl); 24 webrequest.AllowAutoRedirect = true; 25 webrequest.Timeout = 30000; 26 CookieContainer c = new CookieContainer(); 27 webrequest.CookieContainer = c; 28 HttpWebResponse response = (HttpWebResponse)webrequest.GetResponse(); 29 StreamReader read = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding("utf-8")); 30 string strAllHTML = read.ReadToEnd(); 31 32 #region 获取数据字典 33 string[] KeyLink = null; 34 string[] Configpl = null; 35 string[] Optionpl = null; 36 GetAutoHomeDictionary(strAllHTML, ref KeyLink, ref Configpl, ref Optionpl); 37 #endregion 38 39 MatchCollection carInfoMatches = Regex.Matches(strAllHTML, "<script type=\"text/javascript\">((?:.|\\n)*?)</script>"); 40 string strCarInfo = string.Empty; 41 for (int i = 0; i < carInfoMatches.Count; i++) 42 { 43 if (carInfoMatches[i].Result("$1").Trim().IndexOf("var option =") > 0) strCarInfo = carInfoMatches[i].Result("$1").Trim(); 44 } 45 if (strCarInfo != string.Empty) 46 { 47 Hashtable htCarInfo = new Hashtable(); 48 if (strCarInfo.IndexOf("var keyLink =") > -1) htCarInfo.Add(strCarInfo.IndexOf("var keyLink ="), "JsonKeyLink"); 49 if (strCarInfo.IndexOf("var config =") > -1) htCarInfo.Add(strCarInfo.IndexOf("var config ="), "JsonConfig"); 50 if (strCarInfo.IndexOf("var option =") > -1) htCarInfo.Add(strCarInfo.IndexOf("var option ="), "JsonOption"); 51 if (strCarInfo.IndexOf("var color =") > -1) htCarInfo.Add(strCarInfo.IndexOf("var color ="), "JsonColor"); 52 if (strCarInfo.IndexOf("var innerColor =") > -1) htCarInfo.Add(strCarInfo.IndexOf("var innerColor ="), "JsonInnerColor"); 53 if (strCarInfo.IndexOf("var bag =") > -1) htCarInfo.Add(strCarInfo.IndexOf("var bag ="), "JsonBag"); 54 ArrayList arrayList = new ArrayList(htCarInfo.Keys); 55 arrayList.Sort(); 56 for (int i = 0; i < arrayList.Count; i++) 57 { 58 //有些没有的字典和解析JS要筛掉 59 string JsonTemp = string.Empty; 60 if (i == arrayList.Count - 1) 61 { 62 continue; 63 JsonTemp = strCarInfo.Substring(int.Parse(arrayList[i].ToString()), strCarInfo.Length - int.Parse(arrayList[i].ToString())); 64 JsonTemp = JsonTemp.Substring(0, JsonTemp.IndexOf("]}};")) + "]}};"; 65 } 66 else 67 { 68 JsonTemp = strCarInfo.Substring(int.Parse(arrayList[i].ToString()), int.Parse(arrayList[i + 1].ToString()) - int.Parse(arrayList[i].ToString())); 69 } 70 //if (JsonTemp.IndexOf("_baikeVJ") > 0) 71 if (Regex.IsMatch(JsonTemp, @"<span class=\'hs_kw.*?_baike\w{0,2}\'></span>")) 72 { 73 string tmp = JsonTemp.Substring(JsonTemp.IndexOf("_baike") , 8); 74 for (int j = 0; j < KeyLink.Length; j++) 75 { 76 JsonTemp = JsonTemp.Replace("<span class=\'hs_kw" + j + tmp + "\'></span>", KeyLink[j]); 77 } 78 } 79 if (Regex.IsMatch(JsonTemp, @"<span class=\'hs_kw.*?_config\w{0,2}\'></span>")) 80 { 81 string tmp = JsonTemp.Substring(JsonTemp.IndexOf("_config"), 9); 82 for (int j = 0; j < Configpl.Length; j++) 83 { 84 JsonTemp = JsonTemp.Replace("<span class=\'hs_kw" + j + tmp + "\'></span>", Configpl[j]); 85 } 86 } 87 if (Regex.IsMatch(JsonTemp, @"<span class=\'hs_kw.*?_option\w{0,2}\'></span>")) 88 { 89 string tmp = JsonTemp.Substring(JsonTemp.IndexOf("_option"), 9); 90 for (int j = 0; j < Optionpl.Length; j++) 91 { 92 JsonTemp = JsonTemp.Replace("<span class=\'hs_kw" + j + tmp + "\'></span>", Optionpl[j]); 93 } 94 } 95 switch (htCarInfo[arrayList[i]].ToString()) 96 { 97 //这里只解析了左边配置栏和上下配置,其他可自行修改 98 case "JsonKeyLink": 99 JsonTemp = JsonTemp.Replace("var keyLink =", string.Empty).Replace(";", string.Empty).Trim(); 100 JsonKeyLink = JsonTemp; 101 break; 102 case "JsonConfig": 103 JsonTemp = JsonTemp.Replace("var config =", string.Empty).Replace(";", string.Empty).Trim(); 104 JsonConfig = JsonTemp; 105 break; 106 case "JsonOption": 107 JsonTemp = JsonTemp.Replace("var option =", string.Empty).Replace(";", string.Empty).Trim(); 108 JsonOption = JsonTemp; 109 break; 110 } 111 } 112 } 113 return true; 114 } 115 catch (Exception Ex) 116 { 117 ErrorMessage = Ex.Message; 118 return false; 119 } 120 #endregion 121 } 122 #endregion
破解数据字典,其实就是模拟我们上面分析的JS解析过程,其中用到大量的正则分别处理不同格式的数据集
1 /// <summary> 2 /// 获取数据字典 3 /// </summary> 4 /// <param name="strAllHTML"></param> 5 /// <param name="keyLink"></param> 6 /// <param name="configpl"></param> 7 /// <param name="optionpl"></param> 8 public void GetAutoHomeDictionary(string strAllHTML, ref string[] keyLink, ref string[] configpl, ref string[] optionpl) 9 { 10 MatchCollection carInfoMatches = Regex.Matches(strAllHTML, "<script>((?:.|\\n)*?)</script>"); 11 List<string> matcheslist = new List<string>(); 12 foreach (var item in carInfoMatches) 13 { 14 if (item.ToString().IndexOf("try{document.") < 0 && item.ToString().Length > 500) 15 { 16 matcheslist.Add(item.ToString()); 17 } 18 } 19 for (int i = 0; i < matcheslist.Count; i++) 20 { 21 #region 生成文字集1 22 Dictionary<string, string> dc = new Dictionary<string, string>(); 23 MatchCollection matchlist = Regex.Matches(matcheslist[i].Replace("})(document);</script>", " function"), @"function\s(\S){0,2}_\(\)\s*\{.*?\}.*?(?=function)");//取出function 24 for ( int j = 0; j < matchlist.Count; j++) 25 { 26 string str1 = string.Empty, str2 = string.Empty; 27 getStr(matchlist[j].Value, ref str1, ref str2); 28 dc.Add(str1, str2); 29 } 30 try 31 { 32 MatchCollection matchlist2 = Regex.Matches(matcheslist[i], @"var\s?\S\S_=\s?\'\S*\'");//取出赋值变量 33 for (int j = 0; j < matchlist2.Count; j++) 34 { 35 string str1 = string.Empty, str2 = string.Empty; 36 getStr2(matchlist2[j].Value, ref str1, ref str2); 37 dc.Add(str1, str2); 38 } 39 40 MatchCollection matchlist3 = Regex.Matches(matcheslist[i], @"var\s?\S\S_=\s?function\s?\(\)\s?\{.*?return.*?return.*?\}");//取出赋值函数 41 for (int j = 0; j < matchlist3.Count; j++) 42 { 43 string str1 = string.Empty, str2 = string.Empty; 44 getStr3(matchlist3[j].Value, ref str1, ref str2); 45 dc.Add(str1, str2); 46 } 47 } 48 catch (Exception ex) 49 { 50 throw ex; 51 } 52 StringBuilder sb = new StringBuilder(); 53 string str = Regex.Match(matcheslist[i], @"function\s*\$FillDicData\$\s*\(\)\s*?{.*?\$RenderToHTML").Value; 54 string tmp2 = str.Substring(str.IndexOf("$GetWindow$()"), str.IndexOf("$rulePosList$") - str.IndexOf("$GetWindow$()")); 55 string tmp3 = tmp2.Substring(tmp2.IndexOf(\']\') + 1); 56 string[] tmp4 = tmp3.Split(\'+\'); 57 try 58 { 59 for (int j = 1; j < tmp4.Length - 1; j++) 60 { 61 //if (Regex.IsMatch(tmp4[j], @"[\u4e00-\u9fbb]{1,5}")) 62 //{ 63 // sb.Append(Regex.Match(tmp4[j], @"[\u4e00-\u9fbb]{1,5}").ToString()); 64 //} 65 if (Regex.IsMatch(tmp4[j], @"\(function\s{0,3}\(\)\{.*?return.*?return.*?\}\)")) 66 { 67 var strtmp = Regex.Match(tmp4[j], @"\(function\s{0,3}\(\)\{.*?return.*?return.*?\}\)").Value; 68 var strtmp2 = Regex.Match(strtmp, "return.*?(.*?).*?return.*(.*?)").Value.Split(new string[] { "return" }, StringSplitOptions.RemoveEmptyEntries); 69 foreach (var item in strtmp2) 70 { 71 if (item.Split(\'\\'\').Length == 3) sb.Append(item.Split(\'\\'\')[1].Replace("\'", "").Trim()); 72 } 73 } 74 else if (Regex.IsMatch(tmp4[j], @"\(\'([A-Z]|[a-z]|[0-9]|[,]|[\']|[;]|[\u4e00-\u9fbb]){1,10}\'\)")) 75 { 76 sb.Append(Regex.Match(tmp4[j], @"\(\'([A-Z]|[a-z]|[0-9]|[,]|[\']|[;]|[\u4e00-\u9fbb]){1,10}(?=\'\))").ToString().Substring(2)); 77 } 78 else if (Regex.IsMatch(tmp4[j], @"\(\)")) 79 { 80 sb.Append(dc[tmp4[j].Replace("()", "")]); 81 } 82 else if (Regex.IsMatch(tmp4[j], @"\'([A-Z]|[a-z]|[0-9]|[,]|[\']|[;]|[\u4e00-\u9fbb]){1,10}\'(?!\))")) 83 { 84 sb.Append(Regex.Match(tmp4[j], @"\'([A-Z]|[a-z]|[0-9]|[,]|[\']|[;]|[\u4e00-\u9fbb]){1,10}\'").ToString().Replace("\'","")); 85 } 86 else if (Regex.IsMatch(tmp4[j], @"\S{3}")) 87 { 88 sb.Append(dc[tmp4[j]]); 89 } 90 else 91 { 92 sb.Append("X"); 93 } 94 } 95 } 96 catch (Exception ex) 97 { 98 throw; 99 } 100 #endregion 101 102 #region 取下标 103 string tmp11 = str.Substring(str.IndexOf("$rulePosList$")); 104 string tmp12 = tmp11.Substring(0, tmp11.IndexOf("$SystemFunction2$")); 105 StringBuilder sb2 = new StringBuilder(); 106 string[] tmp13 = tmp12.Split(\'+\'); 107 try 108 { 109 tmp13[tmp13.Length - 1] = tmp13[tmp13.Length - 1].Replace("),", ""); 110 for (int j = 1; j < tmp13.Length; j++) 111 { 112 if (Regex.IsMatch(tmp13[j], @"\(\'([A-Z]|[a-z]|[0-9]|[,]|[\']|[;]|[\u4e00-\u9fbb]){1,10}\'\)")) 113 { 114 sb2.Append(Regex.Match(tmp13[j], @"\(\'([A-Z]|[a-z]|[0-9]|[,]|[\']|[;]|[\u4e00-\u9fbb]){1,10}(?=\'\))").ToString().Substring(2)); 115 } 116 else if (Regex.IsMatch(tmp13[j], @"return\s{0,2}\'([0-9]|[,]|[;]){1,10}\'")) 117 { 118 var tmp = Regex.Match(tmp13[j], @"return\s{0,2}\'([0-9]|[,]|[;]){1,10}\'").Value.ToLower().Replace("return", "").Replace("\'", "").Trim(); 119 sb2.Append(tmp); 120 } 121 else if (Regex.IsMatch(tmp13[j], @"\(\)")) 122 { 123 tmp13[j] = tmp13[j].Substring(0, tmp13[j].IndexOf("()") + 2); 124 sb2.Append(dc[tmp13[j].Replace("()", "")]); 125 } 126 else if (Regex.IsMatch(tmp13[j], @"\S{3}") && tmp13[j].IndexOf("\'") < 0) 127 { 128 sb2.Append(dc[tmp13[j]]); 129 } 130 else if (tmp13[j].Split(new string[] { "\'" }, StringSplitOptions.None).Length > 2) 131 { 132 sb2.Append(tmp13[j].Replace("\'", "").Trim()); 133 } 134 else if (tmp13[j].Trim() == "\'\'") 135 { 136 continue; 137 } 138 else 139 { 140 sb2.Append("X"); 141 } 142 } 143 } 144 catch (Exception ex) 145 { 146 throw; 147 } 148 149 #endregion 150 151 #region 生成字典 152 List<string> list = new List<string>(); 153 try 154 { 155 foreach (var item in sb2.ToString().Split(\';\')) 156 { 157 var numlist = item.Split(new string[] { "," }, StringSplitOptions.RemoveEmptyEntries); 158 StringBuilder sbresult = new StringBuilder(); 159 foreach (var num in numlist) 160 { 161 var tmpstr = sb.ToString()[Cvt.ToInt32(num)]; 162 sbresult.Append(tmpstr); 163 } 164 list.Add(sbresult.ToString()); 165 } 166 } 167 catch (Exception e) 168 { 169 170 throw; 171 } 172 173 #endregion 174 175 if (i == 0) keyLink = list.ToArray(); 176 else if (i == 1) configpl = list.ToArray(); 177 else if (i == 2) optionpl = list.ToArray(); 178 } 179 }
1 /// <summary> 2 /// 格式化字符串 3 /// </summary> 4 /// <param name="str"></param> 5 /// <param name="resultKey"></param> 6 /// <param name="resultValue"></param> 7 public void getStr(string str, ref string resultKey, ref string resultValue) 8 { 9 try 10 { 11 if (str.IndexOf("var") > 0) 12 { 13 str = str.Substring(0, str.IndexOf("var")); 14 } 15 resultKey = str.Split(new string[] { "()" }, StringSplitOptions.RemoveEmptyEntries).FirstOrDefault().Replace("function", "").Trim(); 16 resultValue = JSHelper.ExecJs(str + " " + resultKey + "();").ToString(); 17 return; 18 } 19 catch (Exception ex) 20 { 21 throw; 22 } 23 } 24 public void getStr2(string str, ref string resultKey, ref string resultValue) 25 { 26 try 27 { 28 string[] str2 = str.Replace("var", "").Replace("\\'", "").Trim().Split(\'=\'); 29 resultKey = str2[0]; 30 resultValue = str2[1]; 31 } 32 catch (Exception ex) 33 { 34 throw ex; 35 } 36 } 37 public void getStr3(string str, ref string resultKey, ref string resultValue) 38 { 39 try 40 {//var AC_=function(){\'AC_\';var _A=function(){return \'格\';}; return _A();} 41 string[] str2 = str.Replace("var", "").Trim().Split(\'=\'); 42 resultKey = str2[0]; 43 if (str.Split(new string[] { "function" }, StringSplitOptions.None).Length > 2) 44 { 45 string str3 = Regex.Match(str, @"var\s?\S\S_=\s?function\s?\(\S{0,5}\)\s?\{.*?return.*?\}").Value;//取出赋值函数 46 string str4 = str3.Substring(str3.IndexOf("return") + 6); 47 string[] str5 = str4.Split(new string[] { "\\'" }, StringSplitOptions.None); 48 resultValue = str5[1]; 49 } 50 else 51 { 52 string str3 = str2[str2.Length - 1].Substring(str2[str2.Length - 1].LastIndexOf("return")); 53 string[] str4 = str3.Split(\'\\'\'); 54 resultValue = str4[1]; 55 } 56 } 57 catch (Exception ex) 58 { 59 throw ex; 60 } 61 }
文中部分解析直接将变量丢进了JS里执行,这个破解比较早,用的JScript,现在推荐使用MSScriptControl.ScriptControl,这个是com组件里的。
1 using Microsoft.JScript; 2 using Microsoft.JScript.Vsa; 3 using System; 4 using System.CodeDom.Compiler; 5 using System.Collections.Generic; 6 using System.Linq; 7 using System.Reflection; 8 using System.Text; 9 using System.Threading.Tasks; 10 11 namespace library 12 { 13 public static class JSHelper 14 { 15 static VsaEngine Engine = VsaEngine.CreateEngine(); 16 public static object ExecJs(string str) 17 { 18 return EvalJScript(str); 19 } 20 public static object EvalJScript(string JScript) 21 { 22 object Result = null; 23 try 24 { 25 Result = Microsoft.JScript.Eval.JScriptEvaluate(JScript, Engine); 26 } 27 catch (Exception ex) 28 { 29 return ex.Message; 30 } 31 return Result; 32 33 } 34 } 35 }
这种稍微复杂点的爬虫真的十分锻炼分析能力和耐心,这也是笔者认为开发者十分重要的一种能力,而对于.net这种门槛较低,技术能力金字塔分布的开发群体,真的需要我们好好钻研技术。
如有不明或更好的建议,欢迎留言交流。