在现在各个网站使用的反爬措施中,使用 JavaScript 加密算是很常用的了,通常会使用 JavaScript 加密某个参数,例如 token 或者 sign。在这次的例子中,就采取了这种措施来反爬,使用 JavaScript 加密了一个参数 antitoken,而本篇博客要写的就是如何应对和解决的。

 

  本次爬取的站点链接为:https://www.ly.com/hotel/beijing53/?spm0=10002.2001.1.0.1.4.17

  等页面加载完毕后打开开发者工具,切换到 XHR 选项,然后找到如下请求:

  

  注意到参数中有一个 antitoken,这是一个加密后的字符串 ,那要怎么得到这个加密参数 antitoken 呢?

 

  在开发者工具中全局搜索 antitoken,找到名为 list-newest.js 的 JS 文件,切换到 Sources 页面,找到这个 JS 文件并打开,点击左下角的 “{}” 进行格式化便于我们进行查阅,如下图:

  

  在这个 JS 文件中搜索 antitoken,通过查找可以定位到一个获取 antitoken 的方法,具体代码如下:

e.getantitoken = function() {
    var t = $.cookie(“wangba”);
    t && void 0 !== t || (t = (new Date).getTime().toString(),
    $.cookie(“wangba”, t, {
        path: “/”,
        domain: “ly.com”
    }));
    return (0,
    r[“default”])(t)
}
;

  可以看到先是要从 Cookie 中获取一个名为 wangba 字段的值,wangba ?网吧?谁知道呢。如果 wangba 为空,则重新创建一个,而创建的其实就是一个十三位的时间戳。

var t = $.cookie(“wangba”);
t && void 0 !== t || (t = (new Date).getTime().toString(),

  在 return 那一行打上断点,然后刷新页面进行调试,跳转到 return 返回的方法,如下图:

  

  为了知道 antitoken 是怎么生成的,我们需要知道这个函数里各个参数 n,i,o,r 的含义,所以又得继续打断点进行调试了。

  首先是 n,通过代码知道 n = a(30),打断点后找到 n 参数对应的代码如下:

  1. n = {
  2. rotl: function(t, e) {
  3. return t << e | t >>> 32 - e
  4. },
  5. rotr: function(t, e) {
  6. return t << 32 - e | t >>> e
  7. },
  8. endian: function(t) {
  9. if (t.constructor == Number)
  10. return 16711935 & n.rotl(t, 8) | 4278255360 & n.rotl(t, 24);
  11. for (var e = 0; e < t.length; e++)
  12. t[e] = n.endian(t[e]);
  13. return t
  14. },
  15. randomBytes: function(t) {
  16. for (var e = []; t > 0; t--)
  17. e.push(Math.floor(256 * Math.random()));
  18. return e
  19. },
  20. bytesToWords: function(t) {
  21. for (var e = [], a = 0, n = 0; a < t.length; a++,
  22. n += 8)
  23. e[n >>> 5] |= t[a] << 24 - n % 32;
  24. return e
  25. },
  26. wordsToBytes: function(t) {
  27. for (var e = [], a = 0; a < 32 * t.length; a += 8)
  28. e.push(t[a >>> 5] >>> 24 - a % 32 & 255);
  29. return e
  30. },
  31. bytesToHex: function(t) {
  32. for (var e = [], a = 0; a < t.length; a++)
  33. e.push((t[a] >>> 4).toString(16)),
  34. e.push((15 & t[a]).toString(16));
  35. return e.join("")
  36. },
  37. hexToBytes: function(t) {
  38. for (var e = [], a = 0; a < t.length; a += 2)
  39. e.push(parseInt(t.substr(a, 2), 16));
  40. return e
  41. },
  42. bytesToBase64: function(t) {
  43. for (var e = [], n = 0; n < t.length; n += 3)
  44. for (var i = t[n] << 16 | t[n + 1] << 8 | t[n + 2], r = 0; r < 4; r++)
  45. 8 * n + 6 * r <= 8 * t.length ? e.push(a.charAt(i >>> 6 * (3 - r) & 63)) : e.push("=");
  46. return e.join("")
  47. },
  48. base64ToBytes: function(t) {
  49. t = t.replace(/[^A-Z0-9+\/]/gi, "");
  50. for (var e = [], n = 0, i = 0; n < t.length; i = ++n % 4)
  51. 0 != i && e.push((a.indexOf(t.charAt(n - 1)) & Math.pow(2, -2 * i + 8) - 1) << 2 * i | a.indexOf(t.charAt(n)) >>> 6 - 2 * i);
  52. return e
  53. }
  54. },

View Code

  然后是 i,通过代码知道 i = a(12).utf-8,打断点后找到 i 参数对应的代码如下:

  1. {
  2. stringToBytes: function(t) {
  3. return a.bin.stringToBytes(unescape(encodeURIComponent(t)))
  4. },
  5. bytesToString: function(t) {
  6. return decodeURIComponent(escape(a.bin.bytesToString(t)))
  7. }
  8. }

  然后是 o,通过代码知道 o = a(12).bin,打断点后找到 o 参数对应的代码如下:

  1. {
  2. stringToBytes: function (t) {
  3. for (var e = [], a = 0; a < t.length; a++)
  4. e.push(255 & t.charCodeAt(a));
  5. return e
  6. }
  7. ,
  8. bytesToString: function (t) {
  9. for (var e = [], a = 0; a < t.length; a++)
  10. e.push(String.fromCharCode(t[a]));
  11. return e.join("")
  12. }
  13. }

  这里可以定义一个 a12,然后从其中取出相应的方法就行了。

  1. var a12 = {
  2. utf8: {
  3. stringToBytes: function (e) {
  4. return a12.bin.stringToBytes(unescape(encodeURIComponent(e)))
  5. },
  6. bytesToString: function (e) {
  7. return decodeURIComponent(escape(a.bin.bytesToString(e)))
  8. }
  9. },
  10. bin: {
  11. stringToBytes: function (e) {
  12. for (var t = [], a = 0; a < e.length; a++)
  13. t.push(255 & e.charCodeAt(a));
  14. return t
  15. },
  16. bytesToString: function (e) {
  17. for (var t = [], a = 0; a < e.length; a++)
  18. t.push(String.fromCharCode(e[a]));
  19. return t.join("")
  20. }
  21. }
  22. };

View Code

  最后还剩一个 o 参数,通过断点调试可以定位到如下代码:

  

 

 

  可见这个参数 o 赋值为 null 就够了。到这里为止就已经得到加密方法里的各个参数了,接下来要说的就是如何实现加密得到 antitoken。

  要实现加密方法,还需要知道一点,就是加密时传入了两个参数,一个是十三位时间戳,另一个是空值,通过调试可知,截图如下:

  

   将前面的各个参数和方法进行整理,得到如下 JavaScript 代码:

  1. 1 //定义antitoken
  2. 2 function antitoken(e) {
  3. 3 var a12 = {
  4. 4 utf8: {
  5. 5 stringToBytes: function (e) {
  6. 6 return a12.bin.stringToBytes(unescape(encodeURIComponent(e)))
  7. 7 },
  8. 8 bytesToString: function (e) {
  9. 9 return decodeURIComponent(escape(a.bin.bytesToString(e)))
  10. 10 }
  11. 11 },
  12. 12 bin: {
  13. 13 stringToBytes: function (e) {
  14. 14 for (var t = [], a = 0; a < e.length; a++)
  15. 15 t.push(255 & e.charCodeAt(a));
  16. 16 return t
  17. 17 },
  18. 18 bytesToString: function (e) {
  19. 19 for (var t = [], a = 0; a < e.length; a++)
  20. 20 t.push(String.fromCharCode(e[a]));
  21. 21 return t.join("")
  22. 22 }
  23. 23 }
  24. 24 };
  25. 25 var t = null;
  26. 26 var n, i, o, s, r;
  27. 27 n = {
  28. 28 rotl: function (e, t) {
  29. 29 return e << t | e >>> 32 - t
  30. 30 },
  31. 31 rotr: function (e, t) {
  32. 32 return e << 32 - t | e >>> t
  33. 33 },
  34. 34 endian: function (e) {
  35. 35 if (e.constructor == Number)
  36. 36 return 16711935 & n.rotl(e, 8) | 4278255360 & n.rotl(e, 24);
  37. 37 for (var t = 0; t < e.length; t++)
  38. 38 e[t] = n.endian(e[t]);
  39. 39 return e
  40. 40 },
  41. 41 randomBytes: function (e) {
  42. 42 for (var t = []; e > 0; e--)
  43. 43 t.push(Math.floor(256 * Math.random()));
  44. 44 return t
  45. 45 },
  46. 46 bytesToWords: function (e) {
  47. 47 for (var t = [], a = 0, n = 0; a < e.length; a++,
  48. 48 n += 8)
  49. 49 t[n >>> 5] |= e[a] << 24 - n % 32;
  50. 50 return t
  51. 51 },
  52. 52 wordsToBytes: function (e) {
  53. 53 for (var t = [], a = 0; a < 32 * e.length; a += 8)
  54. 54 t.push(e[a >>> 5] >>> 24 - a % 32 & 255);
  55. 55 return t
  56. 56 },
  57. 57 bytesToHex: function (e) {
  58. 58 for (var t = [], a = 0; a < e.length; a++)
  59. 59 t.push((e[a] >>> 4).toString(16)),
  60. 60 t.push((15 & e[a]).toString(16));
  61. 61 return t.join("")
  62. 62 },
  63. 63 hexToBytes: function (e) {
  64. 64 for (var t = [], a = 0; a < e.length; a += 2)
  65. 65 t.push(parseInt(e.substr(a, 2), 16));
  66. 66 return t
  67. 67 },
  68. 68 bytesToBase64: function (e) {
  69. 69 for (var t = [], n = 0; n < e.length; n += 3)
  70. 70 for (var i = e[n] << 16 | e[n + 1] << 8 | e[n + 2], o = 0; o < 4; o++)
  71. 71 8 * n + 6 * o <= 8 * e.length ? t.push(a.charAt(i >>> 6 * (3 - o) & 63)) : t.push("=");
  72. 72 return t.join("")
  73. 73 },
  74. 74 base64ToBytes: function (e) {
  75. 75 e = e.replace(/[^A-Z0-9+\/]/gi, "");
  76. 76 for (var t = [], n = 0, i = 0; n < e.length; i = ++n % 4)
  77. 77 0 != i && t.push((a.indexOf(e.charAt(n - 1)) & Math.pow(2, -2 * i + 8) - 1) << 2 * i | a.indexOf(e.charAt(n)) >>> 6 - 2 * i);
  78. 78 return t
  79. 79 }
  80. 80 },
  81. 81 i = a12.utf8,
  82. 82 o = null,
  83. 83 s = a12.bin,
  84. 84 (r = function (e, t) {
  85. 85 e.constructor == String ? e = t && "binary" === t.encoding ? s.stringToBytes(e) : i.stringToBytes(e) : o(e) ? e = Array.prototype.slice.call(e, 0) : Array.isArray(e) || (e = e.toString());
  86. 86 for (var a = n.bytesToWords(e), l = 8 * e.length, c = 1732584193, d = -271733879, p = -1732584194, u = 271733878, m = 0; m < a.length; m++)
  87. 87 a[m] = 16711935 & (a[m] << 8 | a[m] >>> 24) | 4278255360 & (a[m] << 24 | a[m] >>> 8);
  88. 88 a[l >>> 5] |= 128 << l % 32;
  89. 89 a[14 + (l + 64 >>> 9 << 4)] = l;
  90. 90 var f = r._ff
  91. 91 , h = r._gg
  92. 92 , v = r._hh
  93. 93 , g = r._ii;
  94. 94 for (m = 0; m < a.length; m += 16) {
  95. 95 var y = c
  96. 96 , _ = d
  97. 97 , b = p
  98. 98 , $ = u;
  99. 99 d = g(d = g(d = g(d = g(d = v(d = v(d = v(d = v(d = h(d = h(d = h(d = h(d = f(d = f(d = f(d = f(d, p = f(p, u = f(u, c = f(c, d, p, u, a[m + 0], 7, -680876936), d, p, a[m + 1], 12, -389564586), c, d, a[m + 2], 17, 606105819), u, c, a[m + 3], 22, -1044525330), p = f(p, u = f(u, c = f(c, d, p, u, a[m + 4], 7, -176418897), d, p, a[m + 5], 12, 1200080426), c, d, a[m + 6], 17, -1473231341), u, c, a[m + 7], 22, -45705983), p = f(p, u = f(u, c = f(c, d, p, u, a[m + 8], 7, 1770035416), d, p, a[m + 9], 12, -1958414417), c, d, a[m + 10], 17, -42063), u, c, a[m + 11], 22, -1990404162), p = f(p, u = f(u, c = f(c, d, p, u, a[m + 12], 7, 1804603682), d, p, a[m + 13], 12, -40341101), c, d, a[m + 14], 17, -1502002290), u, c, a[m + 15], 22, 1236535329), p = h(p, u = h(u, c = h(c, d, p, u, a[m + 1], 5, -165796510), d, p, a[m + 6], 9, -1069501632), c, d, a[m + 11], 14, 643717713), u, c, a[m + 0], 20, -373897302), p = h(p, u = h(u, c = h(c, d, p, u, a[m + 5], 5, -701558691), d, p, a[m + 10], 9, 38016083), c, d, a[m + 15], 14, -660478335), u, c, a[m + 4], 20, -405537848), p = h(p, u = h(u, c = h(c, d, p, u, a[m + 9], 5, 568446438), d, p, a[m + 14], 9, -1019803690), c, d, a[m + 3], 14, -187363961), u, c, a[m + 8], 20, 1163531501), p = h(p, u = h(u, c = h(c, d, p, u, a[m + 13], 5, -1444681467), d, p, a[m + 2], 9, -51403784), c, d, a[m + 7], 14, 1735328473), u, c, a[m + 12], 20, -1926607734), p = v(p, u = v(u, c = v(c, d, p, u, a[m + 5], 4, -378558), d, p, a[m + 8], 11, -2022574463), c, d, a[m + 11], 16, 1839030562), u, c, a[m + 14], 23, -35309556), p = v(p, u = v(u, c = v(c, d, p, u, a[m + 1], 4, -1530992060), d, p, a[m + 4], 11, 1272893353), c, d, a[m + 7], 16, -155497632), u, c, a[m + 10], 23, -1094730640), p = v(p, u = v(u, c = v(c, d, p, u, a[m + 13], 4, 681279174), d, p, a[m + 0], 11, -358537222), c, d, a[m + 3], 16, -722521979), u, c, a[m + 6], 23, 76029189), p = v(p, u = v(u, c = v(c, d, p, u, a[m + 9], 4, -640364487), d, p, a[m + 12], 11, -421815835), c, d, a[m + 15], 16, 530742520), u, c, a[m + 2], 23, -995338651), p = g(p, u = g(u, c = g(c, d, p, u, a[m + 0], 6, -198630844), d, p, a[m + 7], 10, 1126891415), c, d, a[m + 14], 15, -1416354905), u, c, a[m + 5], 21, -57434055), p = g(p, u = g(u, c = g(c, d, p, u, a[m + 12], 6, 1700485571), d, p, a[m + 3], 10, -1894986606), c, d, a[m + 10], 15, -1051523), u, c, a[m + 1], 21, -2054922799), p = g(p, u = g(u, c = g(c, d, p, u, a[m + 8], 6, 1873313359), d, p, a[m + 15], 10, -30611744), c, d, a[m + 6], 15, -1560198380), u, c, a[m + 13], 21, 1309151649), p = g(p, u = g(u, c = g(c, d, p, u, a[m + 4], 6, -145523070), d, p, a[m + 11], 10, -1120210379), c, d, a[m + 2], 15, 718787259), u, c, a[m + 9], 21, -343485551),
  100. 100 c = c + y >>> 0;
  101. 101 d = d + _ >>> 0;
  102. 102 p = p + b >>> 0;
  103. 103 u = u + $ >>> 0;
  104. 104 }
  105. 105 return n.endian([c, d, p, u])
  106. 106 }
  107. 107 )._ff = function (e, t, a, n, i, o, s) {
  108. 108 var r = e + (t & a | ~t & n) + (i >>> 0) + s;
  109. 109 return (r << o | r >>> 32 - o) + t
  110. 110 };
  111. 111
  112. 112 r._gg = function (e, t, a, n, i, o, s) {
  113. 113 var r = e + (t & n | a & ~n) + (i >>> 0) + s;
  114. 114 return (r << o | r >>> 32 - o) + t
  115. 115 };
  116. 116
  117. 117 r._hh = function (e, t, a, n, i, o, s) {
  118. 118 var r = e + (t ^ a ^ n) + (i >>> 0) + s;
  119. 119 return (r << o | r >>> 32 - o) + t
  120. 120 };
  121. 121
  122. 122 r._ii = function (e, t, a, n, i, o, s) {
  123. 123 var r = e + (a ^ (t | ~n)) + (i >>> 0) + s;
  124. 124 return (r << o | r >>> 32 - o) + t
  125. 125 };
  126. 126
  127. 127 r._blocksize = 16;
  128. 128 r._digestsize = 16;
  129. 129
  130. 130 var a = n.wordsToBytes(r(e, t));
  131. 131 return t && t.asBytes ? a : t && t.asString ? s.bytesToString(a) : n.bytesToHex(a);
  132. 132 }

  这就是使用 JavaScript 实现的加密方法了,传入的参数 e 是一个十三位时间戳,之后无论使用 JS 还是 Python 进行调用都可以了,这里可以进行一下验证。

  首先是开发者工具里的截图:

  

  然后是代码的运行结果:

  

版权声明:本文为TM0831原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
本文链接:https://www.cnblogs.com/TM0831/p/12642007.html