Typecho 反垃圾评论原理和 Python 模拟
最近在看 Typecho 的时候,在页面中发现了这样一段奇怪的代码
1var _FKbCJ = //'l'
2'l'+//'h'
3'df'+'f'//'pga'
4+'6a4'//'ZR'
5+'ff0'//'fwM'
6+'b21'//'exn'
7+'fe'//'vl'
8+//'K'
9'1'+''///*'7d1'*/'7d1'
10+'4a'//'XI'
11+//'erP'
12'dbe'+//'9'
13'f'+'ab'//'3U'
14+'6'//'e'
15+''///*'hjW'*/'hjW'
16+//'Jb'
17'f'+//'0UM'
18'5'+//'MNp'
19'MNp'+''///*'Zd'*/'Zd'
20+//'gt'
21'gt'+'uOV'//'uOV'
22+//'1uK'
23'76'+'8f9'//'K'
24+''///*'o'*/'o'
25+//'5'
26'5'+/* 'db1'//'db1' */''+'e'//'CN'
27, _cYDj = [[0,1],[26,29],[26,28],[26,29],[31,32]];
28
29 for (var i = 0; i < _cYDj.length; i ++) {
30 _FKbCJ = _FKbCJ.substring(0, _cYDj[i][0]) + _FKbCJ.substring(_cYDj[i][1]);
31 }
32
33 return _FKbCJ;
猜测和反垃圾评论有关,因为机器人直接评论的话,一般都是直接 post 评论数据,如果在评论之前需要先运行一段 js,然后带上这段 js 生成的值再 post 的话,就能挡住一大批低级的机器人了。现在很多地方用到了这个,比如一些云 WAF,在可疑请求的时候也是返回一段 js 要运行的,更高级点的可以检测浏览器环境,鼠标手势等等。相对于验证码,对真实用户更友好一些。
看 Typecho 的源码也确实是这样的。
我用 Python 写了一个,主要是通过各种注释和换行来混淆 js,虽然不运行 js,直接进行字符串分析肯定也能得到结果,但是相比直接 post 数据,难度大大增大了,而且我们可以随时更换混淆规则,我们的目的也就达到了。
1# coding=utf-8
2import time
3import random
4import hashlib
5import json
6
7
8def split_str(string):
9 result = []
10 length = len(string)
11 start = 0
12 while True:
13 r = random.randint(0, 3)
14 if start + r < length:
15 result.append(string[start:start + r])
16 start += r
17 else:
18 result.append(string[start:])
19 break
20 return result
21
22
23def rand_str(length=32):
24 string = hashlib.md5(str(time.time()) + str(random.randrange(1, 9999999900))).hexdigest()
25 return string[0:length]
26
27
28def confuse_string():
29 s = rand_str()
30 str_list = split_str(s)
31 str_len = len(s)
32 js_var1 = "v" + rand_str(3)
33 js_string = "function check(){var " + js_var1 + " = "
34 for item in str_list:
35 js_string += ("'" + item + "'+" + random.randint(0, 2) * ' ')
36 r = random.randint(0, 3)
37 if r:
38 js_string += ("//" + random.randint(0, 1) * "/*" + rand_str(random.randint(1, 5)) + random.randint(0, 1) * "*/" + "\n")
39 else:
40 js_string += ("/*" + "/" * random.randint(0, 2) + rand_str(random.randint(2, 4)) + "*/")
41 js_var2 = "l" + rand_str(3)
42 js_string += ("'';var " + js_var2 + " = ")
43 l = []
44 result = ""
45 for i in range(random.randint(10, 25)):
46 l.append(random.randint(0, str_len))
47 js_string += (json.dumps(l) + ";" + "\n")
48 js_var3 = ("r" + rand_str(3))
49 js_string += ("for(var i = 0;i < " + js_var2 + ".length;i++){var " + js_var3 + "= ")
50 for i in range(0, len(l)):
51 if random.randint(0, 1):
52 result += s[0:l[i]]
53 js_string += (js_var1 + "." + "/*" + random.randint(0, 2) * "/" +
54 rand_str(random.randint(3, 5))+ "*/" + "substr(0, " +
55 str(l[i]) + ") +" + random.randint(0, 2) * " ")
56 else:
57 r = random.randint(-100, 30)
58 result += str(l[i] + r)
59 js_string += ("'" + str(l[i] + r) + "' +" + random.randint(0, 3) * " ")
60 if random.randint(0, 1):
61 js_string += "\n"
62 js_string += ("'';}return " + js_var3 + ";}")
63 return js_string, result
64
65
66s = confuse_string()
67print s[0], s[1]
生成这样的一段 js
1function check(){var v1fa = '44f'+ ///*7661*/
2'2d6'+ ///*4f2
3'02'+ //07
4'90f'+ /*/00*/'f4'+ ///*7
5'22'+/*8c8b*/'c2e'+ ///*c*/
6'b'+ //77f4
7'd'+ ///*2*/
8'b'+/*/4360*/'d'+ ///*72*/
9'7'+ //d0*/
10''+///*b*/
11'be'+ //dd*/
12'7b'+ /*deea*/'82'+ ///*c3eaf*/
13'9b'+//7*/
14''+//142e*/
15'd'+ ///*ba*/
16'';var lfdd = [10, 4, 11, 24, 28, 5, 7, 21, 32, 2, 28, 22, 32, 18, 1, 6, 11, 4, 19];
17for(var i = 0;i < lfdd.length;i++){var r45a= '0' +'-39' +
18v1fa./*fc6*/substr(0, 11) + '-36' + '24' +
19'-23' + '-34' +
20v1fa./*//eada*/substr(0, 21) +'37' + '29' +
21'-60' +
22v1fa./*/d71b*/substr(0, 22) +
23v1fa./*//dd759*/substr(0, 32) + v1fa./*/67ec*/substr(0, 18) + '-30' +
24v1fa./*a98e*/substr(0, 6) + '-77' +
25v1fa./*5de*/substr(0, 4) +
26v1fa./*//bf2d*/substr(0, 19) +
27'';}return r45a;}
js 的运行结果是0-3944f2d60290f-3624-23-3444f2d60290ff422c2ebdb3729-6044f2d60290ff422c2ebdbd44f2d60290ff422c2ebdbd7be7b829bd44f2d60290ff422c2e-3044f2d6-7744f244f2d60290ff422c2eb
,而这个结果在生成 js 的时候就确定了,只要拿到评论数据和 session 中的值比较一下就好了~