最近的答題贏錢很火爆,我也參與了幾次,有些題目確實很難答,但是10秒鐘的時間根本不夠百度的,所以寫了個輔助掛,這樣可以出現題目時自動百度,這個時間也就花掉2秒鐘,剩下的7、8秒鐘可以進行分析和作答,提升了贏錢概率。
源碼可以見我的github:點擊鏈接
原理分析下:使用adb命令,抓取手機視頻播放的界面,然后通過python的截取和ocr,獲得到題目和答案, 然后百度得到結果。這個環境怎么搭建,有需要的童鞋可以聯系我,因為使用本地的ocr所以解析不花錢,也沒有使用的限制。
github上的代碼中
ocr_bw.py,這個是自動根據題目去百度,然后打開瀏覽器,展示檢索結果
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
|
# -*- coding: utf-8 -*- import pytesseract import time import webbrowser import subprocess from PIL import Image def main(): """ 主函數 """ op = yes_or_no( '請確保手機打開了 ADB 并連接了電腦,' if not op: print ( 'bye' ) return #核心遞歸 ocr_subject_parent() # for root, sub_dirs, files in os.walk('E:/臨時接收的文件/知乎答題/百萬/'): # for file in files: # print('發現圖片:' + file) # img = Image.open('E:/臨時接收的文件/知乎答題/百萬/'+file) # ocr_subject(img) def yes_or_no(prompt, true_value = 'y' , false_value = 'n' , default = True ): """ 檢查是否已經為啟動程序做好了準備 """ default_value = true_value if default else false_value prompt = '{} {}/{} [{}]: ' . format (prompt, true_value, false_value, default_value) i = input (prompt) if not i: return default while True : if i = = true_value: return True elif i = = false_value: return False prompt = 'Please input {} or {}: ' . format (true_value, false_value) i = input (prompt) def screenImg(true_value = '', default = True ): prompt = '當出現題目時,請按下回車進行識別 ' i = input (prompt) if not i: return default while True : if i = = true_value: return True else : return False i = input (prompt) def ocr_subject(p): # 截取 距離上530開始 940結束 # 截取 距離上260 570結束 p = cut_img(p) pytesseract.pytesseract.tesseract_cmd = 'E:/Program Files (x86)/Tesseract-OCR/tesseract' subject = pytesseract.image_to_string(p, lang = 'chi_sim' ) subject = "".join(subject.split()) subject = subject.split( '.' )[ 1 ] print (subject) openPage(subject) ocr_subject_parent() def ocr_subject_parent(): result = screenImg() if result: start = time.time() # screenshot.check_screenshot() process = subprocess.Popen( 'adb shell screencap -p' , shell = True , stdout = subprocess.PIPE) binary_screenshot = process.stdout.read() binary_screenshot = binary_screenshot.replace(b '\r\n' , b '\n' ) f = open ( 'autojump.png' , 'wb' ) f.write(binary_screenshot) f.close() # screenshot.pull_screenshot() img = Image. open ( 'autojump.png' ) print ( "耗時:" + str (time.time() - start)) ocr_subject(img) def openPage(subject): url = 'https://www.baidu.com/s?wd={}' . format ( subject) webbrowser. open (url) webbrowser.get() def cut_img(img): region = img.crop(( 70 , 260 , 1025 , 570 )) #region.save("temp/cut_first.png") return region if __name__ = = '__main__' : main() |
ocr_bw2.py,這個是根據題目+答案,去百度檢索,通過爬蟲抓取百度的收錄數,然后在控制臺打印結果
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
|
__author__ = 'zjy' # -*- coding:utf-8 -*- import pytesseract import time import webbrowser import subprocess from PIL import Image import urllib import urllib.request import threading from urllib.parse import quote def main(): """ 主函數 """ op = yes_or_no( '請確保手機打開了 ADB 并連接了電腦,' '然后打開西瓜視頻后再用本程序,確定開始?' ) if not op: print ( 'bye' ) return # 核心遞歸 ocr_subject_parent() # for root, sub_dirs, files in os.walk('E:/臨時接收的文件/知乎答題/百萬/'): # for file in files: # print('發現圖片:' + file) # img = Image.open('E:/臨時接收的文件/知乎答題/百萬/'+file) # ocr_subject(img) def yes_or_no(prompt, true_value = 'y' , false_value = 'n' , default = True ): """ 檢查是否已經為啟動程序做好了準備 """ default_value = true_value if default else false_value prompt = '{} {}/{} [{}]: ' . format (prompt, true_value, false_value, default_value) i = input (prompt) if not i: return default while True : if i = = true_value: return True elif i = = false_value: return False prompt = 'Please input {} or {}: ' . format (true_value, false_value) i = input (prompt) def screenImg(true_value = '', default = True ): prompt = '當出現題目時,請按下回車進行識別 \n' i = input (prompt) if not i: return default while True : if i = = true_value: return True else : return False i = input (prompt) def ocr_subject(p): # 截取 距離上530開始 940結束 # 截取 距離上260 570結束 subImg = cut_img(p) pytesseract.pytesseract.tesseract_cmd = 'E:/Program Files (x86)/Tesseract-OCR/tesseract' subject = pytesseract.image_to_string(subImg, lang = 'chi_sim' ) subject = "".join(subject.split()) subject = subject.split( '.' )[ 1 ].replace( "\"" , "") print (subject) ocr_answer(p, subject) # openPage(subject) # print("結束:" + str(time.time())) ocr_subject_parent() def getSearchNum(key): key = quote(key) # print(key) url = 'http://www.baidu.com/s?wd={}' . format (key) # print(url) response = urllib.request.urlopen(url) page = response.read().decode( "utf-8" ) i = int (page.index( '百度為您找到相關結果約' )) start = i + 10 end = i + 25 page = page[start: end] return page def ocr_answer(p, subject): list = cut_question(p) pytesseract.pytesseract.tesseract_cmd = 'E:/Program Files (x86)/Tesseract-OCR/tesseract' for p in list : t = threading.Thread(target = ocr_answer_thread, args = (p, subject)) t.start() def ocr_answer_thread(p, subject): answer = pytesseract.image_to_string(p, lang = 'chi_sim' ) answer = "".join(answer.split()) v = getSearchNum(subject + ' ' + answer) print (answer + ' ' + v) # print(time.time()) def ocr_subject_parent(): result = screenImg() if result: start = time.time() # print("開始:" + str(start)) # screenshot.check_screenshot() process = subprocess.Popen( 'adb shell screencap -p' , shell = True , stdout = subprocess.PIPE) binary_screenshot = process.stdout.read() binary_screenshot = binary_screenshot.replace(b '\r\n' , b '\n' ) f = open ( 'autojump.png' , 'wb' ) f.write(binary_screenshot) f.close() # screenshot.pull_screenshot() img = Image. open ( 'autojump.png' ) ocr_subject(img) def openPage(subject): url = 'https://www.baidu.com/s?wd={}' . format ( subject) webbrowser. open (url) webbrowser.get() def cut_img(img): region = img.crop(( 70 , 260 , 1025 , 570 )) # region.save("temp/cut_first.png") return region def cut_question(img): list = [] question1 = img.crop(( 70 , 590 , 1025 , 768 )) question2 = img.crop(( 70 , 769 , 1025 , 947 )) question3 = img.crop(( 70 , 948 , 1025 , 1130 )) list .append(question1) list .append(question2) list .append(question3) # question1.save("temp/cut_1.png") # question2.save("temp/cut_2.png") # question3.save("temp/cut_3.png") return list if __name__ = = '__main__' : main() |
由于很多題目是下列哪個不是,所以我更喜歡用第一個方式,基本上識別時間在0.5-0.6秒之間。
最后里面的ocr_zh.py是可以用來抓取頭腦王者的輔助。
以上就是本文的全部內容,希望對大家的學習有所幫助,也希望大家多多支持服務器之家。
原文鏈接:http://blog.csdn.net/zjy105/article/details/79034520