完整代码如下:
'''
Created on Dec 10, 2019
@author: admin
'''
import time, random, pyperclip , traceback
from pymouse import PyMouse
from pykeyboard import PyKeyboard
from com.fy.utils.date.DateUtils import Date_Utils
from com.fy.utils.http.HttpUtils import HttpUtils
from com.fy.utils.file.FileUtils import File_Utils
from com.fy.utils.hash.HashUtils import Hash_Utils
class WeChatMouseInfo:
def __init__(self):
self.du = Date_Utils()
self.du = Date_Utils()
self.pm = PyMouse()
self.kb = PyKeyboard()
self.httpUtil = HttpUtils()
self.hash = Hash_Utils()
self.datas = []
def control(self, kw, isTheme:"是否是主题采集。主题采集是的排序方式为默认;关键词为发布时间"):
print(self.du.getCurrentTimeStr()[11:] + " 点击微信快捷方式\n")
self.pm.click(575, 755)
time.sleep(2)
print(self.du.getCurrentTimeStr()[11:] + " 点击微信搜索框\n")
self.pm.click(122, 39)
time.sleep(2)
print(self.du.getCurrentTimeStr()[11:] + " 点击搜索框下的【文章搜索】\n")
self.pm.click(156, 173)
time.sleep(2)
self.pm.click(105, 12)
time.sleep(2)
self.pm.click(800, 15)
time.sleep(2)
print(self.du.getCurrentTimeStr()[11:] + " 点击输入区,把鼠标置于输入状态\n")
self.pm.click(500, 345)
time.sleep(2)
pyperclip.copy(kw)
print(self.du.getCurrentTimeStr()[11:] + " 模拟键盘点击ctrl+v\n")
self.kb.press_key(self.kb.control_key)
self.kb.tap_key('v')
self.kb.release_key(self.kb.control_key)
time.sleep(2)
self.kb.tap_key(self.kb.enter_key)
print(self.du.getCurrentTimeStr()[11:] + " 按下回车键,进行搜索\n")
time.sleep(5)
if isTheme:
self.pm.click(780, 153)
print(self.du.getCurrentTimeStr()[11:] + " 点击【相关度排序】完毕\n")
else:
self.pm.click(885, 153)
print(self.du.getCurrentTimeStr()[11:] + " 点击【时间排序】完毕\n")
print(self.du.getCurrentTimeStr()[11:] + " +++++++++++++打开搜索界面,并执行搜索操作完毕+++++++++++++\n")
time.sleep(4)
def crawlerSearchResult(self):
print(self.du.getCurrentTimeStr()[11:] + " +++++++++++++解析搜索结果列表++++++++【开始】+++++\n")
for high in range(0, 5) :
moveHigh = 233 + 111 * high
print(self.du.getCurrentTimeStr()[11:] + " 点击第【", high + 1, "】条信息")
self.pm.click(660, moveHigh)
time.sleep(6)
title, url, author = self.parseInfo()
self.pm.click(1352, 14)
if url != None and title != None :
data = {}
data["title"] = title
data["url"] = url
data["author"] = self.hash.getMd5HashUtils(author + "#" + title)
data['dicName'] = "Cralwer_WeChat_List"
data["keyField"] = "author"
self.datas.append(data)
print()
print(self.du.getCurrentTimeStr()[11:] + " +++++++++++++解析搜索结果列表++++++++【完毕】+++++\n")
def closeSearchPanle(self):
print(self.du.getCurrentTimeStr()[11:] + " 关闭搜索结果页\n")
self.pm.click(988, 25)
def parseInfo(self):
try:
print(self.du.getCurrentTimeStr() + " 点击微信快捷方式\n")
self.pm.click(180, 56)
time.sleep(2)
url = pyperclip.paste()
self.pm.click(180, 120)
time.sleep(2)
self.kb.press_keys([self.kb.control_r_key, 'a'])
time.sleep(2)
self.kb.press_keys([self.kb.control_r_key, 'c'])
time.sleep(2)
text = pyperclip.paste()
title = text.split("\n")[0].strip()
author = self.getAuthor(text)
print(self.du.getCurrentTimeStr()[11:] + " 标题:", title)
print(self.du.getCurrentTimeStr()[11:] + " 链接:", url)
print(self.du.getCurrentTimeStr()[11:] + " 作者:", author)
return title, url, author
except:
print(traceback.print_exc())
return None, None, None
def getAuthor(self, text):
author = ""
lineNumber = 0
for line in text.split("\n"):
line = line. strip()
if len(line) > 2:
lineNumber += 1
if lineNumber == 2:
line = line.replace(" ", " ")
lines = line.split(" ")
index = len(lines)
author = lines[index - 2]
return author
def saveDatas(self, datas):
url = "http://XXX.XXX.XXX.XXX:XXXX/hashInter/addListHashRepeat?isChecked=false"
try:
result = self.httpUtil.post(url, datas)
if result == None:
time.sleep(5)
result = self.httpUtil.post(url , datas)
print(result, "\n\n")
except:pass
if __name__ == '__main__':
owc = WeChatMouseInfo()
fu = File_Utils("./kw.txt")
while 1:
kws = []
theme = []
owc.datas = []
for kw in fu.read_To_List("utf-8"):
kws.append(kw)
startTime = owc.du.getCurrentTimeLong()
for kw in kws:
owc.control(kw, False)
owc.crawlerSearchResult()
owc.closeSearchPanle()
for data in owc.datas:
theme.append(data["title"])
endTime = owc.du.getCurrentTimeLong()
print(owc.du.getCurrentTimeStr()[11:] + " 已获取数据【" + str(len(owc.datas)) + "】条,历时【" + str(endTime - startTime) + "】秒\n")
time.sleep(100)
for tkw in theme:
owc.control(tkw, False)
owc.crawlerSearchResult()
owc.closeSearchPanle()
endTime = owc.du.getCurrentTimeLong()
print(owc.du.getCurrentTimeStr()[11:] + " 已获取数据【" + str(len(owc.datas)) + "】条,历时【" + str(endTime - startTime) + "】秒\n")
num = random.randint(180, 540)
print(owc.du.getCurrentTimeStr() + " " + str(num) + " seconds later for the nex round of processing....")
time.sleep(num)