有一个解析器:
class AppWindow(QtWidgets.QMainWindow):
def __init__(self):
QtWidgets.QMainWindow.__init__(self)
#...
def start(self):
self.thread1 = parser()
t = threading.Thread(target=self.thread1.run)
t.daemon = True
t.start()
#...
class parser(QtCore.QThread):
def __init__(self):
super(parser, self).__init__()
QtCore.QThread.__init__(self)
#...
def run(self):
#...
data = [...]
queue = mp.Queue()
for i in data:
queue.put(i)
processes = [mp.Process(target = parse_data, args=(queue)) for i in range(10)]
for process in processes:
process.daemon = True
process.start()
#...
def parse_data(data):
for elem in data:
def parse_elem(elem):
#...
html = Client(elem['url'])
#...
p = ThreadPool(processes = 1)
try:
p_result = p.apply_async(parse_elem, (elem))
res = p_result.get(timeout=60) # поток создаётся только для того, чтобы установить таймаут на выполнение
p.terminate()
p.close()
except:
#...
class Client(QWebEnginePage): # source: https://stackoverflow.com/questions/8049520/web-scraping-javascript-page-with-python#answer-51341435
def __init__(self, url):
global app
print(url)
self.app = QApplication(sys.argv)
QWebEnginePage.__init__(self)
self.html = ""
self.loadFinished.connect(self.on_load_finished)
self.load(QUrl(url))
self.app.exec_()
def on_load_finished(self):
self.html = self.toHtml(self.Callable)
def Callable(self, data):
self.html = data
self.app.quit()
def main():
mp.freeze_support()
app = QtWidgets.QApplication(sys.argv)
appwindow = AppWindow()
appwindow.show()
sys.exit(app.exec_())
if __name__ == "__main__":
main()
但是调用Client
失败,返回错误:WARNING: QApplication was not created in the main() thread.
. 还没有完全弄清楚它是如何工作的。它可以Client
工作吗?
这就是我得到的。将 5 个文件保存到磁盘:
PS起初我在一个进程中做了一个同步版本,我想建议它,但在澄清我需要在不同进程中加载几个页面后,我开始挖掘
QApplication
在另一个进程中启动。玩过之后,我用一个子进程做了一个例子,然后用了几个. 它奏效了,而且很棒。好吧,拿同步版本并在多个进程中运行它不再困难。