代码
<?xml version="1.0" encoding="UTF-8"?>
<ui version="4.0">
<class>Dialog</class>
<widget class="QDialog" name="Dialog">
<property name="geometry">
<rect>
<x>0</x>
<y>0</y>
<width>490</width>
<height>369</height>
</rect>
</property>
<property name="windowTitle">
<string>Dialog</string>
</property>
<widget class="QDialogButtonBox" name="buttonBox">
<property name="geometry">
<rect>
<x>330</x>
<y>330</y>
<width>156</width>
<height>24</height>
</rect>
</property>
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
<property name="standardButtons">
<set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok</set>
</property>
</widget>
<widget class="QLabel" name="label_5">
<property name="geometry">
<rect>
<x>180</x>
<y>10</y>
<width>131</width>
<height>31</height>
</rect>
</property>
<property name="font">
<font>
<family>华文楷体</family>
<pointsize>15</pointsize>
</font>
</property>
<property name="text">
<string>知网信息获取</string>
</property>
</widget>
<widget class="QWidget" name="gridLayoutWidget">
<property name="geometry">
<rect>
<x>70</x>
<y>50</y>
<width>361</width>
<height>251</height>
</rect>
</property>
<layout class="QGridLayout" name="gridLayout">
<item row="1" column="0">
<widget class="QLabel" name="label_2">
<property name="font">
<font>
<family>宋体</family>
<pointsize>10</pointsize>
</font>
</property>
<property name="text">
<string>请输入数量:</string>
</property>
</widget>
</item>
<item row="1" column="1">
<widget class="QLineEdit" name="lineEdit_2">
<property name="text">
<string/>
</property>
</widget>
</item>
<item row="5" column="1">
<widget class="QTextBrowser" name="textBrowser"/>
</item>
<item row="2" column="0">
<widget class="QLabel" name="label_3">
<property name="font">
<font>
<family>宋体</family>
<pointsize>10</pointsize>
</font>
</property>
<property name="text">
<string>路径选择:</string>
</property>
</widget>
</item>
<item row="2" column="2">
<widget class="QToolButton" name="toolButton">
<property name="text">
<string>...</string>
</property>
</widget>
</item>
<item row="0" column="0">
<widget class="QLabel" name="label_1">
<property name="font">
<font>
<family>宋体</family>
<pointsize>10</pointsize>
</font>
</property>
<property name="text">
<string>请输入关键词:</string>
</property>
</widget>
</item>
<item row="0" column="1">
<widget class="QLineEdit" name="lineEdit_1">
<property name="text">
<string/>
</property>
</widget>
</item>
<item row="5" column="0">
<widget class="QLabel" name="label_4">
<property name="font">
<font>
<pointsize>9</pointsize>
</font>
</property>
<property name="text">
<string>输出结果:</string>
</property>
</widget>
</item>
<item row="2" column="1">
<widget class="QLineEdit" name="lineEdit_3">
<property name="text">
<string/>
</property>
</widget>
</item>
<item row="3" column="1">
<layout class="QHBoxLayout" name="horizontalLayout">
<item>
<widget class="QPushButton" name="pushButton">
<property name="text">
<string>开始</string>
</property>
</widget>
</item>
<item>
<widget class="QPushButton" name="pushButton_2">
<property name="text">
<string>结束</string>
</property>
</widget>
</item>
</layout>
</item>
</layout>
</widget>
</widget>
<resources/>
<connections>
<connection>
<sender>buttonBox</sender>
<signal>accepted()</signal>
<receiver>Dialog</receiver>
<slot>accept()</slot>
<hints>
<hint type="sourcelabel">
<x>248</x>
<y>254</y>
</hint>
<hint type="destinationlabel">
<x>157</x>
<y>274</y>
</hint>
</hints>
</connection>
<connection>
<sender>buttonBox</sender>
<signal>rejected()</signal>
<receiver>Dialog</receiver>
<slot>reject()</slot>
<hints>
<hint type="sourcelabel">
<x>316</x>
<y>260</y>
</hint>
<hint type="destinationlabel">
<x>286</x>
<y>274</y>
</hint>
</hints>
</connection>
</connections>
</ui>
# Form implementation generated from reading ui file 'd:\Cumtb_Code\PyQT\cnki.ui'
#
# Created by: PyQt6 UI code generator 6.1.0
#
# WARNING: Any manual changes made to this file will be lost when pyuic6 is
# run again. Do not edit this file unless you know what you are doing.
from PyQt6 import QtCore, QtGui, QtWidgets
class Ui_Dialog(object):
def setupUi(self, Dialog):
Dialog.setObjectName("Dialog")
Dialog.resize(490, 369)
self.buttonBox = QtWidgets.QDialogButtonBox(Dialog)
self.buttonBox.setGeometry(QtCore.QRect(330, 330, 156, 24))
self.buttonBox.setOrientation(QtCore.Qt.Orientation.Horizontal)
self.buttonBox.setStandardButtons(QtWidgets.QDialogButtonBox.StandardButton.Cancel|QtWidgets.QDialogButtonBox.StandardButton.Ok)
self.buttonBox.setObjectName("buttonBox")
self.label_5 = QtWidgets.QLabel(Dialog)
self.label_5.setGeometry(QtCore.QRect(180, 10, 131, 31))
font = QtGui.QFont()
font.setFamily("华文楷体")
font.setPointSize(15)
self.label_5.setFont(font)
self.label_5.setObjectName("label_5")
self.gridLayoutWidget = QtWidgets.QWidget(Dialog)
self.gridLayoutWidget.setGeometry(QtCore.QRect(70, 50, 361, 251))
self.gridLayoutWidget.setObjectName("gridLayoutWidget")
self.gridLayout = QtWidgets.QGridLayout(self.gridLayoutWidget)
self.gridLayout.setContentsMargins(0, 0, 0, 0)
self.gridLayout.setObjectName("gridLayout")
self.label_2 = QtWidgets.QLabel(self.gridLayoutWidget)
font = QtGui.QFont()
font.setFamily("宋体")
font.setPointSize(10)
self.label_2.setFont(font)
self.label_2.setObjectName("label_2")
self.gridLayout.addWidget(self.label_2, 1, 0, 1, 1)
self.lineEdit_2 = QtWidgets.QLineEdit(self.gridLayoutWidget)
self.lineEdit_2.setText("")
self.lineEdit_2.setObjectName("lineEdit_2")
self.gridLayout.addWidget(self.lineEdit_2, 1, 1, 1, 1)
self.textBrowser = QtWidgets.QTextBrowser(self.gridLayoutWidget)
self.textBrowser.setObjectName("textBrowser")
self.gridLayout.addWidget(self.textBrowser, 5, 1, 1, 1)
self.label_3 = QtWidgets.QLabel(self.gridLayoutWidget)
font = QtGui.QFont()
font.setFamily("宋体")
font.setPointSize(10)
self.label_3.setFont(font)
self.label_3.setObjectName("label_3")
self.gridLayout.addWidget(self.label_3, 2, 0, 1, 1)
self.toolButton = QtWidgets.QToolButton(self.gridLayoutWidget)
self.toolButton.setObjectName("toolButton")
self.gridLayout.addWidget(self.toolButton, 2, 2, 1, 1)
self.label_1 = QtWidgets.QLabel(self.gridLayoutWidget)
font = QtGui.QFont()
font.setFamily("宋体")
font.setPointSize(10)
self.label_1.setFont(font)
self.label_1.setObjectName("label_1")
self.gridLayout.addWidget(self.label_1, 0, 0, 1, 1)
self.lineEdit_1 = QtWidgets.QLineEdit(self.gridLayoutWidget)
self.lineEdit_1.setText("")
self.lineEdit_1.setObjectName("lineEdit_1")
self.gridLayout.addWidget(self.lineEdit_1, 0, 1, 1, 1)
self.label_4 = QtWidgets.QLabel(self.gridLayoutWidget)
font = QtGui.QFont()
font.setPointSize(9)
self.label_4.setFont(font)
self.label_4.setObjectName("label_4")
self.gridLayout.addWidget(self.label_4, 5, 0, 1, 1)
self.lineEdit_3 = QtWidgets.QLineEdit(self.gridLayoutWidget)
self.lineEdit_3.setText("")
self.lineEdit_3.setObjectName("lineEdit_3")
self.gridLayout.addWidget(self.lineEdit_3, 2, 1, 1, 1)
self.horizontalLayout = QtWidgets.QHBoxLayout()
self.horizontalLayout.setObjectName("horizontalLayout")
self.pushButton = QtWidgets.QPushButton(self.gridLayoutWidget)
self.pushButton.setObjectName("pushButton")
self.horizontalLayout.addWidget(self.pushButton)
self.pushButton_2 = QtWidgets.QPushButton(self.gridLayoutWidget)
self.pushButton_2.setObjectName("pushButton_2")
self.horizontalLayout.addWidget(self.pushButton_2)
self.gridLayout.addLayout(self.horizontalLayout, 3, 1, 1, 1)
self.retranslateUi(Dialog)
self.buttonBox.accepted.connect(Dialog.accept)
self.buttonBox.rejected.connect(Dialog.reject)
QtCore.QMetaObject.connectSlotsByName(Dialog)
def retranslateUi(self, Dialog):
_translate = QtCore.QCoreApplication.translate
Dialog.setWindowTitle(_translate("Dialog", "Dialog"))
self.label_5.setText(_translate("Dialog", "知网信息获取"))
self.label_2.setText(_translate("Dialog", "请输入数量:"))
self.label_3.setText(_translate("Dialog", "路径选择:"))
self.toolButton.setText(_translate("Dialog", "..."))
self.label_1.setText(_translate("Dialog", "请输入关键词:"))
self.label_4.setText(_translate("Dialog", "输出结果:"))
self.pushButton.setText(_translate("Dialog", "开始"))
self.pushButton_2.setText(_translate("Dialog", "结束"))
'''
Description: henggao_note
version: v1.0.0
Date: 2022-04-07 15:19:37
LastEditors: henggao
LastEditTime: 2022-04-07 19:24:54
'''
import random
from PyQt6.QtWidgets import QApplication, QDialog
import time
from matplotlib.pyplot import cla
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from urllib.parse import urljoin
from selenium.webdriver.common.keys import Keys
from Ui_cnki import Ui_Dialog
from PyQt6 import QtWidgets, QtGui, QtCore
from PyQt6.QtCore import QThread, pyqtSignal
class Cnkiprogrammer(QDialog, Ui_Dialog):
def __init__(self, parent=None):
super(Cnkiprogrammer, self).__init__(parent)
self.setupUi(self)
# 实例化线程对象
self.work = WorkThread()
# 编辑按钮
self.pushButton.clicked.connect(self.startBtn)
# 选择保存路径
self.toolButton.clicked.connect(self.savePath)
def savePath(self):
# 保存路径
directory = QtWidgets.QFileDialog.getSaveFileName(
self, "设置路径", "./", "All Files (*);;Text Files (*.tsv)")
# print(directory)
self.lineEdit_3.setText(directory[0])
def startBtn(self):
# 开始检索,判断输入框内容是否为空
if len(self.lineEdit_1.text()) == 0:
print("请输入关键词")
self.lineEdit_1.setText('请输入关键词')
else:
# 启动线程
self.work.start()
# 线程自定义信号连接的槽函数
self.work.trigger.connect(self.display)
def stopBtn(self):
# 停止
print("结束检索")
# driver.close()
def display(self, str):
# 由于自定义信号时自动传递一个字符串参数,所以在这个槽函数中要接受一个参数
# for i in range(200):
# success_text = '第' + str(i) + '条数据抓取成功' + '\n'
# self.textBrowser.append(success_text)
# time.sleep(3)
# 由于自定义信号时自动传递一个字符串参数,所以在这个槽函数中要接受一个参数
self.textBrowser.append(str)
class WorkThread(QThread):
# 自定义信号对象。参数str就代表这个信号可以传一个字符串
trigger = pyqtSignal(str)
# 初始化线程
def __int__(self):
super(WorkThread, self).__init__()
# 线程运行函数
def run(self):
# while True:
# global T_value
# global P_value
# T_value = random.randint(200,225)
# P_value = random.randint(150,200)
# print(T_value, P_value)
# time.sleep(3)
# 重写线程执行的run函数
# 触发自定义信号
for i in range(20):
time.sleep(1)
# 通过自定义信号把待显示的字符串传递给槽函数
self.trigger.emit(str(i))
def main():
import sys
app = QApplication(sys.argv)
pr = Cnkiprogrammer()
pr.show()
app.exec()
if __name__ == '__main__':
# workThread = WorkThread()
# workThread.start()
main()
效果
import sys,time
from PyQt5.QtWidgets import QWidget,QPushButton,QApplication,QListWidget,QGridLayout
class WinForm(QWidget):
def __init__(self,parent=None):
super(WinForm, self).__init__(parent)
#设置标题与布局方式
self.setWindowTitle('实时刷新界面的例子')
layout=QGridLayout()
#实例化列表控件与按钮控件
self.listFile=QListWidget()
self.btnStart=QPushButton('开始')
#添加到布局中指定位置
layout.addWidget(self.listFile,0,0,1,2)
layout.addWidget(self.btnStart,1,1)
#按钮的点击信号触发自定义的函数
self.btnStart.clicked.connect(self.slotAdd)
self.setLayout(layout)
def slotAdd(self):
for n in range(10):
#获取条目文本
str_n='File index{0}'.format(n)
#添加文本到列表控件中
self.listFile.addItem(str_n)
#实时刷新界面
QApplication.processEvents()
#睡眠一秒
time.sleep(1)
if __name__ == '__main__':
app=QApplication(sys.argv)
win=WinForm()
win.show()
sys.exit(app.exec_())
'''
Description: henggao_note
version: v1.0.0
Date: 2022-04-07 15:19:37
LastEditors: henggao
LastEditTime: 2022-04-08 14:37:34
'''
import random
from PyQt6.QtWidgets import QApplication, QDialog
import time
from matplotlib.pyplot import cla
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from urllib.parse import urljoin
from selenium.webdriver.common.keys import Keys
from Ui_cnki import Ui_Dialog
from PyQt6 import QtWidgets, QtGui, QtCore
from PyQt6.QtGui import QTextCursor
from PyQt6.QtCore import QThread, pyqtSignal, QObject, QTimer
import sys
class Cnkiprogrammer(QDialog, Ui_Dialog):
def __init__(self, parent=None):
super(Cnkiprogrammer, self).__init__(parent)
self.setupUi(self)
# 在PyQt中使用time.sleep(1)会导致界面阻塞,使用QTimer
# self.timer = QtCore.QTimer()
# 编辑按钮
self.pushButton.clicked.connect(self.startBtn)
# 选择保存路径
self.toolButton.clicked.connect(self.savePath)
def savePath(self):
# 保存路径
directory = QtWidgets.QFileDialog.getSaveFileName(
self, "设置路径", "./", "All Files (*);;Text Files (*.tsv)")
# print(directory)
self.lineEdit_3.setText(directory[0])
def startBtn(self):
# 开始检索,判断输入框内容是否为空
if len(self.lineEdit_1.text()) == 0:
print("请输入关键词")
self.lineEdit_1.setText('请输入关键词')
else:
self.linkSearch()
def stopBtn(self):
# 停止
print("结束检索")
# driver.close()
# 连接Chmore浏览,访问知网,进行搜索
def linkSearch(self):
# get直接返回,不再等待界面加载完成
desired_capabilities = DesiredCapabilities.CHROME
desired_capabilities["pageLoadStrategy"] = "none"
# 设置谷歌驱动器的环境
options = webdriver.ChromeOptions()
# 设置chrome不加载图片,提高速度
options.add_experimental_option(
"prefs", {"profile.managed_default_content_settings.images": 2})
# 设置不显示窗口
options.add_argument('--headless')
# 创建一个谷歌驱动器
driver = webdriver.Chrome(options=options)
# 设置搜索主题
theme = self.lineEdit_1.text()
# 设置所需篇数
papers_need = int(self.lineEdit_2.text())
# 存储路径
save_path = self.lineEdit_3.text()
print(save_path)
# 打开页面
driver.get("https://www.cnki.net")
input = driver.find_element(
by=By.CSS_SELECTOR, value=".search-input")
# 传入关键字
input.send_keys(theme)
# 点击搜索
input.send_keys(Keys.ENTER)
time.sleep(3)
# 点击切换中文文献
WebDriverWait(driver, 10).until(EC.presence_of_element_located(
(By.XPATH, "//div[@class='switch-ChEn']/a[@class='ch']"))).click()
time.sleep(1)
# 获取总文献数和页数
res_unm = WebDriverWait(driver, 10).until(EC.presence_of_element_located(
(By.XPATH, "//span[@class='pagerTitleCell']/em"))).text
# 去除千分位里的逗号
res_unm = int(res_unm.replace(",", ''))
page_unm = int(res_unm/20) + 1
# print(f"共找到 {res_unm} 条结果, {page_unm} 页。")
success_text = '共查询到' + str(res_unm) + \
'条数据,' + str(page_unm)+'页' + '\n'
self.textBrowser.append(success_text)
# 赋值序号, 控制爬取的文章数量
global count
count = 1
# 当爬取数量小于需求时,循环网页页码
while count <= papers_need:
# 等待加载完全,休眠3S
# time.sleep(3)
title_list = WebDriverWait(driver, 10).until(
EC.presence_of_all_elements_located((By.CLASS_NAME, "fz14")))
# 循环网页一页中的条目
for i in range(len(title_list)):
try:
# 本页的第几个条目
if count % 20 == 0:
term = 20
else:
term = count % 20
title_xpath = f"/html/body/div[3]/div[2]/div[2]/div[2]/form/div/table/tbody/tr[{term}]/td[2]"
# print(title_xpath)
author_xpath = f"/html/body/div[3]/div[2]/div[2]/div[2]/form/div/table/tbody/tr[{term}]/td[3]"
source_xpath = f"/html/body/div[3]/div[2]/div[2]/div[2]/form/div/table/tbody/tr[{term}]/td[4]"
date_xpath = f"/html/body/div[3]/div[2]/div[2]/div[2]/form/div/table/tbody/tr[{term}]/td[5]"
database_xpath = f"/html/body/div[3]/div[2]/div[2]/div[2]/form/div/table/tbody/tr[{term}]/td[6]"
title = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, title_xpath))).text
authors = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, author_xpath))).text
source = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, source_xpath))).text
date = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, date_xpath))).text
database = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, database_xpath))).text
# 点击条目
title_list[i].click()
# 获取driver的句柄
n = driver.window_handles
# driver切换至最新生产的页面
driver.switch_to.window(n[-1])
# 摘要
abstract = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CLASS_NAME, "abstract-text"))).text
try:
# 单位
institute = WebDriverWait(driver, 10).until(EC.presence_of_element_located(
(By.XPATH, "/html/body/div[2]/div[1]/div[3]/div/div/div[3]/div/h3[2]/span/a"))).text
except:
institute = '无'
try:
# 关键字
keywords = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CLASS_NAME, "keywords"))).text[:-1]
except:
keywords = '无'
url = driver.current_url
# 获取下载链接
# 写入文件
res = f"{count}\t{title}\t{authors}\t{institute}\t{date}\t{source}\t{database}\t{keywords}\t{abstract}\t{url}".replace(
"\n", "")+"\n"
# print(f" 第 {count} 条写入成功\n")
# success_text = '第' + str(count) + '条数据抓取成功' + '\n'
# self.textBrowser.append(success_text)
# # 实时刷新界面
# QApplication.processEvents()
# 睡眠一秒
# time.sleep(1)
with open(save_path, 'a', encoding='gbk') as f:
f.write(res)
success_text = '第' + str(count) + '条数据抓取成功' + '\n'
self.textBrowser.append(success_text)
# 实时刷新界面
QApplication.processEvents()
except:
# print(f" 第 {count} 条爬取失败\n")
# 跳过本条,接着下一个
continue
finally:
# 如果有多个窗口,关闭第二个窗口, 切换回主页
n2 = driver.window_handles
if len(n2) > 1:
driver.close()
driver.switch_to.window(n2[0])
# 计数,判断需求是否足够
count += 1
if count > papers_need:
success_text = '抓取数据结束,共抓取' + \
str(count-1) + '条数据' + '\n'
self.textBrowser.append(success_text)
break
# 切换到下一页
WebDriverWait(driver, 10).until(EC.presence_of_element_located(
(By.XPATH, "//a[@id='PageNext']"))).click()
# 关闭浏览器
driver.close()
def main():
import sys
app = QApplication(sys.argv)
pr = Cnkiprogrammer()
pr.show()
app.exec()
if __name__ == '__main__':
main()