前几天我开始学习bs4。第一个小程序是covid统计解析。对我的代码提出建议或意见,我无法判断自己解析的质量。一开始我想通过正则表达式来做,但后来我意识到最好不要)。这是我的代码:
from bs4 import BeautifulSoup
from requests import get
from prettytable import PrettyTable
import tkinter as tk
SOURSE = r'https://стопкоронавирус.рф/'
def get_code(sourse):
html = get(sourse).text
contents = BeautifulSoup(html, 'html.parser')
return contents
def get_container(contents):
return contents.find('div', class_="cv-countdown")
def get_items(container):
return container.find_all('div', class_="cv-countdown__item")
def remove_br(code):
return code.replace('<br>', ' ').replace('<br/>', ' ').replace('</br>', ' ')
def to_dict(items):
def key(i):
i = i.find('div', class_='cv-countdown__item-label')
for x in i.select('div'):
x.decompose()
i = i.get_text().strip()
return i
def value(i):
return i.find('div', class_='cv-countdown__item-value').span.get_text()
return {key(i): value(i) for i in items}
def get_stats():
return to_dict(get_items(get_container(get_code(SOURSE))))
def convert_to_table(dict_):
table = PrettyTable()
table.field_names = ['Параметр', 'Значение (чел.)']
arr = zip(dict_.keys(), dict_.values())
table.add_rows(arr)
return table
def show_in_window(data, title='Stats', font_size=15):
root = tk.Tk()
root.title(title)
label = tk.Label(text=data, font=f'Consolas {font_size}')
label.pack()
root.mainloop()
stats = get_stats()
table = convert_to_table(stats)
show_in_window(table)
print(table)
更新:更新了代码。现在元素不是通过一堆方法来搜索的,而是通过嵌套的 css 选择器来搜索的。
from bs4 import BeautifulSoup as bs
from requests import get
from prettytable import PrettyTable
import tkinter as tk
SOURSE = r'https://стопкоронавирус.рф/'
HTML = get(SOURSE).text
SOUP = bs(HTML, 'html.parser')
HTML_PATH = 'html body .cv-countdown .cv-countdown__item'
def get_items():
items = SOUP.select(HTML_PATH)
return items
def to_dict(items):
def key(i):
i = i.find('div', class_='cv-countdown__item-label')
for x in i.select('div'):
x.decompose()
i = i.get_text().strip()
return i
def value(i):
return i.find('div', class_='cv-countdown__item-value').span.get_text()
return {key(i): value(i) for i in items}
def get_stats():
return to_dict(get_items())
def convert_to_table(dict_):
table = PrettyTable()
table.field_names = ['Параметр', 'Значение (чел.)']
arr = zip(dict_.keys(), dict_.values())
table.add_rows(arr)
return table
def show_in_window(data, title='Stats', font_size=15):
root = tk.Tk()
root.title(title)
label = tk.Label(text=data, font=f'Consolas {font_size}')
label.pack()
root.mainloop()
stats = get_stats()
table = convert_to_table(stats)
show_in_window(table)
print(table)
1 个回答