有一个 Flask 项目:
from flask import Flask, request, render_template, send_file
import requests
# import csv
import pandas as pd
from io import BytesIO
import logging
import json
# import pandas as pd
# from io import BytesIO
# -*- coding: utf-8 -*-
import sys
sys.stdout.reconfigure(encoding='utf-8')
logging.basicConfig(level=logging.DEBUG)
app = Flask(__name__)
# Функция для поиска вакансий
def search_vacancies(keyword):
BASE_URL = "https://api.hh.ru/vacancies"
vacancies_found = [] # Список для хранения найденных вакансий
per_page = 100
page = 0
while True:
params = {
'text': keyword,
'page': page,
'per_page': per_page,
'only_with_salary': True # Только вакансии с зарплатой
}
response = requests.get(BASE_URL, params=params)
if response.status_code != 200:
print("Ошибка при обращении к API")
break
lan = response.json()
if not lan['items']:
break
for item in lan['items']:
# vacancy_name = item['name']
vacancy_name = item['name'].lower() # Приводим название вакансии к нижнему регистру для сравнения
# Проверяем, содержит ли название вакансии ключевое слово
if keyword in vacancy_name:
salary_from = item['salary']['from'] if item['salary'] else None
salary_to = item['salary']['to'] if item['salary'] else None
currency = item['salary']['currency'] if item['salary'] else None
city = item['area']['name'] if 'area' in item else None
link = item['alternate_url'] if 'alternate_url' in item else None
discription = item['snippet']['responsibility'] if 'snippet' in item else None
vacancies_found.append({
'name': vacancy_name,
'salary_from': salary_from,
'salary_to': salary_to,
'currency': currency,
'city': city,
'link': link,
'discription': discription
})
page += 1
return vacancies_found
@app.route('/download', methods=['POST'])
def download():
keyword = request.form['work_name']
vacancies = search_vacancies(keyword)
df = pd.DataFrame(vacancies)
output = BytesIO()
df.to_excel(output, index=False)
output.seek(0)
return send_file(output, as_attachment=True, download_name='vacancies.xlsx')
@app.route('/', methods=['GET', 'POST'])
def index():
if request.method == 'POST':
keyword = request.form['keyword']
vacancies = search_vacancies(keyword)
print(len(vacancies))
all_vacancies = (len(vacancies))
if vacancies:
# filename = save_to_json(vacancies)
return render_template('index.html', vacancies=vacancies, download=True, all_vacancies=all_vacancies)
else:
return render_template('index.html', all_vacancies=0)
return render_template('index.html')
if __name__ == '__main__':
app.run(debug=True)
<h1>Парсер вакансий</h1>
<form method="POST">
<label for="keyword">Введите вакансию для поиска:</label>
<input type="text" id="keyword" style="margin-left: -2%;" name="keyword" required>
<button type="submit">Искать</button>
</form>
{% if vacancies %}
<h2>Найденные вакансии: {{ all_vacancies }}</h2>
<form method="post" action="/download">
<input type="hidden" name="work_name" value="{{ keyword }}">
<input type="submit" value="Скачать в Excel">
</form>
<table>
<tr>
<th>Название</th>
<th>Зарплата от</th>
<th>Зарплата до</th>
<th>Валюта</th>
<th>Город</th>
<th>Описание</th>
<th>Ссылка</th>
</tr>
{% for vacancy in vacancies %}
<tr>
<td>{{ vacancy.name }}</td>
<td>{{ vacancy.salary_from }}</td>
<td>{{ vacancy.salary_to }}</td>
<td>{{ vacancy.currency }}</td>
<td>{{ vacancy.city }}</td>
<td>{{ vacancy.discription }}</td>
<td><a href="{{ vacancy.link }}" class="vacancy-link">{{ vacancy.link }}</a></td>
</tr>
{% endfor %}
</table>
{% elif message %}
<p>{{ message }}</p>
{% endif %}
<form method="post" action="/download">
<input type="hidden" name="work_name" value="{{ keyword }}">
<input type="submit" value="Скачать в Excel">
</form>
<!--{#</table>#}-->
</body>
</html>
问题是屏幕上显示的解析结果与导出到excel文件的数据不匹配。而且,我对某个关键字请求不同的数据,并且该关键字对应的数据显示在屏幕上,导出到excel文件的数据总是相同的,尽管它必须与屏幕上显示的数据相对应。我不明白为什么会发生这种情况,如果有人知道,请告诉我问题是什么。
关于浏览器中的数据和Excel中的数据的区别,有两点需要注意。
首先,您没有传入表单中存储的
render_template值。保存到 Excel 文件时,您使用As中的值重新查询数据,但其中没有任何内容。即每次对空行的查询结果都存储在表中。keyword"work_name""work_name"keyword其次,第一次请求后,数据可能会发生变化(空缺被删除或添加),结果可能与屏幕不符。最好将数据保存到客户端的表中,或者至少将其缓存在服务器上,以避免重复请求数据源。
使应用程序正常运行的最小更改:
为了避免保存时重复请求而进行的最小更改:
这种缓存的缺点是,对同一单词的重复请求会返回过时的结果。例如,如果客户端发出请求,最小化浏览器,一天后返回并重复相同的搜索,那么他将收到相同的答案。经过一段时间后清除缓存是值得的。