无法将结果保存到 CSV,错误:
writer.writerow([item['Product'], item['Price']])
TypeError: list indices must be integers or slices, not str
编码:
s = Service('C:/Users/user/Desktop/полезно/chromedriver.exe')
browser = webdriver.Chrome(service=s)
CSV = 'cards.csv'
URL = 'https://www.ozon.ru/category/mayonez-9286/?delivery=2'
HEADERS = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/'
'100.0.4896.75 Safari/537.36',
'accept': '*/*'}
def get_html(url, params=''):
r = requests.get(url, headers=HEADERS, params=params)
return r
webAdr = get_html(URL)
browser.get(webAdr.url)
# кликаем в ввод города
button = browser.find_element(By.XPATH, '//*[@id="layoutPage"]/div[1]/div[1]/div/div/button/span/span')
button.click()
# вводим город
city_inp = browser.find_element(By.CLASS_NAME, 'ui-h8')
city = "Санкт Петербург"
time.sleep(1)
city_inp.send_keys(city)
time.sleep(1)
city_list = browser.find_element(By.CLASS_NAME,'s4g')
city_inp.send_keys(Keys.ENTER)
# парсим
time.sleep(3)
#soup = BeautifulSoup(browser.page_source,'lxml')
#print(len(soup.find_all('div', class_='n9i')))
#html = browser.page_source
def get_content():
soup = BeautifulSoup(browser.page_source,'lxml')
items = soup.find_all('div', class_='oi0 i1o')
cards = []
for item in items:
cards.append(
{
'Cat': soup.find('h1', class_='ip5').get_text(strip=True),
'City': soup.find('span', class_='ui-f0').get_text(strip=True),
'Product': item.find('span', class_='ed d0e e0d ed2 tsBodyL i5m').get_text(strip=True),
'Price': item.find('div', class_='ui-s2').get_text(strip=True)
}
)
return cards
#html = get_html(browser.current_url)
#print(get_content())
def save_doc(items, path):
with open(path, 'w', encoding="utf-32", newline='') as file:
writer = csv.writer(file, delimiter=';')
writer.writerow(['Категория','Город','Продукт', 'Цена'])
for item in items:
writer.writerow([item['Cat'], item['City'], item['Product'], item['Price']])
def parser():
ht = get_html(browser.current_url)
if ht.status_code == 200:
#cards = []
cards = get_content()
save_doc(cards, CSV)
else:
print('Error')
该错误意味着您的列表正在通过字符串而不是索引访问元素,即
item
你没有字典,但有一个列表:收到此错误的示例:
get_content
返回嵌套在另一个列表中的字典列表。save_doc
需要一个字典列表,而不是嵌套在另一个列表中的第一个元素的字典列表。这很容易修复 - 按原样传递列表,无需这些操作:而且,如果您真的想创建所需格式的新列表,请使用
extend
,而不是append
,那么字典列表不会立即全部落入新列表的一个元素中,而是会散布在新列表中将再次成为字典列表: