import csv
import requests
from bs4 import BeautifulSoup
url = 'https://www.alta.ru/currency/'
r = requests.get(url)
soup = BeautifulSoup(r.text, 'lxml')
name = soup.find_all('td',class_ = 't-left')
price = soup.find_all(class_ = 't-right')
with open('data.csv', 'w', newline = '') as file:
writer = csv.writer(file)
for names in name:
writer.writerow([names.text])
for prices in price:
writer.writerow([prices.text])
Kalmankantaja
Asked:
2022-08-10 16:11:05 +0800 CST
有一个文件内容如下(他只给出了前两行,有些行的值是1500左右):
Id_1C;Id;Group_id;Producer;Partnumber;Prefix;Name;Addon;Selling;Status;Тип;механизма;Материал;корпуса;Циферблат;Цифры;Бой;Маятник;Ночной;режим;Ширина;Высота;Глубина
CB002590925;1706267;0;TFA;TFA;60.3062.51;Настеные;часы;60.3062;PICUS;(Зеленый;дятел),;розовое;золото;;0;0;*кварцевые;*алюминий;*золотистый;*отсутствуют;*нет;*да;*нет;
如何将其转换为 C# 对象?除了使用 Visual Basic,我没有发现任何正常的东西,但我真的不想这样做。当然,你可以手动完成,但我也不想手动解析 20 个属性
Max Watson
Asked:
2022-06-26 17:56:33 +0800 CST
Timur
Asked:
2022-06-18 14:51:17 +0800 CST
无法将结果保存到 CSV,错误:
writer.writerow([item['Product'], item['Price']])
TypeError: list indices must be integers or slices, not str
编码:
s = Service('C:/Users/user/Desktop/полезно/chromedriver.exe')
browser = webdriver.Chrome(service=s)
CSV = 'cards.csv'
URL = 'https://www.ozon.ru/category/mayonez-9286/?delivery=2'
HEADERS = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/'
'100.0.4896.75 Safari/537.36',
'accept': '*/*'}
def get_html(url, params=''):
r = requests.get(url, headers=HEADERS, params=params)
return r
webAdr = get_html(URL)
browser.get(webAdr.url)
# кликаем в ввод города
button = browser.find_element(By.XPATH, '//*[@id="layoutPage"]/div[1]/div[1]/div/div/button/span/span')
button.click()
# вводим город
city_inp = browser.find_element(By.CLASS_NAME, 'ui-h8')
city = "Санкт Петербург"
time.sleep(1)
city_inp.send_keys(city)
time.sleep(1)
city_list = browser.find_element(By.CLASS_NAME,'s4g')
city_inp.send_keys(Keys.ENTER)
# парсим
time.sleep(3)
#soup = BeautifulSoup(browser.page_source,'lxml')
#print(len(soup.find_all('div', class_='n9i')))
#html = browser.page_source
def get_content():
soup = BeautifulSoup(browser.page_source,'lxml')
items = soup.find_all('div', class_='oi0 i1o')
cards = []
for item in items:
cards.append(
{
'Cat': soup.find('h1', class_='ip5').get_text(strip=True),
'City': soup.find('span', class_='ui-f0').get_text(strip=True),
'Product': item.find('span', class_='ed d0e e0d ed2 tsBodyL i5m').get_text(strip=True),
'Price': item.find('div', class_='ui-s2').get_text(strip=True)
}
)
return cards
#html = get_html(browser.current_url)
#print(get_content())
def save_doc(items, path):
with open(path, 'w', encoding="utf-32", newline='') as file:
writer = csv.writer(file, delimiter=';')
writer.writerow(['Категория','Город','Продукт', 'Цена'])
for item in items:
writer.writerow([item['Cat'], item['City'], item['Product'], item['Price']])
def parser():
ht = get_html(browser.current_url)
if ht.status_code == 200:
#cards = []
cards = get_content()
save_doc(cards, CSV)
else:
print('Error')
Artmanin
Asked:
2020-05-30 04:30:11 +0800 CST
有 CSV。使用 readr::read_csv2() 导入时 - 文本以不同的编码加载。如何解决?选择文本编码?还是只是解码功能?