0
0 комментариев
#! usr/bin python3
import csv
import urllib.request
from bs4 import BeautifulSoup
 
def get_html(url):
    response = urllib.request.urlopen(url)
    return response.read()
 
 
reader = csv.reader(open("urls.csv"))
for row in reader:
    print (row)
 
def parse(html):
    soup = BeautifulSoup(html)
    search = soup.find('div', class_='search-total js-search-total')
    span = soup.find('span', class_='search-message js-page-title')
    spantext = span.text
    searchtext = search.text
    print(spantext, searchtext)
 
    save (searchtext, spantext, 'project.csv')
 
def save(searchtext, spantext, path):
    with open(path, 'w') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(('Название', 'Кол-во'))
        writer.writerow((spantext, searchtext))
def main():
    parse(get_html('http://www.abitant.com/catalogues/bra-i-nastennye-svetilniki/companies/robers'))
 
 
if __name__ == '__main__':
    main()

Помогите, пожалуйста, решить задачу.

В urls.csv в первой колонке находятся урлы.

for row in reader:

File «/usr/lib/python3.4/codecs.py», line 319, in decode

(result, consumed) = self._buffer_decode(data, self.errors, final)

UnicodeDecodeError: ‘utf-8’ codec can’t decode byte 0x8b in position 1: invalid start byte


Добавить комментарий