0
0 комментариев

[apcode language=»python»]

import requests
from bs4 import BeautifulSoup
import csv
  def get_html(url):
    r = requests.get(url)
    return r.text
  def get_total_pages(html):
    soup = BeautifulSoup(html, 'lxml')
    divs = soup.find('div', class_='pagination-pages').find_all('a', class_='pagination-page')[-1].get('href')
    total_pages = divs.split('=')[0].split('&')[1]
    return int(total_pages)
   def write_csv(data):
    with open('avito.csv', 'a') as f:
        writer = csv.writer(f)
        writer.writerow((data['title'],
                         data['price'],
                         data['metro'],
                         data['url']))
   def get_page_data(html):
    soup = BeautifulSoup(html, 'lxml')
    divs = soup.find('div', class_='js-catalog_serp')
    ads = divs.find_all('div', class_='item__line')
    for ad in ads:
        try:
            title = ad.find('div', class_='snippet-title-row').find('h3').text.strip()    
        except:
            title = ''
        try:
            url = 'https://www.avito.ru' + ad.find('div', class_='snippet-title-row').find('h3').find('a').get('href')    
        except:
            url = ''
        try:
            price = ad.find('div', class_='about').text.strip()
        except:
            price = ''
        try:
            metro = ad.find('div', class_='data').find('span', class_='item-address-georeferences-item__content').text   
        except:
           metro = ''
        data = {'title':title,
                'price':price,
                'metro':metro,
                'url':url}
        write_csv(data)
 def main():
    url = "https://www.avito.ru/moskva/telefony?q=samsung&p=1"
    base_url = "https://www.avito.ru/moskva/telefony?"
    page_part = "&p="
    query_par = "q=samsung"
    #total_pages = get_total_pages(get_html(url))
    # total_pages = get_total_pages(get_html(url))
     for i in range(1, 5):
        url_gen = base_url + query_par + page_part + str(i)
        print(url_gen)
          if __name__ == '__main__':
    main()

[/apcode]

Решенный вопрос
Добавить комментарий