0
0 комментариев

Я парсю сайт. Мне нужно распарсить 2 категории Fresh и Bekery и занести данные в БД. Когда запускаю скрипт, то все данные с обоих категорий заносятся в обе таблицы в БД. А мне нужно, чтобы товары с Fresh занеслись в таблицу fresh, а товары Bekery в таблицу bekery, по отдельности.

Python-код

import re
import requests
import mysql.connector
from mysql.connector import MySQLConnection, Error
from bs4 import BeautifulSoup
 
 
class DataBase:
    def __init__(self, user, password, db):
        self.user = user
        self.password = password
        self.db = db
        try:
            conn = self.get_conn()
            # if conn.is_connected():
            #     print('Подключение прошло успешно.')
        except Error as e:
            print(e)
 
    def get_conn(self):
        conn = mysql.connector.connect(user=self.user,
                                       password=self.password,
                                       db=self.db)
        return conn
 
    def add_products(self, title, price, url):
        try:
            sql_fresh = 'INSERT INTO tesco.fresh (title, price, url) VALUES (%s, %s, %s)'
            sql_bakery = 'INSERT INTO tesco.bakery (title, price, url) VALUES (%s, %s, %s)'
            args = (title, price, url)
            conn = self.get_conn()
            c = conn.cursor()
            c.execute(sql_fresh, args)
            c.execute(sql_bakery, args)
            conn.commit()
        except Error as e:
            print(e)
 
 
def write_database(data):
    db = DataBase('root', '123456', 'tesco')
    db.add_products(data['title'], data['price'], data['url'])
 
 
class Tesco:
    def get_html(self, url):
        r = requests.get(url)
        return r.text
 
    def get_total_pages(self, html):
        soup = BeautifulSoup(html, 'lxml')
        total_pages = \
            soup.find('nav', class_='pagination--page-selector-wrapper').find_all('a', class_='pagination--button')[
                -2].find('span').text
 
        return int(total_pages)
 
    def get_page_data(self, html):
        soup = BeautifulSoup(html, 'lxml')
        products = soup.find('div', class_='product-lists').find_all('li', class_='product-list--list-item')
 
        for product in products:
            try:
                offer = product.find('div', class_='yellow-square').text
            except:
                offer = ''
 
            if len(offer) == 0:
                try:
                    title = product.find('div', class_='product-details--content').find('a',
                                                                                        class_='product-tile--title').text
                except:
                    title = ''
 
                try:
                    price = float(product.find('div', class_='controls').find('span', class_='value').text)
                except:
                    price = ''
 
                try:
                    url = 'https://www.tesco.com' + product.find('div', class_='product-details--content').find('a',
                                                                                                                class_='product-tile--title').get(
                        'href')
                except:
                    url = ''
 
                data = {
                    'title': title,
                    'price': price,
                    'url': url
                }
 
                write_database(data)
 
 
def main():
    tesco = Tesco()
 
    base_url_fresh = 'https://www.tesco.com/groceries/en-GB/shop/fresh-food/all?offset='
    base_url_bakery = 'https://www.tesco.com/groceries/en-GB/shop/bakery/all?offset='
 
    print('Записываю Fresh.')
    for i in range(0, 2):
        print((i / 2) * 100)
        current_page = i * 24
        url_gen = base_url_fresh + str(current_page)
        html = tesco.get_html(url_gen)
        tesco.get_page_data(html)
 
    print('Записываю Bakery.')
    for i in range(0, 2):
        print((i / 2) * 100)
        current_page = i * 24
        url_gen = base_url_bakery + str(current_page)
        html = tesco.get_html(url_gen)
        tesco.get_page_data(html)
 
 
if __name__ == '__main__':
    main()

Фото с комментарием
введите сюда описание изображения


Добавить комментарий