0
0 комментариев
import requests
from tqdm import tqdm
from prettytable import PrettyTable
from bs4 import BeautifulSoup
 
 
def fetch_page(url, params=None):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
                      'AppleWebKit/537.36 (KHTML, like Gecko) '
                      'Chrome/64.0.3282.140 Safari/537.36',
        'Accept-Language': 'ru,en;q=0.9'
    }
    return requests.get(url, params=params, headers=headers)
 
 
def parse_afisha_list(raw_html):
    films_information = {}
    min_showing_cinemas_count = 50
    soup = BeautifulSoup(raw_html, 'html.parser')
    tags = soup.find_all(class_='m-disp-table')
    for tag in tags:
        film_title = tag.h3.string
        cinemas_count = len(
            tag.next_sibling.next_sibling.find_all(class_='b-td-item')
        )
        if min_showing_cinemas_count < cinemas_count:
            films_information[film_title] = cinemas_count
    return films_information
 
 
def fetch_movie_page(movie_title):
    url = 'https://www.kinopoisk.ru/index.php'
    params = {'kp_query': movie_title, 'first': 'yes', 'what': ''}
    response = fetch_page(url, params)
    return response
 
 
def parse_film_rating(film_html_raw):
    soup = BeautifulSoup(film_html_raw, 'html.parser')
    try:
        rating = soup.find(class_='rating_ball').string
        rating_count = soup.find(
            class_='ratingCount'
        ).string.replace(u'\xa0', '')
    except AttributeError:
        rating = 0
        rating_count = 0
    return float(rating), int(rating_count)
 
 
def output_movies_to_console(movies_info_list):
    first_ten_movies = 10
    movie_info_table = PrettyTable(
        ['Movie title', 'Rating', 'Rating count', 'Votes']
    )
    movie_info_table.align['Movie title'] = 'l'
    for movie in movies_info_list[:first_ten_movies]:
        print(movie)
        movie_info_table.add_row(movie)
    print(movie_info_table)
 
 
if __name__ == '__main__':
    afisha_page_url = 'https://www.afisha.ru/msk/schedule_cinema/'
    afisha_page_raw = fetch_page(afisha_page_url)
    print('Get a list of movies shown in the cinema from Afisha.ru...')
    showing_films = parse_afisha_list(afisha_page_raw.text)
    print('Get movies rating from Kinopoisk.ru...')
    movie_list_info = []
    for movie_name, votes in tqdm(
            showing_films.items(), desc='Collecting data:'
    ):
        movie_page = fetch_movie_page(movie_name)
        movie_rating, movie_rating_count = parse_film_rating(movie_page.text)
        movie_info = [movie_name, movie_rating, movie_rating_count, votes]
        movie_list_info.append(movie_info)
    sorted_movie_list = (sorted(
        movie_list_info, key=lambda movie: movie[1], reverse=True
    ))
    output_movies_to_console(sorted_movie_list)

вылетает ошибка

File "C:/devman/13_cinemas/cinemas.py", line 81, in <module>
    output_movies_to_console(list)
  File "C:/devman/13_cinemas/cinemas.py", line 61, in output_movies_to_console
    print(info_table)
 
  куча повторяющихся строк...
 
  File "C:\Program Files (x86)\Python36-32\lib\copy.py", line 240, in _deepcopy_dict
    y[deepcopy(key, memo)] = deepcopy(value, memo)
  File "C:\Program Files (x86)\Python36-32\lib\copy.py", line 150, in deepcopy
    y = copier(x, memo)
  File "C:\Program Files (x86)\Python36-32\lib\copy.py", line 215, in _deepcopy_list
    append(deepcopy(a, memo))
  File "C:\Program Files (x86)\Python36-32\lib\copy.py", line 180, in deepcopy
    y = _reconstruct(x, memo, *rv)
  File "C:\Program Files (x86)\Python36-32\lib\copy.py", line 274, in _reconstruct
    y = func(*args)
  File "C:\Program Files (x86)\Python36-32\lib\copyreg.py", line 88, in __newobj__
    return cls.__new__(cls, *args)
RecursionError: maximum recursion depth exceeded

берем из списка данные и пихает их в строку для prettytable такова вида [‘Лед’, 6171, 165]
если добавить print(movie) выведет [‘Лед’, 6171, 165]
при выводи таблицы выдает ошибку,
если строку добавлять ручками а не из списка, так info_table.add_row([‘Лед’, 6171, 165]) тогда prettytable отрабатывает как надо типы у обоих _list, в чем разница ?


Добавить комментарий