Problem when scraping the 100 Movie titles.

Fabian Joseph fabianjoseph063 at gmail.com
Thu Sep 22 03:36:43 EDT 2022


#Try using, it's save in json format of the website: 
import json
import requests
from bs4 import BeautifulSoup


url = "https://www.empireonline.com/movies/features/best-movies-2/"

soup = BeautifulSoup(requests.get(url).content, "html.parser")
data = json.loads(soup.select_one("#__NEXT_DATA__").contents[0])

# uncomment this to print all data:
#print(json.dumps(data, indent=4))


def find_articles(data):
    if isinstance(data, dict):
        for k, v in data.items():
            if k.startswith("ImageMeta:"):
                yield v['image']['name']
            else:
                yield from find_articles(v)
    elif isinstance(data, list):
        for i in data:
            yield from find_articles(i)


for a in find_articles(data):
    print(a)


More information about the Python-list mailing list