How would one scrap property listing website like this?

Fri Sep 23 19:14:56 EDT 2022

On Thu, 22 Sep 2022 09:36:47 -0700 (PDT), tripd... at gmail.com wrote:

> https://nigeriapropertycentre.com/
> Has anyone scrap something like this before?
> probably i should try power bi first to see if it can?

You can try something like this.

import urllib.request
from bs4 import BeautifulSoup

URL = "https://nigeriapropertycentre.com/for-rent/lagos"
UA = "Mozilla/5.0 (X11; Linux x86_64; rv:105.0) Gecko/20100101 Firefox/
105.0"

def fetch_url(url):
    headers = {"User-Agent": UA}
    req = urllib.request.Request(url, headers={"User-Agent": UA})
    resp = urllib.request.urlopen(req)
    return resp.read().decode("utf-8")

html = fetch_url(URL)
soup = BeautifulSoup(html, "html.parser")

for item in soup.find_all(itemtype="https://schema.org/ListItem"):
    row = {}
    row["name"] = item.find(itemprop="name").text
    row["url"] = item.find(itemprop="url").get("href", "")
    row["image"] = item.find(itemprop="image").get("src", "")
    row["content-title"] = item.find(class_="content-title").text
    row["address"] = item.find("address").text.strip()
    row["description"] = item.find(itemprop="description").text.strip()
    row["added-on"] = item.find("span", class_="added-on").text.strip()
    row["price"] = item.find("span", class_="price").parent.text.strip()

    row["aux"] = []

    for li in item.find("ul", class_="aux-info").find_all("li"):
        row["aux"].append(li.text.strip())

    print(row)