[Solved] How to scrape multiple result having same tags and class

Question

You need to parse your data from the script tag rather than the spans and divs.

Try this:

import requests
from bs4 import BeautifulSoup
import re
import pandas as pd
from pandas import json_normalize
import json

def get_page(url):
    response = requests.get(url)
    if not response.ok:
        print('server responded:', response.status_code)
    else:
        soup = BeautifulSoup(response.text, 'lxml') 
    return soup

def get_detail_data(url):
    res = requests.get(url)
    soup = BeautifulSoup(res.content, "lxml")
    raw = res.text.split("<script> window.__INITIAL_STATE__=")[1]
    raw = raw.split("</script>")[0]
    data = json.loads(raw)
    data = json.loads(data)

    cols = ['abn', 'address', 'name', 'primary_location', 'service_area', 'state', 'suburb', 'website']

    df = pd.DataFrame(data["sites"]["list"]).T
    df = df[cols].reset_index(drop=True)

    primary_location = json_normalize(df.primary_location[0])
    df = pd.concat([df, primary_location], axis=1)
    to_drop = ["primary_location", "is_primary", "suburb_seo_key", "capital_city_seo_key"]
    df.drop(to_drop, axis=1, inplace=True)

    return df


def get_index_data(soup):
    titles = []
    for item in soup.findAll("h3", {'class': 'sc-bZQynM sc-iwsKbI dpKmnV'}):
        urls = (f"https://hipages.com.au{item.previous_element.get('href')}")
        titles.append(urls)
    return titles

def Main():
    mainurl = "https://hipages.com.au/find/antenna_services/nsw/sydney"
    main_titles = get_index_data(get_page(mainurl))  
    final_data = [] 
    for title in main_titles:
        data = get_detail_data(title)
        final_data.append(data)
    return final_data

data = Main()

df = pd.concat(data).reset_index(drop=True)
display(df)

This gives you much more detailed data by the way.

Accepted Answer

You need to parse your data from the script tag rather than the spans and divs.

Try this:

import requests
from bs4 import BeautifulSoup
import re
import pandas as pd
from pandas import json_normalize
import json

def get_page(url):
    response = requests.get(url)
    if not response.ok:
        print('server responded:', response.status_code)
    else:
        soup = BeautifulSoup(response.text, 'lxml') 
    return soup

def get_detail_data(url):
    res = requests.get(url)
    soup = BeautifulSoup(res.content, "lxml")
    raw = res.text.split("<script> window.__INITIAL_STATE__=")[1]
    raw = raw.split("</script>")[0]
    data = json.loads(raw)
    data = json.loads(data)

    cols = ['abn', 'address', 'name', 'primary_location', 'service_area', 'state', 'suburb', 'website']

    df = pd.DataFrame(data["sites"]["list"]).T
    df = df[cols].reset_index(drop=True)

    primary_location = json_normalize(df.primary_location[0])
    df = pd.concat([df, primary_location], axis=1)
    to_drop = ["primary_location", "is_primary", "suburb_seo_key", "capital_city_seo_key"]
    df.drop(to_drop, axis=1, inplace=True)

    return df


def get_index_data(soup):
    titles = []
    for item in soup.findAll("h3", {'class': 'sc-bZQynM sc-iwsKbI dpKmnV'}):
        urls = (f"https://hipages.com.au{item.previous_element.get('href')}")
        titles.append(urls)
    return titles

def Main():
    mainurl = "https://hipages.com.au/find/antenna_services/nsw/sydney"
    main_titles = get_index_data(get_page(mainurl))  
    final_data = [] 
    for title in main_titles:
        data = get_detail_data(title)
        final_data.append(data)
    return final_data

data = Main()

df = pd.concat(data).reset_index(drop=True)
display(df)

This gives you much more detailed data by the way.