A été publié la dernière fois
Convertir le PDF de la liste des magasins membres Go To EAT dans la préfecture de Niigata en CSV https://qiita.com/barobaro/items/74fb5bdedbf1ae7267a0
Impossible de trouver le PDF, alors créez une liste en grattant
import re
import time
import requests
from bs4 import BeautifulSoup
url = "https://niigata-gte.com/shop/"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"
}
result = []
while True:
    r = requests.get(url, headers=headers)
    r.raise_for_status()
    soup = BeautifulSoup(r.content, "html.parser")
    for shop in soup.select("div#result > div.cont"):
        data = {}
        data["Code revendeur"] = (
            shop.select_one("div.no").get_text(strip=True).split(":", 1)[-1]
        )
        span = shop.select("div.tag > span")
        data["surface"] = span[0].get_text(strip=True)
        data["Genre"] = span[1].get_text(strip=True)
        if len(span) > 2:
            temp = {i.get("alt"): "○" for i in span[2].select("img")}
            data.update(temp)
        h4 = shop.select_one("h4")
        data["Nom du magasin"] = h4.get_text(strip=True)
        if h4.select_one("a"):
            link = h4.a.get("href")
            if link:
                data["page d'accueil"] = link
        p_add = shop.select_one("p.add").contents
        postcode, address = p_add[0].split(sep=None, maxsplit=1)
        #Extraire la latitude / longitude du lien google map
        gps = re.search(r"(?<=@)(.+?),(.+?)(?=,\d{1,2}z)", p_add[1].a.get("href"))
        if gps:
            data["latitude"] = float(gps.group(1))
            data["longitude"] = float(gps.group(2))
        data["Code postal"] = postcode.strip()
        data["emplacement"] = address.strip()
        data["numéro de téléphone"] = shop.select_one("p.tel").get_text(strip=True)
        result.append(data)
    tag = soup.select_one("li.next")
    if tag:
        m = re.search("https://niigata-gte.com/shop/page/\d+/", tag.a.get("onclick"))
        if m:
            url = m.group(0)
    else:
        break
    time.sleep(3)
result
import pandas as pd
df = pd.DataFrame(result)
df.index += 1
df.to_csv("niigata.csv", encoding="utf_8_sig")