import os, re, glob

# Venue name -> street address mapping
VENUES = {
    # Major US award shows
    "Dolby Theatre": "6801 Hollywood Blvd",
    "Dorothy Chandler Pavilion": "135 N Grand Ave",
    "Shrine Auditorium": "665 W Jefferson Blvd",
    "Peacock Theater": "777 Chick Hearn Ct",
    "Pasadena Civic Auditorium": "300 E Green St",
    "Royal Festival Hall": "Belvedere Rd",
    "Beverly Hilton": "9876 Wilshire Blvd",
    "Barker Hangar": "3021 Airport Ave",
    "Sydney Opera House": "Bennelong Point",
    "Crypto.com Arena": "1111 S Figueroa St",
    "Radio City Music Hall": "1260 6th Ave",
    "Barclays Center": "620 Atlantic Ave",
    "Fairmont Century Plaza": "2025 Avenue of the Stars",
    "Beverly Wilshire": "9500 Wilshire Blvd",
    "Santa Monica Beach": "1550 Pacific Coast Hwy",
    "TCL Chinese Theatre": "6925 Hollywood Blvd",
    # Festivals
    "Berlinale Palast": "Marlene-Dietrich-Platz 1",
    "Palais des Festivals": "1 Bd de la Croisette",
    "Palazzo del Cinema": "Lungomare Marconi 94",
    "TIFF Bell Lightbox": "350 King St W",
    "Carolina Theatre": "309 W Morgan St",
    "Hot Docs Ted Rogers Cinema": "506 Bloor St W",
    "Arlington Resort": "239 Central Ave",
    "Tuschinski Theater": "Reguliersbreestraat 26-34",
    "Piazza Grande": "Via alla Morettina 2",
    "Lincoln Center": "10 Lincoln Center Plaza",
    "De Doelen": "Schouwburgplein 50",
    "Kursaal": "Zurriola Hiribidea 1",
    "Busan Cinema Center": "120 Suyeonggangbyeon-daero",
    "Civic Theatre": "269-287 Queen St",
    "Austin Convention Center": "500 E Cesar Chavez St",
    "Showplace ICON": "1 W Lake St",
    "Missouri Theatre": "203 S 9th St",
    "Cinematheque": "4105 N Oakland Ave",
    "Palm Springs Convention Center": "277 N Avenida Caballeros",
    "St. Anthony Main Theatre": "115 SE Main St",
    "Park City": "328 Main St",
    "Telluride": "110 N Oak St",
    "Sheffield": "15 Paternoster Row",
    "Tribeca": "50 Varick St",
}

# Pattern: "name":"VENUE","address":{"@type":"PostalAddress","addressLocality":"CITY","addressCountry":"CC","addressRegion":"RR"}
# We need to add streetAddress after "@type":"PostalAddress",
pattern = re.compile(
    r'"name"\s*:\s*"([^"]+)"\s*,\s*"address"\s*:\s*\{'
    r'\s*"@type"\s*:\s*"PostalAddress"\s*,'
    r'\s*"addressLocality"\s*:\s*"([^"]+)"\s*,'
    r'\s*"addressCountry"\s*:\s*"([^"]+)"\s*,'
    r'\s*"addressRegion"\s*:\s*"([^"]+)"\s*\}'
)

fixed = 0
skipped = 0
missing_venue = set()

for filepath in sorted(glob.glob('**/*.html', recursive=True)):
    with open(filepath, 'r', encoding='utf-8', errors='replace') as f:
        content = f.read()

    if '"streetAddress"' in content:
        continue
    if '"@type":"PostalAddress"' not in content:
        continue

    def replacer(m):
        global fixed
        venue = m.group(1)
        city = m.group(2)
        country = m.group(3)
        region = m.group(4)
        street = VENUES.get(venue)
        if not street:
            missing_venue.add(venue)
            return m.group(0)
        fixed_addr = (
            f'"name":"{venue}","address":{{'
            f'"@type":"PostalAddress",'
            f'"streetAddress":"{street}",'
            f'"addressLocality":"{city}",'
            f'"addressRegion":"{region}",'
            f'"addressCountry":"{country}"}}'
        )
        return fixed_addr

    new_content = pattern.sub(replacer, content)
    if new_content != content:
        with open(filepath, 'w', encoding='utf-8', newline='') as f:
            f.write(new_content)
        fixed += 1

print(f"Fixed {fixed} files")
if missing_venue:
    print(f"Missing venues: {missing_venue}")
