"""Fetch Rightmove outcode→ID mapping for all outcodes in postcode.parquet.""" import argparse import json from pathlib import Path import httpx import polars as pl TYPEAHEAD_URL = "https://los.rightmove.co.uk/typeahead" def fetch_outcode_ids(postcodes_path: Path, output: Path) -> None: df = pl.read_parquet(postcodes_path, columns=["Postcode"]) outcodes = sorted(set(df["Postcode"].str.split(" ").list.first().to_list()) - {""}) print(f"Querying Rightmove typeahead for {len(outcodes)} outcodes...") mapping: dict[str, str] = {} missed: list[str] = [] client = httpx.Client(timeout=10) for i, oc in enumerate(outcodes): try: resp = client.get(TYPEAHEAD_URL, params={"query": oc, "limit": "5"}) data = resp.json() found = False for m in data.get("matches", []): if m["type"] == "OUTCODE" and m["displayName"].upper().replace( " ", "" ) == oc.upper().replace(" ", ""): mapping[oc] = str(m["id"]) found = True break if not found: missed.append(oc) except Exception as e: missed.append(oc) print(f" Error for {oc}: {e}") if (i + 1) % 200 == 0: print(f" {i + 1}/{len(outcodes)} done ({len(mapping)} found)") client.close() output.parent.mkdir(parents=True, exist_ok=True) with open(output, "w") as f: json.dump(mapping, f, sort_keys=True) print(f"Wrote {output} ({len(mapping)} outcodes, {len(missed)} missed)") if missed: print(f"Missed: {missed}") def main() -> None: parser = argparse.ArgumentParser(description="Fetch Rightmove outcode ID mapping") parser.add_argument( "--postcodes", type=Path, required=True, help="postcode.parquet path" ) parser.add_argument( "--output", type=Path, required=True, help="Output JSON file path" ) args = parser.parse_args() fetch_outcode_ids(args.postcodes, args.output) if __name__ == "__main__": main()