68 lines
2.1 KiB
Python
68 lines
2.1 KiB
Python
"""Fetch Rightmove outcode→ID mapping for all outcodes in postcode.parquet."""
|
|
|
|
import argparse
|
|
import json
|
|
from pathlib import Path
|
|
|
|
import httpx
|
|
import polars as pl
|
|
|
|
|
|
TYPEAHEAD_URL = "https://los.rightmove.co.uk/typeahead"
|
|
|
|
|
|
def fetch_outcode_ids(postcodes_path: Path, output: Path) -> None:
|
|
df = pl.read_parquet(postcodes_path, columns=["Postcode"])
|
|
outcodes = sorted(set(df["Postcode"].str.split(" ").list.first().to_list()) - {""})
|
|
print(f"Querying Rightmove typeahead for {len(outcodes)} outcodes...")
|
|
|
|
mapping: dict[str, str] = {}
|
|
missed: list[str] = []
|
|
client = httpx.Client(timeout=10)
|
|
|
|
for i, oc in enumerate(outcodes):
|
|
try:
|
|
resp = client.get(TYPEAHEAD_URL, params={"query": oc, "limit": "5"})
|
|
data = resp.json()
|
|
found = False
|
|
for m in data.get("matches", []):
|
|
if m["type"] == "OUTCODE" and m["displayName"].upper().replace(
|
|
" ", ""
|
|
) == oc.upper().replace(" ", ""):
|
|
mapping[oc] = str(m["id"])
|
|
found = True
|
|
break
|
|
if not found:
|
|
missed.append(oc)
|
|
except Exception as e:
|
|
missed.append(oc)
|
|
print(f" Error for {oc}: {e}")
|
|
|
|
if (i + 1) % 200 == 0:
|
|
print(f" {i + 1}/{len(outcodes)} done ({len(mapping)} found)")
|
|
|
|
client.close()
|
|
|
|
output.parent.mkdir(parents=True, exist_ok=True)
|
|
with open(output, "w") as f:
|
|
json.dump(mapping, f, sort_keys=True)
|
|
|
|
print(f"Wrote {output} ({len(mapping)} outcodes, {len(missed)} missed)")
|
|
if missed:
|
|
print(f"Missed: {missed}")
|
|
|
|
|
|
def main() -> None:
|
|
parser = argparse.ArgumentParser(description="Fetch Rightmove outcode ID mapping")
|
|
parser.add_argument(
|
|
"--postcodes", type=Path, required=True, help="postcode.parquet path"
|
|
)
|
|
parser.add_argument(
|
|
"--output", type=Path, required=True, help="Output JSON file path"
|
|
)
|
|
args = parser.parse_args()
|
|
fetch_outcode_ids(args.postcodes, args.output)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|