perfect-postcode/finder/rightmove.py
2026-03-15 21:22:28 +00:00

93 lines
2.5 KiB
Python

import logging
import time
import httpx
from constants import (
PAGE_SIZE,
DELAY_BETWEEN_PAGES,
SEARCH_URL,
TYPEAHEAD_URL,
)
from http_client import fetch_with_retry
from spatial import PostcodeSpatialIndex
from transform import transform_property
log = logging.getLogger("rightmove")
# Outcode ID cache (Rightmove typeahead → internal ID)
outcode_cache: dict[str, str] = {}
def resolve_outcode_id(client: httpx.Client, outcode: str) -> str | None:
"""Look up Rightmove's internal ID for an outcode via typeahead API."""
if outcode in outcode_cache:
return outcode_cache[outcode]
data = fetch_with_retry(
client, TYPEAHEAD_URL, {"query": outcode, "limit": "10", "exclude": "STREET"}
)
if not data:
return None
for match in data.get("matches", []):
if match.get("type") == "OUTCODE" and match.get("displayName") == outcode:
rid = str(match["id"])
outcode_cache[outcode] = rid
return rid
log.debug("Outcode %s not found in typeahead results", outcode)
return None
def search_outcode(
client: httpx.Client,
outcode_id: str,
outcode: str,
channel_cfg: dict,
pc_index: PostcodeSpatialIndex,
) -> list[dict]:
"""Paginate through search results for one outcode+channel. Returns transformed properties."""
properties = []
index = 0
while True:
params = {
"useLocationIdentifier": "true",
"locationIdentifier": f"OUTCODE^{outcode_id}",
"index": str(index),
"sortType": channel_cfg["sortType"],
"channel": channel_cfg["channel"],
"transactionType": channel_cfg["transactionType"],
}
data = fetch_with_retry(client, SEARCH_URL, params)
if not data:
log.warning(
"Failed to fetch index %d for %s/%s",
index,
outcode,
channel_cfg["channel"],
)
break
raw_props = data.get("properties", [])
if not raw_props:
break
for prop in raw_props:
transformed = transform_property(prop, outcode, pc_index)
if transformed:
properties.append(transformed)
# Check if there are more pages
result_count_str = data.get("resultCount", "0")
result_count = int(result_count_str.replace(",", ""))
index += PAGE_SIZE
if index >= result_count:
break
time.sleep(DELAY_BETWEEN_PAGES)
return properties