perfect-postcode/finder/rightmove.py
Andras Schmelczer 6cc7288126
Some checks failed
CI / Check (push) Has been cancelled
Build and publish Docker image / build-and-push (push) Has been cancelled
All good
2026-05-18 21:20:10 +01:00

143 lines
4 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import logging
import time
import httpx
from constants import (
PAGE_SIZE,
DELAY_BETWEEN_PAGES,
SEARCH_URL,
TYPEAHEAD_URL,
)
from http_client import fetch_with_retry
from spatial import PostcodeSpatialIndex
from transform import transform_property
log = logging.getLogger("rightmove")
# Outcode ID cache (Rightmove typeahead → internal ID)
outcode_cache: dict[str, str] = {}
# Rightmove hard-caps pagination at index 1008 (42 pages × 24 results).
# Requesting index >= 1008 returns HTTP 400.
_MAX_INDEX = 1008
def resolve_outcode_id(client: httpx.Client, outcode: str) -> str | None:
"""Look up Rightmove's internal ID for an outcode via typeahead API."""
if outcode in outcode_cache:
return outcode_cache[outcode]
data = fetch_with_retry(
client, TYPEAHEAD_URL, {"query": outcode, "limit": "10", "exclude": "STREET"}
)
if not data:
return None
for match in data.get("matches", []):
if match.get("type") == "OUTCODE" and match.get("displayName") == outcode:
rid = str(match["id"])
outcode_cache[outcode] = rid
return rid
log.debug("Outcode %s not found in typeahead results", outcode)
return None
def _paginate(
client: httpx.Client,
outcode_id: str,
outcode: str,
channel_cfg: dict,
pc_index: PostcodeSpatialIndex,
max_properties: int | None = None,
) -> tuple[list[dict], int]:
"""Paginate through search results. Returns (properties, result_count)."""
properties = []
index = 0
result_count = 0
while True:
params = {
"useLocationIdentifier": "true",
"locationIdentifier": f"OUTCODE^{outcode_id}",
"index": str(index),
"sortType": channel_cfg["sortType"],
"channel": channel_cfg["channel"],
"transactionType": channel_cfg["transactionType"],
}
data = fetch_with_retry(client, SEARCH_URL, params)
if not data:
log.warning(
"Failed to fetch index %d for %s/%s",
index,
outcode,
channel_cfg["channel"],
)
break
raw_props = data.get("properties", [])
if not raw_props:
break
for prop in raw_props:
try:
transformed = transform_property(prop, outcode, pc_index)
except Exception as exc:
log.warning(
"Rightmove %s/%s property %s failed to transform: %s",
outcode,
channel_cfg["channel"],
prop.get("id", "?"),
exc,
)
continue
if transformed:
properties.append(transformed)
if max_properties is not None and len(properties) >= max_properties:
return properties, result_count
# Check if there are more pages
result_count_str = data.get("resultCount", "0")
result_count = int(result_count_str.replace(",", ""))
index += PAGE_SIZE
if index >= result_count:
break
if index >= _MAX_INDEX:
log.warning(
"%s/%s: %d results exceed Rightmove's %d-result page cap",
outcode,
channel_cfg["channel"],
result_count,
_MAX_INDEX,
)
break
time.sleep(DELAY_BETWEEN_PAGES)
return properties, result_count
def search_outcode(
client: httpx.Client,
outcode_id: str,
outcode: str,
channel_cfg: dict,
pc_index: PostcodeSpatialIndex,
max_properties: int | None = None,
) -> list[dict]:
"""Paginate through unfiltered sale results for one outcode+channel."""
properties, _ = _paginate(
client,
outcode_id,
outcode,
channel_cfg,
pc_index,
max_properties=max_properties,
)
if max_properties is not None and len(properties) >= max_properties:
return properties[:max_properties]
return properties