perfect-postcode/finder/rightmove.py
Andras Schmelczer 2f149503bb
Some checks failed
Build and publish Docker image / build-and-push (push) Failing after 7m0s
CI / Check (push) Failing after 7m9s
all is well
2026-05-17 17:20:19 +01:00

179 lines
5.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import logging
import time
import httpx
from constants import (
PAGE_SIZE,
DELAY_BETWEEN_PAGES,
SEARCH_URL,
TYPEAHEAD_URL,
)
from http_client import fetch_with_retry
from listing_filters import (
BUY_ALLOWED_BATHROOMS,
BUY_MAX_BEDROOMS,
BUY_MAX_FLOOR_AREA_SQFT,
BUY_MAX_PRICE,
BUY_MIN_BEDROOMS,
BUY_MIN_FLOOR_AREA_SQFT,
matches_strict_buy_listing_filter,
)
from spatial import PostcodeSpatialIndex
from transform import transform_property
log = logging.getLogger("rightmove")
# Outcode ID cache (Rightmove typeahead → internal ID)
outcode_cache: dict[str, str] = {}
# Rightmove hard-caps pagination at index 1008 (42 pages × 24 results).
# Requesting index >= 1008 returns HTTP 400.
_MAX_INDEX = 1008
_BASE_BUY_SEARCH_PARAMS = {
"propertyTypes": "flat",
"minBedrooms": str(BUY_MIN_BEDROOMS),
"maxBedrooms": str(BUY_MAX_BEDROOMS),
"minBathrooms": str(min(BUY_ALLOWED_BATHROOMS)),
"maxBathrooms": str(max(BUY_ALLOWED_BATHROOMS)),
"minSize": str(BUY_MIN_FLOOR_AREA_SQFT),
"maxSize": str(BUY_MAX_FLOOR_AREA_SQFT),
"maxPrice": str(BUY_MAX_PRICE - 1),
}
def _buy_search_params(extra_params: dict | None = None) -> dict:
params = dict(_BASE_BUY_SEARCH_PARAMS)
if extra_params:
params.update(extra_params)
return params
def resolve_outcode_id(client: httpx.Client, outcode: str) -> str | None:
"""Look up Rightmove's internal ID for an outcode via typeahead API."""
if outcode in outcode_cache:
return outcode_cache[outcode]
data = fetch_with_retry(
client, TYPEAHEAD_URL, {"query": outcode, "limit": "10", "exclude": "STREET"}
)
if not data:
return None
for match in data.get("matches", []):
if match.get("type") == "OUTCODE" and match.get("displayName") == outcode:
rid = str(match["id"])
outcode_cache[outcode] = rid
return rid
log.debug("Outcode %s not found in typeahead results", outcode)
return None
def _paginate(
client: httpx.Client,
outcode_id: str,
outcode: str,
channel_cfg: dict,
pc_index: PostcodeSpatialIndex,
extra_params: dict | None = None,
max_properties: int | None = None,
) -> tuple[list[dict], int]:
"""Paginate through search results. Returns (properties, result_count)."""
properties = []
index = 0
result_count = 0
while True:
params = {
"useLocationIdentifier": "true",
"locationIdentifier": f"OUTCODE^{outcode_id}",
"index": str(index),
"sortType": channel_cfg["sortType"],
"channel": channel_cfg["channel"],
"transactionType": channel_cfg["transactionType"],
}
if extra_params:
params.update(extra_params)
data = fetch_with_retry(client, SEARCH_URL, params)
if not data:
log.warning(
"Failed to fetch index %d for %s/%s",
index,
outcode,
channel_cfg["channel"],
)
break
raw_props = data.get("properties", [])
if not raw_props:
break
for prop in raw_props:
try:
transformed = transform_property(prop, outcode, pc_index)
except Exception as exc:
log.warning(
"Rightmove %s/%s property %s failed to transform: %s",
outcode,
channel_cfg["channel"],
prop.get("id", "?"),
exc,
)
continue
if transformed and matches_strict_buy_listing_filter(transformed):
properties.append(transformed)
if max_properties is not None and len(properties) >= max_properties:
return properties, result_count
# Check if there are more pages
result_count_str = data.get("resultCount", "0")
result_count = int(result_count_str.replace(",", ""))
index += PAGE_SIZE
if index >= result_count:
break
if index >= _MAX_INDEX:
log.warning(
"%s/%s: %d filtered results exceed Rightmove's %d-result page cap",
outcode,
channel_cfg["channel"],
result_count,
_MAX_INDEX,
)
break
time.sleep(DELAY_BETWEEN_PAGES)
return properties, result_count
def search_outcode(
client: httpx.Client,
outcode_id: str,
outcode: str,
channel_cfg: dict,
pc_index: PostcodeSpatialIndex,
max_properties: int | None = None,
) -> list[dict]:
"""Paginate through search results for one outcode+channel. Returns transformed properties.
Search requests set the supported Rightmove filters directly: flats,
2-5 bedrooms, 2-3 bathrooms, 969-1830 sq ft, and asking price below £1m.
"""
properties, _ = _paginate(
client,
outcode_id,
outcode,
channel_cfg,
pc_index,
extra_params=_buy_search_params(),
max_properties=max_properties,
)
if max_properties is not None and len(properties) >= max_properties:
return properties[:max_properties]
return properties