all is well
Some checks failed
Build and publish Docker image / build-and-push (push) Failing after 7m0s
CI / Check (push) Failing after 7m9s

This commit is contained in:
Andras Schmelczer 2026-05-17 17:20:19 +01:00
parent eac1bd0d13
commit 2f149503bb
53 changed files with 1543 additions and 354 deletions

View file

@ -10,6 +10,15 @@ from constants import (
TYPEAHEAD_URL,
)
from http_client import fetch_with_retry
from listing_filters import (
BUY_ALLOWED_BATHROOMS,
BUY_MAX_BEDROOMS,
BUY_MAX_FLOOR_AREA_SQFT,
BUY_MAX_PRICE,
BUY_MIN_BEDROOMS,
BUY_MIN_FLOOR_AREA_SQFT,
matches_strict_buy_listing_filter,
)
from spatial import PostcodeSpatialIndex
from transform import transform_property
@ -22,12 +31,23 @@ outcode_cache: dict[str, str] = {}
# Requesting index >= 1008 returns HTTP 400.
_MAX_INDEX = 1008
# Property type filters for splitting overcapped searches. Each sub-query
# gets its own 1008 cap, so we can recover listings beyond the unfiltered limit.
_PROPERTY_TYPES = [
"detached", "semi-detached", "terraced", "flat",
"bungalow", "park-home", "land",
]
_BASE_BUY_SEARCH_PARAMS = {
"propertyTypes": "flat",
"minBedrooms": str(BUY_MIN_BEDROOMS),
"maxBedrooms": str(BUY_MAX_BEDROOMS),
"minBathrooms": str(min(BUY_ALLOWED_BATHROOMS)),
"maxBathrooms": str(max(BUY_ALLOWED_BATHROOMS)),
"minSize": str(BUY_MIN_FLOOR_AREA_SQFT),
"maxSize": str(BUY_MAX_FLOOR_AREA_SQFT),
"maxPrice": str(BUY_MAX_PRICE - 1),
}
def _buy_search_params(extra_params: dict | None = None) -> dict:
params = dict(_BASE_BUY_SEARCH_PARAMS)
if extra_params:
params.update(extra_params)
return params
def resolve_outcode_id(client: httpx.Client, outcode: str) -> str | None:
@ -92,8 +112,18 @@ def _paginate(
break
for prop in raw_props:
transformed = transform_property(prop, outcode, pc_index)
if transformed:
try:
transformed = transform_property(prop, outcode, pc_index)
except Exception as exc:
log.warning(
"Rightmove %s/%s property %s failed to transform: %s",
outcode,
channel_cfg["channel"],
prop.get("id", "?"),
exc,
)
continue
if transformed and matches_strict_buy_listing_filter(transformed):
properties.append(transformed)
if max_properties is not None and len(properties) >= max_properties:
return properties, result_count
@ -105,6 +135,15 @@ def _paginate(
if index >= result_count:
break
if index >= _MAX_INDEX:
log.warning(
"%s/%s: %d filtered results exceed Rightmove's %d-result page cap",
outcode,
channel_cfg["channel"],
result_count,
_MAX_INDEX,
)
break
time.sleep(DELAY_BETWEEN_PAGES)
@ -121,54 +160,20 @@ def search_outcode(
) -> list[dict]:
"""Paginate through search results for one outcode+channel. Returns transformed properties.
When the unfiltered result count exceeds 1008 (Rightmove's hard pagination cap),
re-queries per property type to recover listings beyond the cap.
Search requests set the supported Rightmove filters directly: flats,
2-5 bedrooms, 2-3 bathrooms, 969-1830 sq ft, and asking price below £1m.
"""
properties, result_count = _paginate(
client, outcode_id, outcode, channel_cfg, pc_index, max_properties=max_properties
properties, _ = _paginate(
client,
outcode_id,
outcode,
channel_cfg,
pc_index,
extra_params=_buy_search_params(),
max_properties=max_properties,
)
if max_properties is not None and len(properties) >= max_properties:
return properties[:max_properties]
if result_count <= _MAX_INDEX:
return properties
# Hit the 1008 cap — re-search per property type to get full coverage
ch = channel_cfg["channel"]
log.info(
"%s/%s: %d results exceed %d cap, splitting by property type",
outcode, ch, result_count, _MAX_INDEX,
)
all_by_id: dict[str, dict] = {p["id"]: p for p in properties}
for pt in _PROPERTY_TYPES:
pt_props, _ = _paginate(
client, outcode_id, outcode, channel_cfg, pc_index,
extra_params={"propertyTypes": pt},
max_properties=max_properties,
)
new = 0
for p in pt_props:
if p["id"] not in all_by_id:
all_by_id[p["id"]] = p
new += 1
if (
max_properties is not None
and len(all_by_id) >= max_properties
):
break
if new:
log.debug("%s/%s type=%s: +%d new properties", outcode, ch, pt, new)
if max_properties is not None and len(all_by_id) >= max_properties:
break
log.info(
"%s/%s: type split recovered %d%d properties",
outcode, ch, len(properties), len(all_by_id),
)
properties = list(all_by_id.values())
if max_properties is not None:
return properties[:max_properties]
return properties