all is well
This commit is contained in:
parent
eac1bd0d13
commit
2f149503bb
53 changed files with 1543 additions and 354 deletions
|
|
@ -10,6 +10,15 @@ from constants import (
|
|||
TYPEAHEAD_URL,
|
||||
)
|
||||
from http_client import fetch_with_retry
|
||||
from listing_filters import (
|
||||
BUY_ALLOWED_BATHROOMS,
|
||||
BUY_MAX_BEDROOMS,
|
||||
BUY_MAX_FLOOR_AREA_SQFT,
|
||||
BUY_MAX_PRICE,
|
||||
BUY_MIN_BEDROOMS,
|
||||
BUY_MIN_FLOOR_AREA_SQFT,
|
||||
matches_strict_buy_listing_filter,
|
||||
)
|
||||
from spatial import PostcodeSpatialIndex
|
||||
from transform import transform_property
|
||||
|
||||
|
|
@ -22,12 +31,23 @@ outcode_cache: dict[str, str] = {}
|
|||
# Requesting index >= 1008 returns HTTP 400.
|
||||
_MAX_INDEX = 1008
|
||||
|
||||
# Property type filters for splitting overcapped searches. Each sub-query
|
||||
# gets its own 1008 cap, so we can recover listings beyond the unfiltered limit.
|
||||
_PROPERTY_TYPES = [
|
||||
"detached", "semi-detached", "terraced", "flat",
|
||||
"bungalow", "park-home", "land",
|
||||
]
|
||||
_BASE_BUY_SEARCH_PARAMS = {
|
||||
"propertyTypes": "flat",
|
||||
"minBedrooms": str(BUY_MIN_BEDROOMS),
|
||||
"maxBedrooms": str(BUY_MAX_BEDROOMS),
|
||||
"minBathrooms": str(min(BUY_ALLOWED_BATHROOMS)),
|
||||
"maxBathrooms": str(max(BUY_ALLOWED_BATHROOMS)),
|
||||
"minSize": str(BUY_MIN_FLOOR_AREA_SQFT),
|
||||
"maxSize": str(BUY_MAX_FLOOR_AREA_SQFT),
|
||||
"maxPrice": str(BUY_MAX_PRICE - 1),
|
||||
}
|
||||
|
||||
|
||||
def _buy_search_params(extra_params: dict | None = None) -> dict:
|
||||
params = dict(_BASE_BUY_SEARCH_PARAMS)
|
||||
if extra_params:
|
||||
params.update(extra_params)
|
||||
return params
|
||||
|
||||
|
||||
def resolve_outcode_id(client: httpx.Client, outcode: str) -> str | None:
|
||||
|
|
@ -92,8 +112,18 @@ def _paginate(
|
|||
break
|
||||
|
||||
for prop in raw_props:
|
||||
transformed = transform_property(prop, outcode, pc_index)
|
||||
if transformed:
|
||||
try:
|
||||
transformed = transform_property(prop, outcode, pc_index)
|
||||
except Exception as exc:
|
||||
log.warning(
|
||||
"Rightmove %s/%s property %s failed to transform: %s",
|
||||
outcode,
|
||||
channel_cfg["channel"],
|
||||
prop.get("id", "?"),
|
||||
exc,
|
||||
)
|
||||
continue
|
||||
if transformed and matches_strict_buy_listing_filter(transformed):
|
||||
properties.append(transformed)
|
||||
if max_properties is not None and len(properties) >= max_properties:
|
||||
return properties, result_count
|
||||
|
|
@ -105,6 +135,15 @@ def _paginate(
|
|||
|
||||
if index >= result_count:
|
||||
break
|
||||
if index >= _MAX_INDEX:
|
||||
log.warning(
|
||||
"%s/%s: %d filtered results exceed Rightmove's %d-result page cap",
|
||||
outcode,
|
||||
channel_cfg["channel"],
|
||||
result_count,
|
||||
_MAX_INDEX,
|
||||
)
|
||||
break
|
||||
|
||||
time.sleep(DELAY_BETWEEN_PAGES)
|
||||
|
||||
|
|
@ -121,54 +160,20 @@ def search_outcode(
|
|||
) -> list[dict]:
|
||||
"""Paginate through search results for one outcode+channel. Returns transformed properties.
|
||||
|
||||
When the unfiltered result count exceeds 1008 (Rightmove's hard pagination cap),
|
||||
re-queries per property type to recover listings beyond the cap.
|
||||
Search requests set the supported Rightmove filters directly: flats,
|
||||
2-5 bedrooms, 2-3 bathrooms, 969-1830 sq ft, and asking price below £1m.
|
||||
"""
|
||||
properties, result_count = _paginate(
|
||||
client, outcode_id, outcode, channel_cfg, pc_index, max_properties=max_properties
|
||||
properties, _ = _paginate(
|
||||
client,
|
||||
outcode_id,
|
||||
outcode,
|
||||
channel_cfg,
|
||||
pc_index,
|
||||
extra_params=_buy_search_params(),
|
||||
max_properties=max_properties,
|
||||
)
|
||||
|
||||
if max_properties is not None and len(properties) >= max_properties:
|
||||
return properties[:max_properties]
|
||||
|
||||
if result_count <= _MAX_INDEX:
|
||||
return properties
|
||||
|
||||
# Hit the 1008 cap — re-search per property type to get full coverage
|
||||
ch = channel_cfg["channel"]
|
||||
log.info(
|
||||
"%s/%s: %d results exceed %d cap, splitting by property type",
|
||||
outcode, ch, result_count, _MAX_INDEX,
|
||||
)
|
||||
|
||||
all_by_id: dict[str, dict] = {p["id"]: p for p in properties}
|
||||
|
||||
for pt in _PROPERTY_TYPES:
|
||||
pt_props, _ = _paginate(
|
||||
client, outcode_id, outcode, channel_cfg, pc_index,
|
||||
extra_params={"propertyTypes": pt},
|
||||
max_properties=max_properties,
|
||||
)
|
||||
new = 0
|
||||
for p in pt_props:
|
||||
if p["id"] not in all_by_id:
|
||||
all_by_id[p["id"]] = p
|
||||
new += 1
|
||||
if (
|
||||
max_properties is not None
|
||||
and len(all_by_id) >= max_properties
|
||||
):
|
||||
break
|
||||
if new:
|
||||
log.debug("%s/%s type=%s: +%d new properties", outcode, ch, pt, new)
|
||||
if max_properties is not None and len(all_by_id) >= max_properties:
|
||||
break
|
||||
|
||||
log.info(
|
||||
"%s/%s: type split recovered %d → %d properties",
|
||||
outcode, ch, len(properties), len(all_by_id),
|
||||
)
|
||||
properties = list(all_by_id.values())
|
||||
if max_properties is not None:
|
||||
return properties[:max_properties]
|
||||
return properties
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue