fmt
Some checks failed
CI / Check (push) Failing after 6m52s
Build and publish Docker image / build-and-push (push) Failing after 16m5s

This commit is contained in:
Andras Schmelczer 2026-05-17 19:48:55 +01:00
parent 2f149503bb
commit 6ea544a0f6
10 changed files with 144 additions and 60 deletions

View file

@ -33,6 +33,10 @@ class CookiesExpiredError(Exception):
"""Raised when home.co.uk returns 403, indicating cookies need refresh."""
class PaginationError(Exception):
"""Raised when home.co.uk pagination cannot be completed."""
# Channel mapping: internal name → URL path segment
HOMECOUK_URL_SEGMENT = "for-sale"
@ -171,6 +175,25 @@ def fetch_page(
return None
def _coerce_positive_int(value) -> int | None:
parsed = parse_int_value(value)
if parsed is None or parsed <= 0:
return None
return parsed
def _property_identity(prop: dict, page: int, index: int) -> str:
for key in ("listing_id", "property_id", "id"):
value = prop.get(key)
if value:
return f"{key}:{value}"
return (
f"page:{page}:index:{index}:"
f"{prop.get('display_address') or prop.get('address') or ''}:"
f"{prop.get('price') or prop.get('latest_price') or ''}"
)
def parse_floor_area(description: str | None) -> float | None:
"""Try to extract floor area from description text like '789 sq.ft.' or '73 sq.m.'."""
if not description:
@ -363,6 +386,9 @@ def search_outcode(
url = f"{HOMECOUK_API_BASE}/{url_segment}/{outcode.lower()}/"
properties = []
page = 1
last_page: int | None = None
total_results: int | None = None
seen_ids: set[str] = set()
while True:
params = {
@ -379,12 +405,32 @@ def search_outcode(
data = fetch_page(client, url, params)
if not data:
break
raise PaginationError(f"home.co.uk {outcode} page {page} failed to load")
pagination = data.get("pagination", {}) or {}
if last_page is None:
last_page = _coerce_positive_int(pagination.get("last_page"))
if total_results is None:
total_results = _coerce_positive_int(pagination.get("total"))
raw_props = data.get("properties", [])
if not raw_props:
if total_results and page <= (last_page or page):
raise PaginationError(
f"home.co.uk {outcode} page {page} returned no properties "
f"before the advertised end"
)
break
page_ids = {
_property_identity(prop, page, idx) for idx, prop in enumerate(raw_props)
}
if page_ids and page_ids.issubset(seen_ids):
raise PaginationError(
f"home.co.uk {outcode} page {page} repeated previously seen results"
)
seen_ids.update(page_ids)
for prop in raw_props:
try:
transformed = transform_property(prop, pc_index)
@ -401,10 +447,12 @@ def search_outcode(
if max_properties is not None and len(properties) >= max_properties:
return properties
# Check pagination
pagination = data.get("pagination", {})
last_page = pagination.get("last_page", 1)
if page >= last_page:
if last_page is not None:
if page >= last_page:
break
elif total_results is not None and len(seen_ids) >= total_results:
break
elif len(raw_props) < HOMECOUK_PER_PAGE:
break
page += 1