fmt
This commit is contained in:
parent
2f149503bb
commit
6ea544a0f6
10 changed files with 144 additions and 60 deletions
|
|
@ -33,6 +33,10 @@ class CookiesExpiredError(Exception):
|
|||
"""Raised when home.co.uk returns 403, indicating cookies need refresh."""
|
||||
|
||||
|
||||
class PaginationError(Exception):
|
||||
"""Raised when home.co.uk pagination cannot be completed."""
|
||||
|
||||
|
||||
# Channel mapping: internal name → URL path segment
|
||||
HOMECOUK_URL_SEGMENT = "for-sale"
|
||||
|
||||
|
|
@ -171,6 +175,25 @@ def fetch_page(
|
|||
return None
|
||||
|
||||
|
||||
def _coerce_positive_int(value) -> int | None:
|
||||
parsed = parse_int_value(value)
|
||||
if parsed is None or parsed <= 0:
|
||||
return None
|
||||
return parsed
|
||||
|
||||
|
||||
def _property_identity(prop: dict, page: int, index: int) -> str:
|
||||
for key in ("listing_id", "property_id", "id"):
|
||||
value = prop.get(key)
|
||||
if value:
|
||||
return f"{key}:{value}"
|
||||
return (
|
||||
f"page:{page}:index:{index}:"
|
||||
f"{prop.get('display_address') or prop.get('address') or ''}:"
|
||||
f"{prop.get('price') or prop.get('latest_price') or ''}"
|
||||
)
|
||||
|
||||
|
||||
def parse_floor_area(description: str | None) -> float | None:
|
||||
"""Try to extract floor area from description text like '789 sq.ft.' or '73 sq.m.'."""
|
||||
if not description:
|
||||
|
|
@ -363,6 +386,9 @@ def search_outcode(
|
|||
url = f"{HOMECOUK_API_BASE}/{url_segment}/{outcode.lower()}/"
|
||||
properties = []
|
||||
page = 1
|
||||
last_page: int | None = None
|
||||
total_results: int | None = None
|
||||
seen_ids: set[str] = set()
|
||||
|
||||
while True:
|
||||
params = {
|
||||
|
|
@ -379,12 +405,32 @@ def search_outcode(
|
|||
|
||||
data = fetch_page(client, url, params)
|
||||
if not data:
|
||||
break
|
||||
raise PaginationError(f"home.co.uk {outcode} page {page} failed to load")
|
||||
|
||||
pagination = data.get("pagination", {}) or {}
|
||||
if last_page is None:
|
||||
last_page = _coerce_positive_int(pagination.get("last_page"))
|
||||
if total_results is None:
|
||||
total_results = _coerce_positive_int(pagination.get("total"))
|
||||
|
||||
raw_props = data.get("properties", [])
|
||||
if not raw_props:
|
||||
if total_results and page <= (last_page or page):
|
||||
raise PaginationError(
|
||||
f"home.co.uk {outcode} page {page} returned no properties "
|
||||
f"before the advertised end"
|
||||
)
|
||||
break
|
||||
|
||||
page_ids = {
|
||||
_property_identity(prop, page, idx) for idx, prop in enumerate(raw_props)
|
||||
}
|
||||
if page_ids and page_ids.issubset(seen_ids):
|
||||
raise PaginationError(
|
||||
f"home.co.uk {outcode} page {page} repeated previously seen results"
|
||||
)
|
||||
seen_ids.update(page_ids)
|
||||
|
||||
for prop in raw_props:
|
||||
try:
|
||||
transformed = transform_property(prop, pc_index)
|
||||
|
|
@ -401,10 +447,12 @@ def search_outcode(
|
|||
if max_properties is not None and len(properties) >= max_properties:
|
||||
return properties
|
||||
|
||||
# Check pagination
|
||||
pagination = data.get("pagination", {})
|
||||
last_page = pagination.get("last_page", 1)
|
||||
if page >= last_page:
|
||||
if last_page is not None:
|
||||
if page >= last_page:
|
||||
break
|
||||
elif total_results is not None and len(seen_ids) >= total_results:
|
||||
break
|
||||
elif len(raw_props) < HOMECOUK_PER_PAGE:
|
||||
break
|
||||
|
||||
page += 1
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue