all is well
Some checks failed
Build and publish Docker image / build-and-push (push) Failing after 7m0s
CI / Check (push) Failing after 7m9s

This commit is contained in:
Andras Schmelczer 2026-05-17 17:20:19 +01:00
parent eac1bd0d13
commit 2f149503bb
53 changed files with 1543 additions and 354 deletions

View file

@ -19,7 +19,12 @@ from constants import (
RETRY_BASE_DELAY,
)
from spatial import PostcodeSpatialIndex
from transform import normalize_postcode, normalize_sub_type, validate_floor_area
from transform import (
normalize_postcode,
normalize_sub_type,
parse_int_value,
validate_floor_area,
)
log = logging.getLogger("homecouk")
@ -170,11 +175,19 @@ def parse_floor_area(description: str | None) -> float | None:
"""Try to extract floor area from description text like '789 sq.ft.' or '73 sq.m.'."""
if not description:
return None
m = re.search(r"([\d,]+(?:\.\d+)?)\s*sq\.?\s*ft", description, re.IGNORECASE)
m = re.search(
r"([\d,]+(?:\.\d+)?)\s*(?:sq\.?\s*ft|square\s+feet|ft(?:\^?2|²))",
description,
re.IGNORECASE,
)
if m:
sqft = float(m.group(1).replace(",", ""))
return validate_floor_area(round(sqft * 0.092903, 1))
m = re.search(r"([\d,]+(?:\.\d+)?)\s*sq\.?\s*m", description, re.IGNORECASE)
m = re.search(
r"([\d,]+(?:\.\d+)?)\s*(?:sq\.?\s*m|square\s+met(?:er|re)s?|m(?:\^?2|²))",
description,
re.IGNORECASE,
)
if m:
return validate_floor_area(round(float(m.group(1).replace(",", "")), 1))
return None
@ -237,6 +250,15 @@ def map_property_type(raw_type: str | None) -> str:
# Home.co.uk uses types like "House", "Flat", "Apartment", "Detached", etc.
# Try common patterns
lower = raw_type.lower()
excluded_flat_like = (
"block of apartment",
"house of multiple occupation",
"private halls",
"retirement",
"serviced apartment",
)
if any(term in lower for term in excluded_flat_like):
return "Other"
if (
"flat" in lower
or "apartment" in lower
@ -269,8 +291,10 @@ def transform_property(
log.debug("Coords outside England: lat=%.4f lng=%.4f — skipping", lat, lng)
return None
price = prop.get("price") or prop.get("latest_price")
if not price or int(price) <= 0:
price = parse_int_value(prop.get("price")) or parse_int_value(
prop.get("latest_price")
)
if not price or price <= 0:
return None
# Home.co.uk provides postcodes directly, but fall back to spatial index
@ -281,10 +305,10 @@ def transform_property(
log.debug("No postcode for property at %.4f, %.4f — skipping", lat, lng)
return None
raw_beds = prop.get("bedrooms", 0) or 0
raw_baths = prop.get("bathrooms", 0) or 0
bedrooms = raw_beds if raw_beds <= MAX_BEDROOMS else 0
bathrooms = raw_baths if raw_baths <= MAX_BEDROOMS else 0
raw_beds = parse_int_value(prop.get("bedrooms")) or 0
raw_baths = parse_int_value(prop.get("bathrooms")) or 0
bedrooms = raw_beds if 0 <= raw_beds <= MAX_BEDROOMS else 0
bathrooms = raw_baths if 0 <= raw_baths <= MAX_BEDROOMS else 0
if raw_beds > MAX_BEDROOMS or raw_baths > MAX_BEDROOMS:
log.warning(
"home.co.uk %s: implausible beds=%d baths=%d (capped to 0)",
@ -318,7 +342,7 @@ def transform_property(
"Leasehold/Freehold": parse_tenure(prop),
"Property type": map_property_type(listing_type),
"Property sub-type": normalize_sub_type(listing_type),
"price": int(price),
"price": price,
"price_frequency": "",
"Price qualifier": price_qualifier,
"Total floor area (sqm)": parse_floor_area(prop.get("description")),
@ -362,7 +386,16 @@ def search_outcode(
break
for prop in raw_props:
transformed = transform_property(prop, pc_index)
try:
transformed = transform_property(prop, pc_index)
except Exception as exc:
log.warning(
"home.co.uk %s property %s failed to transform: %s",
outcode,
prop.get("listing_id") or prop.get("property_id") or "?",
exc,
)
continue
if transformed:
properties.append(transformed)
if max_properties is not None and len(properties) >= max_properties: