finder improvements

This commit is contained in:
Andras Schmelczer 2026-03-25 08:06:05 +00:00
parent 30055ab870
commit 3a3e249bdd
6 changed files with 225 additions and 39 deletions

View file

@ -15,6 +15,7 @@ from constants import (
HOMECOUK_API_BASE,
HOMECOUK_BASE,
HOMECOUK_PER_PAGE,
MAX_BEDROOMS,
PROPERTY_TYPE_MAP,
RETRY_BASE_DELAY,
)
@ -25,6 +26,7 @@ from metrics import (
homecouk_requests_total,
)
from spatial import PostcodeSpatialIndex
from transform import validate_floor_area
log = logging.getLogger("homecouk")
@ -216,10 +218,57 @@ def parse_floor_area(description: str | None) -> float | None:
m = re.search(r"([\d,]+(?:\.\d+)?)\s*sq\.?\s*ft", description, re.IGNORECASE)
if m:
sqft = float(m.group(1).replace(",", ""))
return round(sqft * 0.092903, 1)
return validate_floor_area(round(sqft * 0.092903, 1))
m = re.search(r"([\d,]+(?:\.\d+)?)\s*sq\.?\s*m", description, re.IGNORECASE)
if m:
return round(float(m.group(1).replace(",", "")), 1)
return validate_floor_area(round(float(m.group(1).replace(",", "")), 1))
return None
def parse_tenure(prop: dict) -> str | None:
"""Extract tenure from home.co.uk property data.
Checks multiple sources in priority order:
1. Dedicated 'tenure' or 'tenure_type' field in the API response
2. Free-text search in the description for 'freehold' / 'leasehold'
3. Free-text search in features lists
home.co.uk aggregates listings from estate agents, so tenure is often
embedded in the description text rather than a structured field.
"""
# 1. Check dedicated tenure fields (in case the API adds them)
for key in ("tenure", "tenure_type", "tenureType"):
val = prop.get(key)
if val and isinstance(val, str):
lower = val.lower().strip()
if "leasehold" in lower:
return "Leasehold"
if "freehold" in lower:
return "Freehold"
# 2. Check description text — estate agents often include tenure here
description = prop.get("description") or ""
if description:
lower_desc = description.lower()
if re.search(r"\bleasehold\b", lower_desc):
return "Leasehold"
if re.search(r"\bfreehold\b", lower_desc):
# Matches "Freehold" and "Share of Freehold" (both = freehold ownership)
return "Freehold"
# 3. Check features / key_features lists if present
for key in ("features", "key_features", "keyFeatures"):
features = prop.get(key)
if features and isinstance(features, list):
for feat in features:
if not isinstance(feat, str):
continue
lower_feat = feat.lower()
if "leasehold" in lower_feat:
return "Leasehold"
if "freehold" in lower_feat:
return "Freehold"
return None
@ -267,7 +316,7 @@ def transform_property(
return None
price = prop.get("price") or prop.get("latest_price")
if not price:
if not price or int(price) <= 0:
return None
# Home.co.uk provides postcodes directly, but fall back to spatial index
@ -278,8 +327,16 @@ def transform_property(
log.debug("No postcode for property at %.4f, %.4f — skipping", lat, lng)
return None
bedrooms = prop.get("bedrooms", 0) or 0
bathrooms = prop.get("bathrooms", 0) or 0
raw_beds = prop.get("bedrooms", 0) or 0
raw_baths = prop.get("bathrooms", 0) or 0
bedrooms = raw_beds if raw_beds <= MAX_BEDROOMS else 0
bathrooms = raw_baths if raw_baths <= MAX_BEDROOMS else 0
if raw_beds > MAX_BEDROOMS or raw_baths > MAX_BEDROOMS:
log.warning(
"home.co.uk %s: implausible beds=%d baths=%d (capped to 0)",
prop.get("listing_id") or prop.get("property_id") or "?",
raw_beds, raw_baths,
)
listing_type = prop.get("listing_property_type") or prop.get("property_type") or ""
address = prop.get("display_address") or prop.get("address") or ""
@ -304,7 +361,7 @@ def transform_property(
"lat": lat,
"Postcode": postcode,
"Address per Property Register": address,
"Leasehold/Freehold": None, # not available from home.co.uk
"Leasehold/Freehold": parse_tenure(prop),
"Property type": map_property_type(listing_type),
"Property sub-type": listing_type or "Unknown",
"price": int(price),