finder improvements
This commit is contained in:
parent
30055ab870
commit
3a3e249bdd
6 changed files with 225 additions and 39 deletions
|
|
@ -15,6 +15,7 @@ from constants import (
|
|||
HOMECOUK_API_BASE,
|
||||
HOMECOUK_BASE,
|
||||
HOMECOUK_PER_PAGE,
|
||||
MAX_BEDROOMS,
|
||||
PROPERTY_TYPE_MAP,
|
||||
RETRY_BASE_DELAY,
|
||||
)
|
||||
|
|
@ -25,6 +26,7 @@ from metrics import (
|
|||
homecouk_requests_total,
|
||||
)
|
||||
from spatial import PostcodeSpatialIndex
|
||||
from transform import validate_floor_area
|
||||
|
||||
log = logging.getLogger("homecouk")
|
||||
|
||||
|
|
@ -216,10 +218,57 @@ def parse_floor_area(description: str | None) -> float | None:
|
|||
m = re.search(r"([\d,]+(?:\.\d+)?)\s*sq\.?\s*ft", description, re.IGNORECASE)
|
||||
if m:
|
||||
sqft = float(m.group(1).replace(",", ""))
|
||||
return round(sqft * 0.092903, 1)
|
||||
return validate_floor_area(round(sqft * 0.092903, 1))
|
||||
m = re.search(r"([\d,]+(?:\.\d+)?)\s*sq\.?\s*m", description, re.IGNORECASE)
|
||||
if m:
|
||||
return round(float(m.group(1).replace(",", "")), 1)
|
||||
return validate_floor_area(round(float(m.group(1).replace(",", "")), 1))
|
||||
return None
|
||||
|
||||
|
||||
def parse_tenure(prop: dict) -> str | None:
|
||||
"""Extract tenure from home.co.uk property data.
|
||||
|
||||
Checks multiple sources in priority order:
|
||||
1. Dedicated 'tenure' or 'tenure_type' field in the API response
|
||||
2. Free-text search in the description for 'freehold' / 'leasehold'
|
||||
3. Free-text search in features lists
|
||||
|
||||
home.co.uk aggregates listings from estate agents, so tenure is often
|
||||
embedded in the description text rather than a structured field.
|
||||
"""
|
||||
# 1. Check dedicated tenure fields (in case the API adds them)
|
||||
for key in ("tenure", "tenure_type", "tenureType"):
|
||||
val = prop.get(key)
|
||||
if val and isinstance(val, str):
|
||||
lower = val.lower().strip()
|
||||
if "leasehold" in lower:
|
||||
return "Leasehold"
|
||||
if "freehold" in lower:
|
||||
return "Freehold"
|
||||
|
||||
# 2. Check description text — estate agents often include tenure here
|
||||
description = prop.get("description") or ""
|
||||
if description:
|
||||
lower_desc = description.lower()
|
||||
if re.search(r"\bleasehold\b", lower_desc):
|
||||
return "Leasehold"
|
||||
if re.search(r"\bfreehold\b", lower_desc):
|
||||
# Matches "Freehold" and "Share of Freehold" (both = freehold ownership)
|
||||
return "Freehold"
|
||||
|
||||
# 3. Check features / key_features lists if present
|
||||
for key in ("features", "key_features", "keyFeatures"):
|
||||
features = prop.get(key)
|
||||
if features and isinstance(features, list):
|
||||
for feat in features:
|
||||
if not isinstance(feat, str):
|
||||
continue
|
||||
lower_feat = feat.lower()
|
||||
if "leasehold" in lower_feat:
|
||||
return "Leasehold"
|
||||
if "freehold" in lower_feat:
|
||||
return "Freehold"
|
||||
|
||||
return None
|
||||
|
||||
|
||||
|
|
@ -267,7 +316,7 @@ def transform_property(
|
|||
return None
|
||||
|
||||
price = prop.get("price") or prop.get("latest_price")
|
||||
if not price:
|
||||
if not price or int(price) <= 0:
|
||||
return None
|
||||
|
||||
# Home.co.uk provides postcodes directly, but fall back to spatial index
|
||||
|
|
@ -278,8 +327,16 @@ def transform_property(
|
|||
log.debug("No postcode for property at %.4f, %.4f — skipping", lat, lng)
|
||||
return None
|
||||
|
||||
bedrooms = prop.get("bedrooms", 0) or 0
|
||||
bathrooms = prop.get("bathrooms", 0) or 0
|
||||
raw_beds = prop.get("bedrooms", 0) or 0
|
||||
raw_baths = prop.get("bathrooms", 0) or 0
|
||||
bedrooms = raw_beds if raw_beds <= MAX_BEDROOMS else 0
|
||||
bathrooms = raw_baths if raw_baths <= MAX_BEDROOMS else 0
|
||||
if raw_beds > MAX_BEDROOMS or raw_baths > MAX_BEDROOMS:
|
||||
log.warning(
|
||||
"home.co.uk %s: implausible beds=%d baths=%d (capped to 0)",
|
||||
prop.get("listing_id") or prop.get("property_id") or "?",
|
||||
raw_beds, raw_baths,
|
||||
)
|
||||
|
||||
listing_type = prop.get("listing_property_type") or prop.get("property_type") or ""
|
||||
address = prop.get("display_address") or prop.get("address") or ""
|
||||
|
|
@ -304,7 +361,7 @@ def transform_property(
|
|||
"lat": lat,
|
||||
"Postcode": postcode,
|
||||
"Address per Property Register": address,
|
||||
"Leasehold/Freehold": None, # not available from home.co.uk
|
||||
"Leasehold/Freehold": parse_tenure(prop),
|
||||
"Property type": map_property_type(listing_type),
|
||||
"Property sub-type": listing_type or "Unknown",
|
||||
"price": int(price),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue