all is well
This commit is contained in:
parent
eac1bd0d13
commit
2f149503bb
53 changed files with 1543 additions and 354 deletions
|
|
@ -19,7 +19,12 @@ from constants import (
|
|||
RETRY_BASE_DELAY,
|
||||
)
|
||||
from spatial import PostcodeSpatialIndex
|
||||
from transform import normalize_postcode, normalize_sub_type, validate_floor_area
|
||||
from transform import (
|
||||
normalize_postcode,
|
||||
normalize_sub_type,
|
||||
parse_int_value,
|
||||
validate_floor_area,
|
||||
)
|
||||
|
||||
log = logging.getLogger("homecouk")
|
||||
|
||||
|
|
@ -170,11 +175,19 @@ def parse_floor_area(description: str | None) -> float | None:
|
|||
"""Try to extract floor area from description text like '789 sq.ft.' or '73 sq.m.'."""
|
||||
if not description:
|
||||
return None
|
||||
m = re.search(r"([\d,]+(?:\.\d+)?)\s*sq\.?\s*ft", description, re.IGNORECASE)
|
||||
m = re.search(
|
||||
r"([\d,]+(?:\.\d+)?)\s*(?:sq\.?\s*ft|square\s+feet|ft(?:\^?2|²))",
|
||||
description,
|
||||
re.IGNORECASE,
|
||||
)
|
||||
if m:
|
||||
sqft = float(m.group(1).replace(",", ""))
|
||||
return validate_floor_area(round(sqft * 0.092903, 1))
|
||||
m = re.search(r"([\d,]+(?:\.\d+)?)\s*sq\.?\s*m", description, re.IGNORECASE)
|
||||
m = re.search(
|
||||
r"([\d,]+(?:\.\d+)?)\s*(?:sq\.?\s*m|square\s+met(?:er|re)s?|m(?:\^?2|²))",
|
||||
description,
|
||||
re.IGNORECASE,
|
||||
)
|
||||
if m:
|
||||
return validate_floor_area(round(float(m.group(1).replace(",", "")), 1))
|
||||
return None
|
||||
|
|
@ -237,6 +250,15 @@ def map_property_type(raw_type: str | None) -> str:
|
|||
# Home.co.uk uses types like "House", "Flat", "Apartment", "Detached", etc.
|
||||
# Try common patterns
|
||||
lower = raw_type.lower()
|
||||
excluded_flat_like = (
|
||||
"block of apartment",
|
||||
"house of multiple occupation",
|
||||
"private halls",
|
||||
"retirement",
|
||||
"serviced apartment",
|
||||
)
|
||||
if any(term in lower for term in excluded_flat_like):
|
||||
return "Other"
|
||||
if (
|
||||
"flat" in lower
|
||||
or "apartment" in lower
|
||||
|
|
@ -269,8 +291,10 @@ def transform_property(
|
|||
log.debug("Coords outside England: lat=%.4f lng=%.4f — skipping", lat, lng)
|
||||
return None
|
||||
|
||||
price = prop.get("price") or prop.get("latest_price")
|
||||
if not price or int(price) <= 0:
|
||||
price = parse_int_value(prop.get("price")) or parse_int_value(
|
||||
prop.get("latest_price")
|
||||
)
|
||||
if not price or price <= 0:
|
||||
return None
|
||||
|
||||
# Home.co.uk provides postcodes directly, but fall back to spatial index
|
||||
|
|
@ -281,10 +305,10 @@ def transform_property(
|
|||
log.debug("No postcode for property at %.4f, %.4f — skipping", lat, lng)
|
||||
return None
|
||||
|
||||
raw_beds = prop.get("bedrooms", 0) or 0
|
||||
raw_baths = prop.get("bathrooms", 0) or 0
|
||||
bedrooms = raw_beds if raw_beds <= MAX_BEDROOMS else 0
|
||||
bathrooms = raw_baths if raw_baths <= MAX_BEDROOMS else 0
|
||||
raw_beds = parse_int_value(prop.get("bedrooms")) or 0
|
||||
raw_baths = parse_int_value(prop.get("bathrooms")) or 0
|
||||
bedrooms = raw_beds if 0 <= raw_beds <= MAX_BEDROOMS else 0
|
||||
bathrooms = raw_baths if 0 <= raw_baths <= MAX_BEDROOMS else 0
|
||||
if raw_beds > MAX_BEDROOMS or raw_baths > MAX_BEDROOMS:
|
||||
log.warning(
|
||||
"home.co.uk %s: implausible beds=%d baths=%d (capped to 0)",
|
||||
|
|
@ -318,7 +342,7 @@ def transform_property(
|
|||
"Leasehold/Freehold": parse_tenure(prop),
|
||||
"Property type": map_property_type(listing_type),
|
||||
"Property sub-type": normalize_sub_type(listing_type),
|
||||
"price": int(price),
|
||||
"price": price,
|
||||
"price_frequency": "",
|
||||
"Price qualifier": price_qualifier,
|
||||
"Total floor area (sqm)": parse_floor_area(prop.get("description")),
|
||||
|
|
@ -362,7 +386,16 @@ def search_outcode(
|
|||
break
|
||||
|
||||
for prop in raw_props:
|
||||
transformed = transform_property(prop, pc_index)
|
||||
try:
|
||||
transformed = transform_property(prop, pc_index)
|
||||
except Exception as exc:
|
||||
log.warning(
|
||||
"home.co.uk %s property %s failed to transform: %s",
|
||||
outcode,
|
||||
prop.get("listing_id") or prop.get("property_id") or "?",
|
||||
exc,
|
||||
)
|
||||
continue
|
||||
if transformed:
|
||||
properties.append(transformed)
|
||||
if max_properties is not None and len(properties) >= max_properties:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue