all is well
This commit is contained in:
parent
eac1bd0d13
commit
2f149503bb
53 changed files with 1543 additions and 354 deletions
|
|
@ -1,4 +1,5 @@
|
|||
import logging
|
||||
import math
|
||||
import re
|
||||
|
||||
from constants import MAX_BEDROOMS, PROPERTY_TYPE_MAP, RIGHTMOVE_BASE
|
||||
|
|
@ -29,17 +30,43 @@ def validate_floor_area(sqm: float | None) -> float | None:
|
|||
return sqm
|
||||
|
||||
|
||||
def parse_int_value(value) -> int | None:
|
||||
"""Parse an integer-like API value without truncating decimals."""
|
||||
if value is None or isinstance(value, bool):
|
||||
return None
|
||||
if isinstance(value, int):
|
||||
return value
|
||||
if isinstance(value, float):
|
||||
if not math.isfinite(value) or not value.is_integer():
|
||||
return None
|
||||
return int(value)
|
||||
if isinstance(value, str):
|
||||
cleaned = value.strip().replace(",", "").replace("£", "")
|
||||
if not re.fullmatch(r"\d+", cleaned):
|
||||
return None
|
||||
return int(cleaned)
|
||||
return None
|
||||
|
||||
|
||||
def parse_display_size(display_size: str | None) -> float | None:
|
||||
"""Parse displaySize like '499 sq. ft.' or '4,124 sq. ft.' to sqm."""
|
||||
if not display_size:
|
||||
return None
|
||||
# Try sq. ft. first
|
||||
m = re.search(r"([\d,]+(?:\.\d+)?)\s*sq\.?\s*ft", display_size, re.IGNORECASE)
|
||||
m = re.search(
|
||||
r"([\d,]+(?:\.\d+)?)\s*(?:sq\.?\s*ft|square\s+feet|ft(?:\^?2|²))",
|
||||
display_size,
|
||||
re.IGNORECASE,
|
||||
)
|
||||
if m:
|
||||
sqft = float(m.group(1).replace(",", ""))
|
||||
return validate_floor_area(round(sqft * 0.092903, 1))
|
||||
# Try sq. m.
|
||||
m = re.search(r"([\d,]+(?:\.\d+)?)\s*sq\.?\s*m", display_size, re.IGNORECASE)
|
||||
m = re.search(
|
||||
r"([\d,]+(?:\.\d+)?)\s*(?:sq\.?\s*m|square\s+met(?:er|re)s?|m(?:\^?2|²))",
|
||||
display_size,
|
||||
re.IGNORECASE,
|
||||
)
|
||||
if m:
|
||||
return validate_floor_area(round(float(m.group(1).replace(",", "")), 1))
|
||||
return None
|
||||
|
|
@ -86,7 +113,21 @@ def map_property_type(sub_type: str | None) -> str:
|
|||
return canonical
|
||||
# Keyword fallback for compound types not in the map
|
||||
lower = sub_type.lower()
|
||||
if "flat" in lower or "apartment" in lower or "maisonette" in lower or "studio" in lower:
|
||||
excluded_flat_like = (
|
||||
"block of apartment",
|
||||
"house of multiple occupation",
|
||||
"private halls",
|
||||
"retirement",
|
||||
"serviced apartment",
|
||||
)
|
||||
if any(term in lower for term in excluded_flat_like):
|
||||
return "Other"
|
||||
if (
|
||||
"flat" in lower
|
||||
or "apartment" in lower
|
||||
or "maisonette" in lower
|
||||
or "studio" in lower
|
||||
):
|
||||
return "Flats/Maisonettes"
|
||||
if "semi" in lower and "detach" in lower:
|
||||
return "Semi-Detached"
|
||||
|
|
@ -158,10 +199,10 @@ def transform_property(
|
|||
lat, lng = fix_coords(raw_lat, raw_lng)
|
||||
|
||||
price_obj = prop.get("price", {})
|
||||
amount = price_obj.get("amount")
|
||||
amount = parse_int_value(price_obj.get("amount"))
|
||||
if not amount:
|
||||
return None
|
||||
price = int(amount)
|
||||
price = amount
|
||||
if price <= 0:
|
||||
return None
|
||||
|
||||
|
|
@ -172,14 +213,23 @@ def transform_property(
|
|||
|
||||
# POA / Auction listings have unreliable prices — treat as no price
|
||||
pq_lower = price_qualifier.lower()
|
||||
if "poa" in pq_lower or "auction" in pq_lower:
|
||||
non_comparable_price_terms = (
|
||||
"poa",
|
||||
"auction",
|
||||
"shared ownership",
|
||||
"shared equity",
|
||||
"part buy",
|
||||
"part rent",
|
||||
"from",
|
||||
)
|
||||
if any(term in pq_lower for term in non_comparable_price_terms):
|
||||
return None
|
||||
|
||||
sub_type = prop.get("propertySubType", "")
|
||||
raw_beds = prop.get("bedrooms", 0) or 0
|
||||
raw_baths = prop.get("bathrooms", 0) or 0
|
||||
bedrooms = raw_beds if raw_beds <= MAX_BEDROOMS else 0
|
||||
bathrooms = raw_baths if raw_baths <= MAX_BEDROOMS else 0
|
||||
raw_beds = parse_int_value(prop.get("bedrooms")) or 0
|
||||
raw_baths = parse_int_value(prop.get("bathrooms")) or 0
|
||||
bedrooms = raw_beds if 0 <= raw_beds <= MAX_BEDROOMS else 0
|
||||
bathrooms = raw_baths if 0 <= raw_baths <= MAX_BEDROOMS else 0
|
||||
if raw_beds > MAX_BEDROOMS or raw_baths > MAX_BEDROOMS:
|
||||
log.warning(
|
||||
"Rightmove %s: implausible beds=%d baths=%d (capped to 0)",
|
||||
|
|
@ -197,8 +247,15 @@ def transform_property(
|
|||
log.debug("No England postcode for property at %.4f, %.4f — skipping", lat, lng)
|
||||
return None
|
||||
|
||||
property_url = prop.get("propertyUrl") or ""
|
||||
if not isinstance(property_url, str):
|
||||
property_url = ""
|
||||
listing_id = prop.get("id") or property_url
|
||||
if not listing_id:
|
||||
return None
|
||||
|
||||
return {
|
||||
"id": prop.get("id"),
|
||||
"id": listing_id,
|
||||
"Bedrooms": bedrooms,
|
||||
"Bathrooms": bathrooms,
|
||||
"Number of bedrooms & living rooms": bedrooms + bathrooms,
|
||||
|
|
@ -213,7 +270,7 @@ def transform_property(
|
|||
"price_frequency": "",
|
||||
"Price qualifier": price_qualifier,
|
||||
"Total floor area (sqm)": parse_display_size(prop.get("displaySize")),
|
||||
"Listing URL": RIGHTMOVE_BASE + prop.get("propertyUrl", ""),
|
||||
"Listing URL": RIGHTMOVE_BASE + property_url if property_url else "",
|
||||
"Listing features": key_features,
|
||||
"first_visible_date": prop.get("firstVisibleDate", ""),
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue