finder improvements

This commit is contained in:
Andras Schmelczer 2026-03-25 08:06:05 +00:00
parent 30055ab870
commit 3a3e249bdd
6 changed files with 225 additions and 39 deletions

View file

@ -1,12 +1,31 @@
import logging
import re
from constants import PROPERTY_TYPE_MAP, RIGHTMOVE_BASE
from constants import MAX_BEDROOMS, PROPERTY_TYPE_MAP, RIGHTMOVE_BASE
from spatial import PostcodeSpatialIndex
log = logging.getLogger("rightmove")
# Maximum plausible floor area for a residential property listing (sqm).
# ~21,500 sq ft — covers even the largest UK mansions.
MAX_FLOOR_AREA_SQM = 2000.0
def validate_floor_area(sqm: float | None) -> float | None:
"""Validate a floor area value. Returns None for nonsensical values.
Rejects zero/negative values and anything above MAX_FLOOR_AREA_SQM,
which catches parsing errors where prices or other large numbers are
mistakenly extracted as floor area from free-text descriptions or DOM text.
"""
if sqm is None:
return None
if sqm <= 0 or sqm > MAX_FLOOR_AREA_SQM:
return None
return sqm
def parse_display_size(display_size: str | None) -> float | None:
"""Parse displaySize like '499 sq. ft.' or '4,124 sq. ft.' to sqm."""
if not display_size:
@ -15,11 +34,11 @@ def parse_display_size(display_size: str | None) -> float | None:
m = re.search(r"([\d,]+(?:\.\d+)?)\s*sq\.?\s*ft", display_size, re.IGNORECASE)
if m:
sqft = float(m.group(1).replace(",", ""))
return round(sqft * 0.092903, 1)
return validate_floor_area(round(sqft * 0.092903, 1))
# Try sq. m.
m = re.search(r"([\d,]+(?:\.\d+)?)\s*sq\.?\s*m", display_size, re.IGNORECASE)
if m:
return round(float(m.group(1).replace(",", "")), 1)
return validate_floor_area(round(float(m.group(1).replace(",", "")), 1))
return None
@ -92,19 +111,34 @@ def transform_property(
price_obj = prop.get("price", {})
amount = price_obj.get("amount")
if amount is None:
if not amount:
return None
frequency = price_obj.get("frequency", "")
price = normalize_price(int(amount), frequency)
# Store raw price — normalization to monthly happens once in storage.py
price = int(amount)
if price <= 0:
return None
display_prices = price_obj.get("displayPrices", [])
price_qualifier = (
display_prices[0].get("displayPriceQualifier", "") if display_prices else ""
)
# POA / Auction listings have unreliable prices — treat as no price
pq_lower = price_qualifier.lower()
if "poa" in pq_lower or "auction" in pq_lower:
return None
sub_type = prop.get("propertySubType", "")
bedrooms = prop.get("bedrooms", 0) or 0
bathrooms = prop.get("bathrooms", 0) or 0
raw_beds = prop.get("bedrooms", 0) or 0
raw_baths = prop.get("bathrooms", 0) or 0
bedrooms = raw_beds if raw_beds <= MAX_BEDROOMS else 0
bathrooms = raw_baths if raw_baths <= MAX_BEDROOMS else 0
if raw_beds > MAX_BEDROOMS or raw_baths > MAX_BEDROOMS:
log.warning(
"Rightmove %s: implausible beds=%d baths=%d (capped to 0)",
prop.get("id", "?"), raw_beds, raw_baths,
)
key_features = [
kf.get("description", "")