finder improvements
This commit is contained in:
parent
30055ab870
commit
3a3e249bdd
6 changed files with 225 additions and 39 deletions
|
|
@ -1,12 +1,31 @@
|
|||
import logging
|
||||
import re
|
||||
|
||||
from constants import PROPERTY_TYPE_MAP, RIGHTMOVE_BASE
|
||||
from constants import MAX_BEDROOMS, PROPERTY_TYPE_MAP, RIGHTMOVE_BASE
|
||||
from spatial import PostcodeSpatialIndex
|
||||
|
||||
log = logging.getLogger("rightmove")
|
||||
|
||||
|
||||
# Maximum plausible floor area for a residential property listing (sqm).
|
||||
# ~21,500 sq ft — covers even the largest UK mansions.
|
||||
MAX_FLOOR_AREA_SQM = 2000.0
|
||||
|
||||
|
||||
def validate_floor_area(sqm: float | None) -> float | None:
|
||||
"""Validate a floor area value. Returns None for nonsensical values.
|
||||
|
||||
Rejects zero/negative values and anything above MAX_FLOOR_AREA_SQM,
|
||||
which catches parsing errors where prices or other large numbers are
|
||||
mistakenly extracted as floor area from free-text descriptions or DOM text.
|
||||
"""
|
||||
if sqm is None:
|
||||
return None
|
||||
if sqm <= 0 or sqm > MAX_FLOOR_AREA_SQM:
|
||||
return None
|
||||
return sqm
|
||||
|
||||
|
||||
def parse_display_size(display_size: str | None) -> float | None:
|
||||
"""Parse displaySize like '499 sq. ft.' or '4,124 sq. ft.' to sqm."""
|
||||
if not display_size:
|
||||
|
|
@ -15,11 +34,11 @@ def parse_display_size(display_size: str | None) -> float | None:
|
|||
m = re.search(r"([\d,]+(?:\.\d+)?)\s*sq\.?\s*ft", display_size, re.IGNORECASE)
|
||||
if m:
|
||||
sqft = float(m.group(1).replace(",", ""))
|
||||
return round(sqft * 0.092903, 1)
|
||||
return validate_floor_area(round(sqft * 0.092903, 1))
|
||||
# Try sq. m.
|
||||
m = re.search(r"([\d,]+(?:\.\d+)?)\s*sq\.?\s*m", display_size, re.IGNORECASE)
|
||||
if m:
|
||||
return round(float(m.group(1).replace(",", "")), 1)
|
||||
return validate_floor_area(round(float(m.group(1).replace(",", "")), 1))
|
||||
return None
|
||||
|
||||
|
||||
|
|
@ -92,19 +111,34 @@ def transform_property(
|
|||
|
||||
price_obj = prop.get("price", {})
|
||||
amount = price_obj.get("amount")
|
||||
if amount is None:
|
||||
if not amount:
|
||||
return None
|
||||
frequency = price_obj.get("frequency", "")
|
||||
price = normalize_price(int(amount), frequency)
|
||||
# Store raw price — normalization to monthly happens once in storage.py
|
||||
price = int(amount)
|
||||
if price <= 0:
|
||||
return None
|
||||
|
||||
display_prices = price_obj.get("displayPrices", [])
|
||||
price_qualifier = (
|
||||
display_prices[0].get("displayPriceQualifier", "") if display_prices else ""
|
||||
)
|
||||
|
||||
# POA / Auction listings have unreliable prices — treat as no price
|
||||
pq_lower = price_qualifier.lower()
|
||||
if "poa" in pq_lower or "auction" in pq_lower:
|
||||
return None
|
||||
|
||||
sub_type = prop.get("propertySubType", "")
|
||||
bedrooms = prop.get("bedrooms", 0) or 0
|
||||
bathrooms = prop.get("bathrooms", 0) or 0
|
||||
raw_beds = prop.get("bedrooms", 0) or 0
|
||||
raw_baths = prop.get("bathrooms", 0) or 0
|
||||
bedrooms = raw_beds if raw_beds <= MAX_BEDROOMS else 0
|
||||
bathrooms = raw_baths if raw_baths <= MAX_BEDROOMS else 0
|
||||
if raw_beds > MAX_BEDROOMS or raw_baths > MAX_BEDROOMS:
|
||||
log.warning(
|
||||
"Rightmove %s: implausible beds=%d baths=%d (capped to 0)",
|
||||
prop.get("id", "?"), raw_beds, raw_baths,
|
||||
)
|
||||
|
||||
key_features = [
|
||||
kf.get("description", "")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue