finder improvements

This commit is contained in:
Andras Schmelczer 2026-03-25 08:06:05 +00:00
parent 30055ab870
commit 3a3e249bdd
6 changed files with 225 additions and 39 deletions

View file

@ -34,6 +34,7 @@ from playwright.sync_api import sync_playwright
from constants import (
DELAY_BETWEEN_PAGES,
MAX_BEDROOMS,
OPENRENT_BASE,
PROPERTY_TYPE_MAP,
RETRY_BASE_DELAY,
@ -45,6 +46,7 @@ from metrics import (
openrent_requests_total,
)
from spatial import PostcodeSpatialIndex
from transform import validate_floor_area
log = logging.getLogger("openrent")
@ -607,10 +609,10 @@ def parse_floor_area(description: str | None) -> float | None:
m = re.search(r"([\d,]+(?:\.\d+)?)\s*sq\.?\s*ft", description, re.IGNORECASE)
if m:
sqft = float(m.group(1).replace(",", ""))
return round(sqft * 0.092903, 1)
return validate_floor_area(round(sqft * 0.092903, 1))
m = re.search(r"([\d,]+(?:\.\d+)?)\s*sq\.?\s*m", description, re.IGNORECASE)
if m:
return round(float(m.group(1).replace(",", "")), 1)
return validate_floor_area(round(float(m.group(1).replace(",", "")), 1))
return None
@ -651,7 +653,7 @@ def transform_property(
lat = detail.get("lat") or search_data.get("lat")
lng = detail.get("lng") or search_data.get("lng")
price = detail.get("price") or search_data.get("price")
if not price:
if not price or int(price) <= 0:
return None
frequency = search_data.get("frequency", "monthly")
@ -701,8 +703,15 @@ def transform_property(
log.debug("No postcode for property — skipping")
return None
bedrooms = detail.get("bedrooms") or search_data.get("bedrooms", 0) or 0
bathrooms = detail.get("bathrooms") or search_data.get("bathrooms", 0) or 0
raw_beds = detail.get("bedrooms") or search_data.get("bedrooms", 0) or 0
raw_baths = detail.get("bathrooms") or search_data.get("bathrooms", 0) or 0
bedrooms = raw_beds if raw_beds <= MAX_BEDROOMS else 0
bathrooms = raw_baths if raw_baths <= MAX_BEDROOMS else 0
if raw_beds > MAX_BEDROOMS or raw_baths > MAX_BEDROOMS:
log.warning(
"OpenRent %s: implausible beds=%d baths=%d (capped to 0)",
search_data.get("id", "?"), raw_beds, raw_baths,
)
# Title: prefer detail page (has h1 with full title)
title = detail.get("title") or search_data.get("title", "")
@ -746,6 +755,9 @@ def transform_property(
"lat": lat,
"Postcode": postcode,
"Address per Property Register": address,
# OpenRent is a rental-only platform — tenure (Freehold/Leasehold) is a
# property ownership concept that doesn't apply to rental listings. The
# landlord's tenure is not shown on OpenRent listing pages.
"Leasehold/Freehold": None,
"Property type": map_property_type(property_type),
"Property sub-type": property_type or "Unknown",