finder improvements
This commit is contained in:
parent
30055ab870
commit
3a3e249bdd
6 changed files with 225 additions and 39 deletions
|
|
@ -34,6 +34,7 @@ from playwright.sync_api import sync_playwright
|
|||
|
||||
from constants import (
|
||||
DELAY_BETWEEN_PAGES,
|
||||
MAX_BEDROOMS,
|
||||
OPENRENT_BASE,
|
||||
PROPERTY_TYPE_MAP,
|
||||
RETRY_BASE_DELAY,
|
||||
|
|
@ -45,6 +46,7 @@ from metrics import (
|
|||
openrent_requests_total,
|
||||
)
|
||||
from spatial import PostcodeSpatialIndex
|
||||
from transform import validate_floor_area
|
||||
|
||||
log = logging.getLogger("openrent")
|
||||
|
||||
|
|
@ -607,10 +609,10 @@ def parse_floor_area(description: str | None) -> float | None:
|
|||
m = re.search(r"([\d,]+(?:\.\d+)?)\s*sq\.?\s*ft", description, re.IGNORECASE)
|
||||
if m:
|
||||
sqft = float(m.group(1).replace(",", ""))
|
||||
return round(sqft * 0.092903, 1)
|
||||
return validate_floor_area(round(sqft * 0.092903, 1))
|
||||
m = re.search(r"([\d,]+(?:\.\d+)?)\s*sq\.?\s*m", description, re.IGNORECASE)
|
||||
if m:
|
||||
return round(float(m.group(1).replace(",", "")), 1)
|
||||
return validate_floor_area(round(float(m.group(1).replace(",", "")), 1))
|
||||
return None
|
||||
|
||||
|
||||
|
|
@ -651,7 +653,7 @@ def transform_property(
|
|||
lat = detail.get("lat") or search_data.get("lat")
|
||||
lng = detail.get("lng") or search_data.get("lng")
|
||||
price = detail.get("price") or search_data.get("price")
|
||||
if not price:
|
||||
if not price or int(price) <= 0:
|
||||
return None
|
||||
|
||||
frequency = search_data.get("frequency", "monthly")
|
||||
|
|
@ -701,8 +703,15 @@ def transform_property(
|
|||
log.debug("No postcode for property — skipping")
|
||||
return None
|
||||
|
||||
bedrooms = detail.get("bedrooms") or search_data.get("bedrooms", 0) or 0
|
||||
bathrooms = detail.get("bathrooms") or search_data.get("bathrooms", 0) or 0
|
||||
raw_beds = detail.get("bedrooms") or search_data.get("bedrooms", 0) or 0
|
||||
raw_baths = detail.get("bathrooms") or search_data.get("bathrooms", 0) or 0
|
||||
bedrooms = raw_beds if raw_beds <= MAX_BEDROOMS else 0
|
||||
bathrooms = raw_baths if raw_baths <= MAX_BEDROOMS else 0
|
||||
if raw_beds > MAX_BEDROOMS or raw_baths > MAX_BEDROOMS:
|
||||
log.warning(
|
||||
"OpenRent %s: implausible beds=%d baths=%d (capped to 0)",
|
||||
search_data.get("id", "?"), raw_beds, raw_baths,
|
||||
)
|
||||
|
||||
# Title: prefer detail page (has h1 with full title)
|
||||
title = detail.get("title") or search_data.get("title", "")
|
||||
|
|
@ -746,6 +755,9 @@ def transform_property(
|
|||
"lat": lat,
|
||||
"Postcode": postcode,
|
||||
"Address per Property Register": address,
|
||||
# OpenRent is a rental-only platform — tenure (Freehold/Leasehold) is a
|
||||
# property ownership concept that doesn't apply to rental listings. The
|
||||
# landlord's tenure is not shown on OpenRent listing pages.
|
||||
"Leasehold/Freehold": None,
|
||||
"Property type": map_property_type(property_type),
|
||||
"Property sub-type": property_type or "Unknown",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue