Small fixes

This commit is contained in:
Andras Schmelczer 2026-03-28 09:29:56 +00:00
parent d93beb9201
commit 7591e5fc05
12 changed files with 198 additions and 14 deletions

View file

@ -289,10 +289,15 @@ def _extract_beds_baths_from_features(
def _extract_postcode(text: str) -> str | None:
"""Extract full UK postcode from text like '2 Bed Flat, Pimlico, SW1V 2AA'."""
"""Extract full UK postcode from text like '2 Bed Flat, Pimlico, SW1V 2AA'.
Normalizes to include a space before the 3-char incode."""
match = re.search(r"([A-Z]{1,2}\d[A-Z0-9]?\s*\d[A-Z]{2})", text, re.IGNORECASE)
if match:
return match.group(1).upper().strip()
raw = match.group(1).upper().strip()
# Ensure space before incode (last 3 chars): "IP265AT" → "IP26 5AT"
if " " not in raw and len(raw) >= 5:
return raw[:-3] + " " + raw[-3:]
return raw
return None
@ -635,6 +640,29 @@ def _resolve_outcode_postcodes(
return results
def _parse_or_date(date_str: str) -> str:
"""Parse OpenRent date strings to ISO format (YYYY-MM-DD).
Handles 'Today', 'Tomorrow', and 'DD Month, YYYY' formats."""
if not date_str:
return ""
stripped = date_str.strip()
lower = stripped.lower()
if lower == "today":
from datetime import datetime
return datetime.now().strftime("%Y-%m-%d")
if lower == "tomorrow":
from datetime import datetime, timedelta
return (datetime.now() + timedelta(days=1)).strftime("%Y-%m-%d")
# Try "DD Month, YYYY" format (e.g., "01 April, 2026")
from datetime import datetime
for fmt in ("%d %B, %Y", "%d %B %Y"):
try:
return datetime.strptime(stripped, fmt).strftime("%Y-%m-%d")
except ValueError:
continue
return date_str # Return as-is if unparseable
def transform_property(
search_data: dict,
detail_data: dict | None,
@ -767,7 +795,7 @@ def transform_property(
"Total floor area (sqm)": parse_floor_area(description),
"Listing URL": listing_url,
"Listing features": [],
"first_visible_date": detail.get("available_date", ""),
"first_visible_date": _parse_or_date(detail.get("available_date", "")),
}