diff --git a/docker-compose.yml b/docker-compose.yml
index 0b0f525..af992c4 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -163,6 +163,7 @@ services:
# - ./finder:/app
# environment:
# FLARESOLVERR_URL: http://flaresolverr:8191
+ # RELOAD_URL: http://server:8001/api/reload
# depends_on:
# gluetun:
# condition: service_healthy
diff --git a/finder/constants.py b/finder/constants.py
index d9ab10c..821aae3 100644
--- a/finder/constants.py
+++ b/finder/constants.py
@@ -104,6 +104,18 @@ PROPERTY_TYPE_MAP = {
"Garages": "Other",
"Mews": "Terraced",
"Property": "Other",
+ "Flat Share": "Other",
+ "Block of Apartments": "Flats/Maisonettes",
+ "Private Halls": "Flats/Maisonettes",
+ "Terraced Bungalow": "Terraced",
+ "Equestrian Facility": "Other",
+ "Ground Maisonette": "Flats/Maisonettes",
+ "Country House": "Detached",
+ "Village House": "Detached",
+ "Farm Land": "Other",
+ "House Boat": "Other",
+ "Barn": "Other",
+ "Serviced Apartments": "Flats/Maisonettes",
# Lowercase variants (from home.co.uk / Rightmove APIs)
"house": "Detached",
"bungalow": "Other",
@@ -113,6 +125,19 @@ PROPERTY_TYPE_MAP = {
"not-specified": "Other",
"retirement-property": "Flats/Maisonettes",
"equestrian-facility": "Other",
+ "flat": "Flats/Maisonettes",
+ "detached": "Detached",
+ "semi-detached": "Semi-Detached",
+ "terraced": "Terraced",
+ "maisonette": "Flats/Maisonettes",
+ "apartment": "Flats/Maisonettes",
+ "studio": "Flats/Maisonettes",
+ "penthouse": "Flats/Maisonettes",
+ "cottage": "Other",
+ "chalet": "Other",
+ "farm_house": "Detached",
+ "country house": "Detached",
+ "village house": "Detached",
}
CHANNELS = [
diff --git a/finder/homecouk.py b/finder/homecouk.py
index f9e290f..bace56d 100644
--- a/finder/homecouk.py
+++ b/finder/homecouk.py
@@ -363,7 +363,7 @@ def transform_property(
"Address per Property Register": address,
"Leasehold/Freehold": parse_tenure(prop),
"Property type": map_property_type(listing_type),
- "Property sub-type": listing_type or "Unknown",
+ "Property sub-type": listing_type.title() if listing_type else "Unknown",
"price": int(price),
"price_frequency": "" if channel == "BUY" else "monthly",
"Price qualifier": price_qualifier,
diff --git a/finder/openrent.py b/finder/openrent.py
index ce27fe8..f08a3cd 100644
--- a/finder/openrent.py
+++ b/finder/openrent.py
@@ -289,10 +289,15 @@ def _extract_beds_baths_from_features(
def _extract_postcode(text: str) -> str | None:
- """Extract full UK postcode from text like '2 Bed Flat, Pimlico, SW1V 2AA'."""
+ """Extract full UK postcode from text like '2 Bed Flat, Pimlico, SW1V 2AA'.
+ Normalizes to include a space before the 3-char incode."""
match = re.search(r"([A-Z]{1,2}\d[A-Z0-9]?\s*\d[A-Z]{2})", text, re.IGNORECASE)
if match:
- return match.group(1).upper().strip()
+ raw = match.group(1).upper().strip()
+ # Ensure space before incode (last 3 chars): "IP265AT" → "IP26 5AT"
+ if " " not in raw and len(raw) >= 5:
+ return raw[:-3] + " " + raw[-3:]
+ return raw
return None
@@ -635,6 +640,29 @@ def _resolve_outcode_postcodes(
return results
+def _parse_or_date(date_str: str) -> str:
+ """Parse OpenRent date strings to ISO format (YYYY-MM-DD).
+ Handles 'Today', 'Tomorrow', and 'DD Month, YYYY' formats."""
+ if not date_str:
+ return ""
+ stripped = date_str.strip()
+ lower = stripped.lower()
+ if lower == "today":
+ from datetime import datetime
+ return datetime.now().strftime("%Y-%m-%d")
+ if lower == "tomorrow":
+ from datetime import datetime, timedelta
+ return (datetime.now() + timedelta(days=1)).strftime("%Y-%m-%d")
+ # Try "DD Month, YYYY" format (e.g., "01 April, 2026")
+ from datetime import datetime
+ for fmt in ("%d %B, %Y", "%d %B %Y"):
+ try:
+ return datetime.strptime(stripped, fmt).strftime("%Y-%m-%d")
+ except ValueError:
+ continue
+ return date_str # Return as-is if unparseable
+
+
def transform_property(
search_data: dict,
detail_data: dict | None,
@@ -767,7 +795,7 @@ def transform_property(
"Total floor area (sqm)": parse_floor_area(description),
"Listing URL": listing_url,
"Listing features": [],
- "first_visible_date": detail.get("available_date", ""),
+ "first_visible_date": _parse_or_date(detail.get("available_date", "")),
}
diff --git a/finder/storage.py b/finder/storage.py
index 4ab685f..487ee34 100644
--- a/finder/storage.py
+++ b/finder/storage.py
@@ -5,7 +5,7 @@ from pathlib import Path
import polars as pl
from constants import MAX_BEDROOMS, MAX_RENT_MONTHLY, MIN_RENT_MONTHLY
-from transform import normalize_price
+from transform import map_property_type, normalize_price
log = logging.getLogger("rightmove")
@@ -43,6 +43,19 @@ def write_parquet(properties: list[dict], path: Path, channel: str) -> None:
MAX_BEDROOMS,
)
+ # Re-derive Property type from Property sub-type using current PROPERTY_TYPE_MAP.
+ # This retroactively fixes data scraped with older versions of the type map.
+ remapped = 0
+ for p in properties:
+ sub_type = p.get("Property sub-type", "")
+ if sub_type and sub_type != "Unknown":
+ new_type = map_property_type(sub_type)
+ if new_type != p.get("Property type"):
+ p["Property type"] = new_type
+ remapped += 1
+ if remapped:
+ log.info("Re-mapped %d property types from sub-types", remapped)
+
# Parse first_visible_date to datetime
listing_dates = []
for p in properties:
@@ -56,7 +69,27 @@ def write_parquet(properties: list[dict], path: Path, channel: str) -> None:
dt = dt.astimezone(timezone.utc).replace(tzinfo=None)
listing_dates.append(dt)
except (ValueError, TypeError):
- listing_dates.append(None)
+ # Try additional date formats (OpenRent: "DD Month, YYYY", "Today")
+ parsed = None
+ stripped = fvd.strip()
+ lower = stripped.lower()
+ if lower == "today":
+ parsed = datetime.now().replace(
+ hour=0, minute=0, second=0, microsecond=0
+ )
+ elif lower == "tomorrow":
+ from datetime import timedelta
+ parsed = (
+ datetime.now() + timedelta(days=1)
+ ).replace(hour=0, minute=0, second=0, microsecond=0)
+ else:
+ for fmt in ("%d %B, %Y", "%d %B %Y"):
+ try:
+ parsed = datetime.strptime(stripped, fmt)
+ break
+ except ValueError:
+ continue
+ listing_dates.append(parsed)
else:
listing_dates.append(None)
diff --git a/finder/transform.py b/finder/transform.py
index 1027220..301e0e6 100644
--- a/finder/transform.py
+++ b/finder/transform.py
@@ -49,6 +49,22 @@ def map_property_type(sub_type: str | None) -> str:
canonical = PROPERTY_TYPE_MAP.get(sub_type)
if canonical:
return canonical
+ # Try title-case variant (e.g., "country house" → "Country House")
+ canonical = PROPERTY_TYPE_MAP.get(sub_type.title())
+ if canonical:
+ return canonical
+ # Keyword fallback for compound types not in the map
+ lower = sub_type.lower()
+ if "flat" in lower or "apartment" in lower or "maisonette" in lower or "studio" in lower:
+ return "Flats/Maisonettes"
+ if "semi" in lower and "detach" in lower:
+ return "Semi-Detached"
+ if "detach" in lower:
+ return "Detached"
+ if "terrace" in lower or "mews" in lower:
+ return "Terraced"
+ if "house" in lower or "cottage" in lower:
+ return "Detached"
log.warning("Unknown propertySubType: %r — mapping to Other", sub_type)
return "Other"
@@ -86,6 +102,15 @@ def fix_coords(lat: float, lng: float) -> tuple[float, float]:
return lat, lng
+def normalize_postcode(postcode: str) -> str:
+ """Ensure UK postcode has a space before the 3-char incode.
+ E.g., 'SW1A1AA' → 'SW1A 1AA', 'E1 4AB' unchanged."""
+ postcode = postcode.strip().upper()
+ if " " in postcode or len(postcode) < 5:
+ return postcode
+ return postcode[:-3] + " " + postcode[-3:]
+
+
def normalize_price(amount: int, frequency: str) -> int:
"""Normalise price to monthly for rentals (weekly × 52/12, yearly ÷ 12)."""
if frequency == "weekly":
diff --git a/finder/uv.lock b/finder/uv.lock
index 26df5be..2cc4cec 100644
--- a/finder/uv.lock
+++ b/finder/uv.lock
@@ -301,6 +301,7 @@ dependencies = [
{ name = "fake-useragent" },
{ name = "flask" },
{ name = "httpx" },
+ { name = "lxml" },
{ name = "playwright" },
{ name = "playwright-stealth" },
{ name = "polars" },
@@ -315,6 +316,7 @@ requires-dist = [
{ name = "fake-useragent", specifier = ">=2.2.0" },
{ name = "flask" },
{ name = "httpx" },
+ { name = "lxml" },
{ name = "playwright", specifier = ">=1.58.0" },
{ name = "playwright-stealth", specifier = ">=2.0.2" },
{ name = "polars" },
diff --git a/finder/zoopla.py b/finder/zoopla.py
index 19d3b31..f610704 100644
--- a/finder/zoopla.py
+++ b/finder/zoopla.py
@@ -104,6 +104,22 @@ _EXTRACT_LISTINGS_JS = r"""() => {
if (ptMatch) property_type = ptMatch[1].trim();
else if (/\bstudio\s*(?:flat|apartment)?\s+(?:for\s+sale|to\s+(?:rent|let)|for\s+rent)/i.test(text)) property_type = 'Studio';
+ // Keyword fallback when regex doesn't match current DOM format
+ if (!property_type) {
+ const lower = text.toLowerCase();
+ if (/\bstudio\b/.test(lower)) property_type = 'Studio';
+ else if (/\bpenthouse\b/.test(lower)) property_type = 'Penthouse';
+ else if (/\bmaisonette\b/.test(lower)) property_type = 'Maisonette';
+ else if (/\bapartment\b/.test(lower)) property_type = 'Apartment';
+ else if (/\bflat\b/.test(lower)) property_type = 'Flat';
+ else if (/\bsemi[- ]?detached\b/.test(lower)) property_type = 'Semi-Detached';
+ else if (/\bdetached\b/.test(lower)) property_type = 'Detached';
+ else if (/\bterraced?\b/.test(lower)) property_type = 'Terraced';
+ else if (/\bbungalow\b/.test(lower)) property_type = 'Bungalow';
+ else if (/\bcottage\b/.test(lower)) property_type = 'Cottage';
+ else if (/\bhouse\b/.test(lower)) property_type = 'House';
+ }
+
results.push({
id, url: href.replace(window.location.origin, ''),
price: priceMatch ? parseInt(priceMatch[1].replace(/,/g, '')) : null,
@@ -172,6 +188,22 @@ _EXTRACT_LISTINGS_JS = r"""() => {
if (ptMatch2) property_type = ptMatch2[1].trim();
else if (/\bstudio\s*(?:flat|apartment)?\s+(?:for\s+sale|to\s+(?:rent|let)|for\s+rent)/i.test(text)) property_type = 'Studio';
+ // Keyword fallback when regex doesn't match current DOM format
+ if (!property_type) {
+ const lower = text.toLowerCase();
+ if (/\bstudio\b/.test(lower)) property_type = 'Studio';
+ else if (/\bpenthouse\b/.test(lower)) property_type = 'Penthouse';
+ else if (/\bmaisonette\b/.test(lower)) property_type = 'Maisonette';
+ else if (/\bapartment\b/.test(lower)) property_type = 'Apartment';
+ else if (/\bflat\b/.test(lower)) property_type = 'Flat';
+ else if (/\bsemi[- ]?detached\b/.test(lower)) property_type = 'Semi-Detached';
+ else if (/\bdetached\b/.test(lower)) property_type = 'Detached';
+ else if (/\bterraced?\b/.test(lower)) property_type = 'Terraced';
+ else if (/\bbungalow\b/.test(lower)) property_type = 'Bungalow';
+ else if (/\bcottage\b/.test(lower)) property_type = 'Cottage';
+ else if (/\bhouse\b/.test(lower)) property_type = 'House';
+ }
+
results.push({
id, url: href.replace(window.location.origin, ''),
price: priceMatch ? parseInt(priceMatch[1].replace(/,/g, '')) : null,
@@ -596,10 +628,15 @@ def _resolve_outcode_coords(
def _extract_postcode(text: str) -> str | None:
- """Extract a full UK postcode from text like 'Dollar Bay Place, Canary Wharf E14 9SS'."""
+ """Extract a full UK postcode from text like 'Dollar Bay Place, Canary Wharf E14 9SS'.
+ Normalizes to include a space before the 3-char incode."""
match = re.search(r"([A-Z]{1,2}\d[A-Z0-9]?\s*\d[A-Z]{2})", text, re.IGNORECASE)
if match:
- return match.group(1).upper().strip()
+ raw = match.group(1).upper().strip()
+ # Ensure space before incode (last 3 chars): "SW1A1AA" → "SW1A 1AA"
+ if " " not in raw and len(raw) >= 5:
+ return raw[:-3] + " " + raw[-3:]
+ return raw
return None
@@ -651,13 +688,20 @@ def _detect_rent_frequency(price_text: str) -> str:
Zoopla price elements contain text like '£1,500 pcm', '£350 pw',
'£18,000 pa'. Defaults to 'monthly' if no frequency indicator found.
+
+ Checks monthly indicators (pcm) BEFORE weekly (pw) because Zoopla cards
+ often display both monthly and weekly prices in the same text. When the
+ JS extraction falls back to full card text, checking pcm first ensures
+ the captured monthly price gets the correct frequency label.
"""
lower = price_text.lower()
+ if "pcm" in lower or "per month" in lower or "per calendar month" in lower:
+ return "monthly"
if "pw" in lower or "per week" in lower or "/w" in lower:
return "weekly"
if "pa" in lower or "per annum" in lower or "/y" in lower or "per year" in lower:
return "yearly"
- # pcm, per month, /m, or no indicator — default monthly
+ # No indicator — default monthly (Zoopla standard)
return "monthly"
diff --git a/frontend/src/components/map/Filters.tsx b/frontend/src/components/map/Filters.tsx
index 9e49308..0c9ffed 100644
--- a/frontend/src/components/map/Filters.tsx
+++ b/frontend/src/components/map/Filters.tsx
@@ -11,6 +11,8 @@ import InfoPopup from '../ui/InfoPopup';
import { FeatureInfoPopup } from '../ui/FeatureInfoPopup';
import { FeatureActions } from '../ui/FeatureIcons';
import { FeatureLabel } from '../ui/FeatureLabel';
+import { getFeatureIcon } from '../../lib/feature-icons';
+import { getGroupIcon } from '../../lib/group-icons';
import AiFilterInput from './AiFilterInput';
import type { AiFilterErrorType } from '../../hooks/useAiFilters';
import FeatureBrowser from './FeatureBrowser';
@@ -552,6 +554,12 @@ export default memo(function Filters({
clampMax ? feature.max! : displayValue[1],
];
+ const mobileIconClass = 'w-4 h-4 text-teal-600 dark:text-teal-400 shrink-0';
+ const mobileIcon = getFeatureIcon(feature.name, mobileIconClass) || (() => {
+ const G = feature.group ? getGroupIcon(feature.group) : null;
+ return G ?