diff --git a/docker-compose.yml b/docker-compose.yml index 0b0f525..af992c4 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -163,6 +163,7 @@ services: # - ./finder:/app # environment: # FLARESOLVERR_URL: http://flaresolverr:8191 + # RELOAD_URL: http://server:8001/api/reload # depends_on: # gluetun: # condition: service_healthy diff --git a/finder/constants.py b/finder/constants.py index d9ab10c..821aae3 100644 --- a/finder/constants.py +++ b/finder/constants.py @@ -104,6 +104,18 @@ PROPERTY_TYPE_MAP = { "Garages": "Other", "Mews": "Terraced", "Property": "Other", + "Flat Share": "Other", + "Block of Apartments": "Flats/Maisonettes", + "Private Halls": "Flats/Maisonettes", + "Terraced Bungalow": "Terraced", + "Equestrian Facility": "Other", + "Ground Maisonette": "Flats/Maisonettes", + "Country House": "Detached", + "Village House": "Detached", + "Farm Land": "Other", + "House Boat": "Other", + "Barn": "Other", + "Serviced Apartments": "Flats/Maisonettes", # Lowercase variants (from home.co.uk / Rightmove APIs) "house": "Detached", "bungalow": "Other", @@ -113,6 +125,19 @@ PROPERTY_TYPE_MAP = { "not-specified": "Other", "retirement-property": "Flats/Maisonettes", "equestrian-facility": "Other", + "flat": "Flats/Maisonettes", + "detached": "Detached", + "semi-detached": "Semi-Detached", + "terraced": "Terraced", + "maisonette": "Flats/Maisonettes", + "apartment": "Flats/Maisonettes", + "studio": "Flats/Maisonettes", + "penthouse": "Flats/Maisonettes", + "cottage": "Other", + "chalet": "Other", + "farm_house": "Detached", + "country house": "Detached", + "village house": "Detached", } CHANNELS = [ diff --git a/finder/homecouk.py b/finder/homecouk.py index f9e290f..bace56d 100644 --- a/finder/homecouk.py +++ b/finder/homecouk.py @@ -363,7 +363,7 @@ def transform_property( "Address per Property Register": address, "Leasehold/Freehold": parse_tenure(prop), "Property type": map_property_type(listing_type), - "Property sub-type": listing_type or "Unknown", + "Property sub-type": listing_type.title() if listing_type else "Unknown", "price": int(price), "price_frequency": "" if channel == "BUY" else "monthly", "Price qualifier": price_qualifier, diff --git a/finder/openrent.py b/finder/openrent.py index ce27fe8..f08a3cd 100644 --- a/finder/openrent.py +++ b/finder/openrent.py @@ -289,10 +289,15 @@ def _extract_beds_baths_from_features( def _extract_postcode(text: str) -> str | None: - """Extract full UK postcode from text like '2 Bed Flat, Pimlico, SW1V 2AA'.""" + """Extract full UK postcode from text like '2 Bed Flat, Pimlico, SW1V 2AA'. + Normalizes to include a space before the 3-char incode.""" match = re.search(r"([A-Z]{1,2}\d[A-Z0-9]?\s*\d[A-Z]{2})", text, re.IGNORECASE) if match: - return match.group(1).upper().strip() + raw = match.group(1).upper().strip() + # Ensure space before incode (last 3 chars): "IP265AT" → "IP26 5AT" + if " " not in raw and len(raw) >= 5: + return raw[:-3] + " " + raw[-3:] + return raw return None @@ -635,6 +640,29 @@ def _resolve_outcode_postcodes( return results +def _parse_or_date(date_str: str) -> str: + """Parse OpenRent date strings to ISO format (YYYY-MM-DD). + Handles 'Today', 'Tomorrow', and 'DD Month, YYYY' formats.""" + if not date_str: + return "" + stripped = date_str.strip() + lower = stripped.lower() + if lower == "today": + from datetime import datetime + return datetime.now().strftime("%Y-%m-%d") + if lower == "tomorrow": + from datetime import datetime, timedelta + return (datetime.now() + timedelta(days=1)).strftime("%Y-%m-%d") + # Try "DD Month, YYYY" format (e.g., "01 April, 2026") + from datetime import datetime + for fmt in ("%d %B, %Y", "%d %B %Y"): + try: + return datetime.strptime(stripped, fmt).strftime("%Y-%m-%d") + except ValueError: + continue + return date_str # Return as-is if unparseable + + def transform_property( search_data: dict, detail_data: dict | None, @@ -767,7 +795,7 @@ def transform_property( "Total floor area (sqm)": parse_floor_area(description), "Listing URL": listing_url, "Listing features": [], - "first_visible_date": detail.get("available_date", ""), + "first_visible_date": _parse_or_date(detail.get("available_date", "")), } diff --git a/finder/storage.py b/finder/storage.py index 4ab685f..487ee34 100644 --- a/finder/storage.py +++ b/finder/storage.py @@ -5,7 +5,7 @@ from pathlib import Path import polars as pl from constants import MAX_BEDROOMS, MAX_RENT_MONTHLY, MIN_RENT_MONTHLY -from transform import normalize_price +from transform import map_property_type, normalize_price log = logging.getLogger("rightmove") @@ -43,6 +43,19 @@ def write_parquet(properties: list[dict], path: Path, channel: str) -> None: MAX_BEDROOMS, ) + # Re-derive Property type from Property sub-type using current PROPERTY_TYPE_MAP. + # This retroactively fixes data scraped with older versions of the type map. + remapped = 0 + for p in properties: + sub_type = p.get("Property sub-type", "") + if sub_type and sub_type != "Unknown": + new_type = map_property_type(sub_type) + if new_type != p.get("Property type"): + p["Property type"] = new_type + remapped += 1 + if remapped: + log.info("Re-mapped %d property types from sub-types", remapped) + # Parse first_visible_date to datetime listing_dates = [] for p in properties: @@ -56,7 +69,27 @@ def write_parquet(properties: list[dict], path: Path, channel: str) -> None: dt = dt.astimezone(timezone.utc).replace(tzinfo=None) listing_dates.append(dt) except (ValueError, TypeError): - listing_dates.append(None) + # Try additional date formats (OpenRent: "DD Month, YYYY", "Today") + parsed = None + stripped = fvd.strip() + lower = stripped.lower() + if lower == "today": + parsed = datetime.now().replace( + hour=0, minute=0, second=0, microsecond=0 + ) + elif lower == "tomorrow": + from datetime import timedelta + parsed = ( + datetime.now() + timedelta(days=1) + ).replace(hour=0, minute=0, second=0, microsecond=0) + else: + for fmt in ("%d %B, %Y", "%d %B %Y"): + try: + parsed = datetime.strptime(stripped, fmt) + break + except ValueError: + continue + listing_dates.append(parsed) else: listing_dates.append(None) diff --git a/finder/transform.py b/finder/transform.py index 1027220..301e0e6 100644 --- a/finder/transform.py +++ b/finder/transform.py @@ -49,6 +49,22 @@ def map_property_type(sub_type: str | None) -> str: canonical = PROPERTY_TYPE_MAP.get(sub_type) if canonical: return canonical + # Try title-case variant (e.g., "country house" → "Country House") + canonical = PROPERTY_TYPE_MAP.get(sub_type.title()) + if canonical: + return canonical + # Keyword fallback for compound types not in the map + lower = sub_type.lower() + if "flat" in lower or "apartment" in lower or "maisonette" in lower or "studio" in lower: + return "Flats/Maisonettes" + if "semi" in lower and "detach" in lower: + return "Semi-Detached" + if "detach" in lower: + return "Detached" + if "terrace" in lower or "mews" in lower: + return "Terraced" + if "house" in lower or "cottage" in lower: + return "Detached" log.warning("Unknown propertySubType: %r — mapping to Other", sub_type) return "Other" @@ -86,6 +102,15 @@ def fix_coords(lat: float, lng: float) -> tuple[float, float]: return lat, lng +def normalize_postcode(postcode: str) -> str: + """Ensure UK postcode has a space before the 3-char incode. + E.g., 'SW1A1AA' → 'SW1A 1AA', 'E1 4AB' unchanged.""" + postcode = postcode.strip().upper() + if " " in postcode or len(postcode) < 5: + return postcode + return postcode[:-3] + " " + postcode[-3:] + + def normalize_price(amount: int, frequency: str) -> int: """Normalise price to monthly for rentals (weekly × 52/12, yearly ÷ 12).""" if frequency == "weekly": diff --git a/finder/uv.lock b/finder/uv.lock index 26df5be..2cc4cec 100644 --- a/finder/uv.lock +++ b/finder/uv.lock @@ -301,6 +301,7 @@ dependencies = [ { name = "fake-useragent" }, { name = "flask" }, { name = "httpx" }, + { name = "lxml" }, { name = "playwright" }, { name = "playwright-stealth" }, { name = "polars" }, @@ -315,6 +316,7 @@ requires-dist = [ { name = "fake-useragent", specifier = ">=2.2.0" }, { name = "flask" }, { name = "httpx" }, + { name = "lxml" }, { name = "playwright", specifier = ">=1.58.0" }, { name = "playwright-stealth", specifier = ">=2.0.2" }, { name = "polars" }, diff --git a/finder/zoopla.py b/finder/zoopla.py index 19d3b31..f610704 100644 --- a/finder/zoopla.py +++ b/finder/zoopla.py @@ -104,6 +104,22 @@ _EXTRACT_LISTINGS_JS = r"""() => { if (ptMatch) property_type = ptMatch[1].trim(); else if (/\bstudio\s*(?:flat|apartment)?\s+(?:for\s+sale|to\s+(?:rent|let)|for\s+rent)/i.test(text)) property_type = 'Studio'; + // Keyword fallback when regex doesn't match current DOM format + if (!property_type) { + const lower = text.toLowerCase(); + if (/\bstudio\b/.test(lower)) property_type = 'Studio'; + else if (/\bpenthouse\b/.test(lower)) property_type = 'Penthouse'; + else if (/\bmaisonette\b/.test(lower)) property_type = 'Maisonette'; + else if (/\bapartment\b/.test(lower)) property_type = 'Apartment'; + else if (/\bflat\b/.test(lower)) property_type = 'Flat'; + else if (/\bsemi[- ]?detached\b/.test(lower)) property_type = 'Semi-Detached'; + else if (/\bdetached\b/.test(lower)) property_type = 'Detached'; + else if (/\bterraced?\b/.test(lower)) property_type = 'Terraced'; + else if (/\bbungalow\b/.test(lower)) property_type = 'Bungalow'; + else if (/\bcottage\b/.test(lower)) property_type = 'Cottage'; + else if (/\bhouse\b/.test(lower)) property_type = 'House'; + } + results.push({ id, url: href.replace(window.location.origin, ''), price: priceMatch ? parseInt(priceMatch[1].replace(/,/g, '')) : null, @@ -172,6 +188,22 @@ _EXTRACT_LISTINGS_JS = r"""() => { if (ptMatch2) property_type = ptMatch2[1].trim(); else if (/\bstudio\s*(?:flat|apartment)?\s+(?:for\s+sale|to\s+(?:rent|let)|for\s+rent)/i.test(text)) property_type = 'Studio'; + // Keyword fallback when regex doesn't match current DOM format + if (!property_type) { + const lower = text.toLowerCase(); + if (/\bstudio\b/.test(lower)) property_type = 'Studio'; + else if (/\bpenthouse\b/.test(lower)) property_type = 'Penthouse'; + else if (/\bmaisonette\b/.test(lower)) property_type = 'Maisonette'; + else if (/\bapartment\b/.test(lower)) property_type = 'Apartment'; + else if (/\bflat\b/.test(lower)) property_type = 'Flat'; + else if (/\bsemi[- ]?detached\b/.test(lower)) property_type = 'Semi-Detached'; + else if (/\bdetached\b/.test(lower)) property_type = 'Detached'; + else if (/\bterraced?\b/.test(lower)) property_type = 'Terraced'; + else if (/\bbungalow\b/.test(lower)) property_type = 'Bungalow'; + else if (/\bcottage\b/.test(lower)) property_type = 'Cottage'; + else if (/\bhouse\b/.test(lower)) property_type = 'House'; + } + results.push({ id, url: href.replace(window.location.origin, ''), price: priceMatch ? parseInt(priceMatch[1].replace(/,/g, '')) : null, @@ -596,10 +628,15 @@ def _resolve_outcode_coords( def _extract_postcode(text: str) -> str | None: - """Extract a full UK postcode from text like 'Dollar Bay Place, Canary Wharf E14 9SS'.""" + """Extract a full UK postcode from text like 'Dollar Bay Place, Canary Wharf E14 9SS'. + Normalizes to include a space before the 3-char incode.""" match = re.search(r"([A-Z]{1,2}\d[A-Z0-9]?\s*\d[A-Z]{2})", text, re.IGNORECASE) if match: - return match.group(1).upper().strip() + raw = match.group(1).upper().strip() + # Ensure space before incode (last 3 chars): "SW1A1AA" → "SW1A 1AA" + if " " not in raw and len(raw) >= 5: + return raw[:-3] + " " + raw[-3:] + return raw return None @@ -651,13 +688,20 @@ def _detect_rent_frequency(price_text: str) -> str: Zoopla price elements contain text like '£1,500 pcm', '£350 pw', '£18,000 pa'. Defaults to 'monthly' if no frequency indicator found. + + Checks monthly indicators (pcm) BEFORE weekly (pw) because Zoopla cards + often display both monthly and weekly prices in the same text. When the + JS extraction falls back to full card text, checking pcm first ensures + the captured monthly price gets the correct frequency label. """ lower = price_text.lower() + if "pcm" in lower or "per month" in lower or "per calendar month" in lower: + return "monthly" if "pw" in lower or "per week" in lower or "/w" in lower: return "weekly" if "pa" in lower or "per annum" in lower or "/y" in lower or "per year" in lower: return "yearly" - # pcm, per month, /m, or no indicator — default monthly + # No indicator — default monthly (Zoopla standard) return "monthly" diff --git a/frontend/src/components/map/Filters.tsx b/frontend/src/components/map/Filters.tsx index 9e49308..0c9ffed 100644 --- a/frontend/src/components/map/Filters.tsx +++ b/frontend/src/components/map/Filters.tsx @@ -11,6 +11,8 @@ import InfoPopup from '../ui/InfoPopup'; import { FeatureInfoPopup } from '../ui/FeatureInfoPopup'; import { FeatureActions } from '../ui/FeatureIcons'; import { FeatureLabel } from '../ui/FeatureLabel'; +import { getFeatureIcon } from '../../lib/feature-icons'; +import { getGroupIcon } from '../../lib/group-icons'; import AiFilterInput from './AiFilterInput'; import type { AiFilterErrorType } from '../../hooks/useAiFilters'; import FeatureBrowser from './FeatureBrowser'; @@ -552,6 +554,12 @@ export default memo(function Filters({ clampMax ? feature.max! : displayValue[1], ]; + const mobileIconClass = 'w-4 h-4 text-teal-600 dark:text-teal-400 shrink-0'; + const mobileIcon = getFeatureIcon(feature.name, mobileIconClass) || (() => { + const G = feature.group ? getGroupIcon(feature.group) : null; + return G ? : null; + })(); + return (
- +
-
+
+ {mobileIcon &&
{mobileIcon}
} +
onFilterChange(feature.name, v)} /> +
); diff --git a/frontend/src/components/map/MapPage.tsx b/frontend/src/components/map/MapPage.tsx index e77806a..74e68ae 100644 --- a/frontend/src/components/map/MapPage.tsx +++ b/frontend/src/components/map/MapPage.tsx @@ -338,6 +338,18 @@ export default function MapPage({ return () => document.removeEventListener('wheel', handleWheel); }, []); + // On mobile, push a guard history entry to absorb accidental back navigations + // (e.g. iOS Safari edge-swipe that CSS touch-action can't prevent) + useEffect(() => { + if (!isMobile) return; + window.history.pushState({ dashboardGuard: true }, ''); + const handlePopState = () => { + window.history.pushState({ dashboardGuard: true }, ''); + }; + window.addEventListener('popstate', handlePopState); + return () => window.removeEventListener('popstate', handlePopState); + }, [isMobile]); + const { handleHexagonClick } = selection; const handleMobileHexagonClick = useCallback( (id: string, isPostcode?: boolean, geometry?: PostcodeGeometry) => { @@ -611,7 +623,7 @@ export default function MapPage({ if (isMobile) { return ( -
+
{initialLoading && (
diff --git a/frontend/src/components/ui/FeatureLabel.tsx b/frontend/src/components/ui/FeatureLabel.tsx index 178fba3..c994479 100644 --- a/frontend/src/components/ui/FeatureLabel.tsx +++ b/frontend/src/components/ui/FeatureLabel.tsx @@ -15,6 +15,7 @@ interface FeatureLabelProps { className?: string; size?: 'xs' | 'sm'; description?: string; + hideIconOnMobile?: boolean; } export function FeatureLabel({ @@ -23,9 +24,11 @@ export function FeatureLabel({ className = '', size = 'xs', description, + hideIconOnMobile, }: FeatureLabelProps) { const textClass = size === 'sm' ? 'text-sm' : 'text-xs'; - const iconClass = 'w-3.5 h-3.5 text-teal-600 dark:text-teal-400 shrink-0'; + const mobileHide = hideIconOnMobile ? 'hidden md:block ' : ''; + const iconClass = `${mobileHide}w-3.5 h-3.5 text-teal-600 dark:text-teal-400 shrink-0`; const featureIcon = getFeatureIcon(feature.name, iconClass); const GroupIcon = !featureIcon && feature.group ? getGroupIcon(feature.group) : null; const modeTag = diff --git a/frontend/src/hooks/useHexagonSelection.ts b/frontend/src/hooks/useHexagonSelection.ts index 5a36932..a29976a 100644 --- a/frontend/src/hooks/useHexagonSelection.ts +++ b/frontend/src/hooks/useHexagonSelection.ts @@ -57,7 +57,7 @@ export function useHexagonSelection({ const filterStr = buildFilterString(filters, features); if (filterStr) params.append('filters', filterStr); if (fields) { - params.set('fields', fields.join(',')); + params.set('fields', fields.join(';;')); } if (journeyDest) { params.set('journey_mode', journeyDest.mode);