Small fixes

This commit is contained in:
Andras Schmelczer 2026-03-28 09:29:56 +00:00
parent d93beb9201
commit 7591e5fc05
12 changed files with 198 additions and 14 deletions

View file

@ -163,6 +163,7 @@ services:
# - ./finder:/app
# environment:
# FLARESOLVERR_URL: http://flaresolverr:8191
# RELOAD_URL: http://server:8001/api/reload
# depends_on:
# gluetun:
# condition: service_healthy

View file

@ -104,6 +104,18 @@ PROPERTY_TYPE_MAP = {
"Garages": "Other",
"Mews": "Terraced",
"Property": "Other",
"Flat Share": "Other",
"Block of Apartments": "Flats/Maisonettes",
"Private Halls": "Flats/Maisonettes",
"Terraced Bungalow": "Terraced",
"Equestrian Facility": "Other",
"Ground Maisonette": "Flats/Maisonettes",
"Country House": "Detached",
"Village House": "Detached",
"Farm Land": "Other",
"House Boat": "Other",
"Barn": "Other",
"Serviced Apartments": "Flats/Maisonettes",
# Lowercase variants (from home.co.uk / Rightmove APIs)
"house": "Detached",
"bungalow": "Other",
@ -113,6 +125,19 @@ PROPERTY_TYPE_MAP = {
"not-specified": "Other",
"retirement-property": "Flats/Maisonettes",
"equestrian-facility": "Other",
"flat": "Flats/Maisonettes",
"detached": "Detached",
"semi-detached": "Semi-Detached",
"terraced": "Terraced",
"maisonette": "Flats/Maisonettes",
"apartment": "Flats/Maisonettes",
"studio": "Flats/Maisonettes",
"penthouse": "Flats/Maisonettes",
"cottage": "Other",
"chalet": "Other",
"farm_house": "Detached",
"country house": "Detached",
"village house": "Detached",
}
CHANNELS = [

View file

@ -363,7 +363,7 @@ def transform_property(
"Address per Property Register": address,
"Leasehold/Freehold": parse_tenure(prop),
"Property type": map_property_type(listing_type),
"Property sub-type": listing_type or "Unknown",
"Property sub-type": listing_type.title() if listing_type else "Unknown",
"price": int(price),
"price_frequency": "" if channel == "BUY" else "monthly",
"Price qualifier": price_qualifier,

View file

@ -289,10 +289,15 @@ def _extract_beds_baths_from_features(
def _extract_postcode(text: str) -> str | None:
"""Extract full UK postcode from text like '2 Bed Flat, Pimlico, SW1V 2AA'."""
"""Extract full UK postcode from text like '2 Bed Flat, Pimlico, SW1V 2AA'.
Normalizes to include a space before the 3-char incode."""
match = re.search(r"([A-Z]{1,2}\d[A-Z0-9]?\s*\d[A-Z]{2})", text, re.IGNORECASE)
if match:
return match.group(1).upper().strip()
raw = match.group(1).upper().strip()
# Ensure space before incode (last 3 chars): "IP265AT" → "IP26 5AT"
if " " not in raw and len(raw) >= 5:
return raw[:-3] + " " + raw[-3:]
return raw
return None
@ -635,6 +640,29 @@ def _resolve_outcode_postcodes(
return results
def _parse_or_date(date_str: str) -> str:
"""Parse OpenRent date strings to ISO format (YYYY-MM-DD).
Handles 'Today', 'Tomorrow', and 'DD Month, YYYY' formats."""
if not date_str:
return ""
stripped = date_str.strip()
lower = stripped.lower()
if lower == "today":
from datetime import datetime
return datetime.now().strftime("%Y-%m-%d")
if lower == "tomorrow":
from datetime import datetime, timedelta
return (datetime.now() + timedelta(days=1)).strftime("%Y-%m-%d")
# Try "DD Month, YYYY" format (e.g., "01 April, 2026")
from datetime import datetime
for fmt in ("%d %B, %Y", "%d %B %Y"):
try:
return datetime.strptime(stripped, fmt).strftime("%Y-%m-%d")
except ValueError:
continue
return date_str # Return as-is if unparseable
def transform_property(
search_data: dict,
detail_data: dict | None,
@ -767,7 +795,7 @@ def transform_property(
"Total floor area (sqm)": parse_floor_area(description),
"Listing URL": listing_url,
"Listing features": [],
"first_visible_date": detail.get("available_date", ""),
"first_visible_date": _parse_or_date(detail.get("available_date", "")),
}

View file

@ -5,7 +5,7 @@ from pathlib import Path
import polars as pl
from constants import MAX_BEDROOMS, MAX_RENT_MONTHLY, MIN_RENT_MONTHLY
from transform import normalize_price
from transform import map_property_type, normalize_price
log = logging.getLogger("rightmove")
@ -43,6 +43,19 @@ def write_parquet(properties: list[dict], path: Path, channel: str) -> None:
MAX_BEDROOMS,
)
# Re-derive Property type from Property sub-type using current PROPERTY_TYPE_MAP.
# This retroactively fixes data scraped with older versions of the type map.
remapped = 0
for p in properties:
sub_type = p.get("Property sub-type", "")
if sub_type and sub_type != "Unknown":
new_type = map_property_type(sub_type)
if new_type != p.get("Property type"):
p["Property type"] = new_type
remapped += 1
if remapped:
log.info("Re-mapped %d property types from sub-types", remapped)
# Parse first_visible_date to datetime
listing_dates = []
for p in properties:
@ -56,7 +69,27 @@ def write_parquet(properties: list[dict], path: Path, channel: str) -> None:
dt = dt.astimezone(timezone.utc).replace(tzinfo=None)
listing_dates.append(dt)
except (ValueError, TypeError):
listing_dates.append(None)
# Try additional date formats (OpenRent: "DD Month, YYYY", "Today")
parsed = None
stripped = fvd.strip()
lower = stripped.lower()
if lower == "today":
parsed = datetime.now().replace(
hour=0, minute=0, second=0, microsecond=0
)
elif lower == "tomorrow":
from datetime import timedelta
parsed = (
datetime.now() + timedelta(days=1)
).replace(hour=0, minute=0, second=0, microsecond=0)
else:
for fmt in ("%d %B, %Y", "%d %B %Y"):
try:
parsed = datetime.strptime(stripped, fmt)
break
except ValueError:
continue
listing_dates.append(parsed)
else:
listing_dates.append(None)

View file

@ -49,6 +49,22 @@ def map_property_type(sub_type: str | None) -> str:
canonical = PROPERTY_TYPE_MAP.get(sub_type)
if canonical:
return canonical
# Try title-case variant (e.g., "country house" → "Country House")
canonical = PROPERTY_TYPE_MAP.get(sub_type.title())
if canonical:
return canonical
# Keyword fallback for compound types not in the map
lower = sub_type.lower()
if "flat" in lower or "apartment" in lower or "maisonette" in lower or "studio" in lower:
return "Flats/Maisonettes"
if "semi" in lower and "detach" in lower:
return "Semi-Detached"
if "detach" in lower:
return "Detached"
if "terrace" in lower or "mews" in lower:
return "Terraced"
if "house" in lower or "cottage" in lower:
return "Detached"
log.warning("Unknown propertySubType: %r — mapping to Other", sub_type)
return "Other"
@ -86,6 +102,15 @@ def fix_coords(lat: float, lng: float) -> tuple[float, float]:
return lat, lng
def normalize_postcode(postcode: str) -> str:
"""Ensure UK postcode has a space before the 3-char incode.
E.g., 'SW1A1AA' 'SW1A 1AA', 'E1 4AB' unchanged."""
postcode = postcode.strip().upper()
if " " in postcode or len(postcode) < 5:
return postcode
return postcode[:-3] + " " + postcode[-3:]
def normalize_price(amount: int, frequency: str) -> int:
"""Normalise price to monthly for rentals (weekly × 52/12, yearly ÷ 12)."""
if frequency == "weekly":

2
finder/uv.lock generated
View file

@ -301,6 +301,7 @@ dependencies = [
{ name = "fake-useragent" },
{ name = "flask" },
{ name = "httpx" },
{ name = "lxml" },
{ name = "playwright" },
{ name = "playwright-stealth" },
{ name = "polars" },
@ -315,6 +316,7 @@ requires-dist = [
{ name = "fake-useragent", specifier = ">=2.2.0" },
{ name = "flask" },
{ name = "httpx" },
{ name = "lxml" },
{ name = "playwright", specifier = ">=1.58.0" },
{ name = "playwright-stealth", specifier = ">=2.0.2" },
{ name = "polars" },

View file

@ -104,6 +104,22 @@ _EXTRACT_LISTINGS_JS = r"""() => {
if (ptMatch) property_type = ptMatch[1].trim();
else if (/\bstudio\s*(?:flat|apartment)?\s+(?:for\s+sale|to\s+(?:rent|let)|for\s+rent)/i.test(text)) property_type = 'Studio';
// Keyword fallback when regex doesn't match current DOM format
if (!property_type) {
const lower = text.toLowerCase();
if (/\bstudio\b/.test(lower)) property_type = 'Studio';
else if (/\bpenthouse\b/.test(lower)) property_type = 'Penthouse';
else if (/\bmaisonette\b/.test(lower)) property_type = 'Maisonette';
else if (/\bapartment\b/.test(lower)) property_type = 'Apartment';
else if (/\bflat\b/.test(lower)) property_type = 'Flat';
else if (/\bsemi[- ]?detached\b/.test(lower)) property_type = 'Semi-Detached';
else if (/\bdetached\b/.test(lower)) property_type = 'Detached';
else if (/\bterraced?\b/.test(lower)) property_type = 'Terraced';
else if (/\bbungalow\b/.test(lower)) property_type = 'Bungalow';
else if (/\bcottage\b/.test(lower)) property_type = 'Cottage';
else if (/\bhouse\b/.test(lower)) property_type = 'House';
}
results.push({
id, url: href.replace(window.location.origin, ''),
price: priceMatch ? parseInt(priceMatch[1].replace(/,/g, '')) : null,
@ -172,6 +188,22 @@ _EXTRACT_LISTINGS_JS = r"""() => {
if (ptMatch2) property_type = ptMatch2[1].trim();
else if (/\bstudio\s*(?:flat|apartment)?\s+(?:for\s+sale|to\s+(?:rent|let)|for\s+rent)/i.test(text)) property_type = 'Studio';
// Keyword fallback when regex doesn't match current DOM format
if (!property_type) {
const lower = text.toLowerCase();
if (/\bstudio\b/.test(lower)) property_type = 'Studio';
else if (/\bpenthouse\b/.test(lower)) property_type = 'Penthouse';
else if (/\bmaisonette\b/.test(lower)) property_type = 'Maisonette';
else if (/\bapartment\b/.test(lower)) property_type = 'Apartment';
else if (/\bflat\b/.test(lower)) property_type = 'Flat';
else if (/\bsemi[- ]?detached\b/.test(lower)) property_type = 'Semi-Detached';
else if (/\bdetached\b/.test(lower)) property_type = 'Detached';
else if (/\bterraced?\b/.test(lower)) property_type = 'Terraced';
else if (/\bbungalow\b/.test(lower)) property_type = 'Bungalow';
else if (/\bcottage\b/.test(lower)) property_type = 'Cottage';
else if (/\bhouse\b/.test(lower)) property_type = 'House';
}
results.push({
id, url: href.replace(window.location.origin, ''),
price: priceMatch ? parseInt(priceMatch[1].replace(/,/g, '')) : null,
@ -596,10 +628,15 @@ def _resolve_outcode_coords(
def _extract_postcode(text: str) -> str | None:
"""Extract a full UK postcode from text like 'Dollar Bay Place, Canary Wharf E14 9SS'."""
"""Extract a full UK postcode from text like 'Dollar Bay Place, Canary Wharf E14 9SS'.
Normalizes to include a space before the 3-char incode."""
match = re.search(r"([A-Z]{1,2}\d[A-Z0-9]?\s*\d[A-Z]{2})", text, re.IGNORECASE)
if match:
return match.group(1).upper().strip()
raw = match.group(1).upper().strip()
# Ensure space before incode (last 3 chars): "SW1A1AA" → "SW1A 1AA"
if " " not in raw and len(raw) >= 5:
return raw[:-3] + " " + raw[-3:]
return raw
return None
@ -651,13 +688,20 @@ def _detect_rent_frequency(price_text: str) -> str:
Zoopla price elements contain text like '£1,500 pcm', '£350 pw',
'£18,000 pa'. Defaults to 'monthly' if no frequency indicator found.
Checks monthly indicators (pcm) BEFORE weekly (pw) because Zoopla cards
often display both monthly and weekly prices in the same text. When the
JS extraction falls back to full card text, checking pcm first ensures
the captured monthly price gets the correct frequency label.
"""
lower = price_text.lower()
if "pcm" in lower or "per month" in lower or "per calendar month" in lower:
return "monthly"
if "pw" in lower or "per week" in lower or "/w" in lower:
return "weekly"
if "pa" in lower or "per annum" in lower or "/y" in lower or "per year" in lower:
return "yearly"
# pcm, per month, /m, or no indicator — default monthly
# No indicator — default monthly (Zoopla standard)
return "monthly"

View file

@ -11,6 +11,8 @@ import InfoPopup from '../ui/InfoPopup';
import { FeatureInfoPopup } from '../ui/FeatureInfoPopup';
import { FeatureActions } from '../ui/FeatureIcons';
import { FeatureLabel } from '../ui/FeatureLabel';
import { getFeatureIcon } from '../../lib/feature-icons';
import { getGroupIcon } from '../../lib/group-icons';
import AiFilterInput from './AiFilterInput';
import type { AiFilterErrorType } from '../../hooks/useAiFilters';
import FeatureBrowser from './FeatureBrowser';
@ -552,6 +554,12 @@ export default memo(function Filters({
clampMax ? feature.max! : displayValue[1],
];
const mobileIconClass = 'w-4 h-4 text-teal-600 dark:text-teal-400 shrink-0';
const mobileIcon = getFeatureIcon(feature.name, mobileIconClass) || (() => {
const G = feature.group ? getGroupIcon(feature.group) : null;
return G ? <G className={mobileIconClass} /> : null;
})();
return (
<div
key={feature.name}
@ -559,7 +567,7 @@ export default memo(function Filters({
className={`space-y-0.5 px-2 py-1.5 rounded ${isActive ? 'ring-2 ring-teal-400 bg-teal-50 dark:bg-teal-900/30' : isPinned ? 'ring-2 ring-teal-400 bg-teal-50/50 dark:bg-teal-900/20' : ''}`}
>
<div className="flex items-center justify-between gap-1">
<FeatureLabel feature={feature} size="sm" className="min-w-0 shrink" />
<FeatureLabel feature={feature} size="sm" className="min-w-0 shrink" hideIconOnMobile />
<FeatureActions
feature={feature}
isPinned={isPinned}
@ -568,7 +576,9 @@ export default memo(function Filters({
onRemove={onRemoveFilter}
/>
</div>
<div>
<div className="flex md:block items-start gap-1.5">
{mobileIcon && <div className="md:hidden shrink-0 pt-0.5">{mobileIcon}</div>}
<div className="min-w-0 flex-1">
<Slider
min={scale ? 0 : feature.min!}
max={scale ? 100 : feature.max!}
@ -608,6 +618,7 @@ export default memo(function Filters({
/>
</div>
</div>
</div>
);
})}
</div>

View file

@ -338,6 +338,18 @@ export default function MapPage({
return () => document.removeEventListener('wheel', handleWheel);
}, []);
// On mobile, push a guard history entry to absorb accidental back navigations
// (e.g. iOS Safari edge-swipe that CSS touch-action can't prevent)
useEffect(() => {
if (!isMobile) return;
window.history.pushState({ dashboardGuard: true }, '');
const handlePopState = () => {
window.history.pushState({ dashboardGuard: true }, '');
};
window.addEventListener('popstate', handlePopState);
return () => window.removeEventListener('popstate', handlePopState);
}, [isMobile]);
const { handleHexagonClick } = selection;
const handleMobileHexagonClick = useCallback(
(id: string, isPostcode?: boolean, geometry?: PostcodeGeometry) => {
@ -611,7 +623,7 @@ export default function MapPage({
if (isMobile) {
return (
<div className="flex-1 flex flex-col overflow-hidden relative">
<div className="flex-1 flex flex-col overflow-hidden relative touch-pan-y">
{initialLoading && (
<div className="absolute inset-0 z-50 flex items-center justify-center bg-warm-50/80 dark:bg-navy-950/80 backdrop-blur-sm">
<div className="flex flex-col items-center gap-4">

View file

@ -15,6 +15,7 @@ interface FeatureLabelProps {
className?: string;
size?: 'xs' | 'sm';
description?: string;
hideIconOnMobile?: boolean;
}
export function FeatureLabel({
@ -23,9 +24,11 @@ export function FeatureLabel({
className = '',
size = 'xs',
description,
hideIconOnMobile,
}: FeatureLabelProps) {
const textClass = size === 'sm' ? 'text-sm' : 'text-xs';
const iconClass = 'w-3.5 h-3.5 text-teal-600 dark:text-teal-400 shrink-0';
const mobileHide = hideIconOnMobile ? 'hidden md:block ' : '';
const iconClass = `${mobileHide}w-3.5 h-3.5 text-teal-600 dark:text-teal-400 shrink-0`;
const featureIcon = getFeatureIcon(feature.name, iconClass);
const GroupIcon = !featureIcon && feature.group ? getGroupIcon(feature.group) : null;
const modeTag =

View file

@ -57,7 +57,7 @@ export function useHexagonSelection({
const filterStr = buildFilterString(filters, features);
if (filterStr) params.append('filters', filterStr);
if (fields) {
params.set('fields', fields.join(','));
params.set('fields', fields.join(';;'));
}
if (journeyDest) {
params.set('journey_mode', journeyDest.mode);