Small fixes
This commit is contained in:
parent
d93beb9201
commit
7591e5fc05
12 changed files with 198 additions and 14 deletions
|
|
@ -163,6 +163,7 @@ services:
|
|||
# - ./finder:/app
|
||||
# environment:
|
||||
# FLARESOLVERR_URL: http://flaresolverr:8191
|
||||
# RELOAD_URL: http://server:8001/api/reload
|
||||
# depends_on:
|
||||
# gluetun:
|
||||
# condition: service_healthy
|
||||
|
|
|
|||
|
|
@ -104,6 +104,18 @@ PROPERTY_TYPE_MAP = {
|
|||
"Garages": "Other",
|
||||
"Mews": "Terraced",
|
||||
"Property": "Other",
|
||||
"Flat Share": "Other",
|
||||
"Block of Apartments": "Flats/Maisonettes",
|
||||
"Private Halls": "Flats/Maisonettes",
|
||||
"Terraced Bungalow": "Terraced",
|
||||
"Equestrian Facility": "Other",
|
||||
"Ground Maisonette": "Flats/Maisonettes",
|
||||
"Country House": "Detached",
|
||||
"Village House": "Detached",
|
||||
"Farm Land": "Other",
|
||||
"House Boat": "Other",
|
||||
"Barn": "Other",
|
||||
"Serviced Apartments": "Flats/Maisonettes",
|
||||
# Lowercase variants (from home.co.uk / Rightmove APIs)
|
||||
"house": "Detached",
|
||||
"bungalow": "Other",
|
||||
|
|
@ -113,6 +125,19 @@ PROPERTY_TYPE_MAP = {
|
|||
"not-specified": "Other",
|
||||
"retirement-property": "Flats/Maisonettes",
|
||||
"equestrian-facility": "Other",
|
||||
"flat": "Flats/Maisonettes",
|
||||
"detached": "Detached",
|
||||
"semi-detached": "Semi-Detached",
|
||||
"terraced": "Terraced",
|
||||
"maisonette": "Flats/Maisonettes",
|
||||
"apartment": "Flats/Maisonettes",
|
||||
"studio": "Flats/Maisonettes",
|
||||
"penthouse": "Flats/Maisonettes",
|
||||
"cottage": "Other",
|
||||
"chalet": "Other",
|
||||
"farm_house": "Detached",
|
||||
"country house": "Detached",
|
||||
"village house": "Detached",
|
||||
}
|
||||
|
||||
CHANNELS = [
|
||||
|
|
|
|||
|
|
@ -363,7 +363,7 @@ def transform_property(
|
|||
"Address per Property Register": address,
|
||||
"Leasehold/Freehold": parse_tenure(prop),
|
||||
"Property type": map_property_type(listing_type),
|
||||
"Property sub-type": listing_type or "Unknown",
|
||||
"Property sub-type": listing_type.title() if listing_type else "Unknown",
|
||||
"price": int(price),
|
||||
"price_frequency": "" if channel == "BUY" else "monthly",
|
||||
"Price qualifier": price_qualifier,
|
||||
|
|
|
|||
|
|
@ -289,10 +289,15 @@ def _extract_beds_baths_from_features(
|
|||
|
||||
|
||||
def _extract_postcode(text: str) -> str | None:
|
||||
"""Extract full UK postcode from text like '2 Bed Flat, Pimlico, SW1V 2AA'."""
|
||||
"""Extract full UK postcode from text like '2 Bed Flat, Pimlico, SW1V 2AA'.
|
||||
Normalizes to include a space before the 3-char incode."""
|
||||
match = re.search(r"([A-Z]{1,2}\d[A-Z0-9]?\s*\d[A-Z]{2})", text, re.IGNORECASE)
|
||||
if match:
|
||||
return match.group(1).upper().strip()
|
||||
raw = match.group(1).upper().strip()
|
||||
# Ensure space before incode (last 3 chars): "IP265AT" → "IP26 5AT"
|
||||
if " " not in raw and len(raw) >= 5:
|
||||
return raw[:-3] + " " + raw[-3:]
|
||||
return raw
|
||||
return None
|
||||
|
||||
|
||||
|
|
@ -635,6 +640,29 @@ def _resolve_outcode_postcodes(
|
|||
return results
|
||||
|
||||
|
||||
def _parse_or_date(date_str: str) -> str:
|
||||
"""Parse OpenRent date strings to ISO format (YYYY-MM-DD).
|
||||
Handles 'Today', 'Tomorrow', and 'DD Month, YYYY' formats."""
|
||||
if not date_str:
|
||||
return ""
|
||||
stripped = date_str.strip()
|
||||
lower = stripped.lower()
|
||||
if lower == "today":
|
||||
from datetime import datetime
|
||||
return datetime.now().strftime("%Y-%m-%d")
|
||||
if lower == "tomorrow":
|
||||
from datetime import datetime, timedelta
|
||||
return (datetime.now() + timedelta(days=1)).strftime("%Y-%m-%d")
|
||||
# Try "DD Month, YYYY" format (e.g., "01 April, 2026")
|
||||
from datetime import datetime
|
||||
for fmt in ("%d %B, %Y", "%d %B %Y"):
|
||||
try:
|
||||
return datetime.strptime(stripped, fmt).strftime("%Y-%m-%d")
|
||||
except ValueError:
|
||||
continue
|
||||
return date_str # Return as-is if unparseable
|
||||
|
||||
|
||||
def transform_property(
|
||||
search_data: dict,
|
||||
detail_data: dict | None,
|
||||
|
|
@ -767,7 +795,7 @@ def transform_property(
|
|||
"Total floor area (sqm)": parse_floor_area(description),
|
||||
"Listing URL": listing_url,
|
||||
"Listing features": [],
|
||||
"first_visible_date": detail.get("available_date", ""),
|
||||
"first_visible_date": _parse_or_date(detail.get("available_date", "")),
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ from pathlib import Path
|
|||
import polars as pl
|
||||
|
||||
from constants import MAX_BEDROOMS, MAX_RENT_MONTHLY, MIN_RENT_MONTHLY
|
||||
from transform import normalize_price
|
||||
from transform import map_property_type, normalize_price
|
||||
|
||||
log = logging.getLogger("rightmove")
|
||||
|
||||
|
|
@ -43,6 +43,19 @@ def write_parquet(properties: list[dict], path: Path, channel: str) -> None:
|
|||
MAX_BEDROOMS,
|
||||
)
|
||||
|
||||
# Re-derive Property type from Property sub-type using current PROPERTY_TYPE_MAP.
|
||||
# This retroactively fixes data scraped with older versions of the type map.
|
||||
remapped = 0
|
||||
for p in properties:
|
||||
sub_type = p.get("Property sub-type", "")
|
||||
if sub_type and sub_type != "Unknown":
|
||||
new_type = map_property_type(sub_type)
|
||||
if new_type != p.get("Property type"):
|
||||
p["Property type"] = new_type
|
||||
remapped += 1
|
||||
if remapped:
|
||||
log.info("Re-mapped %d property types from sub-types", remapped)
|
||||
|
||||
# Parse first_visible_date to datetime
|
||||
listing_dates = []
|
||||
for p in properties:
|
||||
|
|
@ -56,7 +69,27 @@ def write_parquet(properties: list[dict], path: Path, channel: str) -> None:
|
|||
dt = dt.astimezone(timezone.utc).replace(tzinfo=None)
|
||||
listing_dates.append(dt)
|
||||
except (ValueError, TypeError):
|
||||
listing_dates.append(None)
|
||||
# Try additional date formats (OpenRent: "DD Month, YYYY", "Today")
|
||||
parsed = None
|
||||
stripped = fvd.strip()
|
||||
lower = stripped.lower()
|
||||
if lower == "today":
|
||||
parsed = datetime.now().replace(
|
||||
hour=0, minute=0, second=0, microsecond=0
|
||||
)
|
||||
elif lower == "tomorrow":
|
||||
from datetime import timedelta
|
||||
parsed = (
|
||||
datetime.now() + timedelta(days=1)
|
||||
).replace(hour=0, minute=0, second=0, microsecond=0)
|
||||
else:
|
||||
for fmt in ("%d %B, %Y", "%d %B %Y"):
|
||||
try:
|
||||
parsed = datetime.strptime(stripped, fmt)
|
||||
break
|
||||
except ValueError:
|
||||
continue
|
||||
listing_dates.append(parsed)
|
||||
else:
|
||||
listing_dates.append(None)
|
||||
|
||||
|
|
|
|||
|
|
@ -49,6 +49,22 @@ def map_property_type(sub_type: str | None) -> str:
|
|||
canonical = PROPERTY_TYPE_MAP.get(sub_type)
|
||||
if canonical:
|
||||
return canonical
|
||||
# Try title-case variant (e.g., "country house" → "Country House")
|
||||
canonical = PROPERTY_TYPE_MAP.get(sub_type.title())
|
||||
if canonical:
|
||||
return canonical
|
||||
# Keyword fallback for compound types not in the map
|
||||
lower = sub_type.lower()
|
||||
if "flat" in lower or "apartment" in lower or "maisonette" in lower or "studio" in lower:
|
||||
return "Flats/Maisonettes"
|
||||
if "semi" in lower and "detach" in lower:
|
||||
return "Semi-Detached"
|
||||
if "detach" in lower:
|
||||
return "Detached"
|
||||
if "terrace" in lower or "mews" in lower:
|
||||
return "Terraced"
|
||||
if "house" in lower or "cottage" in lower:
|
||||
return "Detached"
|
||||
log.warning("Unknown propertySubType: %r — mapping to Other", sub_type)
|
||||
return "Other"
|
||||
|
||||
|
|
@ -86,6 +102,15 @@ def fix_coords(lat: float, lng: float) -> tuple[float, float]:
|
|||
return lat, lng
|
||||
|
||||
|
||||
def normalize_postcode(postcode: str) -> str:
|
||||
"""Ensure UK postcode has a space before the 3-char incode.
|
||||
E.g., 'SW1A1AA' → 'SW1A 1AA', 'E1 4AB' unchanged."""
|
||||
postcode = postcode.strip().upper()
|
||||
if " " in postcode or len(postcode) < 5:
|
||||
return postcode
|
||||
return postcode[:-3] + " " + postcode[-3:]
|
||||
|
||||
|
||||
def normalize_price(amount: int, frequency: str) -> int:
|
||||
"""Normalise price to monthly for rentals (weekly × 52/12, yearly ÷ 12)."""
|
||||
if frequency == "weekly":
|
||||
|
|
|
|||
2
finder/uv.lock
generated
2
finder/uv.lock
generated
|
|
@ -301,6 +301,7 @@ dependencies = [
|
|||
{ name = "fake-useragent" },
|
||||
{ name = "flask" },
|
||||
{ name = "httpx" },
|
||||
{ name = "lxml" },
|
||||
{ name = "playwright" },
|
||||
{ name = "playwright-stealth" },
|
||||
{ name = "polars" },
|
||||
|
|
@ -315,6 +316,7 @@ requires-dist = [
|
|||
{ name = "fake-useragent", specifier = ">=2.2.0" },
|
||||
{ name = "flask" },
|
||||
{ name = "httpx" },
|
||||
{ name = "lxml" },
|
||||
{ name = "playwright", specifier = ">=1.58.0" },
|
||||
{ name = "playwright-stealth", specifier = ">=2.0.2" },
|
||||
{ name = "polars" },
|
||||
|
|
|
|||
|
|
@ -104,6 +104,22 @@ _EXTRACT_LISTINGS_JS = r"""() => {
|
|||
if (ptMatch) property_type = ptMatch[1].trim();
|
||||
else if (/\bstudio\s*(?:flat|apartment)?\s+(?:for\s+sale|to\s+(?:rent|let)|for\s+rent)/i.test(text)) property_type = 'Studio';
|
||||
|
||||
// Keyword fallback when regex doesn't match current DOM format
|
||||
if (!property_type) {
|
||||
const lower = text.toLowerCase();
|
||||
if (/\bstudio\b/.test(lower)) property_type = 'Studio';
|
||||
else if (/\bpenthouse\b/.test(lower)) property_type = 'Penthouse';
|
||||
else if (/\bmaisonette\b/.test(lower)) property_type = 'Maisonette';
|
||||
else if (/\bapartment\b/.test(lower)) property_type = 'Apartment';
|
||||
else if (/\bflat\b/.test(lower)) property_type = 'Flat';
|
||||
else if (/\bsemi[- ]?detached\b/.test(lower)) property_type = 'Semi-Detached';
|
||||
else if (/\bdetached\b/.test(lower)) property_type = 'Detached';
|
||||
else if (/\bterraced?\b/.test(lower)) property_type = 'Terraced';
|
||||
else if (/\bbungalow\b/.test(lower)) property_type = 'Bungalow';
|
||||
else if (/\bcottage\b/.test(lower)) property_type = 'Cottage';
|
||||
else if (/\bhouse\b/.test(lower)) property_type = 'House';
|
||||
}
|
||||
|
||||
results.push({
|
||||
id, url: href.replace(window.location.origin, ''),
|
||||
price: priceMatch ? parseInt(priceMatch[1].replace(/,/g, '')) : null,
|
||||
|
|
@ -172,6 +188,22 @@ _EXTRACT_LISTINGS_JS = r"""() => {
|
|||
if (ptMatch2) property_type = ptMatch2[1].trim();
|
||||
else if (/\bstudio\s*(?:flat|apartment)?\s+(?:for\s+sale|to\s+(?:rent|let)|for\s+rent)/i.test(text)) property_type = 'Studio';
|
||||
|
||||
// Keyword fallback when regex doesn't match current DOM format
|
||||
if (!property_type) {
|
||||
const lower = text.toLowerCase();
|
||||
if (/\bstudio\b/.test(lower)) property_type = 'Studio';
|
||||
else if (/\bpenthouse\b/.test(lower)) property_type = 'Penthouse';
|
||||
else if (/\bmaisonette\b/.test(lower)) property_type = 'Maisonette';
|
||||
else if (/\bapartment\b/.test(lower)) property_type = 'Apartment';
|
||||
else if (/\bflat\b/.test(lower)) property_type = 'Flat';
|
||||
else if (/\bsemi[- ]?detached\b/.test(lower)) property_type = 'Semi-Detached';
|
||||
else if (/\bdetached\b/.test(lower)) property_type = 'Detached';
|
||||
else if (/\bterraced?\b/.test(lower)) property_type = 'Terraced';
|
||||
else if (/\bbungalow\b/.test(lower)) property_type = 'Bungalow';
|
||||
else if (/\bcottage\b/.test(lower)) property_type = 'Cottage';
|
||||
else if (/\bhouse\b/.test(lower)) property_type = 'House';
|
||||
}
|
||||
|
||||
results.push({
|
||||
id, url: href.replace(window.location.origin, ''),
|
||||
price: priceMatch ? parseInt(priceMatch[1].replace(/,/g, '')) : null,
|
||||
|
|
@ -596,10 +628,15 @@ def _resolve_outcode_coords(
|
|||
|
||||
|
||||
def _extract_postcode(text: str) -> str | None:
|
||||
"""Extract a full UK postcode from text like 'Dollar Bay Place, Canary Wharf E14 9SS'."""
|
||||
"""Extract a full UK postcode from text like 'Dollar Bay Place, Canary Wharf E14 9SS'.
|
||||
Normalizes to include a space before the 3-char incode."""
|
||||
match = re.search(r"([A-Z]{1,2}\d[A-Z0-9]?\s*\d[A-Z]{2})", text, re.IGNORECASE)
|
||||
if match:
|
||||
return match.group(1).upper().strip()
|
||||
raw = match.group(1).upper().strip()
|
||||
# Ensure space before incode (last 3 chars): "SW1A1AA" → "SW1A 1AA"
|
||||
if " " not in raw and len(raw) >= 5:
|
||||
return raw[:-3] + " " + raw[-3:]
|
||||
return raw
|
||||
return None
|
||||
|
||||
|
||||
|
|
@ -651,13 +688,20 @@ def _detect_rent_frequency(price_text: str) -> str:
|
|||
|
||||
Zoopla price elements contain text like '£1,500 pcm', '£350 pw',
|
||||
'£18,000 pa'. Defaults to 'monthly' if no frequency indicator found.
|
||||
|
||||
Checks monthly indicators (pcm) BEFORE weekly (pw) because Zoopla cards
|
||||
often display both monthly and weekly prices in the same text. When the
|
||||
JS extraction falls back to full card text, checking pcm first ensures
|
||||
the captured monthly price gets the correct frequency label.
|
||||
"""
|
||||
lower = price_text.lower()
|
||||
if "pcm" in lower or "per month" in lower or "per calendar month" in lower:
|
||||
return "monthly"
|
||||
if "pw" in lower or "per week" in lower or "/w" in lower:
|
||||
return "weekly"
|
||||
if "pa" in lower or "per annum" in lower or "/y" in lower or "per year" in lower:
|
||||
return "yearly"
|
||||
# pcm, per month, /m, or no indicator — default monthly
|
||||
# No indicator — default monthly (Zoopla standard)
|
||||
return "monthly"
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -11,6 +11,8 @@ import InfoPopup from '../ui/InfoPopup';
|
|||
import { FeatureInfoPopup } from '../ui/FeatureInfoPopup';
|
||||
import { FeatureActions } from '../ui/FeatureIcons';
|
||||
import { FeatureLabel } from '../ui/FeatureLabel';
|
||||
import { getFeatureIcon } from '../../lib/feature-icons';
|
||||
import { getGroupIcon } from '../../lib/group-icons';
|
||||
import AiFilterInput from './AiFilterInput';
|
||||
import type { AiFilterErrorType } from '../../hooks/useAiFilters';
|
||||
import FeatureBrowser from './FeatureBrowser';
|
||||
|
|
@ -552,6 +554,12 @@ export default memo(function Filters({
|
|||
clampMax ? feature.max! : displayValue[1],
|
||||
];
|
||||
|
||||
const mobileIconClass = 'w-4 h-4 text-teal-600 dark:text-teal-400 shrink-0';
|
||||
const mobileIcon = getFeatureIcon(feature.name, mobileIconClass) || (() => {
|
||||
const G = feature.group ? getGroupIcon(feature.group) : null;
|
||||
return G ? <G className={mobileIconClass} /> : null;
|
||||
})();
|
||||
|
||||
return (
|
||||
<div
|
||||
key={feature.name}
|
||||
|
|
@ -559,7 +567,7 @@ export default memo(function Filters({
|
|||
className={`space-y-0.5 px-2 py-1.5 rounded ${isActive ? 'ring-2 ring-teal-400 bg-teal-50 dark:bg-teal-900/30' : isPinned ? 'ring-2 ring-teal-400 bg-teal-50/50 dark:bg-teal-900/20' : ''}`}
|
||||
>
|
||||
<div className="flex items-center justify-between gap-1">
|
||||
<FeatureLabel feature={feature} size="sm" className="min-w-0 shrink" />
|
||||
<FeatureLabel feature={feature} size="sm" className="min-w-0 shrink" hideIconOnMobile />
|
||||
<FeatureActions
|
||||
feature={feature}
|
||||
isPinned={isPinned}
|
||||
|
|
@ -568,7 +576,9 @@ export default memo(function Filters({
|
|||
onRemove={onRemoveFilter}
|
||||
/>
|
||||
</div>
|
||||
<div>
|
||||
<div className="flex md:block items-start gap-1.5">
|
||||
{mobileIcon && <div className="md:hidden shrink-0 pt-0.5">{mobileIcon}</div>}
|
||||
<div className="min-w-0 flex-1">
|
||||
<Slider
|
||||
min={scale ? 0 : feature.min!}
|
||||
max={scale ? 100 : feature.max!}
|
||||
|
|
@ -606,6 +616,7 @@ export default memo(function Filters({
|
|||
feature={feature}
|
||||
onValueChange={(v) => onFilterChange(feature.name, v)}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
|
|
|
|||
|
|
@ -338,6 +338,18 @@ export default function MapPage({
|
|||
return () => document.removeEventListener('wheel', handleWheel);
|
||||
}, []);
|
||||
|
||||
// On mobile, push a guard history entry to absorb accidental back navigations
|
||||
// (e.g. iOS Safari edge-swipe that CSS touch-action can't prevent)
|
||||
useEffect(() => {
|
||||
if (!isMobile) return;
|
||||
window.history.pushState({ dashboardGuard: true }, '');
|
||||
const handlePopState = () => {
|
||||
window.history.pushState({ dashboardGuard: true }, '');
|
||||
};
|
||||
window.addEventListener('popstate', handlePopState);
|
||||
return () => window.removeEventListener('popstate', handlePopState);
|
||||
}, [isMobile]);
|
||||
|
||||
const { handleHexagonClick } = selection;
|
||||
const handleMobileHexagonClick = useCallback(
|
||||
(id: string, isPostcode?: boolean, geometry?: PostcodeGeometry) => {
|
||||
|
|
@ -611,7 +623,7 @@ export default function MapPage({
|
|||
|
||||
if (isMobile) {
|
||||
return (
|
||||
<div className="flex-1 flex flex-col overflow-hidden relative">
|
||||
<div className="flex-1 flex flex-col overflow-hidden relative touch-pan-y">
|
||||
{initialLoading && (
|
||||
<div className="absolute inset-0 z-50 flex items-center justify-center bg-warm-50/80 dark:bg-navy-950/80 backdrop-blur-sm">
|
||||
<div className="flex flex-col items-center gap-4">
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ interface FeatureLabelProps {
|
|||
className?: string;
|
||||
size?: 'xs' | 'sm';
|
||||
description?: string;
|
||||
hideIconOnMobile?: boolean;
|
||||
}
|
||||
|
||||
export function FeatureLabel({
|
||||
|
|
@ -23,9 +24,11 @@ export function FeatureLabel({
|
|||
className = '',
|
||||
size = 'xs',
|
||||
description,
|
||||
hideIconOnMobile,
|
||||
}: FeatureLabelProps) {
|
||||
const textClass = size === 'sm' ? 'text-sm' : 'text-xs';
|
||||
const iconClass = 'w-3.5 h-3.5 text-teal-600 dark:text-teal-400 shrink-0';
|
||||
const mobileHide = hideIconOnMobile ? 'hidden md:block ' : '';
|
||||
const iconClass = `${mobileHide}w-3.5 h-3.5 text-teal-600 dark:text-teal-400 shrink-0`;
|
||||
const featureIcon = getFeatureIcon(feature.name, iconClass);
|
||||
const GroupIcon = !featureIcon && feature.group ? getGroupIcon(feature.group) : null;
|
||||
const modeTag =
|
||||
|
|
|
|||
|
|
@ -57,7 +57,7 @@ export function useHexagonSelection({
|
|||
const filterStr = buildFilterString(filters, features);
|
||||
if (filterStr) params.append('filters', filterStr);
|
||||
if (fields) {
|
||||
params.set('fields', fields.join(','));
|
||||
params.set('fields', fields.join(';;'));
|
||||
}
|
||||
if (journeyDest) {
|
||||
params.set('journey_mode', journeyDest.mode);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue