diff --git a/Pasted image 20260515211038.png b/Pasted image 20260515211038.png new file mode 100644 index 0000000..8b494dc Binary files /dev/null and b/Pasted image 20260515211038.png differ diff --git a/finder/constants.py b/finder/constants.py index 558e625..c456f62 100644 --- a/finder/constants.py +++ b/finder/constants.py @@ -82,7 +82,7 @@ PROPERTY_TYPE_MAP = { "Farm / Barn": "Other", "Farm House": "Other", "House": "Detached", - "House of Multiple Occupation": "Flats/Maisonettes", + "House of Multiple Occupation": "Other", "House Share": "Other", "Not Specified": "Other", "Chalet": "Other", @@ -90,15 +90,15 @@ PROPERTY_TYPE_MAP = { "Coach House": "Other", "Character Property": "Other", "Cluster House": "Other", - "Retirement Property": "Flats/Maisonettes", + "Retirement Property": "Other", "Parking": "Other", "Plot": "Other", "Garages": "Other", "Mews": "Terraced", "Property": "Other", "Flat Share": "Other", - "Block of Apartments": "Flats/Maisonettes", - "Private Halls": "Flats/Maisonettes", + "Block of Apartments": "Other", + "Private Halls": "Other", "Terraced Bungalow": "Terraced", "Equestrian Facility": "Other", "Ground Maisonette": "Flats/Maisonettes", @@ -107,13 +107,13 @@ PROPERTY_TYPE_MAP = { "Farm Land": "Other", "House Boat": "Other", "Barn": "Other", - "Serviced Apartments": "Flats/Maisonettes", + "Serviced Apartments": "Other", # Space-separated variants (from home.co.uk underscore/hyphen normalization) "Semi Detached": "Semi-Detached", "Semi Detached Bungalow": "Semi-Detached", "End Of Terrace": "Terraced", "End Terrace": "Terraced", - "Block Of Apartments": "Flats/Maisonettes", + "Block Of Apartments": "Other", # Lowercase variants (from home.co.uk / Rightmove APIs) "house": "Detached", "bungalow": "Other", @@ -121,7 +121,7 @@ PROPERTY_TYPE_MAP = { "land": "Other", "other": "Other", "not-specified": "Other", - "retirement-property": "Flats/Maisonettes", + "retirement-property": "Other", "equestrian-facility": "Other", "flat": "Flats/Maisonettes", "detached": "Detached", diff --git a/finder/homecouk.py b/finder/homecouk.py index 1e47d40..09a2401 100644 --- a/finder/homecouk.py +++ b/finder/homecouk.py @@ -19,7 +19,12 @@ from constants import ( RETRY_BASE_DELAY, ) from spatial import PostcodeSpatialIndex -from transform import normalize_postcode, normalize_sub_type, validate_floor_area +from transform import ( + normalize_postcode, + normalize_sub_type, + parse_int_value, + validate_floor_area, +) log = logging.getLogger("homecouk") @@ -170,11 +175,19 @@ def parse_floor_area(description: str | None) -> float | None: """Try to extract floor area from description text like '789 sq.ft.' or '73 sq.m.'.""" if not description: return None - m = re.search(r"([\d,]+(?:\.\d+)?)\s*sq\.?\s*ft", description, re.IGNORECASE) + m = re.search( + r"([\d,]+(?:\.\d+)?)\s*(?:sq\.?\s*ft|square\s+feet|ft(?:\^?2|²))", + description, + re.IGNORECASE, + ) if m: sqft = float(m.group(1).replace(",", "")) return validate_floor_area(round(sqft * 0.092903, 1)) - m = re.search(r"([\d,]+(?:\.\d+)?)\s*sq\.?\s*m", description, re.IGNORECASE) + m = re.search( + r"([\d,]+(?:\.\d+)?)\s*(?:sq\.?\s*m|square\s+met(?:er|re)s?|m(?:\^?2|²))", + description, + re.IGNORECASE, + ) if m: return validate_floor_area(round(float(m.group(1).replace(",", "")), 1)) return None @@ -237,6 +250,15 @@ def map_property_type(raw_type: str | None) -> str: # Home.co.uk uses types like "House", "Flat", "Apartment", "Detached", etc. # Try common patterns lower = raw_type.lower() + excluded_flat_like = ( + "block of apartment", + "house of multiple occupation", + "private halls", + "retirement", + "serviced apartment", + ) + if any(term in lower for term in excluded_flat_like): + return "Other" if ( "flat" in lower or "apartment" in lower @@ -269,8 +291,10 @@ def transform_property( log.debug("Coords outside England: lat=%.4f lng=%.4f — skipping", lat, lng) return None - price = prop.get("price") or prop.get("latest_price") - if not price or int(price) <= 0: + price = parse_int_value(prop.get("price")) or parse_int_value( + prop.get("latest_price") + ) + if not price or price <= 0: return None # Home.co.uk provides postcodes directly, but fall back to spatial index @@ -281,10 +305,10 @@ def transform_property( log.debug("No postcode for property at %.4f, %.4f — skipping", lat, lng) return None - raw_beds = prop.get("bedrooms", 0) or 0 - raw_baths = prop.get("bathrooms", 0) or 0 - bedrooms = raw_beds if raw_beds <= MAX_BEDROOMS else 0 - bathrooms = raw_baths if raw_baths <= MAX_BEDROOMS else 0 + raw_beds = parse_int_value(prop.get("bedrooms")) or 0 + raw_baths = parse_int_value(prop.get("bathrooms")) or 0 + bedrooms = raw_beds if 0 <= raw_beds <= MAX_BEDROOMS else 0 + bathrooms = raw_baths if 0 <= raw_baths <= MAX_BEDROOMS else 0 if raw_beds > MAX_BEDROOMS or raw_baths > MAX_BEDROOMS: log.warning( "home.co.uk %s: implausible beds=%d baths=%d (capped to 0)", @@ -318,7 +342,7 @@ def transform_property( "Leasehold/Freehold": parse_tenure(prop), "Property type": map_property_type(listing_type), "Property sub-type": normalize_sub_type(listing_type), - "price": int(price), + "price": price, "price_frequency": "", "Price qualifier": price_qualifier, "Total floor area (sqm)": parse_floor_area(prop.get("description")), @@ -362,7 +386,16 @@ def search_outcode( break for prop in raw_props: - transformed = transform_property(prop, pc_index) + try: + transformed = transform_property(prop, pc_index) + except Exception as exc: + log.warning( + "home.co.uk %s property %s failed to transform: %s", + outcode, + prop.get("listing_id") or prop.get("property_id") or "?", + exc, + ) + continue if transformed: properties.append(transformed) if max_properties is not None and len(properties) >= max_properties: diff --git a/finder/listing_filters.py b/finder/listing_filters.py new file mode 100644 index 0000000..f1eca04 --- /dev/null +++ b/finder/listing_filters.py @@ -0,0 +1,63 @@ +"""Shared target filters for manual buy-listing scrapes.""" + +import math +from typing import Any + +BUY_MAX_PRICE = 1_000_000 +BUY_MIN_BEDROOMS = 2 +BUY_MAX_BEDROOMS = 5 +BUY_ALLOWED_BATHROOMS = frozenset({2, 3}) +BUY_MIN_FLOOR_AREA_SQM = 90.0 +BUY_MAX_FLOOR_AREA_SQM = 170.0 +BUY_PROPERTY_TYPES = frozenset({"Flats/Maisonettes"}) + +BUY_MIN_FLOOR_AREA_SQFT = round(BUY_MIN_FLOOR_AREA_SQM / 0.092903) +BUY_MAX_FLOOR_AREA_SQFT = round(BUY_MAX_FLOOR_AREA_SQM / 0.092903) + + +def _number(value: Any) -> float | None: + if value is None: + return None + try: + number = float(value) + except (TypeError, ValueError): + return None + if not math.isfinite(number): + return None + return number + + +def _int(value: Any) -> int | None: + number = _number(value) + if number is None or not number.is_integer(): + return None + return int(number) + + +def matches_strict_buy_listing_filter(prop: dict) -> bool: + """Exact filter used to guard scraped/output datasets.""" + if "price" in prop: + price = _number(prop.get("price")) + else: + price = _number(prop.get("Asking price")) + if price is None or price <= 0 or price >= BUY_MAX_PRICE: + return False + + bedrooms = _int(prop.get("Bedrooms")) + if bedrooms is None or ( + bedrooms < BUY_MIN_BEDROOMS or bedrooms > BUY_MAX_BEDROOMS + ): + return False + + property_type = prop.get("Property type") + if property_type not in BUY_PROPERTY_TYPES: + return False + + bathrooms = _int(prop.get("Bathrooms")) + if bathrooms not in BUY_ALLOWED_BATHROOMS: + return False + + floor_area = _number(prop.get("Total floor area (sqm)")) + if floor_area is None: + return False + return BUY_MIN_FLOOR_AREA_SQM <= floor_area <= BUY_MAX_FLOOR_AREA_SQM diff --git a/finder/rightmove.py b/finder/rightmove.py index 3c831a5..0a3d7a2 100644 --- a/finder/rightmove.py +++ b/finder/rightmove.py @@ -10,6 +10,15 @@ from constants import ( TYPEAHEAD_URL, ) from http_client import fetch_with_retry +from listing_filters import ( + BUY_ALLOWED_BATHROOMS, + BUY_MAX_BEDROOMS, + BUY_MAX_FLOOR_AREA_SQFT, + BUY_MAX_PRICE, + BUY_MIN_BEDROOMS, + BUY_MIN_FLOOR_AREA_SQFT, + matches_strict_buy_listing_filter, +) from spatial import PostcodeSpatialIndex from transform import transform_property @@ -22,12 +31,23 @@ outcode_cache: dict[str, str] = {} # Requesting index >= 1008 returns HTTP 400. _MAX_INDEX = 1008 -# Property type filters for splitting overcapped searches. Each sub-query -# gets its own 1008 cap, so we can recover listings beyond the unfiltered limit. -_PROPERTY_TYPES = [ - "detached", "semi-detached", "terraced", "flat", - "bungalow", "park-home", "land", -] +_BASE_BUY_SEARCH_PARAMS = { + "propertyTypes": "flat", + "minBedrooms": str(BUY_MIN_BEDROOMS), + "maxBedrooms": str(BUY_MAX_BEDROOMS), + "minBathrooms": str(min(BUY_ALLOWED_BATHROOMS)), + "maxBathrooms": str(max(BUY_ALLOWED_BATHROOMS)), + "minSize": str(BUY_MIN_FLOOR_AREA_SQFT), + "maxSize": str(BUY_MAX_FLOOR_AREA_SQFT), + "maxPrice": str(BUY_MAX_PRICE - 1), +} + + +def _buy_search_params(extra_params: dict | None = None) -> dict: + params = dict(_BASE_BUY_SEARCH_PARAMS) + if extra_params: + params.update(extra_params) + return params def resolve_outcode_id(client: httpx.Client, outcode: str) -> str | None: @@ -92,8 +112,18 @@ def _paginate( break for prop in raw_props: - transformed = transform_property(prop, outcode, pc_index) - if transformed: + try: + transformed = transform_property(prop, outcode, pc_index) + except Exception as exc: + log.warning( + "Rightmove %s/%s property %s failed to transform: %s", + outcode, + channel_cfg["channel"], + prop.get("id", "?"), + exc, + ) + continue + if transformed and matches_strict_buy_listing_filter(transformed): properties.append(transformed) if max_properties is not None and len(properties) >= max_properties: return properties, result_count @@ -105,6 +135,15 @@ def _paginate( if index >= result_count: break + if index >= _MAX_INDEX: + log.warning( + "%s/%s: %d filtered results exceed Rightmove's %d-result page cap", + outcode, + channel_cfg["channel"], + result_count, + _MAX_INDEX, + ) + break time.sleep(DELAY_BETWEEN_PAGES) @@ -121,54 +160,20 @@ def search_outcode( ) -> list[dict]: """Paginate through search results for one outcode+channel. Returns transformed properties. - When the unfiltered result count exceeds 1008 (Rightmove's hard pagination cap), - re-queries per property type to recover listings beyond the cap. + Search requests set the supported Rightmove filters directly: flats, + 2-5 bedrooms, 2-3 bathrooms, 969-1830 sq ft, and asking price below £1m. """ - properties, result_count = _paginate( - client, outcode_id, outcode, channel_cfg, pc_index, max_properties=max_properties + properties, _ = _paginate( + client, + outcode_id, + outcode, + channel_cfg, + pc_index, + extra_params=_buy_search_params(), + max_properties=max_properties, ) if max_properties is not None and len(properties) >= max_properties: return properties[:max_properties] - if result_count <= _MAX_INDEX: - return properties - - # Hit the 1008 cap — re-search per property type to get full coverage - ch = channel_cfg["channel"] - log.info( - "%s/%s: %d results exceed %d cap, splitting by property type", - outcode, ch, result_count, _MAX_INDEX, - ) - - all_by_id: dict[str, dict] = {p["id"]: p for p in properties} - - for pt in _PROPERTY_TYPES: - pt_props, _ = _paginate( - client, outcode_id, outcode, channel_cfg, pc_index, - extra_params={"propertyTypes": pt}, - max_properties=max_properties, - ) - new = 0 - for p in pt_props: - if p["id"] not in all_by_id: - all_by_id[p["id"]] = p - new += 1 - if ( - max_properties is not None - and len(all_by_id) >= max_properties - ): - break - if new: - log.debug("%s/%s type=%s: +%d new properties", outcode, ch, pt, new) - if max_properties is not None and len(all_by_id) >= max_properties: - break - - log.info( - "%s/%s: type split recovered %d → %d properties", - outcode, ch, len(properties), len(all_by_id), - ) - properties = list(all_by_id.values()) - if max_properties is not None: - return properties[:max_properties] return properties diff --git a/finder/scraper.py b/finder/scraper.py index 6eb6eee..14bbd87 100644 --- a/finder/scraper.py +++ b/finder/scraper.py @@ -19,6 +19,7 @@ from homecouk import load_cookies as load_homecouk_cookies from homecouk import make_client as make_homecouk_client from homecouk import search_outcode as homecouk_search_outcode from http_client import make_client +from listing_filters import matches_strict_buy_listing_filter from rightmove import resolve_outcode_id from rightmove import search_outcode as rightmove_search_outcode from spatial import PostcodeSpatialIndex @@ -181,11 +182,11 @@ def _source_names(sources: str | Iterable[str] | None) -> list[str]: requested = [str(source).strip().lower() for source in sources] requested = [source for source in requested if source] - if "all" in requested: - return list(SOURCE_ORDER) - unknown = sorted(set(requested) - set(SOURCE_ORDER)) + unknown = sorted(set(requested) - set(SOURCE_ORDER) - {"all"}) if unknown: raise ValueError(f"Unknown source(s): {', '.join(unknown)}") + if "all" in requested: + return list(SOURCE_ORDER) return [source for source in SOURCE_ORDER if source in requested] @@ -196,19 +197,28 @@ def _dedup_key(prop: dict) -> tuple: def _merge_properties(source_results: dict[str, list[dict]]) -> tuple[list[dict], dict, int]: merged: dict[str, dict] = {} seen_keys: set[tuple] = set() + seen_ids: set[str] = set() counts = {source: 0 for source in SOURCE_ORDER} deduped = 0 for source in SOURCE_ORDER: for prop in source_results.get(source, []): prop_id = prop.get("id") - key = _dedup_key(prop) - if (prop_id is not None and prop_id in merged) or key in seen_keys: - deduped += 1 - continue - storage_key = prop_id if prop_id is not None else f"{source}:{len(merged)}" + if prop_id is not None: + prop_id = str(prop_id) + if prop_id in seen_ids: + deduped += 1 + continue + seen_ids.add(prop_id) + storage_key = prop_id + else: + key = _dedup_key(prop) + if key in seen_keys: + deduped += 1 + continue + seen_keys.add(key) + storage_key = f"{source}:{len(merged)}" merged[storage_key] = prop - seen_keys.add(key) counts[source] += 1 return list(merged.values()), counts, deduped @@ -241,13 +251,22 @@ def _store_properties( if remaining == 0: return 0 - eligible = [prop for prop in props if _property_is_londonish(prop)] - dropped = len(props) - len(eligible) - if dropped: + londonish = [prop for prop in props if _property_is_londonish(prop)] + dropped_outside_area = len(props) - len(londonish) + if dropped_outside_area: log.debug( "%s dropped %d properties outside the Greater London-ish postcode filter", source, - dropped, + dropped_outside_area, + ) + + eligible = [prop for prop in londonish if matches_strict_buy_listing_filter(prop)] + dropped_non_matching = len(londonish) - len(eligible) + if dropped_non_matching: + log.debug( + "%s dropped %d properties outside the strict buy-listing filters", + source, + dropped_non_matching, ) selected = eligible if remaining is None else eligible[:remaining] @@ -367,20 +386,16 @@ def _scrape_homecouk( log.info("home.co.uk cap reached") return - remaining = _source_remaining( - results, "homecouk", max_properties_per_source - ) - if remaining == 0: - log.info("home.co.uk cap reached") - return - for attempt in range(2): try: + # home.co.uk cannot express the full filter set at source. + # Fetch the outcode page set first; _store_properties applies + # the strict filter and source cap after transformation. props = homecouk_search_outcode( client, outcode, pc_index, - max_properties=remaining, + max_properties=None, ) added = _store_properties( results, @@ -442,19 +457,17 @@ def _scrape_zoopla( log.info("Zoopla cap reached") return - remaining = _source_remaining(results, "zoopla", max_properties_per_source) - if remaining == 0: - log.info("Zoopla cap reached") - return - for attempt in range(2): try: + # Zoopla source-side filters are unverified here. Fetch the + # outcode page set first; _store_properties applies the + # strict filter and source cap after transformation. props, _ = zoopla_search_outcode( page, outcode, pc_index, pc_coords, - max_properties=remaining, + max_properties=None, ) added = _store_properties( results, @@ -506,9 +519,6 @@ def run_scrape( output_base = Path(output_dir) if output_dir is not None else DATA_DIR output_base.mkdir(parents=True, exist_ok=True) - if "zoopla" in selected_sources and pc_coords is None: - pc_coords = build_postcode_coords() - errors: list[str] = [] results = {source: [] for source in SOURCE_ORDER} started_at = time.time() @@ -539,7 +549,8 @@ def run_scrape( ) if "zoopla" in selected_sources: - assert pc_coords is not None + if pc_coords is None: + pc_coords = build_postcode_coords() _scrape_zoopla( selected_outcodes, pc_index, @@ -551,19 +562,36 @@ def run_scrape( merged, source_counts, deduped = _merge_properties(results) output_path = output_base / "online_listings_buy.parquet" - write_parquet(merged, output_path) + if merged: + write_parquet(merged, output_path) + else: + if output_path.exists(): + output_path.unlink() + log.warning("No strict properties to write to %s", output_path) + + filtered = [prop for prop in merged if matches_strict_buy_listing_filter(prop)] + filtered_output_path = output_base / "online_listings_buy_filtered.parquet" + if filtered: + write_parquet(filtered, filtered_output_path) + else: + if filtered_output_path.exists(): + filtered_output_path.unlink() + log.warning("No strict-filtered properties to write to %s", filtered_output_path) counts = { "total": len(merged), + "filtered_total": len(filtered), "deduped": deduped, "sources": source_counts, } + source_summary = " ".join( + f"{source}:{source_counts[source]}" for source in SOURCE_ORDER + ) log.info( - "Sale scrape complete: %d unique (rightmove:%d homecouk:%d zoopla:%d deduped:%d)", + "Sale scrape complete: %d unique, %d strict-filtered (%s deduped:%d)", len(merged), - source_counts["rightmove"], - source_counts["homecouk"], - source_counts["zoopla"], + len(filtered), + source_summary, deduped, ) @@ -575,6 +603,7 @@ def run_scrape( }, "counts": counts, "path": str(output_path), + "filtered_path": str(filtered_output_path), "errors": errors, "elapsed_seconds": round(time.time() - started_at, 3), } diff --git a/finder/storage.py b/finder/storage.py index 605c39f..3d21083 100644 --- a/finder/storage.py +++ b/finder/storage.py @@ -45,9 +45,10 @@ def write_parquet(properties: list[dict], path: Path) -> None: remapped = 0 for p in properties: sub_type = p.get("Property sub-type", "") - if sub_type and sub_type != "Unknown": + current_type = p.get("Property type") + if sub_type and sub_type != "Unknown" and current_type in (None, "", "Other"): new_type = map_property_type(sub_type) - if new_type != p.get("Property type"): + if new_type != current_type: p["Property type"] = new_type remapped += 1 if remapped: diff --git a/finder/transform.py b/finder/transform.py index 4066ed2..a55fdad 100644 --- a/finder/transform.py +++ b/finder/transform.py @@ -1,4 +1,5 @@ import logging +import math import re from constants import MAX_BEDROOMS, PROPERTY_TYPE_MAP, RIGHTMOVE_BASE @@ -29,17 +30,43 @@ def validate_floor_area(sqm: float | None) -> float | None: return sqm +def parse_int_value(value) -> int | None: + """Parse an integer-like API value without truncating decimals.""" + if value is None or isinstance(value, bool): + return None + if isinstance(value, int): + return value + if isinstance(value, float): + if not math.isfinite(value) or not value.is_integer(): + return None + return int(value) + if isinstance(value, str): + cleaned = value.strip().replace(",", "").replace("£", "") + if not re.fullmatch(r"\d+", cleaned): + return None + return int(cleaned) + return None + + def parse_display_size(display_size: str | None) -> float | None: """Parse displaySize like '499 sq. ft.' or '4,124 sq. ft.' to sqm.""" if not display_size: return None # Try sq. ft. first - m = re.search(r"([\d,]+(?:\.\d+)?)\s*sq\.?\s*ft", display_size, re.IGNORECASE) + m = re.search( + r"([\d,]+(?:\.\d+)?)\s*(?:sq\.?\s*ft|square\s+feet|ft(?:\^?2|²))", + display_size, + re.IGNORECASE, + ) if m: sqft = float(m.group(1).replace(",", "")) return validate_floor_area(round(sqft * 0.092903, 1)) # Try sq. m. - m = re.search(r"([\d,]+(?:\.\d+)?)\s*sq\.?\s*m", display_size, re.IGNORECASE) + m = re.search( + r"([\d,]+(?:\.\d+)?)\s*(?:sq\.?\s*m|square\s+met(?:er|re)s?|m(?:\^?2|²))", + display_size, + re.IGNORECASE, + ) if m: return validate_floor_area(round(float(m.group(1).replace(",", "")), 1)) return None @@ -86,7 +113,21 @@ def map_property_type(sub_type: str | None) -> str: return canonical # Keyword fallback for compound types not in the map lower = sub_type.lower() - if "flat" in lower or "apartment" in lower or "maisonette" in lower or "studio" in lower: + excluded_flat_like = ( + "block of apartment", + "house of multiple occupation", + "private halls", + "retirement", + "serviced apartment", + ) + if any(term in lower for term in excluded_flat_like): + return "Other" + if ( + "flat" in lower + or "apartment" in lower + or "maisonette" in lower + or "studio" in lower + ): return "Flats/Maisonettes" if "semi" in lower and "detach" in lower: return "Semi-Detached" @@ -158,10 +199,10 @@ def transform_property( lat, lng = fix_coords(raw_lat, raw_lng) price_obj = prop.get("price", {}) - amount = price_obj.get("amount") + amount = parse_int_value(price_obj.get("amount")) if not amount: return None - price = int(amount) + price = amount if price <= 0: return None @@ -172,14 +213,23 @@ def transform_property( # POA / Auction listings have unreliable prices — treat as no price pq_lower = price_qualifier.lower() - if "poa" in pq_lower or "auction" in pq_lower: + non_comparable_price_terms = ( + "poa", + "auction", + "shared ownership", + "shared equity", + "part buy", + "part rent", + "from", + ) + if any(term in pq_lower for term in non_comparable_price_terms): return None sub_type = prop.get("propertySubType", "") - raw_beds = prop.get("bedrooms", 0) or 0 - raw_baths = prop.get("bathrooms", 0) or 0 - bedrooms = raw_beds if raw_beds <= MAX_BEDROOMS else 0 - bathrooms = raw_baths if raw_baths <= MAX_BEDROOMS else 0 + raw_beds = parse_int_value(prop.get("bedrooms")) or 0 + raw_baths = parse_int_value(prop.get("bathrooms")) or 0 + bedrooms = raw_beds if 0 <= raw_beds <= MAX_BEDROOMS else 0 + bathrooms = raw_baths if 0 <= raw_baths <= MAX_BEDROOMS else 0 if raw_beds > MAX_BEDROOMS or raw_baths > MAX_BEDROOMS: log.warning( "Rightmove %s: implausible beds=%d baths=%d (capped to 0)", @@ -197,8 +247,15 @@ def transform_property( log.debug("No England postcode for property at %.4f, %.4f — skipping", lat, lng) return None + property_url = prop.get("propertyUrl") or "" + if not isinstance(property_url, str): + property_url = "" + listing_id = prop.get("id") or property_url + if not listing_id: + return None + return { - "id": prop.get("id"), + "id": listing_id, "Bedrooms": bedrooms, "Bathrooms": bathrooms, "Number of bedrooms & living rooms": bedrooms + bathrooms, @@ -213,7 +270,7 @@ def transform_property( "price_frequency": "", "Price qualifier": price_qualifier, "Total floor area (sqm)": parse_display_size(prop.get("displaySize")), - "Listing URL": RIGHTMOVE_BASE + prop.get("propertyUrl", ""), + "Listing URL": RIGHTMOVE_BASE + property_url if property_url else "", "Listing features": key_features, "first_visible_date": prop.get("firstVisibleDate", ""), } diff --git a/finder/zoopla.py b/finder/zoopla.py index cb3e9b4..dcd70ee 100644 --- a/finder/zoopla.py +++ b/finder/zoopla.py @@ -24,7 +24,7 @@ import time from constants import DELAY_BETWEEN_PAGES, MAX_BEDROOMS, PROPERTY_TYPE_MAP, ZOOPLA_BASE from spatial import PostcodeSpatialIndex -from transform import normalize_sub_type, validate_floor_area +from transform import normalize_sub_type, parse_int_value, validate_floor_area log = logging.getLogger("zoopla") @@ -106,7 +106,8 @@ _EXTRACT_LISTINGS_JS = r"""() => { const bedsMatch = text.match(/(\d+)\s*beds?/i); const bathsMatch = text.match(/(\d+)\s*baths?/i); const recMatch = text.match(/(\d+)\s*reception/i); - const areaMatch = text.match(/([\d,]+)\s*sq\.?\s*ft/i); + const areaSqftMatch = text.match(/([\d,]+(?:\.\d+)?)\s*(?:sq\.?\s*ft|square\s+feet|ft(?:\^?2|²))/i); + const areaSqmMatch = text.match(/([\d,]+(?:\.\d+)?)\s*(?:sq\.?\s*m|square\s+met(?:er|re)s?|m(?:\^?2|²))/i); let tenure = ''; if (/leasehold/i.test(text)) tenure = 'Leasehold'; @@ -141,7 +142,8 @@ _EXTRACT_LISTINGS_JS = r"""() => { beds: bedsMatch && parseInt(bedsMatch[1]) <= 20 ? parseInt(bedsMatch[1]) : null, baths: bathsMatch && parseInt(bathsMatch[1]) <= 20 ? parseInt(bathsMatch[1]) : null, receptions: recMatch && parseInt(recMatch[1]) <= 20 ? parseInt(recMatch[1]) : null, - floor_area_sqft: areaMatch ? parseInt(areaMatch[1].replace(/,/g, '')) : null, + floor_area_sqft: areaSqftMatch ? parseInt(areaSqftMatch[1].replace(/,/g, '')) : null, + floor_area_sqm: areaSqmMatch ? parseFloat(areaSqmMatch[1].replace(/,/g, '')) : null, address, tenure, property_type, }); } @@ -181,7 +183,8 @@ _EXTRACT_LISTINGS_JS = r"""() => { const bedsMatch = text.match(/(\d+)\s*beds?/i); const bathsMatch = text.match(/(\d+)\s*baths?/i); const recMatch = text.match(/(\d+)\s*reception/i); - const areaMatch = text.match(/([\d,]+)\s*sq\.?\s*ft/i); + const areaSqftMatch = text.match(/([\d,]+(?:\.\d+)?)\s*(?:sq\.?\s*ft|square\s+feet|ft(?:\^?2|²))/i); + const areaSqmMatch = text.match(/([\d,]+(?:\.\d+)?)\s*(?:sq\.?\s*m|square\s+met(?:er|re)s?|m(?:\^?2|²))/i); let address = ''; for (const line of lines) { @@ -225,7 +228,8 @@ _EXTRACT_LISTINGS_JS = r"""() => { beds: bedsMatch && parseInt(bedsMatch[1]) <= 20 ? parseInt(bedsMatch[1]) : null, baths: bathsMatch && parseInt(bathsMatch[1]) <= 20 ? parseInt(bathsMatch[1]) : null, receptions: recMatch && parseInt(recMatch[1]) <= 20 ? parseInt(recMatch[1]) : null, - floor_area_sqft: areaMatch ? parseInt(areaMatch[1].replace(/,/g, '')) : null, + floor_area_sqft: areaSqftMatch ? parseInt(areaSqftMatch[1].replace(/,/g, '')) : null, + floor_area_sqm: areaSqmMatch ? parseFloat(areaSqmMatch[1].replace(/,/g, '')) : null, address, tenure, property_type, }); } @@ -611,7 +615,22 @@ def _map_property_type(raw_type: str | None) -> str: return canonical # Keyword fallback lower = raw_type.lower() - if "flat" in lower or "apartment" in lower or "maisonette" in lower or "studio" in lower or "penthouse" in lower: + excluded_flat_like = ( + "block of apartment", + "house of multiple occupation", + "private halls", + "retirement", + "serviced apartment", + ) + if any(term in lower for term in excluded_flat_like): + return "Other" + if ( + "flat" in lower + or "apartment" in lower + or "maisonette" in lower + or "studio" in lower + or "penthouse" in lower + ): return "Flats/Maisonettes" if "semi" in lower and "detach" in lower: return "Semi-Detached" @@ -634,8 +653,8 @@ def transform_property( Zoopla search cards do not include coordinates, so we resolve lat/lng from postcodes extracted from the address text.""" - price = raw.get("price") - if not price or int(price) <= 0: + price = parse_int_value(raw.get("price")) + if not price or price <= 0: return None address = raw.get("address", "") @@ -670,10 +689,10 @@ def transform_property( if not (49 <= lat <= 56 and -7 <= lng <= 2): return None - raw_beds = raw.get("beds") or 0 - raw_baths = raw.get("baths") or 0 - bedrooms = raw_beds if raw_beds <= MAX_BEDROOMS else 0 - bathrooms = raw_baths if raw_baths <= MAX_BEDROOMS else 0 + raw_beds = parse_int_value(raw.get("beds")) or 0 + raw_baths = parse_int_value(raw.get("baths")) or 0 + bedrooms = raw_beds if 0 <= raw_beds <= MAX_BEDROOMS else 0 + bathrooms = raw_baths if 0 <= raw_baths <= MAX_BEDROOMS else 0 if raw_beds > MAX_BEDROOMS or raw_baths > MAX_BEDROOMS: log.warning( "Zoopla %s: implausible beds=%d baths=%d (capped to 0)", @@ -683,9 +702,13 @@ def transform_property( # Floor area: convert sq ft to sq m floor_area_sqm = None - sqft = raw.get("floor_area_sqft") - if sqft: - floor_area_sqm = validate_floor_area(round(sqft * 0.092903, 1)) + raw_sqm = raw.get("floor_area_sqm") + if raw_sqm: + floor_area_sqm = validate_floor_area(round(float(raw_sqm), 1)) + else: + sqft = raw.get("floor_area_sqft") + if sqft: + floor_area_sqm = validate_floor_area(round(float(sqft) * 0.092903, 1)) listing_id = raw.get("id", "") listing_url = raw.get("url", "") @@ -704,7 +727,7 @@ def transform_property( "Leasehold/Freehold": raw.get("tenure") or None, "Property type": _map_property_type(raw.get("property_type")), "Property sub-type": normalize_sub_type(raw.get("property_type")), - "price": int(price), + "price": price, "price_frequency": "", "Price qualifier": "", "Total floor area (sqm)": floor_area_sqm, @@ -760,7 +783,18 @@ def search_outcode( properties = [] dropped = 0 for raw in raw_listings: - transformed = transform_property(raw, pc_index, pc_coords, search_outcode=outcode) + try: + transformed = transform_property( + raw, pc_index, pc_coords, search_outcode=outcode + ) + except Exception as exc: + log.warning( + "Zoopla %s property %s failed to transform: %s", + outcode, + raw.get("id", "?"), + exc, + ) + transformed = None if transformed: properties.append(transformed) else: diff --git a/frontend/Pasted image 20260515211038.png b/frontend/Pasted image 20260515211038.png new file mode 100644 index 0000000..8b494dc Binary files /dev/null and b/frontend/Pasted image 20260515211038.png differ diff --git a/frontend/src/components/map/AreaPane.tsx b/frontend/src/components/map/AreaPane.tsx index 8ded744..03e23e3 100644 --- a/frontend/src/components/map/AreaPane.tsx +++ b/frontend/src/components/map/AreaPane.tsx @@ -1,14 +1,16 @@ -import { useMemo, useState } from 'react'; +import { useMemo, useState, type MutableRefObject, type ReactNode } from 'react'; import { useTranslation } from 'react-i18next'; import { ts } from '../../i18n/server'; import type { FeatureFilters, + FeatureGroup, FeatureMeta, FilterExclusion, HexagonStatsResponse, } from '../../types'; import { travelFieldKey, type TravelTimeEntry } from '../../hooks/useTravelTime'; import type { HexagonLocation } from '../../lib/external-search'; +import { formatStationDistance, type NearbyStation } from '../../lib/nearby-stations'; import { formatValue, formatFilterValue, @@ -16,19 +18,22 @@ import { roundedPercentages, } from '../../lib/format'; import { groupFeaturesByCategory } from '../../lib/features'; +import { getPoiCategoryLogoUrl } from '../../lib/map-utils'; import { PARTY_FEATURE_COLORS, STACKED_GROUPS, STACKED_ENUM_GROUPS, STACKED_SEGMENT_COLORS, } from '../../lib/consts'; +import { useNearbyStations } from '../../hooks/useNearbyStations'; +import { useRetainedScrollTop } from '../../hooks/useRetainedScrollTop'; import { DualHistogram, LoadingSkeleton } from './DualHistogram'; import EnumBarChart from './EnumBarChart'; import StackedBarChart from './StackedBarChart'; import StackedEnumChart from './StackedEnumChart'; import PriceHistoryChart from './PriceHistoryChart'; import ExternalSearchLinks from './ExternalSearchLinks'; -import { InfoIcon } from '../ui/icons'; +import { InfoIcon, TransitIcon } from '../ui/icons'; import { CollapsibleGroupHeader } from '../ui/CollapsibleGroupHeader'; import { FeatureInfoPopup } from '../ui/FeatureInfoPopup'; import { EmptyState } from '../ui/EmptyState'; @@ -54,6 +59,9 @@ interface AreaPaneProps { shareCode?: string; isGroupExpanded: (name: string) => boolean; onToggleGroup: (name: string) => void; + scrollTopRef?: MutableRefObject; + scrollRestoreKey?: string | null; + scrollSaveDisabled?: boolean; } function normalizePercentageSegments(segments: T[]): T[] { @@ -75,6 +83,136 @@ function filterValueFormat(feature?: FeatureMeta) { }; } +const STATION_GROUP_NAME = 'Transport'; +const STATION_GROUP_NAMES = new Set([STATION_GROUP_NAME, 'Public Transport']); + +function MetricTextLabel({ children }: { children: ReactNode }) { + return ( + + {children} + + ); +} + +function MetricFeatureLabel({ + feature, + onShowInfo, + label, + aboutLabel, +}: { + feature: FeatureMeta; + onShowInfo: (feature: FeatureMeta) => void; + label?: string; + aboutLabel: string; +}) { + return ( +
+ {label ?? ts(feature.name)} + {feature.detail && ( + + )} +
+ ); +} + +function MetricRow({ + label, + chart, + value, + valueTitle, + className = '', +}: { + label: ReactNode; + chart?: ReactNode; + value?: ReactNode; + valueTitle?: string; + className?: string; +}) { + return ( +
+
{label}
+
{chart}
+
+ {value} +
+
+ ); +} + +function NearbyStationsCard({ location }: { location: HexagonLocation }) { + const { t } = useTranslation(); + const origin = useMemo( + () => ({ lat: location.lat, lon: location.lon }), + [location.lat, location.lon] + ); + const { stations, loading } = useNearbyStations(origin); + + return ( +
+
+ + {t('areaPane.closestStations')} + {loading && ( + + )} +
+ {stations.length > 0 ? ( +
    + {stations.map((station) => ( + + ))} +
+ ) : ( +
+ {loading ? t('common.loading') : t('areaPane.noNearbyStations')} +
+ )} +
+ ); +} + +function NearbyStationRow({ station }: { station: NearbyStation }) { + const icon = getPoiCategoryLogoUrl(station.category, station.icon_category); + + return ( +
  • + {icon ? ( + + ) : ( + + )} +
    +
    + {station.name} +
    +
    {ts(station.category)}
    +
    + + {formatStationDistance(station.distanceKm)} + +
  • + ); +} + export default function AreaPane({ stats, globalFeatures, @@ -91,6 +229,9 @@ export default function AreaPane({ shareCode, isGroupExpanded, onToggleGroup, + scrollTopRef, + scrollRestoreKey, + scrollSaveDisabled, }: AreaPaneProps) { const { t } = useTranslation(); const propertyCount = stats?.count; @@ -99,7 +240,19 @@ export default function AreaPane({ const filteredStatsEmpty = filtersActive && statsUseFilters && stats?.count === 0; const showFlipToggleCallout = filteredStatsEmpty && unfilteredCount !== 0; const featureGroups = useMemo(() => groupFeaturesByCategory(globalFeatures), [globalFeatures]); + const displayFeatureGroups = useMemo(() => { + if (!hexagonLocation || featureGroups.some((group) => STATION_GROUP_NAMES.has(group.name))) { + return featureGroups; + } + + return [{ name: STATION_GROUP_NAME, features: [] }, ...featureGroups]; + }, [featureGroups, hexagonLocation]); const [infoFeature, setInfoFeature] = useState(null); + const { scrollRef, onScroll } = useRetainedScrollTop({ + restoreKey: scrollRestoreKey ?? hexagonId, + scrollTopRef, + suspendSave: scrollSaveDisabled ?? (loading && stats == null), + }); const numericByName = useMemo(() => { if (!stats) return new Map(); @@ -164,7 +317,7 @@ export default function AreaPane({ <>
    -
    +
    @@ -300,20 +453,22 @@ export default function AreaPane({ {stats.price_history && (() => { const uniqueYears = new Set(stats.price_history.map((p) => Math.floor(p.year))); - return uniqueYears.size > 1; - })() && ( -
    - - {t('areaPane.priceHistory')} - - -
    - )} - {featureGroups.map((group) => { + return uniqueYears.size > 1 ? ( +
    + + {t('areaPane.priceHistory')} + + +
    + ) : null; + })()} + {displayFeatureGroups.map((group) => { + const showNearbyStations = + hexagonLocation != null && STATION_GROUP_NAMES.has(group.name); const hasData = group.features.some( (feature) => numericByName.has(feature.name) || enumByName.has(feature.name) ); - if (!hasData) return null; + if (!hasData && !showNearbyStations) return null; const stackedCharts = STACKED_GROUPS[group.name]; const stackedEnumCharts = STACKED_ENUM_GROUPS[group.name]; @@ -332,10 +487,11 @@ export default function AreaPane({ name={group.name} expanded={expanded} onToggle={() => onToggleGroup(group.name)} - className="px-3 py-2.5 text-sm font-bold text-warm-500 bg-warm-50 dark:bg-warm-900 dark:text-warm-400 sticky top-0 z-10 hover:bg-warm-100 dark:hover:bg-warm-800" + className="area-pane-group-header sticky top-0 z-10 bg-white px-3 pb-1.5 pt-4 text-[11px] font-bold uppercase tracking-wide text-warm-500 hover:bg-warm-50 dark:bg-navy-950 dark:text-warm-400 dark:hover:bg-navy-900" /> {expanded && ( -
    +
    + {showNearbyStations && } {stackedCharts?.map((chart) => { const segments = chart.components .map((name) => ({ @@ -445,21 +601,17 @@ export default function AreaPane({ : undefined; return ( -
    -
    - - - {formatValue(numericStats.mean, feature)} - -
    - {numericStats.histogram && + } + chart={ + numericStats.histogram && (globalHistogram ? ( ) : ( - ))} -
    + )) + } + value={formatValue(numericStats.mean, feature)} + valueTitle={ + globalMean != null + ? `${t('areaPane.nationalAvg')}: ${formatValue(globalMean)}` + : undefined + } + /> ); } diff --git a/frontend/src/components/map/DualHistogram.test.ts b/frontend/src/components/map/DualHistogram.test.ts new file mode 100644 index 0000000..92aac74 --- /dev/null +++ b/frontend/src/components/map/DualHistogram.test.ts @@ -0,0 +1,26 @@ +import { describe, expect, it } from 'vitest'; + +import { compactHistogramLabel } from './DualHistogram'; + +describe('compactHistogramLabel', () => { + it('rounds low-cardinality count labels to integers', () => { + const fmt = (value: number) => value.toFixed(2); + const labels = [0, 0.99, 2.98, 4.96, 5.95].map((center, index) => + compactHistogramLabel(index, 5, 0, 5.95, center, fmt, true) + ); + + expect(labels).toEqual(['0', '1', '3', '5', '6+']); + }); + + it('labels the first integer count bucket as zero when it means below one', () => { + const fmt = (value: number) => value.toFixed(2); + + expect(compactHistogramLabel(0, 5, 0.99, 5.95, 0.99, fmt, true)).toBe('0'); + }); + + it('keeps fractional labels when integer labels are not requested', () => { + const fmt = (value: number) => value.toFixed(2); + + expect(compactHistogramLabel(1, 5, 0, 5.95, 0.99, fmt, false)).toBe('0.99'); + }); +}); diff --git a/frontend/src/components/map/DualHistogram.tsx b/frontend/src/components/map/DualHistogram.tsx index 76a8ecf..bd2670f 100644 --- a/frontend/src/components/map/DualHistogram.tsx +++ b/frontend/src/components/map/DualHistogram.tsx @@ -30,6 +30,42 @@ function pickTicks(min: number, max: number, count: number): number[] { return ticks; } +function isLowCardinalityHistogram(counts: number[], p1: number, p99: number): boolean { + return counts.length > 0 && counts.length <= 10 && p99 > p1 && p99 - p1 <= 10; +} + +export function compactHistogramLabel( + index: number, + barCount: number, + p1: number, + p99: number, + center: number, + formatLabel: (value: number) => string, + integerLabels = false +): string { + const formatAxisValue = (value: number) => + integerLabels ? Math.round(value).toLocaleString() : formatLabel(value); + + if (barCount <= 1) return formatAxisValue(center); + + const middleBins = barCount - 2; + if (index === 0) { + if (!integerLabels) return `<${formatLabel(p1)}`; + const firstBoundary = Math.ceil(p1); + return firstBoundary <= 1 ? '0' : `<${firstBoundary.toLocaleString()}`; + } + if (index === barCount - 1) { + if (!integerLabels) return `${formatLabel(p99)}+`; + return `${Math.ceil(p99).toLocaleString()}+`; + } + + const middleWidth = middleBins > 0 ? (p99 - p1) / middleBins : 0; + if (Math.abs(middleWidth - 1) < 0.001) { + return formatAxisValue(p1 + index - 1); + } + return formatAxisValue(center); +} + export function DualHistogram({ localCounts, globalCounts, @@ -38,6 +74,8 @@ export function DualHistogram({ globalMean, meanLabel, formatLabel, + compact = false, + integerAxisLabels = false, }: { localCounts: number[]; globalCounts: number[]; @@ -46,9 +84,15 @@ export function DualHistogram({ globalMean?: number; meanLabel?: string; formatLabel?: (value: number) => string; + compact?: boolean; + integerAxisLabels?: boolean; }) { const { t } = useTranslation(); - const targetBars = 25; + const showCompactAxisLabels = + compact && + isLowCardinalityHistogram(localCounts, p1, p99) && + isLowCardinalityHistogram(globalCounts, p1, p99); + const targetBars = compact ? (showCompactAxisLabels ? localCounts.length : 16) : 25; const localBars = downsampleBars(localCounts, targetBars); const globalBars = downsampleBars(globalCounts, targetBars); @@ -59,6 +103,8 @@ export function DualHistogram({ const fmt = formatLabel ?? ((v: number) => (Number.isInteger(v) ? v.toLocaleString() : v.toFixed(1))); + if (barCount === 0) return null; + // Compute center value for each bar. // Bar 0 = low outlier, bars 1..n-2 = middle (p1 to p99), bar n-1 = high outlier. const middleBins = Math.max(barCount - 2, 0); @@ -97,6 +143,60 @@ export function DualHistogram({ ? { right: 0 } : { left: '50%', transform: 'translateX(-50%)' }; + if (compact) { + const axisLabels = showCompactAxisLabels + ? barCenters.map((center, index) => + compactHistogramLabel(index, barCount, p1, p99, center, fmt, integerAxisLabels) + ) + : []; + const chartTitle = [ + `${fmt(p1)} - ${fmt(p99)}`, + globalMean != null ? `${meanLabel ?? t('areaPane.nationalAvg')}: ${fmt(globalMean)}` : null, + ] + .filter(Boolean) + .join('\n'); + + return ( +
    +
    + {Array.from({ length: barCount }).map((_, index) => { + const globalHeight = (globalBars[index] / globalMax) * 100; + const localHeight = (localBars[index] / localMax) * 100; + return ( +
    +
    0 ? 8 : 0)}%` }} + /> + {localBars[index] > 0 && ( +
    + )} +
    + ); + })} +
    + {showCompactAxisLabels && ( +
    + {axisLabels.map((label, index) => ( + + {label} + + ))} +
    + )} +
    + ); + } + return (
    @@ -152,35 +252,29 @@ export function DualHistogram({ function SkeletonHistogram() { return ( -
    -
    -
    -
    -
    -
    - {Array.from({ length: 15 }).map((_, i) => ( +
    +
    +
    + {Array.from({ length: 12 }).map((_, i) => (
    ))}
    -
    -
    -
    -
    +
    ); } export function LoadingSkeleton() { return ( -
    +
    {[0, 1, 2].map((groupIdx) => (
    -
    -
    +
    +
    {Array.from({ length: groupIdx === 0 ? 3 : 2 }).map((_, i) => ( ))} diff --git a/frontend/src/components/map/EnumBarChart.tsx b/frontend/src/components/map/EnumBarChart.tsx index e8177e6..b98ef9e 100644 --- a/frontend/src/components/map/EnumBarChart.tsx +++ b/frontend/src/components/map/EnumBarChart.tsx @@ -1,16 +1,34 @@ import { ts } from '../../i18n/server'; import { getEnumValueColor } from '../../lib/consts'; +function shortenAxisLabel(label: string, total: number): string { + if (label.length <= 3) return label; + const parts = label.split(/[\s/&-]+/).filter(Boolean); + if (parts.length > 1) { + return parts + .map((part) => Array.from(part)[0]) + .join('') + .slice(0, 3); + } + return Array.from(label) + .slice(0, total <= 5 ? 3 : 2) + .join(''); +} + export default function EnumBarChart({ counts, globalCounts, featureName, + compact = false, }: { counts: Record; globalCounts?: Record; featureName: string; + compact?: boolean; }) { const entries = Object.entries(counts).sort(([, countA], [, countB]) => countB - countA); + if (entries.length === 0) return null; + const localTotal = entries.reduce((sum, [, c]) => sum + c, 0); // When global counts are available, normalize both to percentages for comparison @@ -28,6 +46,71 @@ export default function EnumBarChart({ // Fallback to raw count scaling when no global data const maxCount = Math.max(...entries.map(([, count]) => count), 1); + if (compact) { + const title = entries + .map(([label, count]) => { + const localPct = localTotal > 0 ? (count / localTotal) * 100 : 0; + const globalPct = + hasGlobal && globalTotal > 0 ? ((globalCounts[label] ?? 0) / globalTotal) * 100 : null; + return `${ts(label)}: ${count.toLocaleString()} (${localPct.toFixed(1)}%)${ + globalPct != null ? ` / ${globalPct.toFixed(1)}%` : '' + }`; + }) + .join('\n'); + + return ( +
    +
    + {entries.map(([label, count]) => { + const localPct = localTotal > 0 ? count / localTotal : 0; + const globalPct = hasGlobal ? (globalCounts[label] ?? 0) / globalTotal : 0; + const localHeight = hasGlobal + ? maxPct > 0 + ? (localPct / maxPct) * 100 + : 0 + : (count / maxCount) * 100; + const globalHeight = hasGlobal && maxPct > 0 ? (globalPct / maxPct) * 100 : 0; + const color = getEnumValueColor(featureName, label); + + return ( +
    + {hasGlobal && ( +
    0 ? 8 : 0)}%` }} + /> + )} + {count > 0 && ( +
    + )} +
    + ); + })} +
    +
    + {entries.map(([label]) => { + const translated = ts(label); + return ( + + {shortenAxisLabel(translated, entries.length)} + + ); + })} +
    +
    + ); + } + return (
    {entries.map(([label, count]) => { diff --git a/frontend/src/components/map/HistogramLegend.tsx b/frontend/src/components/map/HistogramLegend.tsx index 874615b..a15433d 100644 --- a/frontend/src/components/map/HistogramLegend.tsx +++ b/frontend/src/components/map/HistogramLegend.tsx @@ -3,35 +3,18 @@ import { useTranslation } from 'react-i18next'; export default function HistogramLegend() { const { t } = useTranslation(); return ( -
    -
    -
    -
    - - - {t('histogramLegend.tealBars')} - {' '} - {t('histogramLegend.tealBarsDesc')} - -
    -
    -
    - - - {t('histogramLegend.greyBars')} - {' '} - {t('histogramLegend.greyBarsDesc')} - -
    -
    -
    - - - {t('histogramLegend.dashedLine')} - {' '} - {t('histogramLegend.dashedLineDesc')} - -
    +
    +
    +
    + + {t('histogramLegend.tealBars')} + +
    +
    +
    + + {t('histogramLegend.greyBars')} +
    ); diff --git a/frontend/src/components/map/MapPage.tsx b/frontend/src/components/map/MapPage.tsx index 2cb7d35..cc32247 100644 --- a/frontend/src/components/map/MapPage.tsx +++ b/frontend/src/components/map/MapPage.tsx @@ -147,6 +147,8 @@ export default function MapPage({ const pendingCurrentLocationFlyToRef = useRef<{ lat: number; lng: number } | null>(null); const pendingLocationSearchFlyToRef = useRef(null); const mobileDrawerPanelRectRef = useRef(null); + const areaPaneScrollTopRef = useRef(0); + const propertiesPaneScrollTopRef = useRef(0); const getMobileMapFlyToOptions = useCallback((): MapFlyToOptions | undefined => { if (!isMobile) return undefined; @@ -558,6 +560,11 @@ export default function MapPage({ shareCode={shareCode} isGroupExpanded={isAreaGroupExpanded} onToggleGroup={toggleAreaGroup} + scrollTopRef={areaPaneScrollTopRef} + scrollRestoreKey={ + selectedHexagon ? `${selectedHexagon.type}:${selectedHexagon.id}` : null + } + scrollSaveDisabled={loadingAreaStats && areaStats == null} /> ); @@ -570,6 +577,11 @@ export default function MapPage({ loading={loadingProperties} hexagonId={selectedHexagon?.id || null} onLoadMore={handleLoadMoreProperties} + scrollTopRef={propertiesPaneScrollTopRef} + scrollRestoreKey={ + selectedHexagon ? `${selectedHexagon.type}:${selectedHexagon.id}` : null + } + scrollSaveDisabled={loadingProperties && properties.length === 0} /> ); diff --git a/frontend/src/components/map/POIPane.tsx b/frontend/src/components/map/POIPane.tsx index 366c9a2..472aa03 100644 --- a/frontend/src/components/map/POIPane.tsx +++ b/frontend/src/components/map/POIPane.tsx @@ -3,7 +3,7 @@ import { useTranslation } from 'react-i18next'; import { ts } from '../../i18n/server'; import { useCollapsibleGroups } from '../../hooks/useCollapsibleGroups'; import { trackEvent } from '../../lib/analytics'; -import { POI_CATEGORY_LOGOS } from '../../lib/consts'; +import { getPoiCategoryLogoUrl } from '../../lib/map-utils'; import type { POICategoryGroup } from '../../types'; import InfoPopup from '../ui/InfoPopup'; import { SearchInput } from '../ui/SearchInput'; @@ -188,7 +188,7 @@ export default function POIPane({
    {group.categories.map((category) => { - const logo = POI_CATEGORY_LOGOS[category]; + const logo = getPoiCategoryLogoUrl(category); return ( void; onNavigateToSource?: (slug: string) => void; + scrollTopRef?: MutableRefObject; + scrollRestoreKey?: string | null; + scrollSaveDisabled?: boolean; } export function PropertiesPane({ @@ -26,10 +30,18 @@ export function PropertiesPane({ hexagonId, onLoadMore, onNavigateToSource, + scrollTopRef, + scrollRestoreKey, + scrollSaveDisabled, }: PropertiesPaneProps) { const { t } = useTranslation(); const [search, setSearch] = useState(''); const [showInfo, setShowInfo] = useState(false); + const { scrollRef, onScroll } = useRetainedScrollTop({ + restoreKey: scrollRestoreKey ?? hexagonId, + scrollTopRef, + suspendSave: scrollSaveDisabled ?? (loading && properties.length === 0), + }); useEffect(() => { setSearch(''); @@ -60,7 +72,7 @@ export function PropertiesPane({ return (
    0} /> -
    +
    {showInfo && ( ; + compact?: boolean; } /** Strip common suffixes/prefixes to produce short legend labels */ @@ -28,7 +29,27 @@ function shortenLabel(name: string): string { .trim(); } -export default function StackedBarChart({ segments, total, colorMap }: StackedBarChartProps) { +function shortenAxisLabel(name: string, total: number): string { + const label = shortenLabel(name); + if (label.length <= 3) return label; + const parts = label.split(/[\s/&-]+/).filter(Boolean); + if (parts.length > 1) { + return parts + .map((part) => Array.from(part)[0]) + .join('') + .slice(0, 3); + } + return Array.from(label) + .slice(0, total <= 5 ? 3 : 2) + .join(''); +} + +export default function StackedBarChart({ + segments, + total, + colorMap, + compact = false, +}: StackedBarChartProps) { const { t } = useTranslation(); const sortedSegments = useMemo(() => [...segments].sort((a, b) => b.value - a.value), [segments]); const roundedPcts = useMemo( @@ -55,6 +76,53 @@ export default function StackedBarChart({ segments, total, colorMap }: StackedBa return color; }; + if (compact) { + const maxValue = Math.max(...sortedSegments.map((segment) => segment.value), 1); + const showAxisLabels = sortedSegments.length <= 8; + const title = sortedSegments + .map((segment, i) => { + const label = shortenLabel(ts(segment.name)); + return `${label}: ${formatValue(segment.value)} (${roundedPcts[i].toFixed(1)}%)`; + }) + .join('\n'); + + return ( +
    +
    + {sortedSegments.map((segment) => { + const height = (segment.value / maxValue) * 100; + return ( +
    + ); + })} +
    + {showAxisLabels && ( +
    + {sortedSegments.map((segment) => { + const label = shortenLabel(ts(segment.name)); + return ( + + {shortenAxisLabel(label, sortedSegments.length)} + + ); + })} +
    + )} +
    + ); + } + return (
    {/* Stacked bar */} diff --git a/frontend/src/components/map/StackedEnumChart.tsx b/frontend/src/components/map/StackedEnumChart.tsx index 33f2775..229091b 100644 --- a/frontend/src/components/map/StackedEnumChart.tsx +++ b/frontend/src/components/map/StackedEnumChart.tsx @@ -7,6 +7,7 @@ interface StackedEnumChartProps { components: { label: string; stats: EnumFeatureStats }[]; valueOrder: string[]; valueColors: string[]; + compact?: boolean; } /** Strip common suffixes to produce short row labels */ @@ -14,10 +15,24 @@ function shortenLabel(name: string): string { return name.replace(/ risk$/, ''); } +function shortenAxisLabel(name: string): string { + const label = shortenLabel(name); + if (label.length <= 3) return label; + const parts = label.split(/[\s/&-]+/).filter(Boolean); + if (parts.length > 1) { + return parts + .map((part) => Array.from(part)[0]) + .join('') + .slice(0, 3); + } + return Array.from(label).slice(0, 3).join(''); +} + export default function StackedEnumChart({ components, valueOrder, valueColors, + compact = false, }: StackedEnumChartProps) { const { t } = useTranslation(); const visibleRows = components.filter(({ stats }) => { @@ -35,6 +50,63 @@ export default function StackedEnumChart({ ); } + if (compact) { + return ( +
    + {visibleRows.map(({ label, stats }) => { + const counts = valueOrder.map((value) => stats.counts[value] ?? 0); + const total = counts.reduce((a, b) => a + b, 0); + const roundedPcts = roundedPercentages(counts, total, 0); + const title = valueOrder + .map((value, i) => `${ts(value)}: ${counts[i]} (${roundedPcts[i]}%)`) + .join('\n'); + + return ( +
    + + {shortenLabel(ts(label))} + +
    + {valueOrder.map((value, i) => { + const count = counts[i]; + const pct = (count / total) * 100; + if (pct < 0.5) return null; + return ( +
    + ); + })} +
    +
    + ); + })} +
    + {valueOrder.map((value) => ( + + {shortenAxisLabel(ts(value))} + + ))} +
    +
    + ); + } + return (
    {visibleRows.map(({ label, stats }) => { diff --git a/frontend/src/components/map/filters/ActiveFiltersPanel.tsx b/frontend/src/components/map/filters/ActiveFiltersPanel.tsx index b7bb163..4b6736d 100644 --- a/frontend/src/components/map/filters/ActiveFiltersPanel.tsx +++ b/frontend/src/components/map/filters/ActiveFiltersPanel.tsx @@ -110,7 +110,7 @@ export function ActiveFiltersPanel({ > {(!collapsed || !isLicensed) && (
    -
    - {!collapsed && ( + {!collapsed && ( +
    - )} - {!isLicensed && ( -
    -

    - {t('filters.upgradePrompt')} -

    -

    - {t('filters.oneTimeLifetime')} -

    - - - - - - - -
    - )} -
    +
    + )} + {!isLicensed && ( +
    +

    + {t('filters.upgradePrompt')} +

    +

    + {t('filters.oneTimeLifetime')} +

    + + + + + + + +
    + )}
    )}
    diff --git a/frontend/src/components/ui/InfoPopup.tsx b/frontend/src/components/ui/InfoPopup.tsx index 87f4edf..adbddaf 100644 --- a/frontend/src/components/ui/InfoPopup.tsx +++ b/frontend/src/components/ui/InfoPopup.tsx @@ -1,5 +1,7 @@ -import { useRef, useCallback, useEffect, useId, type ReactNode } from 'react'; +import { useCallback, useEffect, useId, type ReactNode } from 'react'; +import { createPortal } from 'react-dom'; import { useClickOutside } from '../../hooks/useClickOutside'; +import { useModalA11y } from '../../hooks/useModalA11y'; import { CloseIcon } from './icons'; import { IconButton } from './IconButton'; @@ -11,8 +13,7 @@ interface InfoPopupProps { } export default function InfoPopup({ title, children, onClose, sourceLink }: InfoPopupProps) { - const popupRef = useRef(null); - const previouslyFocusedRef = useRef(null); + const popupRef = useModalA11y(); const titleId = useId(); const handleClose = useCallback(() => { @@ -29,20 +30,9 @@ export default function InfoPopup({ title, children, onClose, sourceLink }: Info return () => document.removeEventListener('keydown', handleKeyDown); }, [onClose]); - useEffect(() => { - previouslyFocusedRef.current = document.activeElement as HTMLElement | null; - const firstFocusable = popupRef.current?.querySelector( - 'button, [href], input, select, textarea, [tabindex]:not([tabindex="-1"])' - ); - (firstFocusable ?? popupRef.current)?.focus(); - return () => { - previouslyFocusedRef.current?.focus?.(); - }; - }, []); - - return ( + const popup = (
    ); + + if (typeof document === 'undefined') return popup; + + return createPortal(popup, document.body); } diff --git a/frontend/src/components/ui/MobileMenu.tsx b/frontend/src/components/ui/MobileMenu.tsx index 2964505..33415d8 100644 --- a/frontend/src/components/ui/MobileMenu.tsx +++ b/frontend/src/components/ui/MobileMenu.tsx @@ -160,7 +160,7 @@ export default function MobileMenu({
    {/* Menu panel */}
    -
    +
    {t('mobileMenu.menu')}