Compare commits

...

3 commits

Author SHA1 Message Date
d93beb9201 Small fixes
Some checks failed
CI / Python (lint + test) (push) Failing after 1m42s
CI / Frontend (lint + typecheck) (push) Failing after 1m45s
CI / Rust (lint + test) (push) Successful in 4m45s
Build and publish Docker image / build-and-push (push) Failing after 6m21s
2026-03-26 07:55:13 +00:00
d56b5dedff Bump memory 2026-03-26 07:54:43 +00:00
3adbaf435d Fix scrape 2026-03-26 07:54:39 +00:00
10 changed files with 228 additions and 71 deletions

View file

@ -18,6 +18,17 @@ log = logging.getLogger("rightmove")
# Outcode ID cache (Rightmove typeahead → internal ID) # Outcode ID cache (Rightmove typeahead → internal ID)
outcode_cache: dict[str, str] = {} outcode_cache: dict[str, str] = {}
# Rightmove hard-caps pagination at index 1008 (42 pages × 24 results).
# Requesting index >= 1008 returns HTTP 400.
_MAX_INDEX = 1008
# Property type filters for splitting overcapped searches. Each sub-query
# gets its own 1008 cap, so we can recover listings beyond the unfiltered limit.
_PROPERTY_TYPES = [
"detached", "semi-detached", "terraced", "flat",
"bungalow", "park-home", "land",
]
def resolve_outcode_id(client: httpx.Client, outcode: str) -> str | None: def resolve_outcode_id(client: httpx.Client, outcode: str) -> str | None:
"""Look up Rightmove's internal ID for an outcode via typeahead API.""" """Look up Rightmove's internal ID for an outcode via typeahead API."""
@ -40,16 +51,18 @@ def resolve_outcode_id(client: httpx.Client, outcode: str) -> str | None:
return None return None
def search_outcode( def _paginate(
client: httpx.Client, client: httpx.Client,
outcode_id: str, outcode_id: str,
outcode: str, outcode: str,
channel_cfg: dict, channel_cfg: dict,
pc_index: PostcodeSpatialIndex, pc_index: PostcodeSpatialIndex,
) -> list[dict]: extra_params: dict | None = None,
"""Paginate through search results for one outcode+channel. Returns transformed properties.""" ) -> tuple[list[dict], int]:
"""Paginate through search results. Returns (properties, result_count)."""
properties = [] properties = []
index = 0 index = 0
result_count = 0
while True: while True:
params = { params = {
@ -60,6 +73,8 @@ def search_outcode(
"channel": channel_cfg["channel"], "channel": channel_cfg["channel"],
"transactionType": channel_cfg["transactionType"], "transactionType": channel_cfg["transactionType"],
} }
if extra_params:
params.update(extra_params)
data = fetch_with_retry(client, SEARCH_URL, params) data = fetch_with_retry(client, SEARCH_URL, params)
if not data: if not data:
@ -90,4 +105,52 @@ def search_outcode(
time.sleep(DELAY_BETWEEN_PAGES) time.sleep(DELAY_BETWEEN_PAGES)
return properties return properties, result_count
def search_outcode(
client: httpx.Client,
outcode_id: str,
outcode: str,
channel_cfg: dict,
pc_index: PostcodeSpatialIndex,
) -> list[dict]:
"""Paginate through search results for one outcode+channel. Returns transformed properties.
When the unfiltered result count exceeds 1008 (Rightmove's hard pagination cap),
re-queries per property type to recover listings beyond the cap.
"""
properties, result_count = _paginate(
client, outcode_id, outcode, channel_cfg, pc_index
)
if result_count <= _MAX_INDEX:
return properties
# Hit the 1008 cap — re-search per property type to get full coverage
ch = channel_cfg["channel"]
log.info(
"%s/%s: %d results exceed %d cap, splitting by property type",
outcode, ch, result_count, _MAX_INDEX,
)
all_by_id: dict[str, dict] = {p["id"]: p for p in properties}
for pt in _PROPERTY_TYPES:
pt_props, _ = _paginate(
client, outcode_id, outcode, channel_cfg, pc_index,
extra_params={"propertyTypes": pt},
)
new = 0
for p in pt_props:
if p["id"] not in all_by_id:
all_by_id[p["id"]] = p
new += 1
if new:
log.debug("%s/%s type=%s: +%d new properties", outcode, ch, pt, new)
log.info(
"%s/%s: type split recovered %d%d properties",
outcode, ch, len(properties), len(all_by_id),
)
return list(all_by_id.values())

View file

@ -39,7 +39,7 @@ class TurnstileError(Exception):
# Maximum search result pages to scrape per outcode (25 listings/page) # Maximum search result pages to scrape per outcode (25 listings/page)
MAX_PAGES_PER_OUTCODE = 10 MAX_PAGES_PER_OUTCODE = 40
# JavaScript to extract listings from the rendered DOM. # JavaScript to extract listings from the rendered DOM.
# Uses data-testid attributes as primary selectors (stable across deployments), # Uses data-testid attributes as primary selectors (stable across deployments),
@ -98,6 +98,12 @@ _EXTRACT_LISTINGS_JS = r"""() => {
if (/leasehold/i.test(text)) tenure = 'Leasehold'; if (/leasehold/i.test(text)) tenure = 'Leasehold';
else if (/freehold/i.test(text)) tenure = 'Freehold'; else if (/freehold/i.test(text)) tenure = 'Freehold';
// Extract property type (e.g., "2 bed flat for sale" "flat")
let property_type = '';
const ptMatch = text.match(/\d+\s*(?:beds?|bedrooms?)\s+([\w\s-]+?)\s+(?:for\s+sale|to\s+(?:rent|let)|for\s+rent)/i);
if (ptMatch) property_type = ptMatch[1].trim();
else if (/\bstudio\s*(?:flat|apartment)?\s+(?:for\s+sale|to\s+(?:rent|let)|for\s+rent)/i.test(text)) property_type = 'Studio';
results.push({ results.push({
id, url: href.replace(window.location.origin, ''), id, url: href.replace(window.location.origin, ''),
price: priceMatch ? parseInt(priceMatch[1].replace(/,/g, '')) : null, price: priceMatch ? parseInt(priceMatch[1].replace(/,/g, '')) : null,
@ -106,7 +112,7 @@ _EXTRACT_LISTINGS_JS = r"""() => {
baths: bathsMatch && parseInt(bathsMatch[1]) <= 20 ? parseInt(bathsMatch[1]) : null, baths: bathsMatch && parseInt(bathsMatch[1]) <= 20 ? parseInt(bathsMatch[1]) : null,
receptions: recMatch && parseInt(recMatch[1]) <= 20 ? parseInt(recMatch[1]) : null, receptions: recMatch && parseInt(recMatch[1]) <= 20 ? parseInt(recMatch[1]) : null,
floor_area_sqft: areaMatch ? parseInt(areaMatch[1].replace(/,/g, '')) : null, floor_area_sqft: areaMatch ? parseInt(areaMatch[1].replace(/,/g, '')) : null,
address, tenure, address, tenure, property_type,
}); });
} }
@ -160,6 +166,12 @@ _EXTRACT_LISTINGS_JS = r"""() => {
if (/leasehold/i.test(text)) tenure = 'Leasehold'; if (/leasehold/i.test(text)) tenure = 'Leasehold';
else if (/freehold/i.test(text)) tenure = 'Freehold'; else if (/freehold/i.test(text)) tenure = 'Freehold';
// Extract property type
let property_type = '';
const ptMatch2 = text.match(/\d+\s*(?:beds?|bedrooms?)\s+([\w\s-]+?)\s+(?:for\s+sale|to\s+(?:rent|let)|for\s+rent)/i);
if (ptMatch2) property_type = ptMatch2[1].trim();
else if (/\bstudio\s*(?:flat|apartment)?\s+(?:for\s+sale|to\s+(?:rent|let)|for\s+rent)/i.test(text)) property_type = 'Studio';
results.push({ results.push({
id, url: href.replace(window.location.origin, ''), id, url: href.replace(window.location.origin, ''),
price: priceMatch ? parseInt(priceMatch[1].replace(/,/g, '')) : null, price: priceMatch ? parseInt(priceMatch[1].replace(/,/g, '')) : null,
@ -168,7 +180,7 @@ _EXTRACT_LISTINGS_JS = r"""() => {
baths: bathsMatch && parseInt(bathsMatch[1]) <= 20 ? parseInt(bathsMatch[1]) : null, baths: bathsMatch && parseInt(bathsMatch[1]) <= 20 ? parseInt(bathsMatch[1]) : null,
receptions: recMatch && parseInt(recMatch[1]) <= 20 ? parseInt(recMatch[1]) : null, receptions: recMatch && parseInt(recMatch[1]) <= 20 ? parseInt(recMatch[1]) : null,
floor_area_sqft: areaMatch ? parseInt(areaMatch[1].replace(/,/g, '')) : null, floor_area_sqft: areaMatch ? parseInt(areaMatch[1].replace(/,/g, '')) : null,
address, tenure, address, tenure, property_type,
}); });
} }
} }
@ -557,6 +569,32 @@ def _paginate(page, total_results: int, channel: str) -> list[dict]:
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Cached outcode → (postcode, lat, lng) lookups to avoid repeated O(n) scans
# over 2.26M postcodes. Populated lazily on first lookup per outcode.
_outcode_coords_cache: dict[str, tuple[str, float, float] | None] = {}
def _resolve_outcode_coords(
outcode: str, pc_coords: dict[str, tuple[float, float]]
) -> tuple[str, float, float] | None:
"""Find first postcode + coords for an outcode. Result is cached."""
if outcode in _outcode_coords_cache:
return _outcode_coords_cache[outcode]
prefix = outcode + " "
for pcd, (lat, lng) in pc_coords.items():
if pcd.startswith(prefix) or (
len(outcode) >= 4
and pcd.startswith(outcode)
and len(pcd) > len(outcode)
):
_outcode_coords_cache[outcode] = (pcd, lat, lng)
return (pcd, lat, lng)
_outcode_coords_cache[outcode] = None
return None
def _extract_postcode(text: str) -> str | None: def _extract_postcode(text: str) -> str | None:
"""Extract a full UK postcode from text like 'Dollar Bay Place, Canary Wharf E14 9SS'.""" """Extract a full UK postcode from text like 'Dollar Bay Place, Canary Wharf E14 9SS'."""
match = re.search(r"([A-Z]{1,2}\d[A-Z0-9]?\s*\d[A-Z]{2})", text, re.IGNORECASE) match = re.search(r"([A-Z]{1,2}\d[A-Z0-9]?\s*\d[A-Z]{2})", text, re.IGNORECASE)
@ -585,11 +623,17 @@ def _map_property_type(raw_type: str | None) -> str:
"""Map Zoopla property type text to canonical type.""" """Map Zoopla property type text to canonical type."""
if not raw_type: if not raw_type:
return "Other" return "Other"
# Exact match (handles Rightmove-style capitalised values)
canonical = PROPERTY_TYPE_MAP.get(raw_type) canonical = PROPERTY_TYPE_MAP.get(raw_type)
if canonical: if canonical:
return canonical return canonical
# Title-case match (handles regex-extracted lowercase like "town house" → "Town House")
canonical = PROPERTY_TYPE_MAP.get(raw_type.title())
if canonical:
return canonical
# Keyword fallback
lower = raw_type.lower() lower = raw_type.lower()
if "flat" in lower or "apartment" in lower or "maisonette" in lower or "studio" in lower: if "flat" in lower or "apartment" in lower or "maisonette" in lower or "studio" in lower or "penthouse" in lower:
return "Flats/Maisonettes" return "Flats/Maisonettes"
if "detached" in lower and "semi" not in lower: if "detached" in lower and "semi" not in lower:
return "Detached" return "Detached"
@ -622,6 +666,7 @@ def transform_property(
channel: str, channel: str,
pc_index: PostcodeSpatialIndex, pc_index: PostcodeSpatialIndex,
pc_coords: dict[str, tuple[float, float]], pc_coords: dict[str, tuple[float, float]],
search_outcode: str | None = None,
) -> dict | None: ) -> dict | None:
"""Transform a raw Zoopla listing dict into the standard output schema. """Transform a raw Zoopla listing dict into the standard output schema.
@ -643,22 +688,18 @@ def transform_property(
lat, lng = coords lat, lng = coords
if lat is None: if lat is None:
# Try outcode-level fallback # Try outcode-level fallback from address text
outcode = _extract_outcode(address) addr_outcode = _extract_outcode(address)
if outcode: if addr_outcode:
# ONSPD 7-char format: 4-char outcodes have no space before incode result = _resolve_outcode_coords(addr_outcode, pc_coords)
# (e.g., "BH191AB"), while shorter outcodes do (e.g., "E14 5AB"). if result:
# Check both formats to handle all outcode lengths. postcode, lat, lng = result
prefix = outcode + " "
for pcd, coords in pc_coords.items(): # Final fallback: use the outcode we know we're searching
if pcd.startswith(prefix) or ( if lat is None and search_outcode:
len(outcode) >= 4 result = _resolve_outcode_coords(search_outcode, pc_coords)
and pcd.startswith(outcode) if result:
and len(pcd) > len(outcode) postcode, lat, lng = result
):
postcode = pcd
lat, lng = coords
break
if lat is None or lng is None or not postcode: if lat is None or lng is None or not postcode:
return None return None
@ -706,8 +747,8 @@ def transform_property(
"Postcode": postcode, "Postcode": postcode,
"Address per Property Register": address, "Address per Property Register": address,
"Leasehold/Freehold": raw.get("tenure") or None, "Leasehold/Freehold": raw.get("tenure") or None,
"Property type": "Other", # Not reliably extractable from Zoopla search cards "Property type": _map_property_type(raw.get("property_type")),
"Property sub-type": "", "Property sub-type": raw.get("property_type") or "",
"price": int(price), "price": int(price),
"price_frequency": frequency, "price_frequency": frequency,
"Price qualifier": "", "Price qualifier": "",
@ -774,7 +815,7 @@ def search_outcode(
properties = [] properties = []
dropped = 0 dropped = 0
for raw in raw_listings: for raw in raw_listings:
transformed = transform_property(raw, channel, pc_index, pc_coords) transformed = transform_property(raw, channel, pc_index, pc_coords, search_outcode=outcode)
if transformed: if transformed:
properties.append(transformed) properties.append(transformed)
zoopla_properties_scraped.labels(channel=channel_label).inc() zoopla_properties_scraped.labels(channel=channel_label).inc()

View file

@ -1,4 +1,4 @@
import { useState, useEffect, useCallback, useMemo } from 'react'; import { useState, useEffect, useCallback, useMemo, useRef } from 'react';
import MapPage, { type ExportState } from './components/map/MapPage'; import MapPage, { type ExportState } from './components/map/MapPage';
import PricingPage from './components/pricing/PricingPage'; import PricingPage from './components/pricing/PricingPage';
import HomePage from './components/home/HomePage'; import HomePage from './components/home/HomePage';
@ -67,9 +67,14 @@ function pathToPage(pathname: string): { page: Page; inviteCode?: string } | nul
export default function App() { export default function App() {
const urlState = useMemo(() => parseUrlState(), []); const urlState = useMemo(() => parseUrlState(), []);
const [mapUrlState, setMapUrlState] = useState(urlState);
const dashboardSearchRef = useRef(
window.location.pathname === '/dashboard' ? window.location.search : ''
);
const activePageRef = useRef<Page>('home');
const initialViewState = useMemo( const initialViewState = useMemo(
() => urlState.viewState || INITIAL_VIEW_STATE, () => mapUrlState.viewState || INITIAL_VIEW_STATE,
[urlState.viewState] [mapUrlState.viewState]
); );
const isScreenshotMode = useMemo(() => { const isScreenshotMode = useMemo(() => {
@ -179,17 +184,30 @@ export default function App() {
const navigateTo = useCallback( const navigateTo = useCallback(
(page: Page, hash?: string, infoFeature?: string) => { (page: Page, hash?: string, infoFeature?: string) => {
// Save dashboard search params before navigating away
if (activePageRef.current === 'dashboard') {
dashboardSearchRef.current = window.location.search;
}
if (infoFeature) { if (infoFeature) {
window.history.replaceState({ ...window.history.state, infoFeature }, ''); window.history.replaceState({ ...window.history.state, infoFeature }, '');
} }
const path = pageToPath(page, inviteCode ?? undefined); const path = pageToPath(page, inviteCode ?? undefined);
const url = hash ? `${path}#${hash}` : path; // Restore dashboard search params when navigating back
const search = page === 'dashboard' ? dashboardSearchRef.current : '';
const url = hash ? `${path}${search}#${hash}` : `${path}${search}`;
window.history.pushState({ page }, '', url); window.history.pushState({ page }, '', url);
if (page === 'dashboard') {
setMapUrlState(parseUrlState());
}
setActivePage(page); setActivePage(page);
}, },
[inviteCode] [inviteCode]
); );
useEffect(() => {
activePageRef.current = activePage;
}, [activePage]);
useEffect(() => { useEffect(() => {
if (!window.history.state?.page) { if (!window.history.state?.page) {
window.history.replaceState( window.history.replaceState(
@ -199,17 +217,24 @@ export default function App() {
); );
} }
const handlePopState = (e: PopStateEvent) => { const handlePopState = (e: PopStateEvent) => {
let page: Page;
if (e.state?.page) { if (e.state?.page) {
setActivePage(e.state.page); page = e.state.page;
setActivePage(page);
if (e.state.infoFeature) { if (e.state.infoFeature) {
setPendingInfoFeature(e.state.infoFeature); setPendingInfoFeature(e.state.infoFeature);
} }
} else { } else {
// Fall back to deriving page from pathname // Fall back to deriving page from pathname
const parsed = pathToPage(window.location.pathname); const parsed = pathToPage(window.location.pathname);
setActivePage(parsed?.page || 'home'); page = parsed?.page || 'home';
setActivePage(page);
if (parsed?.inviteCode) setInviteCode(parsed.inviteCode); if (parsed?.inviteCode) setInviteCode(parsed.inviteCode);
} }
// Re-parse URL state when returning to dashboard via back/forward
if (page === 'dashboard') {
setMapUrlState(parseUrlState());
}
}; };
window.addEventListener('popstate', handlePopState); window.addEventListener('popstate', handlePopState);
return () => window.removeEventListener('popstate', handlePopState); return () => window.removeEventListener('popstate', handlePopState);
@ -367,10 +392,10 @@ export default function App() {
<MapPage <MapPage
features={features} features={features}
poiCategoryGroups={poiCategoryGroups} poiCategoryGroups={poiCategoryGroups}
initialFilters={urlState.filters || { 'Listing status': ['Historical sale'] }} initialFilters={mapUrlState.filters || { 'Listing status': ['Historical sale'] }}
initialViewState={initialViewState} initialViewState={initialViewState}
initialPOICategories={urlState.poiCategories || new Set()} initialPOICategories={mapUrlState.poiCategories || new Set()}
initialTab={urlState.tab || 'area'} initialTab={mapUrlState.tab || 'area'}
initialLoading={initialLoading} initialLoading={initialLoading}
theme={theme} theme={theme}
pendingInfoFeature={pendingInfoFeature} pendingInfoFeature={pendingInfoFeature}
@ -378,8 +403,8 @@ export default function App() {
onNavigateTo={navigateTo} onNavigateTo={navigateTo}
onExportStateChange={setExportState} onExportStateChange={setExportState}
isMobile={isMobile} isMobile={isMobile}
initialTravelTime={urlState.travelTime} initialTravelTime={mapUrlState.travelTime}
initialPostcode={urlState.postcode} initialPostcode={mapUrlState.postcode}
user={user} user={user}
onLoginClick={() => { onLoginClick={() => {
setAuthModalTab('login'); setAuthModalTab('login');

View file

@ -567,6 +567,7 @@ export default function MapPage({
selectedCategories={selectedPOICategories} selectedCategories={selectedPOICategories}
onCategoriesChange={setSelectedPOICategories} onCategoriesChange={setSelectedPOICategories}
poiCount={pois.length} poiCount={pois.length}
onClose={() => setPoiPaneOpen(false)}
/> />
); );

View file

@ -6,7 +6,7 @@ import InfoPopup from '../ui/InfoPopup';
import { SearchInput } from '../ui/SearchInput'; import { SearchInput } from '../ui/SearchInput';
import { PillToggle } from '../ui/PillToggle'; import { PillToggle } from '../ui/PillToggle';
import { PillGroup } from '../ui/PillGroup'; import { PillGroup } from '../ui/PillGroup';
import { InfoIcon, ChevronIcon } from '../ui/icons'; import { InfoIcon, ChevronIcon, CloseIcon } from '../ui/icons';
import { IconButton } from '../ui/IconButton'; import { IconButton } from '../ui/IconButton';
interface POIPaneProps { interface POIPaneProps {
@ -15,6 +15,7 @@ interface POIPaneProps {
onCategoriesChange: (categories: Set<string>) => void; onCategoriesChange: (categories: Set<string>) => void;
poiCount: number; poiCount: number;
onNavigateToSource?: (slug: string) => void; onNavigateToSource?: (slug: string) => void;
onClose?: () => void;
} }
export default function POIPane({ export default function POIPane({
@ -23,6 +24,7 @@ export default function POIPane({
onCategoriesChange, onCategoriesChange,
poiCount: _poiCount, poiCount: _poiCount,
onNavigateToSource, onNavigateToSource,
onClose,
}: POIPaneProps) { }: POIPaneProps) {
const [searchTerm, setSearchTerm] = useState(''); const [searchTerm, setSearchTerm] = useState('');
const [isGroupExpanded, toggleCollapse] = useCollapsibleGroups(); const [isGroupExpanded, toggleCollapse] = useCollapsibleGroups();
@ -96,7 +98,7 @@ export default function POIPane({
<IconButton onClick={() => setShowInfo(true)} title="Data source info"> <IconButton onClick={() => setShowInfo(true)} title="Data source info">
<InfoIcon /> <InfoIcon />
</IconButton> </IconButton>
<div className="flex gap-1 ml-auto"> <div className="flex gap-1 ml-auto items-center">
<button <button
onClick={selectAll} onClick={selectAll}
className="px-2 py-0.5 text-xs rounded border border-warm-300 dark:border-warm-700 text-warm-600 dark:text-warm-400 hover:bg-warm-50 dark:hover:bg-warm-700" className="px-2 py-0.5 text-xs rounded border border-warm-300 dark:border-warm-700 text-warm-600 dark:text-warm-400 hover:bg-warm-50 dark:hover:bg-warm-700"
@ -109,6 +111,15 @@ export default function POIPane({
> >
None None
</button> </button>
{onClose && (
<button
onClick={onClose}
className="ml-1 p-0.5 text-warm-400 hover:text-warm-700 dark:hover:text-warm-300"
title="Close"
>
<CloseIcon className="w-4 h-4" />
</button>
)}
</div> </div>
</div> </div>

View file

@ -316,10 +316,12 @@ export function useDeckLayers({
number, number,
]; ];
} }
const ttMin = (d[`min_${vf}`] as number) ?? ttVal;
const ttMax = (d[`max_${vf}`] as number) ?? ttVal;
return getFeatureFillColor( return getFeatureFillColor(
ttVal as number, ttVal as number,
ttVal as number, ttMin as number,
ttVal as number, ttMax as number,
clr, clr,
fr, fr,
0, 0,
@ -417,10 +419,12 @@ export function useDeckLayers({
number, number,
]; ];
} }
const ttMin = (d[`min_${vf}`] as number) ?? ttVal;
const ttMax = (d[`max_${vf}`] as number) ?? ttVal;
return getFeatureFillColor( return getFeatureFillColor(
ttVal as number, ttVal as number,
ttVal as number, ttMin as number,
ttVal as number, ttMax as number,
clr, clr,
fr, fr,
0, 0,

View file

@ -7,22 +7,23 @@ import subprocess
import sys import sys
import tarfile import tarfile
import urllib.request import urllib.request
from datetime import datetime, timedelta from datetime import UTC, datetime, timedelta
from io import BytesIO from io import BytesIO
from pathlib import Path from pathlib import Path
PROTOMAPS_BASE = "https://build.protomaps.com" PROTOMAPS_BASE = "https://build.protomaps.com"
UK_BBOX = "-10.5,49,5,61" UK_BBOX = "-10.5,49,5,61"
MAX_AGE_DAYS = 14 MAX_AGE_DAYS = 14
USER_AGENT = "property-map-tiles/1.0"
def find_latest_build() -> str: def find_latest_build() -> str:
"""Find the most recent available Protomaps daily build.""" """Find the most recent available Protomaps daily build."""
today = datetime.utcnow().date() today = datetime.now(UTC).date()
for i in range(MAX_AGE_DAYS): for i in range(MAX_AGE_DAYS):
d = today - timedelta(days=i) d = today - timedelta(days=i)
url = f"{PROTOMAPS_BASE}/{d:%Y%m%d}.pmtiles" url = f"{PROTOMAPS_BASE}/{d:%Y%m%d}.pmtiles"
req = urllib.request.Request(url, method="HEAD") req = urllib.request.Request(url, method="HEAD", headers={"User-Agent": USER_AGENT})
try: try:
urllib.request.urlopen(req) urllib.request.urlopen(req)
print(f"Found build: {d:%Y%m%d}") print(f"Found build: {d:%Y%m%d}")

View file

@ -22,8 +22,8 @@ set -euo pipefail
# --demo only compute Bank + TCR, transit only (quick test) # --demo only compute Bank + TCR, transit only (quick test)
# --- Defaults --- # --- Defaults ---
THREADS=16 THREADS=12
HEAP=16g HEAP=24g
NETWORK_DIR=property-data/r5-network NETWORK_DIR=property-data/r5-network
OUTPUT_BASE=property-data/travel-times OUTPUT_BASE=property-data/travel-times
R5_DIR=r5-java R5_DIR=r5-java

View file

@ -175,8 +175,7 @@ fn execute_destination_search(state: &AppState, query: &str, mode: &str) -> Valu
.find_map(|(idx, name_lower)| { .find_map(|(idx, name_lower)| {
let words_match = query_words.iter().all(|word| name_lower.contains(word)); let words_match = query_words.iter().all(|word| name_lower.contains(word));
let slug = slugify(&pd.name[idx]); let slug = slugify(&pd.name[idx]);
let slug_match = let slug_match = slug.contains(&query_slug) || query_slug.contains(&slug);
slug.contains(&query_slug) || query_slug.contains(&slug);
if (words_match || slug_match) && pd.type_rank[idx] == 0 { if (words_match || slug_match) && pd.type_rank[idx] == 0 {
Some(pd.name[idx].as_str()) Some(pd.name[idx].as_str())
} else { } else {
@ -704,7 +703,7 @@ fn count_matching_rows(
let (pc_interner, pc_keys) = state.data.postcode_parts(); let (pc_interner, pc_keys) = state.data.postcode_parts();
let mut count = 0usize; let mut count = 0usize;
for row in 0..num_rows { for (row, pc_key) in pc_keys.iter().enumerate().take(num_rows) {
if !row_passes_filters( if !row_passes_filters(
row, row,
&parsed_filters, &parsed_filters,
@ -716,12 +715,11 @@ fn count_matching_rows(
} }
if has_travel { if has_travel {
let postcode = pc_interner.resolve(&pc_keys[row]); let postcode = pc_interner.resolve(pc_key);
let mut passes_travel = true; let mut passes_travel = true;
for (data, fmin, fmax) in &travel_data { for (data, fmin, fmax) in &travel_data {
let pass = if let Some(mins) = data.get(postcode).map(|r| r.minutes as f32) { let pass = if let Some(mins) = data.get(postcode).map(|r| r.minutes as f32) {
fmin.map_or(true, |min| mins >= min) fmin.is_none_or(|min| mins >= min) && fmax.is_none_or(|max| mins <= max)
&& fmax.map_or(true, |max| mins <= max)
} else { } else {
false // no travel data → postcode not reachable false // no travel data → postcode not reachable
}; };
@ -880,7 +878,12 @@ pub async fn post_ai_filters(
let fn_args = fc.get("args").cloned().unwrap_or(json!({})); let fn_args = fc.get("args").cloned().unwrap_or(json!({}));
tool_call_count += 1; tool_call_count += 1;
info!(function = fn_name, round = round, tool_call = tool_call_count, "AI called tool"); info!(
function = fn_name,
round = round,
tool_call = tool_call_count,
"AI called tool"
);
if tool_call_count > MAX_TOOL_CALLS { if tool_call_count > MAX_TOOL_CALLS {
warn!("Tool call budget exhausted, forcing text output"); warn!("Tool call budget exhausted, forcing text output");
@ -929,9 +932,15 @@ pub async fn post_ai_filters(
if text.is_empty() { if text.is_empty() {
retry_count += 1; retry_count += 1;
warn!("Gemini returned empty text content (round {}, retry {})", round, retry_count); warn!(
"Gemini returned empty text content (round {}, retry {})",
round, retry_count
);
if retry_count > MAX_RETRIES { if retry_count > MAX_RETRIES {
return Err((StatusCode::BAD_GATEWAY, "AI returned empty responses".into())); return Err((
StatusCode::BAD_GATEWAY,
"AI returned empty responses".into(),
));
} }
contents.push(candidate.clone()); contents.push(candidate.clone());
contents.push(json!({ contents.push(json!({
@ -988,7 +997,11 @@ pub async fn post_ai_filters(
// Count matching properties and refine if too restrictive // Count matching properties and refine if too restrictive
let match_count = count_matching_rows(&state, &filters, &travel_time_filters); let match_count = count_matching_rows(&state, &filters, &travel_time_filters);
info!(match_count = match_count, round = round, "AI filter match count"); info!(
match_count = match_count,
round = round,
"AI filter match count"
);
if match_count == 0 { if match_count == 0 {
refinement_attempts += 1; refinement_attempts += 1;
@ -1008,7 +1021,10 @@ pub async fn post_ai_filters(
let notes = if notes.is_empty() { let notes = if notes.is_empty() {
"No properties match these filters. Try relaxing some constraints.".to_string() "No properties match these filters. Try relaxing some constraints.".to_string()
} else { } else {
format!("{}. No properties match — try relaxing some constraints.", notes) format!(
"{}. No properties match — try relaxing some constraints.",
notes
)
}; };
return Ok(Json(AiFiltersResponse { return Ok(Json(AiFiltersResponse {
@ -1193,8 +1209,7 @@ fn validate_and_convert(raw: &Value, features: &FeaturesResponse, listing_type:
} => { } => {
// Only include features valid for the chosen listing mode // Only include features valid for the chosen listing mode
if modes.is_empty() || modes.contains(&listing_type) { if modes.is_empty() || modes.contains(&listing_type) {
numeric_features numeric_features.insert(name, (*min, *max, histogram.min, histogram.max));
.insert(name, (*min, *max, histogram.min, histogram.max));
} }
} }
FeatureInfo::Enum { name, values, .. } => { FeatureInfo::Enum { name, values, .. } => {
@ -1217,11 +1232,10 @@ fn validate_and_convert(raw: &Value, features: &FeaturesResponse, listing_type:
Some(name) => name, Some(name) => name,
None => continue, None => continue,
}; };
let (slider_min, slider_max, data_min, data_max) = let (slider_min, slider_max, data_min, data_max) = match numeric_features.get(name) {
match numeric_features.get(name) { Some(range) => *range,
Some(range) => *range, None => continue,
None => continue, };
};
let bound = match item.get("bound").and_then(|val| val.as_str()) { let bound = match item.get("bound").and_then(|val| val.as_str()) {
Some(b) => b, Some(b) => b,
None => continue, None => continue,

View file

@ -140,10 +140,7 @@ pub async fn get_short_url(
match params { match params {
Some(params) => { Some(params) => {
let redirect_url = format!("/dashboard?{params}"); let redirect_url = format!("/dashboard?{params}");
let og_image_url = format!( let og_image_url = format!("{}/api/screenshot?og=1&{params}", state.public_url);
"{}/api/screenshot?og=1&{params}",
state.public_url
);
let og_url = format!("{}/s/{code}", state.public_url); let og_url = format!("{}/s/{code}", state.public_url);
let og_title = "Perfect Postcode \u{2014} Every neighbourhood in England"; let og_title = "Perfect Postcode \u{2014} Every neighbourhood in England";
let og_description = "Explore property prices, energy ratings, crime stats, school ratings, and more across England on one interactive map."; let og_description = "Explore property prices, energy ratings, crime stats, school ratings, and more across England on one interactive map.";