good changes
This commit is contained in:
parent
160283f1a1
commit
c997ea46a5
26 changed files with 991 additions and 288 deletions
|
|
@ -44,11 +44,13 @@ PC_BOUNDARIES := $(MANUAL_DATA)/postcode_boundaries
|
|||
TRANSIT_DIR := $(DATA_DIR)/transit
|
||||
TRANSIT_STAMP := $(TRANSIT_DIR)/.done
|
||||
GREENSPACE := $(DATA_DIR)/greenspace_water.parquet
|
||||
OS_GREENSPACE := $(DATA_DIR)/os_greenspace.parquet
|
||||
PBF := $(DATA_DIR)/england-latest.osm.pbf
|
||||
PLACES := $(DATA_DIR)/places.parquet
|
||||
LISTINGS_BUY := $(DATA_DIR)/online_listings_buy.parquet
|
||||
LISTINGS_RENT := $(DATA_DIR)/online_listings_rent.parquet
|
||||
LSOA_POP := $(DATA_DIR)/lsoa_population.parquet
|
||||
MEDIAN_AGE := $(DATA_DIR)/median_age.parquet
|
||||
ENGLAND_BOUNDARY := $(DATA_DIR)/england_boundary.geojson
|
||||
RM_OUTCODES := frontend/src/lib/rightmove-outcodes.json
|
||||
|
||||
|
|
@ -62,10 +64,10 @@ PMTILES_VERSION := 1.22.3
|
|||
.PHONY: prepare merge tiles \
|
||||
download-arcgis download-price-paid download-deprivation download-ethnicity \
|
||||
download-naptan download-pois download-ofsted download-broadband download-rental-prices \
|
||||
download-postcodes download-geosure download-noise download-inspire \
|
||||
download-oa-boundaries download-uprn-lookup download-transit-network download-greenspace download-pbf download-places download-lsoa-population download-england-boundary download-rightmove-outcodes \
|
||||
download-postcodes download-noise download-inspire \
|
||||
download-oa-boundaries download-uprn-lookup download-transit-network download-greenspace download-os-greenspace download-pbf download-places download-lsoa-population download-median-age download-england-boundary download-rightmove-outcodes \
|
||||
transform-pois transform-epc-pp transform-crime transform-poi-proximity \
|
||||
transform-school-proximity transform-geosure transform-postcode-boundaries \
|
||||
transform-school-proximity transform-postcode-boundaries \
|
||||
generate-postcode-boundaries
|
||||
|
||||
prepare: $(PRICES_STAMP)
|
||||
|
|
@ -80,7 +82,6 @@ download-pois: $(POIS_RAW)
|
|||
download-ofsted: $(OFSTED)
|
||||
download-broadband: $(BROADBAND)
|
||||
download-postcodes: $(POSTCODES)
|
||||
download-geosure: $(GEOSURE_STAMP)
|
||||
download-rental-prices: $(RENTAL)
|
||||
download-noise: $(NOISE)
|
||||
download-inspire: $(INSPIRE_STAMP)
|
||||
|
|
@ -88,9 +89,11 @@ download-oa-boundaries: $(OA_BOUNDARIES)
|
|||
download-uprn-lookup: $(UPRN_LOOKUP)
|
||||
download-transit-network: $(TRANSIT_STAMP)
|
||||
download-greenspace: $(GREENSPACE)
|
||||
download-os-greenspace: $(OS_GREENSPACE)
|
||||
download-pbf: $(PBF)
|
||||
download-places: $(PLACES)
|
||||
download-lsoa-population: $(LSOA_POP)
|
||||
download-median-age: $(MEDIAN_AGE)
|
||||
download-england-boundary: $(ENGLAND_BOUNDARY)
|
||||
download-rightmove-outcodes: $(RM_OUTCODES)
|
||||
transform-pois: $(POIS_FILTERED)
|
||||
|
|
@ -98,7 +101,6 @@ transform-epc-pp: $(EPC_PP)
|
|||
transform-crime: $(CRIME)
|
||||
transform-poi-proximity: $(POI_PROXIMITY)
|
||||
transform-school-proximity: $(SCHOOL_PROX)
|
||||
transform-geosure: $(GEOSURE)
|
||||
transform-postcode-boundaries: $(PC_BOUNDARIES)
|
||||
generate-postcode-boundaries: $(OA_BOUNDARIES) $(INSPIRE_STAMP) $(UPRN_LOOKUP)
|
||||
uv run python -m pipeline.transform.postcode_boundaries \
|
||||
|
|
@ -155,10 +157,6 @@ $(BROADBAND):
|
|||
$(POSTCODES):
|
||||
uv run python -m pipeline.download.postcodes --output $@
|
||||
|
||||
$(GEOSURE_STAMP):
|
||||
uv run python -m pipeline.download.geosure --output $(GEOSURE_DIR)
|
||||
@touch $@
|
||||
|
||||
$(NOISE): $(ARCGIS)
|
||||
uv run python -m pipeline.download.noise --arcgis $(ARCGIS) --output $@
|
||||
|
||||
|
|
@ -182,12 +180,19 @@ $(RENTAL):
|
|||
$(GREENSPACE): $(PBF)
|
||||
uv run python -m pipeline.download.greenspace_water --output $@ --pbf $(PBF)
|
||||
|
||||
$(OS_GREENSPACE):
|
||||
uv run python -m pipeline.download.os_greenspace --output $@
|
||||
|
||||
$(PLACES): $(PBF) $(ENGLAND_BOUNDARY)
|
||||
uv run python -m pipeline.download.places --output $@ --pbf $(PBF) --boundary $(ENGLAND_BOUNDARY)
|
||||
|
||||
$(LSOA_POP):
|
||||
uv run python -m pipeline.download.lsoa_population --output $@
|
||||
|
||||
|
||||
$(MEDIAN_AGE):
|
||||
uv run python -m pipeline.download.median_age --output $@
|
||||
|
||||
$(ENGLAND_BOUNDARY):
|
||||
uv run python -m pipeline.download.england_boundary --output $@
|
||||
|
||||
|
|
@ -213,15 +218,12 @@ $(CRIME):
|
|||
fi
|
||||
uv run python -m pipeline.transform.crime --input $(CRIME_DIR) --output $@
|
||||
|
||||
$(POI_PROXIMITY): $(ARCGIS) $(POIS_FILTERED)
|
||||
uv run python -m pipeline.transform.poi_proximity --arcgis $(ARCGIS) --pois $(POIS_FILTERED) --output $@
|
||||
$(POI_PROXIMITY): $(ARCGIS) $(POIS_FILTERED) $(OS_GREENSPACE)
|
||||
uv run python -m pipeline.transform.poi_proximity --arcgis $(ARCGIS) --pois $(POIS_FILTERED) --greenspace $(OS_GREENSPACE) --output $@
|
||||
|
||||
$(SCHOOL_PROX): $(OFSTED) $(ARCGIS)
|
||||
uv run python -m pipeline.transform.school_proximity --ofsted $(OFSTED) --arcgis $(ARCGIS) --output $@
|
||||
|
||||
$(GEOSURE): $(GEOSURE_STAMP) $(ARCGIS)
|
||||
uv run python -m pipeline.transform.transform_geosure --geosure $(GEOSURE_DIR) --arcgis $(ARCGIS) --output $@
|
||||
|
||||
# Postcode boundaries require manual generation — fail with instructions
|
||||
$(PC_BOUNDARIES):
|
||||
@echo ""
|
||||
|
|
@ -240,7 +242,7 @@ $(PC_BOUNDARIES):
|
|||
# ── Final merge → postcode.parquet + properties.parquet ──────────────────────
|
||||
|
||||
$(MERGE_STAMP): $(EPC_PP) $(ARCGIS) $(IOD) $(POI_PROXIMITY) \
|
||||
$(ETHNICITY) $(CRIME) $(NOISE) $(SCHOOL_PROX) $(BROADBAND) $(GEOSURE) $(RENTAL) $(LSOA_POP)
|
||||
$(ETHNICITY) $(CRIME) $(NOISE) $(SCHOOL_PROX) $(BROADBAND) $(RENTAL) $(LSOA_POP) $(MEDIAN_AGE)
|
||||
uv run python -m pipeline.transform.merge \
|
||||
--epc-pp $(EPC_PP) \
|
||||
--arcgis $(ARCGIS) \
|
||||
|
|
@ -251,9 +253,9 @@ $(MERGE_STAMP): $(EPC_PP) $(ARCGIS) $(IOD) $(POI_PROXIMITY) \
|
|||
--noise $(NOISE) \
|
||||
--school-proximity $(SCHOOL_PROX) \
|
||||
--broadband $(BROADBAND) \
|
||||
--geosure $(GEOSURE) \
|
||||
--rental-prices $(RENTAL) \
|
||||
--lsoa-population $(LSOA_POP) \
|
||||
--median-age $(MEDIAN_AGE) \
|
||||
--output-postcodes $(POSTCODES_PQ) \
|
||||
--output-properties $(PROPERTIES_PQ)
|
||||
@touch $@
|
||||
|
|
|
|||
|
|
@ -69,6 +69,14 @@ const DATA_SOURCES = [
|
|||
url: 'https://download.geofabrik.de/europe/great-britain-latest.osm.pbf',
|
||||
license: 'Open Data Commons Open Database License (ODbL)',
|
||||
},
|
||||
{
|
||||
id: 'os-open-greenspace',
|
||||
name: 'OS Open Greenspace',
|
||||
origin: 'Ordnance Survey',
|
||||
use: 'Authoritative green space boundaries for Great Britain, including public parks, gardens, playing fields, and play spaces. Polygon centroids are used for park proximity counts and distance-to-nearest-park calculations.',
|
||||
url: 'https://osdatahub.os.uk/downloads/open/OpenGreenspace',
|
||||
license: 'Open Government Licence v3.0',
|
||||
},
|
||||
{
|
||||
id: 'naptan',
|
||||
name: 'NaPTAN (Public Transport Stops)',
|
||||
|
|
@ -101,14 +109,6 @@ const DATA_SOURCES = [
|
|||
url: 'https://www.ofcom.org.uk/phones-and-broadband/coverage-and-speeds/connected-nations-20252/data-downloads-2025',
|
||||
license: 'Open Government Licence v3.0',
|
||||
},
|
||||
{
|
||||
id: 'geosure',
|
||||
name: 'GeoSure Ground Stability',
|
||||
origin: 'Ordnance Survey',
|
||||
use: 'Ground stability hazard ratings on a 5km hex grid covering Great Britain. Six risk categories (collapsible deposits, compressible ground, landslides, running sand, shrink-swell, and soluble rocks) rated Low, Moderate, or Significant. Spatial-joined to postcodes via centroid intersection.',
|
||||
url: 'https://osdatahub.os.uk/downloads/open/GeoSure',
|
||||
license: 'Open Government Licence v3.0',
|
||||
},
|
||||
{
|
||||
id: 'council-tax',
|
||||
name: 'Council Tax Levels 2025-26',
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ const LOADING_MESSAGES = [
|
|||
'Analysing your query...',
|
||||
'Searching for destinations...',
|
||||
'Generating filters...',
|
||||
'Refining results...',
|
||||
];
|
||||
|
||||
/** Cycle through loading messages to show progress. */
|
||||
|
|
@ -28,9 +29,11 @@ function useLoadingMessage(loading: boolean): string {
|
|||
// Advance message every 1.5s
|
||||
timerRef.current = setTimeout(() => setIndex(1), 1500);
|
||||
const t2 = setTimeout(() => setIndex(2), 3500);
|
||||
const t3 = setTimeout(() => setIndex(3), 5500);
|
||||
return () => {
|
||||
clearTimeout(timerRef.current);
|
||||
clearTimeout(t2);
|
||||
clearTimeout(t3);
|
||||
};
|
||||
}, [loading]);
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
import { useMemo, useState } from 'react';
|
||||
import { useCollapsibleGroups } from '../../hooks/useCollapsibleGroups';
|
||||
import type {
|
||||
FeatureFilters,
|
||||
FeatureMeta,
|
||||
|
|
@ -38,6 +37,8 @@ interface AreaPaneProps {
|
|||
filters: FeatureFilters;
|
||||
onNavigateToSource?: (slug: string, featureName: string) => void;
|
||||
travelTimeEntries?: TravelTimeEntry[];
|
||||
isGroupExpanded: (name: string) => boolean;
|
||||
onToggleGroup: (name: string) => void;
|
||||
}
|
||||
|
||||
export default function AreaPane({
|
||||
|
|
@ -52,11 +53,12 @@ export default function AreaPane({
|
|||
filters,
|
||||
onNavigateToSource,
|
||||
travelTimeEntries,
|
||||
isGroupExpanded,
|
||||
onToggleGroup,
|
||||
}: AreaPaneProps) {
|
||||
const propertyCount = isPostcode && postcodeData ? postcodeData.properties.count : stats?.count;
|
||||
const featureGroups = useMemo(() => groupFeaturesByCategory(globalFeatures), [globalFeatures]);
|
||||
const [infoFeature, setInfoFeature] = useState<FeatureMeta | null>(null);
|
||||
const [collapsedGroups, toggleGroup] = useCollapsibleGroups();
|
||||
|
||||
const numericByName = useMemo(() => {
|
||||
if (!stats) return new Map();
|
||||
|
|
@ -165,17 +167,17 @@ export default function AreaPane({
|
|||
) ?? []
|
||||
);
|
||||
|
||||
const isExpanded = !collapsedGroups.has(group.name);
|
||||
const expanded = isGroupExpanded(group.name);
|
||||
|
||||
return (
|
||||
<div key={group.name}>
|
||||
<CollapsibleGroupHeader
|
||||
name={group.name}
|
||||
expanded={isExpanded}
|
||||
onToggle={() => toggleGroup(group.name)}
|
||||
expanded={expanded}
|
||||
onToggle={() => onToggleGroup(group.name)}
|
||||
className="px-3 py-2.5 text-sm font-bold text-warm-500 bg-warm-50 dark:bg-warm-900 dark:text-warm-400 sticky top-0 z-10 hover:bg-warm-100 dark:hover:bg-warm-800"
|
||||
/>
|
||||
{isExpanded && (
|
||||
{expanded && (
|
||||
<div className="px-3 py-2 space-y-3">
|
||||
{stackedCharts
|
||||
? stackedCharts.map((chart) => {
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ export default function FeatureBrowser({
|
|||
const [search, setSearch] = useState('');
|
||||
const [infoFeature, setInfoFeature] = useState<FeatureMeta | null>(null);
|
||||
const [travelInfoMode, setTravelInfoMode] = useState<TransportMode | null>(null);
|
||||
const [expandedGroups, toggleGroup] = useCollapsibleGroups();
|
||||
const [isGroupExpanded, toggleGroup] = useCollapsibleGroups(true);
|
||||
const availableTravelModes = useTravelModes();
|
||||
|
||||
useEffect(() => {
|
||||
|
|
@ -106,7 +106,7 @@ export default function FeatureBrowser({
|
|||
</div>
|
||||
<div className="md:min-h-0 md:flex-1 md:overflow-y-auto flex flex-col">
|
||||
{mergedGrouped.map((group) => {
|
||||
const isExpanded = isSearching || expandedGroups.has(group.name);
|
||||
const isExpanded = isSearching || isGroupExpanded(group.name);
|
||||
return (
|
||||
<div key={group.name} className="shrink-0">
|
||||
<CollapsibleGroupHeader
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ import { usePOIData } from '../../hooks/usePOIData';
|
|||
import { useFilters } from '../../hooks/useFilters';
|
||||
import { useHexagonSelection } from '../../hooks/useHexagonSelection';
|
||||
import { usePaneResize } from '../../hooks/usePaneResize';
|
||||
import { useCollapsibleGroups } from '../../hooks/useCollapsibleGroups';
|
||||
import { useAiFilters } from '../../hooks/useAiFilters';
|
||||
import { useUrlSync } from '../../hooks/useUrlSync';
|
||||
import { useTutorial } from '../../hooks/useTutorial';
|
||||
|
|
@ -274,6 +275,7 @@ export default function MapPage({
|
|||
}, []);
|
||||
|
||||
const pois = usePOIData(mapData.bounds, selectedPOICategories);
|
||||
const [isAreaGroupExpanded, toggleAreaGroup] = useCollapsibleGroups(true);
|
||||
|
||||
useUrlSync(
|
||||
mapData.currentView,
|
||||
|
|
@ -528,6 +530,8 @@ export default function MapPage({
|
|||
hexagonLocation={hexagonLocation}
|
||||
filters={filters}
|
||||
travelTimeEntries={travelTime.activeEntries}
|
||||
isGroupExpanded={isAreaGroupExpanded}
|
||||
onToggleGroup={toggleAreaGroup}
|
||||
/>
|
||||
);
|
||||
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ export default function POIPane({
|
|||
onNavigateToSource,
|
||||
}: POIPaneProps) {
|
||||
const [searchTerm, setSearchTerm] = useState('');
|
||||
const [collapsedGroups, toggleCollapse] = useCollapsibleGroups();
|
||||
const [isGroupExpanded, toggleCollapse] = useCollapsibleGroups();
|
||||
const [showInfo, setShowInfo] = useState(false);
|
||||
|
||||
const allCategories = groups.flatMap((g) => g.categories);
|
||||
|
|
@ -150,7 +150,7 @@ export default function POIPane({
|
|||
const groupSelected = group.categories.filter((c) => selectedCategories.has(c)).length;
|
||||
const allInGroupSelected = groupSelected === group.categories.length;
|
||||
const someInGroupSelected = groupSelected > 0 && !allInGroupSelected;
|
||||
const isCollapsed = collapsedGroups.has(group.name) && !searchTerm;
|
||||
const isCollapsed = !isGroupExpanded(group.name) && !searchTerm;
|
||||
|
||||
return (
|
||||
<div key={group.name}>
|
||||
|
|
|
|||
|
|
@ -169,15 +169,6 @@ export default function Header({
|
|||
Pricing
|
||||
</a>
|
||||
)}
|
||||
{user && (
|
||||
<a
|
||||
href={PAGE_PATHS.saved}
|
||||
className={tabClass('saved')}
|
||||
onClick={(e) => navLink('saved', e)}
|
||||
>
|
||||
Saved
|
||||
</a>
|
||||
)}
|
||||
</nav>
|
||||
)}
|
||||
</div>
|
||||
|
|
@ -187,20 +178,6 @@ export default function Header({
|
|||
{/* Desktop-only dashboard actions */}
|
||||
{!isMobile && activePage === 'dashboard' && (
|
||||
<>
|
||||
{onSaveSearch && (
|
||||
<button
|
||||
onClick={onSaveSearch}
|
||||
disabled={savingSearch}
|
||||
className="flex items-center gap-1.5 px-3 py-1.5 rounded bg-navy-800 hover:bg-navy-700 transition-colors text-sm disabled:opacity-50 disabled:cursor-wait"
|
||||
>
|
||||
{savingSearch ? (
|
||||
<SpinnerIcon className="w-4 h-4 animate-spin" />
|
||||
) : (
|
||||
<BookmarkIcon className="w-4 h-4" />
|
||||
)}
|
||||
Save
|
||||
</button>
|
||||
)}
|
||||
<button
|
||||
onClick={handleShare}
|
||||
disabled={sharing}
|
||||
|
|
@ -232,8 +209,31 @@ export default function Header({
|
|||
<DownloadIcon className="w-4 h-4" />
|
||||
{exporting ? 'Exporting...' : 'Export'}
|
||||
</button>
|
||||
{onSaveSearch && (
|
||||
<button
|
||||
onClick={onSaveSearch}
|
||||
disabled={savingSearch}
|
||||
className="flex items-center gap-1.5 px-3 py-1.5 rounded bg-navy-800 hover:bg-navy-700 transition-colors text-sm disabled:opacity-50 disabled:cursor-wait"
|
||||
>
|
||||
{savingSearch ? (
|
||||
<SpinnerIcon className="w-4 h-4 animate-spin" />
|
||||
) : (
|
||||
<BookmarkIcon className="w-4 h-4" />
|
||||
)}
|
||||
Save
|
||||
</button>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
{!isMobile && user && (
|
||||
<a
|
||||
href={PAGE_PATHS.saved}
|
||||
className={tabClass('saved')}
|
||||
onClick={(e) => navLink('saved', e)}
|
||||
>
|
||||
Saved
|
||||
</a>
|
||||
)}
|
||||
|
||||
{/* Desktop-only auth */}
|
||||
{!isMobile && (
|
||||
|
|
|
|||
|
|
@ -90,28 +90,10 @@ export default function MobileMenu({
|
|||
mobileNavItem('pricing', 'Pricing')}
|
||||
{user && mobileNavItem('invites', 'Invite Friends')}
|
||||
{user && mobileNavItem('account', 'Account')}
|
||||
{user && mobileNavItem('saved', 'Saved')}
|
||||
|
||||
{/* Dashboard actions */}
|
||||
{activePage === 'dashboard' && (
|
||||
<div className="mt-3 pt-3 border-t border-navy-700 flex flex-col gap-1">
|
||||
{onSaveSearch && (
|
||||
<button
|
||||
onClick={() => {
|
||||
onSaveSearch();
|
||||
onClose();
|
||||
}}
|
||||
disabled={savingSearch}
|
||||
className="w-full flex items-center gap-2 px-4 py-3 text-base text-warm-300 hover:bg-navy-800 hover:text-white rounded disabled:opacity-50"
|
||||
>
|
||||
{savingSearch ? (
|
||||
<SpinnerIcon className="w-5 h-5 animate-spin" />
|
||||
) : (
|
||||
<BookmarkIcon className="w-5 h-5" />
|
||||
)}
|
||||
Save
|
||||
</button>
|
||||
)}
|
||||
<button
|
||||
onClick={() => {
|
||||
onShare();
|
||||
|
|
@ -133,8 +115,27 @@ export default function MobileMenu({
|
|||
<DownloadIcon className="w-5 h-5" />
|
||||
{exporting ? 'Exporting...' : 'Export'}
|
||||
</button>
|
||||
{onSaveSearch && (
|
||||
<button
|
||||
onClick={() => {
|
||||
onSaveSearch();
|
||||
onClose();
|
||||
}}
|
||||
disabled={savingSearch}
|
||||
className="w-full flex items-center gap-2 px-4 py-3 text-base text-warm-300 hover:bg-navy-800 hover:text-white rounded disabled:opacity-50"
|
||||
>
|
||||
{savingSearch ? (
|
||||
<SpinnerIcon className="w-5 h-5 animate-spin" />
|
||||
) : (
|
||||
<BookmarkIcon className="w-5 h-5" />
|
||||
)}
|
||||
Save
|
||||
</button>
|
||||
)}
|
||||
{user && mobileNavItem('saved', 'Saved')}
|
||||
</div>
|
||||
)}
|
||||
{activePage !== 'dashboard' && user && mobileNavItem('saved', 'Saved')}
|
||||
</nav>
|
||||
|
||||
{/* Theme toggle + Auth section at bottom */}
|
||||
|
|
|
|||
|
|
@ -19,6 +19,8 @@ export interface AiFiltersResult {
|
|||
summary: string;
|
||||
/** The listing mode used (historical/buy/rent) */
|
||||
listingType: string;
|
||||
/** Number of properties matching the proposed filters (excludes travel time) */
|
||||
matchCount: number;
|
||||
}
|
||||
|
||||
export type AiFilterErrorType = 'auth' | 'limit' | 'error';
|
||||
|
|
@ -43,7 +45,11 @@ interface UseAiFiltersResult {
|
|||
}
|
||||
|
||||
/** Build a human-readable summary of the AI result. */
|
||||
function buildSummary(filters: FeatureFilters, travelTimeFilters: AiTravelTimeFilter[]): string {
|
||||
function buildSummary(
|
||||
filters: FeatureFilters,
|
||||
travelTimeFilters: AiTravelTimeFilter[],
|
||||
matchCount: number
|
||||
): string {
|
||||
const parts: string[] = [];
|
||||
|
||||
for (const [name, value] of Object.entries(filters)) {
|
||||
|
|
@ -63,7 +69,8 @@ function buildSummary(filters: FeatureFilters, travelTimeFilters: AiTravelTimeFi
|
|||
}
|
||||
|
||||
if (parts.length === 0) return 'No filters set';
|
||||
return `Set ${parts.length} filter${parts.length > 1 ? 's' : ''}: ${parts.join(', ')}`;
|
||||
const countStr = matchCount.toLocaleString();
|
||||
return `${countStr} properties match · Set ${parts.length} filter${parts.length > 1 ? 's' : ''}: ${parts.join(', ')}`;
|
||||
}
|
||||
|
||||
export function useAiFilters(): UseAiFiltersResult {
|
||||
|
|
@ -137,13 +144,15 @@ export function useAiFilters(): UseAiFiltersResult {
|
|||
})
|
||||
);
|
||||
const filters = json.filters as FeatureFilters;
|
||||
const summaryText = buildSummary(filters, travelTimeFilters);
|
||||
const matchCount: number = json.match_count ?? 0;
|
||||
const summaryText = buildSummary(filters, travelTimeFilters, matchCount);
|
||||
const result: AiFiltersResult = {
|
||||
filters,
|
||||
travelTimeFilters,
|
||||
notes: json.notes || '',
|
||||
summary: summaryText,
|
||||
listingType: json.listing_type || 'historical',
|
||||
matchCount,
|
||||
};
|
||||
setNotes(result.notes || null);
|
||||
setSummary(summaryText);
|
||||
|
|
|
|||
|
|
@ -1,14 +1,24 @@
|
|||
import { useState, useCallback } from 'react';
|
||||
|
||||
export function useCollapsibleGroups(): [
|
||||
Set<string>,
|
||||
/**
|
||||
* Manages collapsible group state.
|
||||
* @param defaultCollapsed When true, groups start collapsed (tracks expanded groups).
|
||||
* When false (default), groups start expanded (tracks collapsed groups).
|
||||
*/
|
||||
export function useCollapsibleGroups(defaultCollapsed = false): [
|
||||
(name: string) => boolean,
|
||||
(name: string) => void,
|
||||
(name: string) => void,
|
||||
] {
|
||||
const [collapsed, setCollapsed] = useState<Set<string>>(new Set());
|
||||
const [toggled, setToggled] = useState<Set<string>>(new Set());
|
||||
|
||||
const isExpanded = useCallback(
|
||||
(name: string) => (defaultCollapsed ? toggled.has(name) : !toggled.has(name)),
|
||||
[toggled, defaultCollapsed]
|
||||
);
|
||||
|
||||
const toggle = useCallback((name: string) => {
|
||||
setCollapsed((prev) => {
|
||||
setToggled((prev) => {
|
||||
const next = new Set(prev);
|
||||
if (next.has(name)) next.delete(name);
|
||||
else next.add(name);
|
||||
|
|
@ -16,14 +26,24 @@ export function useCollapsibleGroups(): [
|
|||
});
|
||||
}, []);
|
||||
|
||||
const expand = useCallback((name: string) => {
|
||||
setCollapsed((prev) => {
|
||||
if (!prev.has(name)) return prev;
|
||||
const next = new Set(prev);
|
||||
next.delete(name);
|
||||
return next;
|
||||
});
|
||||
}, []);
|
||||
const expand = useCallback(
|
||||
(name: string) => {
|
||||
setToggled((prev) => {
|
||||
if (defaultCollapsed) {
|
||||
if (prev.has(name)) return prev;
|
||||
const next = new Set(prev);
|
||||
next.add(name);
|
||||
return next;
|
||||
} else {
|
||||
if (!prev.has(name)) return prev;
|
||||
const next = new Set(prev);
|
||||
next.delete(name);
|
||||
return next;
|
||||
}
|
||||
});
|
||||
},
|
||||
[defaultCollapsed]
|
||||
);
|
||||
|
||||
return [collapsed, toggle, expand];
|
||||
return [isExpanded, toggle, expand];
|
||||
}
|
||||
|
|
|
|||
|
|
@ -321,7 +321,7 @@ export function useDeckLayers({
|
|||
ttVal as number,
|
||||
ttVal as number,
|
||||
clr,
|
||||
null,
|
||||
fr,
|
||||
0,
|
||||
densityGradientRef.current,
|
||||
dark,
|
||||
|
|
@ -422,7 +422,7 @@ export function useDeckLayers({
|
|||
ttVal as number,
|
||||
ttVal as number,
|
||||
clr,
|
||||
null,
|
||||
fr,
|
||||
0,
|
||||
densityGradientRef.current,
|
||||
dark,
|
||||
|
|
|
|||
|
|
@ -119,11 +119,14 @@ export function useMapData({
|
|||
const boundsStr = `${bounds.south},${bounds.west},${bounds.north},${bounds.east}`;
|
||||
const isTravelTimeDrag = activeFeature.startsWith('tt_');
|
||||
const dragTravelParam = isTravelTimeDrag ? buildTravelParam(activeFeature) : travelParam;
|
||||
// Travel time fields are computed from the travel param, not regular feature columns.
|
||||
// Sending a tt_* name as fields would cause a 400 (unknown field). Use empty string instead.
|
||||
const fieldsParam = isTravelTimeDrag ? '' : activeFeature;
|
||||
|
||||
if (usePostcodeView) {
|
||||
const params = new URLSearchParams({ bounds: boundsStr });
|
||||
if (filtersStr) params.set('filters', filtersStr);
|
||||
params.set('fields', activeFeature);
|
||||
params.set('fields', fieldsParam);
|
||||
if (dragTravelParam) params.set('travel', dragTravelParam);
|
||||
|
||||
fetch(apiUrl('postcodes', params), authHeaders({ signal: dragAbortRef.current.signal }))
|
||||
|
|
@ -140,7 +143,7 @@ export function useMapData({
|
|||
bounds: boundsStr,
|
||||
});
|
||||
if (filtersStr) params.set('filters', filtersStr);
|
||||
params.set('fields', activeFeature);
|
||||
params.set('fields', fieldsParam);
|
||||
if (dragTravelParam) params.set('travel', dragTravelParam);
|
||||
|
||||
fetch(apiUrl('hexagons', params), authHeaders({ signal: dragAbortRef.current.signal }))
|
||||
|
|
|
|||
|
|
@ -189,22 +189,7 @@ export const STACKED_ENUM_GROUPS: Record<
|
|||
valueColors: ['#3b82f6', '#f59e0b'],
|
||||
},
|
||||
],
|
||||
Environment: [
|
||||
{
|
||||
label: 'Ground Risk',
|
||||
feature: 'Environmental risk',
|
||||
components: [
|
||||
'Collapsible deposits risk',
|
||||
'Compressible ground risk',
|
||||
'Landslide risk',
|
||||
'Running sand risk',
|
||||
'Shrink-swell risk',
|
||||
'Soluble rocks risk',
|
||||
],
|
||||
valueOrder: ['Low', 'Moderate', 'Significant'],
|
||||
valueColors: ['#22c55e', '#eab308', '#ef4444'],
|
||||
},
|
||||
],
|
||||
Environment: [],
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -443,52 +443,6 @@ const FEATURE_ICON_PATHS: Record<string, ReactNode> = {
|
|||
<line x1="12" y1="20" x2="12.01" y2="20" />
|
||||
</>
|
||||
),
|
||||
'Environmental risk': (
|
||||
<>
|
||||
<path d="M10.29 3.86L1.82 18a2 2 0 001.71 3h16.94a2 2 0 001.71-3L13.71 3.86a2 2 0 00-3.42 0z" />
|
||||
<line x1="12" y1="9" x2="12" y2="13" />
|
||||
<line x1="12" y1="17" x2="12.01" y2="17" />
|
||||
</>
|
||||
),
|
||||
'Collapsible deposits risk': (
|
||||
<>
|
||||
<polyline points="12 2 2 7 12 12 22 7 12 2" />
|
||||
<polyline points="2 17 12 22 22 17" />
|
||||
<polyline points="2 12 12 17 22 12" />
|
||||
</>
|
||||
),
|
||||
'Compressible ground risk': (
|
||||
<>
|
||||
<line x1="12" y1="2" x2="12" y2="22" />
|
||||
<polyline points="16 6 12 2 8 6" />
|
||||
<polyline points="16 18 12 22 8 18" />
|
||||
<line x1="4" y1="12" x2="20" y2="12" />
|
||||
</>
|
||||
),
|
||||
'Landslide risk': (
|
||||
<>
|
||||
<path d="M8 3l4 8 5-5 5 15H2L8 3z" />
|
||||
</>
|
||||
),
|
||||
'Running sand risk': (
|
||||
<>
|
||||
<path d="M2 6c2-1 4-1 6 0s4 1 6 0 4-1 6 0" />
|
||||
<path d="M2 12c2-1 4-1 6 0s4 1 6 0 4-1 6 0" />
|
||||
<path d="M2 18c2-1 4-1 6 0s4 1 6 0 4-1 6 0" />
|
||||
</>
|
||||
),
|
||||
'Shrink-swell risk': (
|
||||
<>
|
||||
<line x1="2" y1="12" x2="22" y2="12" />
|
||||
<polyline points="6 8 2 12 6 16" />
|
||||
<polyline points="18 8 22 12 18 16" />
|
||||
</>
|
||||
),
|
||||
'Soluble rocks risk': (
|
||||
<>
|
||||
<path d="M12 2.69l5.66 5.66a8 8 0 11-11.31 0z" />
|
||||
</>
|
||||
),
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -186,5 +186,13 @@ export function summarizeParams(queryString: string): string {
|
|||
}
|
||||
}
|
||||
|
||||
const ttParams = params.getAll('tt');
|
||||
if (ttParams.length > 0) {
|
||||
const count = ttParams.filter(Boolean).length;
|
||||
if (count > 0) {
|
||||
parts.push(`${count} travel time ${count === 1 ? 'destination' : 'destinations'}`);
|
||||
}
|
||||
}
|
||||
|
||||
return parts.length > 0 ? parts.join(' + ') : 'No filters';
|
||||
}
|
||||
|
|
|
|||
269
pipeline/check_travel_times.py
Normal file
269
pipeline/check_travel_times.py
Normal file
|
|
@ -0,0 +1,269 @@
|
|||
"""Find corrupted and duplicate travel-time parquet files.
|
||||
|
||||
A travel-time parquet file is considered corrupted when the R5 routing
|
||||
computation failed or was interrupted, leaving either zero rows or only
|
||||
the origin postcode. We detect this by comparing each file's row count
|
||||
against a per-mode threshold derived from the 5th-percentile of all files
|
||||
in that mode. Files at or below 1 row are always flagged.
|
||||
|
||||
Duplicates arise when places.parquet is rebuilt between R5 runs — each
|
||||
place gets a new numeric index prefix, so the skip-completed logic
|
||||
doesn't recognize previous results. --dedup keeps only the largest
|
||||
file per slug and removes the rest.
|
||||
|
||||
Usage:
|
||||
uv run python pipeline/check_travel_times.py [--travel-times property-data/travel-times]
|
||||
[--threshold-pct 5]
|
||||
[--delete]
|
||||
[--dedup]
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
import polars as pl
|
||||
|
||||
|
||||
@dataclass
|
||||
class BadFile:
|
||||
mode: str
|
||||
filename: str
|
||||
slug: str
|
||||
rows: int
|
||||
|
||||
|
||||
def scan_mode(mode_dir: Path, mode: str) -> tuple[list[tuple[str, str, int]], int]:
|
||||
"""Return (filename, slug, row_count) for every parquet in a mode dir."""
|
||||
entries: list[tuple[str, str, int]] = []
|
||||
errors = 0
|
||||
for f in sorted(os.listdir(mode_dir)):
|
||||
if not f.endswith(".parquet"):
|
||||
continue
|
||||
path = mode_dir / f
|
||||
slug = f.removesuffix(".parquet")
|
||||
# Strip numeric prefix (e.g. "000699-london-bridge" → "london-bridge")
|
||||
if "-" in slug:
|
||||
prefix, rest = slug.split("-", 1)
|
||||
if prefix.isdigit():
|
||||
slug = rest
|
||||
try:
|
||||
rows = pl.scan_parquet(path).select(pl.len()).collect().item()
|
||||
except Exception as exc:
|
||||
print(f" ERROR reading {mode}/{f}: {exc}", file=sys.stderr)
|
||||
errors += 1
|
||||
entries.append((f, slug, -1))
|
||||
continue
|
||||
entries.append((f, slug, rows))
|
||||
return entries, errors
|
||||
|
||||
|
||||
def percentile(values: list[int], pct: float) -> float:
|
||||
"""Linear-interpolation percentile on a sorted list."""
|
||||
if not values:
|
||||
return 0.0
|
||||
s = sorted(values)
|
||||
idx = (pct / 100) * (len(s) - 1)
|
||||
lo = int(idx)
|
||||
hi = min(lo + 1, len(s) - 1)
|
||||
frac = idx - lo
|
||||
return s[lo] + frac * (s[hi] - s[lo])
|
||||
|
||||
|
||||
def find_bad_files(
|
||||
base_dir: Path, threshold_pct: float
|
||||
) -> tuple[list[BadFile], dict[str, dict]]:
|
||||
"""Scan all modes and return bad files + per-mode stats."""
|
||||
bad: list[BadFile] = []
|
||||
stats: dict[str, dict] = {}
|
||||
|
||||
modes = sorted(
|
||||
d
|
||||
for d in os.listdir(base_dir)
|
||||
if (base_dir / d).is_dir()
|
||||
)
|
||||
|
||||
for mode in modes:
|
||||
mode_dir = base_dir / mode
|
||||
entries, errors = scan_mode(mode_dir, mode)
|
||||
if not entries:
|
||||
continue
|
||||
|
||||
row_counts = [r for _, _, r in entries if r >= 0]
|
||||
if not row_counts:
|
||||
continue
|
||||
|
||||
p5 = percentile(row_counts, threshold_pct)
|
||||
median = percentile(row_counts, 50)
|
||||
# Threshold: max of 1 and the chosen percentile — ensures we always
|
||||
# catch files with 0-1 rows even if p5 is 0 (e.g. walking mode).
|
||||
threshold = max(1, int(p5))
|
||||
|
||||
mode_bad = []
|
||||
for filename, slug, rows in entries:
|
||||
if rows <= threshold:
|
||||
bf = BadFile(mode=mode, filename=filename, slug=slug, rows=rows)
|
||||
mode_bad.append(bf)
|
||||
bad.append(bf)
|
||||
|
||||
stats[mode] = {
|
||||
"total": len(entries),
|
||||
"errors": errors,
|
||||
"bad": len(mode_bad),
|
||||
"threshold": threshold,
|
||||
"p5": p5,
|
||||
"median": median,
|
||||
"min": min(row_counts),
|
||||
"max": max(row_counts),
|
||||
}
|
||||
|
||||
return bad, stats
|
||||
|
||||
|
||||
def find_duplicates(base_dir: Path) -> tuple[list[BadFile], dict[str, dict]]:
|
||||
"""Find duplicate files (same slug, different numeric prefix). Keep the largest."""
|
||||
dupes: list[BadFile] = []
|
||||
stats: dict[str, dict] = {}
|
||||
|
||||
modes = sorted(d for d in os.listdir(base_dir) if (base_dir / d).is_dir())
|
||||
|
||||
for mode in modes:
|
||||
mode_dir = base_dir / mode
|
||||
entries, _ = scan_mode(mode_dir, mode)
|
||||
if not entries:
|
||||
continue
|
||||
|
||||
# Group by slug, keep largest
|
||||
slug_files: dict[str, list[tuple[str, int]]] = {}
|
||||
for filename, slug, rows in entries:
|
||||
slug_files.setdefault(slug, []).append((filename, rows))
|
||||
|
||||
mode_dupes = 0
|
||||
for slug, files in slug_files.items():
|
||||
if len(files) <= 1:
|
||||
continue
|
||||
# Keep the file with the most rows
|
||||
files.sort(key=lambda x: x[1], reverse=True)
|
||||
for filename, rows in files[1:]:
|
||||
dupes.append(BadFile(mode=mode, filename=filename, slug=slug, rows=rows))
|
||||
mode_dupes += 1
|
||||
|
||||
duped_slugs = sum(1 for fs in slug_files.values() if len(fs) > 1)
|
||||
stats[mode] = {
|
||||
"total": len(entries),
|
||||
"unique_slugs": len(slug_files),
|
||||
"duped_slugs": duped_slugs,
|
||||
"removable": mode_dupes,
|
||||
}
|
||||
|
||||
return dupes, stats
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument(
|
||||
"--travel-times",
|
||||
type=Path,
|
||||
default=Path("property-data/travel-times"),
|
||||
help="Path to travel-times directory",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--threshold-pct",
|
||||
type=float,
|
||||
default=5,
|
||||
help="Percentile below which files are flagged (default: 5th)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--delete",
|
||||
action="store_true",
|
||||
help="Delete corrupted files (so R5 will recompute them)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dedup",
|
||||
action="store_true",
|
||||
help="Remove duplicate files (keep largest per slug)",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.travel_times.is_dir():
|
||||
print(f"Error: {args.travel_times} is not a directory", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# --- Corruption check ---
|
||||
bad_files, stats = find_bad_files(args.travel_times, args.threshold_pct)
|
||||
|
||||
print("=== Per-mode summary ===\n")
|
||||
print(f"{'Mode':<10} {'Total':>6} {'Bad':>5} {'Threshold':>10} {'Median':>8} {'Range':>20}")
|
||||
print("-" * 65)
|
||||
for mode, s in sorted(stats.items()):
|
||||
rng = f"{s['min']:,}–{s['max']:,}"
|
||||
print(
|
||||
f"{mode:<10} {s['total']:>6} {s['bad']:>5} {s['threshold']:>10,} "
|
||||
f"{s['median']:>8,.0f} {rng:>20}"
|
||||
)
|
||||
|
||||
if bad_files:
|
||||
print(f"\n=== Corrupted files ({len(bad_files)} total) ===\n")
|
||||
current_mode = ""
|
||||
for bf in sorted(bad_files, key=lambda b: (b.mode, b.rows, b.slug)):
|
||||
if bf.mode != current_mode:
|
||||
current_mode = bf.mode
|
||||
print(f"\n {current_mode}/")
|
||||
status = "UNREADABLE" if bf.rows < 0 else f"{bf.rows} rows"
|
||||
print(f" {bf.filename} ({status})")
|
||||
|
||||
if args.delete:
|
||||
print(f"\nDeleting {len(bad_files)} corrupted files...")
|
||||
deleted = _delete_files(args.travel_times, bad_files)
|
||||
print(f"Deleted {deleted}/{len(bad_files)} files.")
|
||||
else:
|
||||
print(f"\nRun with --delete to remove these files so R5 can recompute them.")
|
||||
else:
|
||||
print("\nNo corrupted files found.")
|
||||
|
||||
# --- Dedup check ---
|
||||
dupe_files, dupe_stats = find_duplicates(args.travel_times)
|
||||
|
||||
total_removable = sum(s["removable"] for s in dupe_stats.values())
|
||||
if total_removable > 0:
|
||||
print(f"\n=== Duplicates ({total_removable} removable files) ===\n")
|
||||
print(f"{'Mode':<10} {'Total':>6} {'Unique':>7} {'Duped slugs':>12} {'Removable':>10}")
|
||||
print("-" * 50)
|
||||
for mode, s in sorted(dupe_stats.items()):
|
||||
if s["removable"] > 0:
|
||||
print(
|
||||
f"{mode:<10} {s['total']:>6} {s['unique_slugs']:>7} "
|
||||
f"{s['duped_slugs']:>12} {s['removable']:>10}"
|
||||
)
|
||||
|
||||
if args.dedup:
|
||||
# Exclude files already deleted by --delete
|
||||
deleted_set = {(bf.mode, bf.filename) for bf in bad_files} if args.delete else set()
|
||||
to_delete = [df for df in dupe_files if (df.mode, df.filename) not in deleted_set]
|
||||
print(f"\nRemoving {len(to_delete)} duplicate files (keeping largest per slug)...")
|
||||
deleted = _delete_files(args.travel_times, to_delete)
|
||||
print(f"Deleted {deleted}/{len(to_delete)} files.")
|
||||
else:
|
||||
print("\nRun with --dedup to remove duplicates (keeps largest per slug).")
|
||||
else:
|
||||
print("\nNo duplicates found.")
|
||||
|
||||
|
||||
def _delete_files(base_dir: Path, files: list[BadFile]) -> int:
|
||||
deleted = 0
|
||||
for bf in files:
|
||||
path = base_dir / bf.mode / bf.filename
|
||||
try:
|
||||
path.unlink()
|
||||
deleted += 1
|
||||
except OSError as exc:
|
||||
print(f" Failed to delete {path}: {exc}", file=sys.stderr)
|
||||
return deleted
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -1,44 +0,0 @@
|
|||
"""Download OS GeoSure ground stability data (5km hex grid).
|
||||
|
||||
Downloads the GB-Hex-5km-GeoSure dataset from Ordnance Survey as an ESRI
|
||||
Shapefile and extracts it.
|
||||
|
||||
Source: https://osdatahub.os.uk/downloads/open/GeoSure
|
||||
License: Open Government Licence v3.0
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
from pipeline.utils import download, extract_zip
|
||||
|
||||
URL = "https://api.os.uk/downloads/v1/products/GB-Hex-5km-GeoSure/downloads?area=GB&format=ESRI%C2%AE+Shapefile&redirect"
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Download OS GeoSure ground stability data"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
type=Path,
|
||||
required=True,
|
||||
help="Output directory for extracted shapefile",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
with tempfile.TemporaryDirectory() as cache_dir:
|
||||
zip_path = Path(cache_dir) / "geosure.zip"
|
||||
|
||||
download(URL, zip_path, timeout=300)
|
||||
extract_zip(zip_path, args.output)
|
||||
|
||||
shp_files = list(args.output.rglob("*.shp"))
|
||||
print(f"Extracted {len(shp_files)} shapefiles to {args.output}")
|
||||
for f in shp_files:
|
||||
print(f" {f.relative_to(args.output)}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
140
pipeline/download/median_age.py
Normal file
140
pipeline/download/median_age.py
Normal file
|
|
@ -0,0 +1,140 @@
|
|||
"""Download Census 2021 median age by LSOA.
|
||||
|
||||
Downloads five-year age band counts (TS007A) from the NOMIS API, then computes
|
||||
the median age per LSOA using linear interpolation within the median class.
|
||||
|
||||
Source: NOMIS (ONS Census 2021 — TS007A dataset, NM_2020_1)
|
||||
License: Open Government Licence v3.0
|
||||
"""
|
||||
|
||||
import argparse
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
|
||||
import httpx
|
||||
import polars as pl
|
||||
|
||||
# NOMIS API: Census 2021 TS007A (age by five-year bands) by LSOA 2021 (TYPE151)
|
||||
# c2021_age_19=1..18 selects 18 five-year bands (excluding 0 = Total)
|
||||
# measures=20100 selects absolute count
|
||||
BASE_URL = "https://www.nomisweb.co.uk/api/v01/dataset/NM_2020_1.data.csv?date=latest&geography=TYPE151&c2021_age_19=1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18&measures=20100&select=GEOGRAPHY_CODE,C2021_AGE_19_NAME,OBS_VALUE"
|
||||
PAGE_SIZE = 25000
|
||||
|
||||
# Five-year age bands in order, with lower bounds for interpolation.
|
||||
# The last band (85+) is open-ended — we treat it as 85-89 for median purposes.
|
||||
AGE_BANDS = [
|
||||
(0, 5), # Aged 0 to 4 years
|
||||
(5, 5), # Aged 5 to 9 years
|
||||
(10, 5), # Aged 10 to 14 years
|
||||
(15, 5), # Aged 15 to 19 years
|
||||
(20, 5), # Aged 20 to 24 years
|
||||
(25, 5), # Aged 25 to 29 years
|
||||
(30, 5), # Aged 30 to 34 years
|
||||
(35, 5), # Aged 35 to 39 years
|
||||
(40, 5), # Aged 40 to 44 years
|
||||
(45, 5), # Aged 45 to 49 years
|
||||
(50, 5), # Aged 50 to 54 years
|
||||
(55, 5), # Aged 55 to 59 years
|
||||
(60, 5), # Aged 60 to 64 years
|
||||
(65, 5), # Aged 65 to 69 years
|
||||
(70, 5), # Aged 70 to 74 years
|
||||
(75, 5), # Aged 75 to 79 years
|
||||
(80, 5), # Aged 80 to 84 years
|
||||
(85, 5), # Aged 85 years and over
|
||||
]
|
||||
|
||||
|
||||
def compute_median_age(counts: list[int]) -> float:
|
||||
"""Compute median age from five-year band counts using linear interpolation."""
|
||||
total = sum(counts)
|
||||
if total == 0:
|
||||
return float("nan")
|
||||
|
||||
half = total / 2
|
||||
cumulative = 0
|
||||
for i, count in enumerate(counts):
|
||||
if cumulative + count >= half:
|
||||
lower_bound, width = AGE_BANDS[i]
|
||||
# Linear interpolation within the median band
|
||||
return lower_bound + ((half - cumulative) / count) * width
|
||||
cumulative += count
|
||||
|
||||
return float("nan")
|
||||
|
||||
|
||||
def download_and_convert(output_path: Path) -> None:
|
||||
print("Downloading Census 2021 age by five-year bands from NOMIS...")
|
||||
frames = []
|
||||
offset = 0
|
||||
while True:
|
||||
url = f"{BASE_URL}&recordoffset={offset}"
|
||||
response = httpx.get(url, follow_redirects=True, timeout=120)
|
||||
response.raise_for_status()
|
||||
if len(response.content) == 0:
|
||||
break
|
||||
chunk = pl.read_csv(BytesIO(response.content))
|
||||
if chunk.height == 0:
|
||||
break
|
||||
frames.append(chunk)
|
||||
print(f" Fetched {chunk.height} rows (offset={offset})")
|
||||
if chunk.height < PAGE_SIZE:
|
||||
break
|
||||
offset += PAGE_SIZE
|
||||
|
||||
df = pl.concat(frames)
|
||||
print(f"Total rows: {df.height}")
|
||||
|
||||
# Filter to England only
|
||||
df = df.filter(pl.col("GEOGRAPHY_CODE").str.starts_with("E"))
|
||||
|
||||
# Pivot: one row per LSOA, columns = age band names, values = counts
|
||||
pivoted = df.pivot(
|
||||
on="C2021_AGE_19_NAME",
|
||||
index="GEOGRAPHY_CODE",
|
||||
values="OBS_VALUE",
|
||||
)
|
||||
|
||||
# Extract age band columns in order and compute median
|
||||
# NOMIS returns band names like "Aged 0 to 4 years", "Aged 85 years and over"
|
||||
band_cols = [c for c in pivoted.columns if c != "GEOGRAPHY_CODE"]
|
||||
# Sort by the lower bound of each band
|
||||
band_cols.sort(key=lambda c: int(c.split()[1]))
|
||||
|
||||
print(f"Age bands found: {len(band_cols)}")
|
||||
print(f" First: {band_cols[0]}")
|
||||
print(f" Last: {band_cols[-1]}")
|
||||
|
||||
# Compute median age per LSOA
|
||||
rows = pivoted.select("GEOGRAPHY_CODE", *band_cols).to_dicts()
|
||||
medians = []
|
||||
for row in rows:
|
||||
counts = [row[col] for col in band_cols]
|
||||
median = compute_median_age(counts)
|
||||
medians.append({"lsoa21": row["GEOGRAPHY_CODE"], "median_age": round(median, 1)})
|
||||
|
||||
result = pl.DataFrame(medians).with_columns(
|
||||
pl.col("median_age").cast(pl.Float32),
|
||||
)
|
||||
|
||||
print(f"England LSOAs: {result.height}")
|
||||
print(f"Median age range: {result['median_age'].min()} - {result['median_age'].max()}")
|
||||
print(f"Mean of medians: {result['median_age'].mean():.1f}")
|
||||
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
result.write_parquet(output_path, compression="zstd")
|
||||
print(f"Saved to {output_path}")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Download Census 2021 median age by LSOA"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output", type=Path, required=True, help="Output parquet file path"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
download_and_convert(args.output)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
127
pipeline/download/os_greenspace.py
Normal file
127
pipeline/download/os_greenspace.py
Normal file
|
|
@ -0,0 +1,127 @@
|
|||
"""Download OS Open Greenspace and extract site centroids.
|
||||
|
||||
Downloads the OS Open Greenspace dataset as ESRI Shapefile, computes
|
||||
WGS84 centroids for each greenspace site polygon, and outputs a parquet
|
||||
with lat/lng/category columns compatible with the POI proximity pipeline.
|
||||
|
||||
Source: https://osdatahub.os.uk/downloads/open/OpenGreenspace
|
||||
License: Open Government Licence v3.0
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import polars as pl
|
||||
import shapefile as shp
|
||||
from pyproj import Transformer
|
||||
from shapely.geometry import shape as to_shapely
|
||||
|
||||
from pipeline.utils.download import download, extract_zip
|
||||
|
||||
URL = "https://api.os.uk/downloads/v1/products/OpenGreenspace/downloads?area=GB&format=ESRI%C2%AE+Shapefile&redirect"
|
||||
|
||||
_to_wgs84 = Transformer.from_crs("EPSG:27700", "EPSG:4326", always_xy=True)
|
||||
|
||||
|
||||
def download_greenspace(output: Path) -> None:
|
||||
output.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with tempfile.TemporaryDirectory() as cache_dir:
|
||||
zip_path = Path(cache_dir) / "greenspace.zip"
|
||||
extract_dir = Path(cache_dir) / "extracted"
|
||||
|
||||
download(URL, zip_path, timeout=300)
|
||||
extract_zip(zip_path, extract_dir)
|
||||
|
||||
# Find the GreenspaceSite shapefile (not the AccessPoint one)
|
||||
shp_files = list(extract_dir.rglob("*GreenspaceSite*.shp"))
|
||||
if not shp_files:
|
||||
shp_files = [
|
||||
f
|
||||
for f in extract_dir.rglob("*.shp")
|
||||
if "AccessPoint" not in f.name
|
||||
]
|
||||
if not shp_files:
|
||||
raise FileNotFoundError(
|
||||
"No GreenspaceSite shapefile found in download"
|
||||
)
|
||||
|
||||
print(f"Reading {shp_files[0].name}...")
|
||||
reader = shp.Reader(str(shp_files[0]), encoding="latin-1")
|
||||
|
||||
# Find the "function" field (greenspace type)
|
||||
field_names = [f[0] for f in reader.fields[1:]] # skip deletion flag
|
||||
func_field = None
|
||||
for name in field_names:
|
||||
if "funct" in name.lower():
|
||||
func_field = name
|
||||
break
|
||||
if func_field is None:
|
||||
raise ValueError(
|
||||
f"No 'function' field found. Available: {field_names}"
|
||||
)
|
||||
func_idx = field_names.index(func_field)
|
||||
|
||||
# Find a name field if available
|
||||
name_idx = None
|
||||
for name in field_names:
|
||||
if "distname" in name.lower():
|
||||
name_idx = field_names.index(name)
|
||||
break
|
||||
|
||||
lats = []
|
||||
lngs = []
|
||||
categories = []
|
||||
names = []
|
||||
|
||||
for sr in reader.shapeRecords():
|
||||
func = sr.record[func_idx]
|
||||
site_name = sr.record[name_idx] if name_idx is not None else ""
|
||||
|
||||
try:
|
||||
geom = to_shapely(sr.shape.__geo_interface__)
|
||||
if geom.is_empty or not geom.is_valid:
|
||||
continue
|
||||
centroid = geom.centroid
|
||||
lng, lat = _to_wgs84.transform(centroid.x, centroid.y)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
lats.append(lat)
|
||||
lngs.append(lng)
|
||||
categories.append(func)
|
||||
names.append(site_name or "")
|
||||
|
||||
df = pl.DataFrame(
|
||||
{
|
||||
"lat": np.array(lats, dtype=np.float64),
|
||||
"lng": np.array(lngs, dtype=np.float64),
|
||||
"category": categories,
|
||||
"name": names,
|
||||
}
|
||||
)
|
||||
|
||||
df.write_parquet(output)
|
||||
size_mb = output.stat().st_size / (1024 * 1024)
|
||||
print(f"Wrote {output} ({size_mb:.1f} MB, {len(df):,} greenspace sites)")
|
||||
|
||||
counts = df.group_by("category").len().sort("len", descending=True)
|
||||
for row in counts.iter_rows(named=True):
|
||||
print(f" {row['category']}: {row['len']:,}")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Download OS Open Greenspace site centroids"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output", type=Path, required=True, help="Output parquet file path"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
download_greenspace(args.output)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -13,7 +13,6 @@ from pipeline.utils.poi_counts import count_pois_per_postcode, min_distance_per_
|
|||
POI_GROUPS_2KM = {
|
||||
"restaurants": ["Restaurant", "Fast Food"],
|
||||
"groceries": ["Greengrocer", "Supermarket", "Convenience Store"],
|
||||
"parks": ["Park"],
|
||||
}
|
||||
|
||||
# Train/tube stations counted at 1km radius
|
||||
|
|
@ -21,11 +20,18 @@ TRAIN_TUBE_GROUP = {
|
|||
"train_tube": ["Metro or Tram stop", "Rail station"],
|
||||
}
|
||||
|
||||
# Groups for which to compute distance to nearest POI
|
||||
# Groups for which to compute distance to nearest POI (from filtered POIs)
|
||||
DISTANCE_GROUPS = {
|
||||
"train_tube": ["Metro or Tram stop", "Rail station"],
|
||||
}
|
||||
|
||||
# OS Open Greenspace function types used for park counts and distance calculation.
|
||||
# Uses the authoritative OS dataset instead of OSM point POIs for better coverage
|
||||
# of green spaces that are only mapped as polygons in OSM.
|
||||
GREENSPACE_PARK_FUNCTIONS = {
|
||||
"parks": ["Public Park Or Garden", "Playing Field", "Play Space"],
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
|
|
@ -37,6 +43,12 @@ def main():
|
|||
parser.add_argument(
|
||||
"--pois", type=Path, required=True, help="Filtered POIs parquet"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--greenspace",
|
||||
type=Path,
|
||||
required=True,
|
||||
help="OS Open Greenspace centroids parquet",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output", type=Path, required=True, help="Output parquet path"
|
||||
)
|
||||
|
|
@ -60,11 +72,25 @@ def main():
|
|||
postcodes, pois, groups=TRAIN_TUBE_GROUP, radius_km=1
|
||||
)
|
||||
|
||||
# Distance to nearest train/tube station
|
||||
# Distance to nearest train/tube station (from filtered POIs)
|
||||
distances = min_distance_per_postcode(postcodes, pois, groups=DISTANCE_GROUPS)
|
||||
|
||||
# Park counts and distances from OS Open Greenspace
|
||||
greenspace = pl.read_parquet(args.greenspace)
|
||||
park_counts_2km = count_pois_per_postcode(
|
||||
postcodes, greenspace, groups=GREENSPACE_PARK_FUNCTIONS, radius_km=2
|
||||
)
|
||||
park_distances = min_distance_per_postcode(
|
||||
postcodes, greenspace, groups=GREENSPACE_PARK_FUNCTIONS
|
||||
)
|
||||
|
||||
# Join all results on postcode
|
||||
result = counts_2km.join(counts_1km, on="postcode").join(distances, on="postcode")
|
||||
result = (
|
||||
counts_2km.join(counts_1km, on="postcode")
|
||||
.join(distances, on="postcode")
|
||||
.join(park_counts_2km, on="postcode")
|
||||
.join(park_distances, on="postcode")
|
||||
)
|
||||
|
||||
result.write_parquet(args.output)
|
||||
size_mb = args.output.stat().st_size / (1024 * 1024)
|
||||
|
|
|
|||
|
|
@ -3,6 +3,8 @@ package propertymap;
|
|||
import com.conveyal.r5.transit.TransportNetwork;
|
||||
import org.duckdb.DuckDBConnection;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.DirectoryStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
|
|
@ -132,11 +134,15 @@ public class App {
|
|||
Path modeDir = outDir.resolve(mode);
|
||||
Files.createDirectories(modeDir);
|
||||
|
||||
// Scan existing slugs once (O(directory)) instead of per-origin stat calls.
|
||||
// This matches by slug regardless of numeric prefix, so re-indexed places.parquet
|
||||
// won't cause duplicate computation.
|
||||
Set<String> existingSlugs = skipCompleted ? scanExistingSlugs(modeDir) : Set.of();
|
||||
|
||||
List<Integer> remaining = new ArrayList<>();
|
||||
for (int idx : originIndices) {
|
||||
if (skipCompleted) {
|
||||
Path f = modeDir.resolve(originFilename(idx, originNames[idx]));
|
||||
if (Files.exists(f) && Files.size(f) > 0) continue;
|
||||
if (skipCompleted && existingSlugs.contains(slugFromName(originNames[idx]))) {
|
||||
continue;
|
||||
}
|
||||
remaining.add(idx);
|
||||
}
|
||||
|
|
@ -255,10 +261,38 @@ public class App {
|
|||
|
||||
/** Build a filename from index + place name (index prefix prevents collisions after sanitization). */
|
||||
private static String originFilename(int index, String name) {
|
||||
String safe = name.toLowerCase()
|
||||
return String.format("%06d-%s.parquet", index, slugFromName(name));
|
||||
}
|
||||
|
||||
/** Slugify a place name: lowercase, strip non-alphanumeric (except spaces/hyphens), collapse whitespace. */
|
||||
private static String slugFromName(String name) {
|
||||
return name.toLowerCase()
|
||||
.replaceAll("[^a-z0-9 -]", "")
|
||||
.replaceAll("\\s+", "-");
|
||||
return String.format("%06d-%s.parquet", index, safe);
|
||||
}
|
||||
|
||||
/**
|
||||
* Scan a mode directory for existing non-empty parquet files, returning the set of slugs
|
||||
* (filenames with numeric prefix stripped). This allows resume to work across places.parquet
|
||||
* rebuilds where indices change but slugs stay the same.
|
||||
*/
|
||||
private static Set<String> scanExistingSlugs(Path modeDir) throws IOException {
|
||||
Set<String> slugs = new HashSet<>();
|
||||
if (!Files.isDirectory(modeDir)) return slugs;
|
||||
try (DirectoryStream<Path> stream = Files.newDirectoryStream(modeDir, "*.parquet")) {
|
||||
for (Path p : stream) {
|
||||
if (Files.size(p) > 0) {
|
||||
String stem = p.getFileName().toString().replace(".parquet", "");
|
||||
int dash = stem.indexOf('-');
|
||||
if (dash > 0 && stem.substring(0, dash).chars().allMatch(Character::isDigit)) {
|
||||
slugs.add(stem.substring(dash + 1));
|
||||
} else {
|
||||
slugs.add(stem);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return slugs;
|
||||
}
|
||||
|
||||
private static String requiredArg(String[] args, String name) {
|
||||
|
|
|
|||
|
|
@ -46,6 +46,12 @@ export class ScreenshotCache {
|
|||
normalized.poi = pois.join(',');
|
||||
}
|
||||
|
||||
// Sort travel time entries
|
||||
const tt = params.getAll('tt').sort();
|
||||
if (tt.length > 0) {
|
||||
normalized.tt = tt.join(',');
|
||||
}
|
||||
|
||||
if (params.get('tab')) {
|
||||
normalized.tab = params.get('tab')!;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -41,8 +41,8 @@ app.get('/screenshot', async (req, res) => {
|
|||
qs.set(key, val);
|
||||
}
|
||||
}
|
||||
// Repeated params: filter, poi
|
||||
for (const key of ['filter', 'poi']) {
|
||||
// Repeated params: filter, poi, tt (travel time)
|
||||
for (const key of ['filter', 'poi', 'tt']) {
|
||||
const val = req.query[key];
|
||||
if (typeof val === 'string' && val) {
|
||||
qs.append(key, val);
|
||||
|
|
|
|||
|
|
@ -68,9 +68,9 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
features: &[
|
||||
FeatureConfig {
|
||||
name: "Last known price",
|
||||
bounds: Bounds::Percentile {
|
||||
low: 0.0,
|
||||
high: 98.0,
|
||||
bounds: Bounds::Fixed {
|
||||
min: 0.0,
|
||||
max: 2_500_000.0,
|
||||
},
|
||||
step: 10000.0,
|
||||
description: "Most recent sale price from the Land Registry",
|
||||
|
|
@ -79,15 +79,15 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
prefix: "£",
|
||||
suffix: "",
|
||||
raw: false,
|
||||
absolute: false,
|
||||
absolute: true,
|
||||
modes: &["historical"],
|
||||
linked: "",
|
||||
},
|
||||
FeatureConfig {
|
||||
name: "Estimated current price",
|
||||
bounds: Bounds::Percentile {
|
||||
low: 0.0,
|
||||
high: 98.0,
|
||||
bounds: Bounds::Fixed {
|
||||
min: 0.0,
|
||||
max: 2_500_000.0,
|
||||
},
|
||||
step: 10000.0,
|
||||
description: "Inflation-adjusted estimate of the current property value",
|
||||
|
|
@ -96,7 +96,7 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
prefix: "£",
|
||||
suffix: "",
|
||||
raw: false,
|
||||
absolute: false,
|
||||
absolute: true,
|
||||
modes: &["historical"],
|
||||
linked: "Asking price",
|
||||
},
|
||||
|
|
@ -252,9 +252,9 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
},
|
||||
FeatureConfig {
|
||||
name: "Asking price",
|
||||
bounds: Bounds::Percentile {
|
||||
low: 0.0,
|
||||
high: 98.0,
|
||||
bounds: Bounds::Fixed {
|
||||
min: 0.0,
|
||||
max: 2_500_000.0,
|
||||
},
|
||||
step: 10000.0,
|
||||
description: "Listed asking price for properties currently for sale",
|
||||
|
|
@ -263,7 +263,7 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
prefix: "£",
|
||||
suffix: "",
|
||||
raw: false,
|
||||
absolute: false,
|
||||
absolute: true,
|
||||
modes: &["buy"],
|
||||
linked: "Estimated current price",
|
||||
},
|
||||
|
|
@ -949,6 +949,23 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
modes: &[],
|
||||
linked: "",
|
||||
},
|
||||
FeatureConfig {
|
||||
name: "Median age",
|
||||
bounds: Bounds::Percentile {
|
||||
low: 2.0,
|
||||
high: 98.0,
|
||||
},
|
||||
step: 0.5,
|
||||
description: "Median age of the local population",
|
||||
detail: "From the 2021 Census (TS007A). Median age of usual residents in the LSOA, computed by linear interpolation from five-year age band counts. Areas with younger populations tend to be urban, university towns, or have more families; older medians are typical in rural and coastal areas.",
|
||||
source: "census-2021",
|
||||
prefix: "",
|
||||
suffix: " years",
|
||||
raw: false,
|
||||
absolute: false,
|
||||
modes: &[],
|
||||
linked: "",
|
||||
},
|
||||
],
|
||||
},
|
||||
FeatureGroup {
|
||||
|
|
@ -996,8 +1013,8 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
},
|
||||
step: 1.0,
|
||||
description: "Number of parks and green spaces within 2km",
|
||||
detail: "Count of parks, gardens, nature reserves, and other green spaces within a 2km radius of the property's postcode centroid. Derived from OpenStreetMap POI data.",
|
||||
source: "osm-pois",
|
||||
detail: "Count of public parks, gardens, playing fields, and play spaces within a 2km radius of the property's postcode centroid. Derived from the OS Open Greenspace dataset (Ordnance Survey), which provides authoritative polygon boundaries for green spaces across Great Britain.",
|
||||
source: "os-open-greenspace",
|
||||
prefix: "",
|
||||
suffix: "",
|
||||
raw: false,
|
||||
|
|
@ -1005,6 +1022,23 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
modes: &[],
|
||||
linked: "",
|
||||
},
|
||||
FeatureConfig {
|
||||
name: "Distance to nearest park (km)",
|
||||
bounds: Bounds::Percentile {
|
||||
low: 2.0,
|
||||
high: 98.0,
|
||||
},
|
||||
step: 0.1,
|
||||
description: "Distance to the closest park or green space",
|
||||
detail: "Straight-line distance in kilometres from the property's postcode centroid to the nearest public park, garden, playing field, or play space. Derived from the OS Open Greenspace dataset (Ordnance Survey), which provides authoritative polygon boundaries for green spaces across Great Britain.",
|
||||
source: "os-open-greenspace",
|
||||
prefix: "",
|
||||
suffix: " km",
|
||||
raw: false,
|
||||
absolute: false,
|
||||
modes: &[],
|
||||
linked: "",
|
||||
},
|
||||
],
|
||||
},
|
||||
FeatureGroup {
|
||||
|
|
@ -1056,6 +1090,13 @@ pub static ENUM_FEATURE_GROUPS: &[EnumFeatureGroup] = &[
|
|||
detail: "From HM Land Registry Price Paid data and EPC certificates. Detached, Semi-Detached, Terraced (includes all terrace sub-types), Flats/Maisonettes, or Other (bungalows, park homes, etc.).",
|
||||
source: "price-paid",
|
||||
},
|
||||
EnumFeatureConfig {
|
||||
name: "Former council house",
|
||||
order: Some(&["Yes", "No"]),
|
||||
description: "Whether the property was ever recorded as social housing",
|
||||
detail: "Derived from the TENURE field in Energy Performance Certificate data. If any EPC certificate for this property recorded the tenure as social rental, it indicates the property was council or housing-association stock at the time of that inspection. Properties that were later sold (e.g. via Right to Buy) retain this flag.",
|
||||
source: "epc",
|
||||
},
|
||||
EnumFeatureConfig {
|
||||
name: "Current energy rating",
|
||||
order: Some(&["A", "B", "C", "D", "E", "F", "G"]),
|
||||
|
|
@ -1082,55 +1123,6 @@ pub static ENUM_FEATURE_GROUPS: &[EnumFeatureGroup] = &[
|
|||
detail: "Maximum available fixed broadband download speed in Megabits per second, from Ofcom's Connected Nations 2025 report. Measured at Output Area level and represents the maximum speed available from any provider, not actual achieved speeds. Tiers: 10 = basic, 30 = superfast (SFBB), 100 = ultrafast 100Mbit, 300 = ultrafast (UFBB), 1000 = gigabit.",
|
||||
source: "broadband",
|
||||
},
|
||||
EnumFeatureConfig {
|
||||
name: "Environmental risk",
|
||||
order: Some(&["Low", "Moderate", "Significant"]),
|
||||
description: "Highest ground stability risk across all six hazard types",
|
||||
detail: "Overall ground stability risk for the area, taken as the maximum across all six GeoSure hazard categories (collapsible deposits, compressible ground, landslides, running sand, shrink-swell, and soluble rocks). From Ordnance Survey GeoSure data on a 5km hex grid.",
|
||||
source: "geosure",
|
||||
},
|
||||
EnumFeatureConfig {
|
||||
name: "Collapsible deposits risk",
|
||||
order: Some(&["Low", "Moderate", "Significant"]),
|
||||
description: "Risk of ground collapse from natural underground cavities",
|
||||
detail: "From OS GeoSure. Indicates the likelihood of ground collapse due to natural cavities formed by dissolution of soluble rocks or the collapse of old mines and natural pipes. Rated on a 5km hex grid across Great Britain.",
|
||||
source: "geosure",
|
||||
},
|
||||
EnumFeatureConfig {
|
||||
name: "Compressible ground risk",
|
||||
order: Some(&["Low", "Moderate", "Significant"]),
|
||||
description: "Risk of ground compression causing subsidence",
|
||||
detail: "From OS GeoSure. Indicates the potential for ground to compress under loading, which can cause gradual settlement or subsidence of buildings and infrastructure. Typically associated with soft clay, silt, or peat deposits.",
|
||||
source: "geosure",
|
||||
},
|
||||
EnumFeatureConfig {
|
||||
name: "Landslide risk",
|
||||
order: Some(&["Low", "Moderate", "Significant"]),
|
||||
description: "Risk of landslide or slope instability",
|
||||
detail: "From OS GeoSure. Indicates the susceptibility of the ground to landslides and slope instability. Based on slope angle, geology, and historical landslide records.",
|
||||
source: "geosure",
|
||||
},
|
||||
EnumFeatureConfig {
|
||||
name: "Running sand risk",
|
||||
order: Some(&["Low", "Moderate", "Significant"]),
|
||||
description: "Risk of sand becoming fluid when saturated",
|
||||
detail: "From OS GeoSure. Indicates the potential for fine-grained sand to behave like a fluid when saturated with water, which can affect excavations and foundations.",
|
||||
source: "geosure",
|
||||
},
|
||||
EnumFeatureConfig {
|
||||
name: "Shrink-swell risk",
|
||||
order: Some(&["Low", "Moderate", "Significant"]),
|
||||
description: "Risk of clay shrinking and swelling with moisture changes",
|
||||
detail: "From OS GeoSure. Indicates the potential for clay-rich soils to shrink when dry and swell when wet, causing ground movement that can damage buildings and infrastructure. One of the most common causes of subsidence in the UK.",
|
||||
source: "geosure",
|
||||
},
|
||||
EnumFeatureConfig {
|
||||
name: "Soluble rocks risk",
|
||||
order: Some(&["Low", "Moderate", "Significant"]),
|
||||
description: "Risk of sinkholes from dissolution of soluble rocks",
|
||||
detail: "From OS GeoSure. Indicates the potential for soluble rocks (limestone, chalk, gypsum) to dissolve, creating underground voids that can lead to sinkholes and ground subsidence.",
|
||||
source: "geosure",
|
||||
},
|
||||
],
|
||||
},
|
||||
];
|
||||
|
|
|
|||
|
|
@ -12,6 +12,8 @@ use tracing::{info, warn};
|
|||
use crate::auth::OptionalUser;
|
||||
use crate::consts::{AI_FILTERS_MAX_TOKENS, AI_FILTERS_TEMPERATURE, AI_FILTERS_WEEKLY_TOKEN_LIMIT};
|
||||
use crate::data::slugify;
|
||||
use crate::data::travel_time::TravelData;
|
||||
use crate::parsing::{parse_filters, row_passes_filters};
|
||||
use crate::pocketbase::{get_superuser_token, log_ai_query};
|
||||
use crate::routes::{FeatureInfo, FeaturesResponse};
|
||||
use crate::state::{AppState, SharedState};
|
||||
|
|
@ -62,6 +64,8 @@ pub struct AiFiltersResponse {
|
|||
notes: String,
|
||||
/// The listing mode used for this response (historical/buy/rent)
|
||||
listing_type: String,
|
||||
/// Number of properties matching the proposed filters (excludes travel time)
|
||||
match_count: usize,
|
||||
}
|
||||
|
||||
/// Strip markdown code fences (```json ... ``` or ``` ... ```) from LLM output.
|
||||
|
|
@ -556,6 +560,117 @@ async fn update_ai_usage(state: &AppState, user_id: &str, tokens_used: u64, week
|
|||
}
|
||||
}
|
||||
|
||||
/// Convert validated filter JSON back to the `;;`-separated filter string format
|
||||
/// that `parse_filters` expects.
|
||||
///
|
||||
/// Numeric: `{"name": [min, max]}` → `name:min:max`
|
||||
/// Enum: `{"name": ["val1", "val2"]}` → `name:val1|val2`
|
||||
fn filters_to_filter_string(filters: &Value) -> String {
|
||||
let obj = match filters.as_object() {
|
||||
Some(obj) => obj,
|
||||
None => return String::new(),
|
||||
};
|
||||
|
||||
let mut parts = Vec::new();
|
||||
for (name, value) in obj {
|
||||
if let Some(arr) = value.as_array() {
|
||||
if arr.len() == 2 && arr[0].is_number() && arr[1].is_number() {
|
||||
let min = arr[0].as_f64().unwrap_or(0.0);
|
||||
let max = arr[1].as_f64().unwrap_or(0.0);
|
||||
parts.push(format!("{name}:{min}:{max}"));
|
||||
} else if !arr.is_empty() && arr[0].is_string() {
|
||||
let values: Vec<&str> = arr.iter().filter_map(|v| v.as_str()).collect();
|
||||
if !values.is_empty() {
|
||||
parts.push(format!("{name}:{}", values.join("|")));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
parts.join(";;")
|
||||
}
|
||||
|
||||
/// Count how many rows in the property dataset pass the given property filters
|
||||
/// AND travel time filters. Travel time data is loaded from the TravelTimeStore
|
||||
/// and checked per-postcode (same logic as hexagons.rs).
|
||||
fn count_matching_rows(
|
||||
state: &AppState,
|
||||
filters: &Value,
|
||||
travel_time_filters: &[TravelTimeFilter],
|
||||
) -> usize {
|
||||
let filter_str = filters_to_filter_string(filters);
|
||||
|
||||
let quant = state.data.quant_ref();
|
||||
let (parsed_filters, parsed_enum_filters) = if filter_str.is_empty() {
|
||||
(Vec::new(), Vec::new())
|
||||
} else {
|
||||
match parse_filters(
|
||||
Some(&filter_str),
|
||||
&state.feature_name_to_index,
|
||||
&state.data.enum_values,
|
||||
&quant,
|
||||
) {
|
||||
Ok(f) => f,
|
||||
Err(err) => {
|
||||
warn!("Failed to parse filters for match count: {err}");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Load travel time data for each filter entry
|
||||
let travel_data: Vec<(TravelData, Option<f32>, Option<f32>)> = travel_time_filters
|
||||
.iter()
|
||||
.filter_map(|ttf| {
|
||||
let data = state.travel_time_store.get(&ttf.mode, &ttf.slug).ok()?;
|
||||
Some((data, ttf.min, ttf.max))
|
||||
})
|
||||
.collect();
|
||||
let has_travel = !travel_data.is_empty();
|
||||
|
||||
let feature_data = &state.data.feature_data;
|
||||
let num_features = state.data.num_features;
|
||||
let num_rows = state.data.lat.len();
|
||||
let (pc_interner, pc_keys) = state.data.postcode_parts();
|
||||
|
||||
let mut count = 0usize;
|
||||
for row in 0..num_rows {
|
||||
if !row_passes_filters(
|
||||
row,
|
||||
&parsed_filters,
|
||||
&parsed_enum_filters,
|
||||
feature_data,
|
||||
num_features,
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if has_travel {
|
||||
let postcode = pc_interner.resolve(&pc_keys[row]);
|
||||
let mut passes_travel = true;
|
||||
for (data, fmin, fmax) in &travel_data {
|
||||
let pass = if let Some(mins) = data.get(postcode).map(|r| r.minutes as f32) {
|
||||
fmin.map_or(true, |min| mins >= min)
|
||||
&& fmax.map_or(true, |max| mins <= max)
|
||||
} else {
|
||||
false // no travel data → postcode not reachable
|
||||
};
|
||||
if !pass {
|
||||
passes_travel = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if !passes_travel {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
count += 1;
|
||||
}
|
||||
|
||||
count
|
||||
}
|
||||
|
||||
/// Maximum number of round trips (function calls + retries) before giving up.
|
||||
const MAX_TOOL_ROUNDS: usize = 5;
|
||||
|
||||
|
|
@ -631,6 +746,7 @@ pub async fn post_ai_filters(
|
|||
})];
|
||||
|
||||
let mut total_tokens_accumulated: u64 = 0;
|
||||
let mut refinement_attempts = 0u32;
|
||||
|
||||
// Function calling loop: model may call search_destinations, we execute and feed back
|
||||
for round in 0..MAX_TOOL_ROUNDS {
|
||||
|
|
@ -776,6 +892,42 @@ pub async fn post_ai_filters(
|
|||
map.insert("Listing status".to_string(), json!([listing_value]));
|
||||
}
|
||||
|
||||
// Count matching properties and refine if too restrictive
|
||||
let match_count = count_matching_rows(&state, &filters, &travel_time_filters);
|
||||
info!(match_count = match_count, round = round, "AI filter match count");
|
||||
|
||||
if match_count == 0 {
|
||||
refinement_attempts += 1;
|
||||
let total_rows = state.data.lat.len();
|
||||
info!(
|
||||
attempt = refinement_attempts,
|
||||
"0 matches out of {total_rows} — asking AI to relax filters"
|
||||
);
|
||||
let feedback = match refinement_attempts {
|
||||
1 => format!(
|
||||
"Your proposed filters matched 0 properties out of {total_rows} total. \
|
||||
The combination is too restrictive. Please widen some numeric ranges \
|
||||
or add more enum values while keeping the user's intent. \
|
||||
Output the adjusted JSON."
|
||||
),
|
||||
2 => format!(
|
||||
"Still 0 matches out of {total_rows}. Please widen ranges further. \
|
||||
Output the adjusted JSON."
|
||||
),
|
||||
_ => format!(
|
||||
"Still 0 matches out of {total_rows}. Please remove additional filters \
|
||||
until some properties match, keeping the user's core priority. \
|
||||
Output the adjusted JSON."
|
||||
),
|
||||
};
|
||||
contents.push(candidate.clone());
|
||||
contents.push(json!({
|
||||
"role": "user",
|
||||
"parts": [{ "text": feedback }]
|
||||
}));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Update usage with total accumulated tokens
|
||||
let new_total = tokens_used + total_tokens_accumulated;
|
||||
update_ai_usage(&state, &user.id, new_total, current_week).await;
|
||||
|
|
@ -810,6 +962,7 @@ pub async fn post_ai_filters(
|
|||
travel_time_filters,
|
||||
notes,
|
||||
listing_type: listing_type.to_string(),
|
||||
match_count,
|
||||
}));
|
||||
}
|
||||
|
||||
|
|
@ -902,8 +1055,10 @@ fn validate_travel_time_filters(raw: &Value, state: &AppState) -> Vec<TravelTime
|
|||
fn validate_and_convert(raw: &Value, features: &FeaturesResponse, listing_type: &str) -> Value {
|
||||
let mut result = serde_json::Map::new();
|
||||
|
||||
// Build lookup maps from feature metadata, filtering by listing mode
|
||||
let mut numeric_features: rustc_hash::FxHashMap<&str, (f32, f32)> =
|
||||
// Build lookup maps from feature metadata, filtering by listing mode.
|
||||
// Store both slider bounds (min/max from percentiles) and true data bounds
|
||||
// (histogram.min/max) so one-sided AI filters use the full data range.
|
||||
let mut numeric_features: rustc_hash::FxHashMap<&str, (f32, f32, f32, f32)> =
|
||||
rustc_hash::FxHashMap::default();
|
||||
let mut enum_features: rustc_hash::FxHashMap<&str, &[String]> =
|
||||
rustc_hash::FxHashMap::default();
|
||||
|
|
@ -915,12 +1070,14 @@ fn validate_and_convert(raw: &Value, features: &FeaturesResponse, listing_type:
|
|||
name,
|
||||
min,
|
||||
max,
|
||||
histogram,
|
||||
modes,
|
||||
..
|
||||
} => {
|
||||
// Only include features valid for the chosen listing mode
|
||||
if modes.is_empty() || modes.contains(&listing_type) {
|
||||
numeric_features.insert(name, (*min, *max));
|
||||
numeric_features
|
||||
.insert(name, (*min, *max, histogram.min, histogram.max));
|
||||
}
|
||||
}
|
||||
FeatureInfo::Enum { name, values, .. } => {
|
||||
|
|
@ -933,32 +1090,37 @@ fn validate_and_convert(raw: &Value, features: &FeaturesResponse, listing_type:
|
|||
}
|
||||
}
|
||||
|
||||
// Process numeric filters — each sets one bound (min or max)
|
||||
// Process numeric filters — each sets one bound (min or max).
|
||||
// The unset side uses the true data min/max (from histogram), not
|
||||
// the slider bounds (percentile-based), so a "max" filter for crime
|
||||
// produces [0, value] rather than [2nd-percentile, value].
|
||||
if let Some(arr) = raw.get("numeric_filters").and_then(|val| val.as_array()) {
|
||||
for item in arr {
|
||||
let name = match item.get("name").and_then(|val| val.as_str()) {
|
||||
Some(name) => name,
|
||||
None => continue,
|
||||
};
|
||||
let (feat_min, feat_max) = match numeric_features.get(name) {
|
||||
Some(range) => *range,
|
||||
None => continue,
|
||||
};
|
||||
let (slider_min, slider_max, data_min, data_max) =
|
||||
match numeric_features.get(name) {
|
||||
Some(range) => *range,
|
||||
None => continue,
|
||||
};
|
||||
let bound = match item.get("bound").and_then(|val| val.as_str()) {
|
||||
Some(b) => b,
|
||||
None => continue,
|
||||
};
|
||||
// Clamp value to true data range (not slider range)
|
||||
let value = match item.get("value").and_then(|val| val.as_f64()) {
|
||||
Some(v) => v.max(feat_min as f64).min(feat_max as f64) as f32,
|
||||
Some(v) => v.max(data_min as f64).min(data_max as f64) as f32,
|
||||
None => continue,
|
||||
};
|
||||
let (filter_min, filter_max) = match bound {
|
||||
"min" => (value, feat_max),
|
||||
"max" => (feat_min, value),
|
||||
"min" => (value, data_max),
|
||||
"max" => (data_min, value),
|
||||
_ => continue,
|
||||
};
|
||||
// Only include if range is narrower than full range
|
||||
if filter_min > feat_min || filter_max < feat_max {
|
||||
// Only include if range is narrower than full slider range
|
||||
if filter_min > slider_min || filter_max < slider_max {
|
||||
result.insert(name.to_string(), json!([filter_min, filter_max]));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue