Checkpoint all changes

This commit is contained in:
Andras Schmelczer 2026-02-01 19:30:33 +00:00
parent 65877acf95
commit 66c2a25457
28 changed files with 3035 additions and 621 deletions

View file

@ -8,7 +8,7 @@ pub const SERVER_ADDRESS: &str = "0.0.0.0:8001";
pub const BOUNDS_QUANTIZATION: f64 = 0.01;
pub const BOUNDS_BUFFER_PERCENT: f64 = 0.1;
pub const POSTCODE_MIN_RESOLUTION: u8 = 11;
pub const MAX_POIS_PER_REQUEST: usize = 5000;
pub const MAX_POIS_PER_REQUEST: usize = 2500;
pub const DEFAULT_PROPERTIES_LIMIT: usize = 100;
pub const MAX_PROPERTIES_LIMIT: usize = 500;
pub const ENUM_NULL: u8 = 255;

676
server-rs/src/features.rs Normal file
View file

@ -0,0 +1,676 @@
//! Static feature configuration. Every numeric and enum column in wide.parquet
//! must be declared here. Unknown columns cause a startup panic.
pub enum Bounds {
/// Fixed min/max values for the slider
Fixed { min: f64, max: f64 },
/// Compute percentile from data at startup
Percentile { low: f64, high: f64 },
}
pub struct FeatureConfig {
/// Must match parquet column name exactly (also used as display label)
pub name: &'static str,
pub bounds: Bounds,
/// Slider step size. Controls the granularity of the range slider in the UI.
pub step: f64,
/// Short one-line description shown in the filter sidebar
pub description: &'static str,
/// Longer description explaining methodology, data source, and caveats
pub detail: &'static str,
/// Data source slug for linking to /data-sources#<slug>
pub source: &'static str,
}
pub struct FeatureGroup {
pub name: &'static str,
pub features: &'static [FeatureConfig],
}
pub struct EnumFeatureConfig {
pub name: &'static str,
/// If set, values are presented in this order instead of alphabetical.
/// Values not listed are appended alphabetically after the ordered ones.
pub order: Option<&'static [&'static str]>,
/// Short one-line description shown in the filter sidebar
pub description: &'static str,
/// Longer description explaining methodology, data source, and caveats
pub detail: &'static str,
/// Data source slug for linking to /data-sources#<slug>
pub source: &'static str,
}
pub struct EnumFeatureGroup {
pub name: &'static str,
pub features: &'static [EnumFeatureConfig],
}
/// Columns in parquet that are neither numeric features nor enum features.
/// These are silently skipped during schema validation.
pub const IGNORED_COLUMNS: &[&str] = &[
"lat",
"lon",
"Address per Property Register",
"Address per EPC",
"Postcode",
"historical_prices",
"Is construction date approximate",
];
pub static FEATURE_GROUPS: &[FeatureGroup] = &[
FeatureGroup {
name: "Property",
features: &[
FeatureConfig {
name: "Last known price",
bounds: Bounds::Fixed {
min: 0.0,
max: 2_000_000.0,
},
step: 10000.0,
description: "Most recent sale price from the Land Registry",
detail: "The last recorded sale price for this property from HM Land Registry Price Paid data. Covers residential sales in England and Wales. May be years old if the property hasn't sold recently.",
source: "price-paid",
},
FeatureConfig {
name: "Price per sqm",
bounds: Bounds::Percentile {
low: 0.0,
high: 98.0,
},
step: 100.0,
description: "Sale price divided by total floor area",
detail: "Calculated by dividing the last known sale price by the total floor area from the EPC certificate. Useful for comparing value across different-sized properties. Only available where both price and floor area data exist.",
source: "price-paid",
},
FeatureConfig {
name: "Total floor area (sqm)",
bounds: Bounds::Percentile {
low: 0.0,
high: 98.0,
},
step: 1.0,
description: "Internal floor area from the EPC survey",
detail: "Total useful floor area in square metres as measured during the Energy Performance Certificate assessment. Includes all habitable rooms but excludes garages, outbuildings, and external areas.",
source: "epc",
},
FeatureConfig {
name: "Number of bedrooms & living rooms",
bounds: Bounds::Fixed {
min: 1.0,
max: 10.0,
},
step: 1.0,
description: "Count of habitable rooms from the EPC survey",
detail: "Total number of habitable rooms (bedrooms plus living rooms) as recorded in the Energy Performance Certificate. Kitchens and bathrooms are typically excluded unless they are large enough to count as habitable rooms.",
source: "epc",
},
FeatureConfig {
name: "Approximate construction age",
bounds: Bounds::Fixed {
min: 0.0,
max: 2026.0,
},
step: 1.0,
description: "Estimated year of construction from the EPC",
detail: "The approximate year of construction as recorded in the Energy Performance Certificate. Derived from the construction age band (e.g. '1930-1949') by taking the midpoint. May be approximate, especially for older buildings.",
source: "epc",
},
],
},
FeatureGroup {
name: "Transport",
features: &[
FeatureConfig {
name: "public_transport_easy_minutes",
bounds: Bounds::Fixed {
min: 0.0,
max: 180.0,
},
step: 2.0,
description: "Quickest public transport journey to central London (easy route)",
detail: "Journey time in minutes by public transport to central London destinations, using TfL's Journey Planner API. The 'easy' route minimises changes and walking. Calculated for weekday morning commute times.",
source: "tfl-journey-times",
},
FeatureConfig {
name: "public_transport_quick_minutes",
bounds: Bounds::Fixed {
min: 0.0,
max: 180.0,
},
step: 2.0,
description: "Fastest public transport journey to central London",
detail: "Journey time in minutes by public transport to central London destinations, using TfL's Journey Planner API. The 'quick' route optimises for shortest total time regardless of changes. Calculated for weekday morning commute times.",
source: "tfl-journey-times",
},
FeatureConfig {
name: "cycling_minutes",
bounds: Bounds::Fixed {
min: 0.0,
max: 180.0,
},
step: 1.0,
description: "Cycling time to central London via TfL routing",
detail: "Cycling journey time in minutes to central London destinations, as calculated by the TfL Journey Planner API. Uses TfL's default cycling speed and route preferences.",
source: "tfl-journey-times",
},
FeatureConfig {
name: "Public transport within 2km",
bounds: Bounds::Percentile {
low: 5.0,
high: 95.0,
},
step: 1.0,
description: "Number of public transport stops within 2km",
detail: "Count of bus stops, rail stations, tube stations, tram stops, and other public transport access points within a 2km radius of the property's postcode. Derived from the NaPTAN (National Public Transport Access Nodes) dataset.",
source: "naptan",
},
],
},
FeatureGroup {
name: "Education",
features: &[
FeatureConfig {
name: "Education, Skills and Training Score",
bounds: Bounds::Percentile {
low: 2.0,
high: 98.0,
},
step: 0.1,
description: "IoD education deprivation score for the local area",
detail: "From the English Indices of Deprivation. Measures deprivation in education, skills and training in the local area (LSOA). Higher scores indicate greater deprivation. Combines children/young people sub-domain (school attainment, entry to higher education) and adult skills sub-domain (adult qualifications, English language proficiency).",
source: "iod",
},
FeatureConfig {
name: "Good+ primary schools within 5km",
bounds: Bounds::Fixed {
min: 0.0,
max: 30.0,
},
step: 1.0,
description: "Primary schools rated Good or Outstanding by Ofsted nearby",
detail: "Number of state-funded primary schools within 5km that have a current Ofsted rating of Good or Outstanding. Based on the latest inspection outcomes dataset. Schools that have not yet been inspected are excluded.",
source: "ofsted",
},
FeatureConfig {
name: "Good+ secondary schools within 5km",
bounds: Bounds::Fixed {
min: 0.0,
max: 15.0,
},
step: 1.0,
description: "Secondary schools rated Good or Outstanding by Ofsted nearby",
detail: "Number of state-funded secondary schools within 5km that have a current Ofsted rating of Good or Outstanding. Based on the latest inspection outcomes dataset. Schools that have not yet been inspected are excluded.",
source: "ofsted",
},
],
},
FeatureGroup {
name: "Deprivation",
features: &[
FeatureConfig {
name: "Index of Multiple Deprivation (IMD) Score",
bounds: Bounds::Percentile {
low: 2.0,
high: 98.0,
},
step: 0.1,
description: "Overall deprivation score combining all domains",
detail: "The Index of Multiple Deprivation is the official measure of relative deprivation in England. It combines seven weighted domains: Income (22.5%), Employment (22.5%), Education (13.5%), Health (13.5%), Crime (9.3%), Barriers to Housing & Services (9.3%), and Living Environment (9.3%). Higher scores indicate greater deprivation. Measured at LSOA level (~1,500 people).",
source: "iod",
},
FeatureConfig {
name: "Income Score (rate)",
bounds: Bounds::Fixed { min: 0.0, max: 0.6 },
step: 0.01,
description: "Proportion of the population experiencing income deprivation",
detail: "From the English Indices of Deprivation. The proportion of the local population experiencing deprivation relating to low income. Includes people on Income Support, income-based Jobseeker's Allowance, income-based Employment and Support Allowance, Pension Credit, Working Tax Credit and Child Tax Credit, Universal Credit, and asylum seekers.",
source: "iod",
},
FeatureConfig {
name: "Employment Score (rate)",
bounds: Bounds::Fixed { min: 0.0, max: 0.4 },
step: 0.01,
description: "Proportion of the working-age population involuntarily excluded from work",
detail: "From the English Indices of Deprivation. The proportion of the working-age population involuntarily excluded from the labour market. Includes claimants of Jobseeker's Allowance, Employment and Support Allowance, Incapacity Benefit, Severe Disablement Allowance, Carer's Allowance, and relevant Universal Credit claimants.",
source: "iod",
},
FeatureConfig {
name: "Health Deprivation and Disability Score",
bounds: Bounds::Percentile {
low: 2.0,
high: 98.0,
},
step: 0.1,
description: "Risk of premature death and quality of life impairment",
detail: "From the English Indices of Deprivation. Measures the risk of premature death and impairment of quality of life through poor physical or mental health. Derived from years of potential life lost, comparative illness and disability ratio, acute morbidity, and mood and anxiety disorders.",
source: "iod",
},
FeatureConfig {
name: "Crime Score",
bounds: Bounds::Percentile {
low: 2.0,
high: 98.0,
},
step: 0.1,
description: "IoD crime deprivation score measuring personal risk",
detail: "From the English Indices of Deprivation. Measures the risk of personal and material victimisation at local level. Derived from recorded rates of violence, burglary, theft, and criminal damage. Higher scores indicate higher crime-related deprivation.",
source: "iod",
},
FeatureConfig {
name: "Living Environment Score",
bounds: Bounds::Percentile {
low: 2.0,
high: 98.0,
},
step: 0.1,
description: "Quality of the local indoor and outdoor environment",
detail: "From the English Indices of Deprivation. Measures deprivation in the quality of the local environment. Combines the Indoors sub-domain (housing quality, central heating, housing conditions) and Outdoors sub-domain (air quality, road traffic accidents). Higher scores indicate poorer living environments.",
source: "iod",
},
FeatureConfig {
name: "Indoors Sub-domain Score",
bounds: Bounds::Percentile {
low: 2.0,
high: 98.0,
},
step: 0.1,
description: "Housing quality and conditions in the local area",
detail: "From the English Indices of Deprivation, Living Environment domain. Measures the quality of housing stock: houses without central heating, housing in poor condition, and houses failing Decent Homes standards. Higher scores indicate worse housing conditions.",
source: "iod",
},
FeatureConfig {
name: "Outdoors Sub-domain Score",
bounds: Bounds::Percentile {
low: 2.0,
high: 98.0,
},
step: 0.1,
description: "Air quality and road safety in the local area",
detail: "From the English Indices of Deprivation, Living Environment domain. Measures the outdoor living environment quality through air quality indicators and road traffic accident casualties involving pedestrians and cyclists. Higher scores indicate poorer outdoor environments.",
source: "iod",
},
],
},
FeatureGroup {
name: "Crime",
features: &[
FeatureConfig {
name: "Anti-social behaviour (avg/yr)",
bounds: Bounds::Percentile {
low: 2.0,
high: 98.0,
},
step: 1.0,
description: "Average yearly anti-social behaviour incidents in the area",
detail: "Average number of anti-social behaviour incidents per year in the LSOA, from police.uk street-level crime data (2023-2025). Includes nuisance, environmental, and personal anti-social behaviour.",
source: "crime",
},
FeatureConfig {
name: "Violence and sexual offences (avg/yr)",
bounds: Bounds::Percentile {
low: 2.0,
high: 98.0,
},
step: 1.0,
description: "Average yearly violent and sexual offences in the area",
detail: "Average number of violence and sexual offences per year in the LSOA, from police.uk street-level crime data (2023-2025). Includes assault, harassment, and sexual offences.",
source: "crime",
},
FeatureConfig {
name: "Criminal damage and arson (avg/yr)",
bounds: Bounds::Percentile {
low: 2.0,
high: 98.0,
},
step: 1.0,
description: "Average yearly criminal damage and arson in the area",
detail: "Average number of criminal damage and arson incidents per year in the LSOA, from police.uk street-level crime data (2023-2025).",
source: "crime",
},
FeatureConfig {
name: "Burglary (avg/yr)",
bounds: Bounds::Percentile {
low: 2.0,
high: 98.0,
},
step: 1.0,
description: "Average yearly burglary offences in the area",
detail: "Average number of burglary offences per year in the LSOA, from police.uk street-level crime data (2023-2025). Includes residential and commercial burglary.",
source: "crime",
},
FeatureConfig {
name: "Vehicle crime (avg/yr)",
bounds: Bounds::Percentile {
low: 2.0,
high: 98.0,
},
step: 1.0,
description: "Average yearly vehicle crime in the area",
detail: "Average number of vehicle crime incidents per year in the LSOA, from police.uk street-level crime data (2023-2025). Includes theft of and from vehicles.",
source: "crime",
},
FeatureConfig {
name: "Robbery (avg/yr)",
bounds: Bounds::Percentile {
low: 2.0,
high: 98.0,
},
step: 1.0,
description: "Average yearly robbery offences in the area",
detail: "Average number of robbery offences per year in the LSOA, from police.uk street-level crime data (2023-2025). Robbery involves theft with force or threat of force.",
source: "crime",
},
FeatureConfig {
name: "Other theft (avg/yr)",
bounds: Bounds::Percentile {
low: 2.0,
high: 98.0,
},
step: 1.0,
description: "Average yearly other theft offences in the area",
detail: "Average number of 'other theft' offences per year in the LSOA, from police.uk street-level crime data (2023-2025). Includes theft not classified under burglary, vehicle crime, shoplifting, or bicycle theft.",
source: "crime",
},
FeatureConfig {
name: "Shoplifting (avg/yr)",
bounds: Bounds::Percentile {
low: 2.0,
high: 98.0,
},
step: 1.0,
description: "Average yearly shoplifting offences in the area",
detail: "Average number of shoplifting offences per year in the LSOA, from police.uk street-level crime data (2023-2025).",
source: "crime",
},
FeatureConfig {
name: "Drugs (avg/yr)",
bounds: Bounds::Percentile {
low: 2.0,
high: 98.0,
},
step: 1.0,
description: "Average yearly drug offences in the area",
detail: "Average number of drug offences per year in the LSOA, from police.uk street-level crime data (2023-2025). Includes possession and trafficking offences.",
source: "crime",
},
FeatureConfig {
name: "Possession of weapons (avg/yr)",
bounds: Bounds::Percentile {
low: 2.0,
high: 98.0,
},
step: 1.0,
description: "Average yearly weapons possession offences in the area",
detail: "Average number of possession of weapons offences per year in the LSOA, from police.uk street-level crime data (2023-2025).",
source: "crime",
},
FeatureConfig {
name: "Public order (avg/yr)",
bounds: Bounds::Percentile {
low: 2.0,
high: 98.0,
},
step: 1.0,
description: "Average yearly public order offences in the area",
detail: "Average number of public order offences per year in the LSOA, from police.uk street-level crime data (2023-2025). Includes causing fear, alarm, or distress.",
source: "crime",
},
FeatureConfig {
name: "Bicycle theft (avg/yr)",
bounds: Bounds::Percentile {
low: 2.0,
high: 98.0,
},
step: 1.0,
description: "Average yearly bicycle theft in the area",
detail: "Average number of bicycle theft offences per year in the LSOA, from police.uk street-level crime data (2023-2025).",
source: "crime",
},
FeatureConfig {
name: "Theft from the person (avg/yr)",
bounds: Bounds::Percentile {
low: 2.0,
high: 98.0,
},
step: 1.0,
description: "Average yearly theft from the person in the area",
detail: "Average number of theft from the person offences per year in the LSOA, from police.uk street-level crime data (2023-2025). Includes pickpocketing and bag snatching without force.",
source: "crime",
},
FeatureConfig {
name: "Other crime (avg/yr)",
bounds: Bounds::Percentile {
low: 2.0,
high: 98.0,
},
step: 1.0,
description: "Average yearly other crime in the area",
detail: "Average number of other crime offences per year in the LSOA, from police.uk street-level crime data (2023-2025). A catch-all category for offences not classified elsewhere.",
source: "crime",
},
FeatureConfig {
name: "Serious crime (avg/yr)",
bounds: Bounds::Percentile {
low: 2.0,
high: 98.0,
},
step: 1.0,
description: "Aggregate of serious crime categories per year",
detail: "Sum of violence, robbery, burglary, and weapons possession per year in the LSOA, from police.uk street-level crime data (2023-2025). Provides a single serious crime metric.",
source: "crime",
},
FeatureConfig {
name: "Minor crime (avg/yr)",
bounds: Bounds::Percentile {
low: 2.0,
high: 98.0,
},
step: 1.0,
description: "Aggregate of minor crime categories per year",
detail: "Sum of anti-social behaviour, shoplifting, bicycle theft, and other lower-severity crime per year in the LSOA, from police.uk street-level crime data (2023-2025). Provides a single minor crime metric.",
source: "crime",
},
],
},
FeatureGroup {
name: "Demographics",
features: &[
FeatureConfig {
name: "% White",
bounds: Bounds::Fixed {
min: 0.0,
max: 100.0,
},
step: 1.0,
description: "Percentage of population identifying as White",
detail: "From the 2021 Census. Percentage of the local authority population identifying as White (English, Welsh, Scottish, Northern Irish, British, Irish, Gypsy or Irish Traveller, Roma, or any other White background).",
source: "ethnicity",
},
FeatureConfig {
name: "% Asian",
bounds: Bounds::Fixed {
min: 0.0,
max: 100.0,
},
step: 1.0,
description: "Percentage of population identifying as Asian",
detail: "From the 2021 Census. Percentage of the local authority population identifying as Asian or Asian British (Indian, Pakistani, Bangladeshi, Chinese, or any other Asian background).",
source: "ethnicity",
},
FeatureConfig {
name: "% Black",
bounds: Bounds::Fixed {
min: 0.0,
max: 100.0,
},
step: 1.0,
description: "Percentage of population identifying as Black",
detail: "From the 2021 Census. Percentage of the local authority population identifying as Black, Black British, Caribbean, or African.",
source: "ethnicity",
},
FeatureConfig {
name: "% Mixed",
bounds: Bounds::Fixed {
min: 0.0,
max: 100.0,
},
step: 1.0,
description: "Percentage of population identifying as Mixed or Multiple ethnic groups",
detail: "From the 2021 Census. Percentage of the local authority population identifying as Mixed or Multiple ethnic groups (White and Black Caribbean, White and Black African, White and Asian, or any other Mixed or Multiple background).",
source: "ethnicity",
},
FeatureConfig {
name: "% Other",
bounds: Bounds::Fixed {
min: 0.0,
max: 100.0,
},
step: 1.0,
description: "Percentage of population identifying as Other ethnic group",
detail: "From the 2021 Census. Percentage of the local authority population identifying as Other ethnic group (Arab or any other ethnic group not covered by the main categories).",
source: "ethnicity",
},
],
},
FeatureGroup {
name: "Amenities",
features: &[
FeatureConfig {
name: "Restaurants within 2km",
bounds: Bounds::Percentile {
low: 5.0,
high: 95.0,
},
step: 1.0,
description: "Number of restaurants and cafes within 2km",
detail: "Count of restaurants, cafes, and food establishments within a 2km radius of the property's postcode centroid. Derived from OpenStreetMap POI data using haversine distance calculation with a 0.05° spatial grid for candidate reduction.",
source: "osm-pois",
},
FeatureConfig {
name: "Groceries within 2km",
bounds: Bounds::Percentile {
low: 5.0,
high: 95.0,
},
step: 1.0,
description: "Number of grocery shops and supermarkets within 2km",
detail: "Count of supermarkets, convenience stores, and other grocery shops within a 2km radius of the property's postcode centroid. Derived from OpenStreetMap POI data.",
source: "osm-pois",
},
FeatureConfig {
name: "Parks within 2km",
bounds: Bounds::Percentile {
low: 5.0,
high: 95.0,
},
step: 1.0,
description: "Number of parks and green spaces within 2km",
detail: "Count of parks, gardens, nature reserves, and other green spaces within a 2km radius of the property's postcode centroid. Derived from OpenStreetMap POI data.",
source: "osm-pois",
},
],
},
FeatureGroup {
name: "Environment",
features: &[
FeatureConfig {
name: "Noise (dB)",
bounds: Bounds::Fixed {
min: 50.0,
max: 80.0,
},
step: 1.0,
description: "Road noise level at the postcode in decibels (Lden)",
detail: "Road noise level in decibels (Lden — day-evening-night 24-hour weighted average) from Defra's Strategic Noise Mapping Round 4 (2022). Modelled at 4m above ground on a 10m grid. Sampled at postcode centroids via WCS GeoTIFF tiles. Values above ~55 dB are generally considered noticeable; above ~70 dB can affect health.",
source: "noise",
},
FeatureConfig {
name: "Max available download speed (Mbps)",
bounds: Bounds::Percentile {
low: 5.0,
high: 95.0,
},
step: 10.0,
description: "Maximum broadband download speed available at the postcode",
detail: "Maximum available fixed broadband download speed in Megabits per second, from Ofcom's Connected Nations 2025 report. Measured at Output Area level and represents the maximum speed available from any provider, not actual achieved speeds.",
source: "broadband",
},
],
},
];
pub static ENUM_FEATURE_GROUPS: &[EnumFeatureGroup] = &[EnumFeatureGroup {
name: "Property",
features: &[
EnumFeatureConfig {
name: "Leashold/Freehold",
order: Some(&["Freehold", "Leasehold"]),
description: "Whether the property is leasehold or freehold",
detail: "From HM Land Registry Price Paid data. Freehold means you own the building and the land it stands on. Leasehold means you own the building but not the land — you have a lease from the freeholder for a set number of years.",
source: "price-paid",
},
EnumFeatureConfig {
name: "Current energy rating",
order: Some(&["A", "B", "C", "D", "E", "F", "G"]),
description: "Current EPC energy efficiency rating (A-G)",
detail: "The current energy efficiency rating from the Energy Performance Certificate, graded A (most efficient) to G (least efficient). Based on the energy costs per square metre of floor area for heating, hot water, lighting, and ventilation.",
source: "epc",
},
EnumFeatureConfig {
name: "Potential energy rating",
order: Some(&["A", "B", "C", "D", "E", "F", "G"]),
description: "Achievable EPC rating after recommended improvements",
detail: "The potential energy efficiency rating that could be achieved if all cost-effective improvements recommended in the EPC were carried out. Graded A (most efficient) to G (least efficient).",
source: "epc",
},
EnumFeatureConfig {
name: "Property type",
order: Some(&["Detached", "Semi-Detached", "Terraced", "Flat"]),
description: "Type of property: detached, semi-detached, terraced, or flat",
detail: "From HM Land Registry Price Paid data. The broad property type classification: Detached, Semi-Detached, Terraced, or Flat/Maisonette.",
source: "price-paid",
},
EnumFeatureConfig {
name: "Property type/built form",
order: None,
description: "Detailed property type and built form from the EPC",
detail: "A more detailed classification from the Energy Performance Certificate combining property type and built form. Examples include 'Semi-Detached House', 'Mid-Terrace House', 'Ground-Floor Flat', 'Detached Bungalow', etc.",
source: "epc",
},
],
}];
/// Flat ordered list of all numeric feature names (follows group order).
pub fn all_numeric_feature_names() -> Vec<&'static str> {
FEATURE_GROUPS
.iter()
.flat_map(|group| group.features.iter().map(|feature| feature.name))
.collect()
}
/// Flat ordered list of all enum feature names (follows group order).
pub fn all_enum_feature_names() -> Vec<&'static str> {
ENUM_FEATURE_GROUPS
.iter()
.flat_map(|group| group.features.iter().map(|feature| feature.name))
.collect()
}
/// Look up the configured value order for an enum feature by name.
pub fn order_for(name: &str) -> Option<&'static [&'static str]> {
ENUM_FEATURE_GROUPS
.iter()
.flat_map(|group| group.features.iter())
.find(|feature| feature.name == name)
.and_then(|feature| feature.order)
}
/// Look up the Bounds config for a numeric feature by name.
pub fn bounds_for(name: &str) -> Option<&'static Bounds> {
FEATURE_GROUPS
.iter()
.flat_map(|group| group.features.iter())
.find(|feature| feature.name == name)
.map(|feature| &feature.bounds)
}

View file

@ -1,3 +1,4 @@
use crate::consts::ENUM_NULL;
use crate::data::EnumFeatureData;
pub struct ParsedFilter {
@ -22,12 +23,12 @@ pub fn parse_filters(
let mut numeric = Vec::new();
let mut enums = Vec::new();
let s = match filter_str.filter(|s| !s.is_empty()) {
Some(s) => s,
let input = match filter_str.filter(|text| !text.is_empty()) {
Some(text) => text,
None => return (numeric, enums),
};
for entry in s.split(',') {
for entry in input.split(',') {
let parts: Vec<&str> = entry.splitn(2, ':').collect();
if parts.len() != 2 {
continue;
@ -35,13 +36,13 @@ pub fn parse_filters(
let name = parts[0].trim();
let rest = parts[1].trim();
if let Some(enum_idx) = enum_features.iter().position(|ef| ef.name == name) {
let ef = &enum_features[enum_idx];
if let Some(enum_idx) = enum_features.iter().position(|enum_feat| enum_feat.name == name) {
let enum_feat = &enum_features[enum_idx];
let allowed: Vec<u8> = rest
.split('|')
.filter_map(|v| {
let v = v.trim();
ef.values.iter().position(|ev| ev == v).map(|i| i as u8)
.filter_map(|value| {
let value = value.trim();
enum_feat.values.iter().position(|existing| existing == value).map(|position| position as u8)
})
.collect();
enums.push(ParsedEnumFilter { enum_idx, allowed });
@ -51,14 +52,14 @@ pub fn parse_filters(
continue;
}
let min = match num_parts[0].trim().parse::<f64>() {
Ok(v) => v,
Ok(value) => value,
Err(_) => continue,
};
let max = match num_parts[1].trim().parse::<f64>() {
Ok(v) => v,
Ok(value) => value,
Err(_) => continue,
};
if let Some(feat_idx) = feature_names.iter().position(|n| n == name) {
if let Some(feat_idx) = feature_names.iter().position(|feat_name| feat_name == name) {
numeric.push(ParsedFilter { feat_idx, min, max });
}
}
@ -75,11 +76,11 @@ pub fn row_passes_filters(
num_features: usize,
enum_features: &[EnumFeatureData],
) -> bool {
filters.iter().all(|f| {
let v = feature_data[row * num_features + f.feat_idx];
v.is_finite() && v >= f.min && v <= f.max
}) && enum_filters.iter().all(|ef| {
let v = enum_features[ef.enum_idx].data[row];
v != 255 && ef.allowed.contains(&v)
filters.iter().all(|filter| {
let value = feature_data[row * num_features + filter.feat_idx];
value.is_finite() && value >= filter.min && value <= filter.max
}) && enum_filters.iter().all(|enum_filter| {
let value = enum_features[enum_filter.enum_idx].data[row];
value != ENUM_NULL && enum_filter.allowed.contains(&value)
})
}

View file

@ -19,18 +19,18 @@ impl GridIndex {
let mut min_lon = f64::INFINITY;
let mut max_lon = f64::NEG_INFINITY;
for i in 0..lat.len() {
if lat[i] < min_lat {
min_lat = lat[i];
for index in 0..lat.len() {
if lat[index] < min_lat {
min_lat = lat[index];
}
if lat[i] > max_lat {
max_lat = lat[i];
if lat[index] > max_lat {
max_lat = lat[index];
}
if lon[i] < min_lon {
min_lon = lon[i];
if lon[index] < min_lon {
min_lon = lon[index];
}
if lon[i] > max_lon {
max_lon = lon[i];
if lon[index] > max_lon {
max_lon = lon[index];
}
}
@ -52,11 +52,11 @@ impl GridIndex {
let mut cells: Vec<Vec<u32>> = vec![Vec::new(); rows * cols];
for i in 0..lat.len() {
let grid_row = ((lat[i] - min_lat) / cell_size) as usize;
let grid_col = ((lon[i] - min_lon) / cell_size) as usize;
let idx = grid_row * cols + grid_col;
cells[idx].push(i as u32);
for index in 0..lat.len() {
let grid_row = ((lat[index] - min_lat) / cell_size) as usize;
let grid_col = ((lon[index] - min_lon) / cell_size) as usize;
let cell_index = grid_row * cols + grid_col;
cells[cell_index].push(index as u32);
}
tracing::debug!("Grid index built");
@ -96,7 +96,7 @@ impl GridIndex {
west: f64,
north: f64,
east: f64,
mut f: impl FnMut(u32),
mut callback: impl FnMut(u32),
) {
let Some((row_min, row_max, col_min, col_max)) =
self.clamp_bounds(south, west, north, east)
@ -108,7 +108,7 @@ impl GridIndex {
let row_start = row * self.cols;
for col in col_min..=col_max {
for &row_idx in &self.cells[row_start + col] {
f(row_idx);
callback(row_idx);
}
}
}

View file

@ -1,15 +1,20 @@
mod consts;
mod data;
mod features;
mod filter;
mod index;
mod grid_index;
mod routes;
mod state;
#[cfg(test)]
mod tests;
use std::path::PathBuf;
use std::sync::Arc;
use anyhow::{bail, Context};
use axum::routing::get;
use axum::Router;
use clap::Parser;
use tower_http::compression::CompressionLayer;
use tower_http::cors::{Any, CorsLayer};
use tower_http::services::ServeDir;
@ -19,8 +24,24 @@ use tracing_subscriber::EnvFilter;
use state::AppState;
#[derive(Parser)]
#[command(name = "narrowit", about = "Narrowit property map server")]
struct Cli {
/// Path to the wide property parquet file
#[arg(long)]
data: PathBuf,
/// Path to the POI parquet file
#[arg(long)]
pois: PathBuf,
/// Path to the frontend dist directory
#[arg(long)]
dist: Option<PathBuf>,
}
#[tokio::main]
async fn main() {
async fn main() -> anyhow::Result<()> {
tracing_subscriber::fmt()
.with_env_filter(
EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info")),
@ -28,18 +49,18 @@ async fn main() {
.with_ansi(true)
.init();
let parquet_path = PathBuf::from(
std::env::args()
.nth(1)
.unwrap_or_else(|| "data_sources/processed/wide.parquet".to_string()),
);
let cli = Cli::parse();
let parquet_path = &cli.data;
if !parquet_path.exists() {
tracing::error!("Parquet file not found: {}", parquet_path.display());
std::process::exit(1);
bail!(
"Property parquet file not found: {}",
parquet_path.display()
);
}
info!("Loading property data from {}", parquet_path.display());
let property_data = data::PropertyData::load(&parquet_path);
let property_data = data::PropertyData::load(parquet_path)?;
info!(
rows = property_data.lat.len(),
features = property_data.num_features,
@ -48,32 +69,90 @@ async fn main() {
);
info!("Building spatial grid index (0.01° cells)");
let grid = index::GridIndex::build(&property_data.lat, &property_data.lon, 0.01);
let grid = grid_index::GridIndex::build(&property_data.lat, &property_data.lon, 0.01);
info!("Precomputing H3 cells for resolutions {}-{}", consts::H3_PRECOMPUTE_MIN, consts::H3_PRECOMPUTE_MAX);
let h3_cells = data::precompute_h3(&property_data.lat, &property_data.lon);
info!(
"Precomputing H3 cells for resolutions {}-{}",
consts::H3_PRECOMPUTE_MIN,
consts::H3_PRECOMPUTE_MAX
);
let h3_cells = data::precompute_h3(&property_data.lat, &property_data.lon)?;
let poi_path = PathBuf::from("/volumes/syncthing/Projects/property-map/data/filtered_uk_pois.parquet");
let poi_path = cli.pois;
let poi_data = if poi_path.exists() {
info!("Loading POI data from {}", poi_path.display());
let pd = data::POIData::load(&poi_path);
info!(pois = pd.lat.len(), "POI data loaded");
pd
} else {
tracing::warn!("POI file not found: {}. POI endpoints will be unavailable.", poi_path.display());
data::POIData {
id: Vec::new(),
name: Vec::new(),
category: Vec::new(),
lat: Vec::new(),
lng: Vec::new(),
emoji: Vec::new(),
}
};
if !poi_path.exists() {
bail!("POI parquet file not found: {}", poi_path.display());
}
info!("Loading POI data from {}", poi_path.display());
let poi_data = data::POIData::load(&poi_path)?;
info!(pois = poi_data.lat.len(), "POI data loaded");
info!("Building POI spatial grid index");
let poi_grid = index::GridIndex::build(&poi_data.lat, &poi_data.lng, 0.01);
let poi_grid = grid_index::GridIndex::build(&poi_data.lat, &poi_data.lng, 0.01);
let min_keys: Vec<String> = property_data
.feature_names
.iter()
.map(|name| format!("min_{}", name))
.collect();
let max_keys: Vec<String> = property_data
.feature_names
.iter()
.map(|name| format!("max_{}", name))
.collect();
let enum_min_keys: Vec<String> = property_data
.enum_features
.iter()
.map(|enum_feature| format!("min_{}", enum_feature.name))
.collect();
let enum_max_keys: Vec<String> = property_data
.enum_features
.iter()
.map(|enum_feature| format!("max_{}", enum_feature.name))
.collect();
// Precompute POI category groups
let poi_category_groups = {
let mut group_cats: std::collections::HashMap<String, std::collections::HashSet<String>> =
std::collections::HashMap::new();
for (category, group) in poi_data.category.iter().zip(poi_data.group.iter()) {
group_cats
.entry(group.clone())
.or_default()
.insert(category.clone());
}
// Validate that data groups match the hardcoded order exactly
let expected: std::collections::HashSet<&str> =
consts::POI_GROUP_ORDER.iter().copied().collect();
let actual: std::collections::HashSet<&str> =
group_cats.keys().map(|key| key.as_str()).collect();
let missing_from_data: Vec<&&str> = expected.difference(&actual).collect();
let missing_from_order: Vec<&&str> = actual.difference(&expected).collect();
if !missing_from_data.is_empty() || !missing_from_order.is_empty() {
bail!(
"POI group mismatch!\n In POI_GROUP_ORDER but not in data: {:?}\n In data but not in POI_GROUP_ORDER: {:?}",
missing_from_data, missing_from_order
);
}
consts::POI_GROUP_ORDER.iter().map(|group_name| group_name.to_string()).collect::<Vec<_>>()
.into_iter()
.map(|name| {
let mut categories: Vec<String> =
group_cats.remove(&name).context("POI group validated but missing from map")?.into_iter().collect();
categories.sort();
Ok(state::POICategoryGroup { name, categories })
})
.collect::<anyhow::Result<Vec<_>>>()?
};
// Precompute enum name → index map
let enum_name_to_idx: rustc_hash::FxHashMap<String, usize> = property_data
.enum_features
.iter()
.enumerate()
.map(|(index, enum_feature)| (enum_feature.name.clone(), index))
.collect();
let state = Arc::new(AppState {
data: property_data,
@ -81,6 +160,12 @@ async fn main() {
h3_cells,
poi_data,
poi_grid,
min_keys,
max_keys,
enum_min_keys,
enum_max_keys,
poi_category_groups,
enum_name_to_idx,
});
let cors = CorsLayer::new()
@ -93,6 +178,7 @@ async fn main() {
let state_pois = state.clone();
let state_poi_categories = state.clone();
let state_hexagon_properties = state.clone();
let state_hexagon_stats = state.clone();
let api = Router::new()
.route(
@ -116,9 +202,23 @@ async fn main() {
get(move |query| {
routes::get_hexagon_properties(state_hexagon_properties.clone(), query)
}),
)
.route(
"/api/hexagon-stats",
get(move |query| routes::get_hexagon_stats(state_hexagon_stats.clone(), query)),
);
let frontend_dist = PathBuf::from("frontend/dist");
let frontend_dist = cli.dist.unwrap_or_else(|| {
// Check next to the binary first, then fall back to working directory
if let Ok(executable) = std::env::current_exe() {
let executable_dir = executable.parent().unwrap_or_else(|| std::path::Path::new("."));
let dist_next_to_binary = executable_dir.join("dist");
if dist_next_to_binary.exists() {
return dist_next_to_binary;
}
}
PathBuf::from("frontend/dist")
});
let app = if frontend_dist.exists() {
api.fallback_service(ServeDir::new(frontend_dist))
} else {
@ -127,12 +227,16 @@ async fn main() {
let app = app
.layer(cors)
.layer(CompressionLayer::new().gzip(true))
.layer(CompressionLayer::new().zstd(true).gzip(true))
.layer(TraceLayer::new_for_http());
let addr = "0.0.0.0:8001";
let addr = consts::SERVER_ADDRESS;
let listener = tokio::net::TcpListener::bind(addr)
.await
.with_context(|| format!("Failed to bind to {addr}"))?;
info!("Server listening on {}", addr);
let listener = tokio::net::TcpListener::bind(addr).await.unwrap();
axum::serve(listener, app).await.unwrap();
axum::serve(listener, app)
.await
.context("Server error")?;
Ok(())
}

View file

@ -5,6 +5,7 @@ use serde::Serialize;
use tracing::info;
use crate::data::Histogram;
use crate::features::{ENUM_FEATURE_GROUPS, FEATURE_GROUPS};
use crate::state::AppState;
#[derive(Serialize)]
@ -13,75 +14,123 @@ pub enum FeatureInfo {
#[serde(rename = "numeric")]
Numeric {
name: String,
label: String,
min: f64,
max: f64,
step: f64,
histogram: Histogram,
description: &'static str,
detail: &'static str,
source: &'static str,
},
#[serde(rename = "enum")]
Enum {
name: String,
label: String,
values: Vec<String>,
description: &'static str,
detail: &'static str,
source: &'static str,
},
}
#[derive(Serialize)]
pub struct FeaturesResponse {
pub struct FeatureGroupResponse {
name: String,
features: Vec<FeatureInfo>,
}
fn snake_to_label(name: &str) -> String {
// If name contains '/' or uppercase, assume it's already human-readable
if name.contains('/') || name.chars().any(|c| c.is_uppercase()) {
return name.to_string();
}
name.split('_')
.map(|word| {
let mut chars = word.chars();
match chars.next() {
None => String::new(),
Some(c) => {
let mut s = c.to_uppercase().to_string();
s.extend(chars);
s
}
}
})
.collect::<Vec<_>>()
.join(" ")
#[derive(Serialize)]
pub struct FeaturesResponse {
groups: Vec<FeatureGroupResponse>,
}
pub async fn get_features(state: Arc<AppState>) -> Json<FeaturesResponse> {
let mut features: Vec<FeatureInfo> = state
.data
.feature_names
.iter()
.enumerate()
.map(|(i, name): (usize, &String)| {
let stats = &state.data.feature_stats[i];
FeatureInfo::Numeric {
name: name.clone(),
label: snake_to_label(name),
min: stats.p_low,
max: stats.p_high,
histogram: stats.histogram.clone(),
}
})
.collect();
for ef in &state.data.enum_features {
features.push(FeatureInfo::Enum {
name: ef.name.clone(),
label: snake_to_label(&ef.name),
values: ef.values.clone(),
});
// Collect all group names in order, merging numeric and enum groups with the same name
let mut group_names: Vec<&str> = Vec::new();
for feature_group in FEATURE_GROUPS {
if !group_names.contains(&feature_group.name) {
group_names.push(feature_group.name);
}
}
for enum_group in ENUM_FEATURE_GROUPS {
if !group_names.contains(&enum_group.name) {
group_names.push(enum_group.name);
}
}
let mut groups: Vec<FeatureGroupResponse> = Vec::new();
for &group_name in &group_names {
let mut features: Vec<FeatureInfo> = Vec::new();
// Add numeric features for this group
for feature_group in FEATURE_GROUPS {
if feature_group.name == group_name {
for feature_config in feature_group.features {
if let Some(feat_idx) =
state.data.feature_names.iter().position(|feat_name| feat_name == feature_config.name)
{
let stats = &state.data.feature_stats[feat_idx];
features.push(FeatureInfo::Numeric {
name: feature_config.name.to_string(),
min: stats.slider_min,
max: stats.slider_max,
step: feature_config.step,
histogram: stats.histogram.clone(),
description: feature_config.description,
detail: feature_config.detail,
source: feature_config.source,
});
}
}
}
}
// Add enum features for this group
for enum_group in ENUM_FEATURE_GROUPS {
if enum_group.name == group_name {
for enum_config in enum_group.features {
if let Some(enum_feature) = state
.data
.enum_features
.iter()
.find(|enum_feat| enum_feat.name == enum_config.name)
{
features.push(FeatureInfo::Enum {
name: enum_config.name.to_string(),
values: enum_feature.values.clone(),
description: enum_config.description,
detail: enum_config.detail,
source: enum_config.source,
});
}
}
}
}
if !features.is_empty() {
groups.push(FeatureGroupResponse {
name: group_name.to_string(),
features,
});
}
}
let num_numeric: usize = groups
.iter()
.flat_map(|group| &group.features)
.filter(|feature| matches!(feature, FeatureInfo::Numeric { .. }))
.count();
let num_enum: usize = groups
.iter()
.flat_map(|group| &group.features)
.filter(|feature| matches!(feature, FeatureInfo::Enum { .. }))
.count();
info!(
numeric = features.iter().filter(|f| matches!(f, FeatureInfo::Numeric { .. })).count(),
enums = features.iter().filter(|f| matches!(f, FeatureInfo::Enum { .. })).count(),
numeric = num_numeric,
enums = num_enum,
groups = groups.len(),
"GET /api/features"
);
Json(FeaturesResponse { features })
Json(FeaturesResponse { groups })
}

View file

@ -0,0 +1,251 @@
use std::fmt::Write;
use std::str::FromStr;
use std::sync::Arc;
use axum::extract::Query;
use axum::http::StatusCode;
use axum::response::IntoResponse;
use serde::Deserialize;
use tracing::{info, warn};
use crate::consts::{ENUM_NULL, HISTOGRAM_BINS};
use crate::filter::{parse_filters, row_passes_filters};
use crate::state::AppState;
use super::parse::h3_cell_bounds;
#[derive(Deserialize)]
pub struct HexagonStatsParams {
pub h3: String,
pub resolution: u8,
pub filters: Option<String>,
}
pub async fn get_hexagon_stats(
state: Arc<AppState>,
Query(params): Query<HexagonStatsParams>,
) -> Result<impl IntoResponse, (StatusCode, String)> {
let cell = h3o::CellIndex::from_str(&params.h3).map_err(|error| {
warn!(h3 = %params.h3, error = %error, "Invalid H3 cell index");
(StatusCode::BAD_REQUEST, format!("Invalid H3 cell: {}", error))
})?;
let cell_u64: u64 = cell.into();
let resolution = params.resolution as usize;
if resolution >= state.h3_cells.len() || state.h3_cells[resolution].is_empty() {
warn!(
resolution,
"Invalid or non-precomputed resolution for hexagon-stats"
);
return Err((
StatusCode::BAD_REQUEST,
"Invalid or non-precomputed resolution".to_string(),
));
}
let h3_str = params.h3.clone();
let filters_str = params.filters.clone();
let (parsed_filters, parsed_enum_filters) = parse_filters(
params.filters.as_deref(),
&state.data.feature_names,
&state.data.enum_features,
);
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
let result = tokio::task::spawn_blocking(move || {
let start_time = std::time::Instant::now();
let h3_data = &state.h3_cells[resolution];
let num_features = state.data.num_features;
let feature_data = &state.data.feature_data;
let enum_features = &state.data.enum_features;
let (min_lat, min_lon, max_lat, max_lon) = h3_cell_bounds(cell, 0.001);
// Collect matching rows
let mut matching_rows: Vec<usize> = Vec::new();
state
.grid
.for_each_in_bounds(min_lat, min_lon, max_lat, max_lon, |row_idx| {
let row = row_idx as usize;
if h3_data[row] == cell_u64
&& row_passes_filters(
row,
&parsed_filters,
&parsed_enum_filters,
feature_data,
num_features,
enum_features,
)
{
matching_rows.push(row);
}
});
let total_count = matching_rows.len();
// Build JSON directly via string buffer
let mut output = String::with_capacity(4096);
output.push_str("{\"count\":");
write!(output, "{}", total_count).unwrap();
// Numeric features: compute count, min, max, sum, histogram using global bin edges
output.push_str(",\"numeric_features\":[");
let mut first_numeric = true;
for (feature_index, feature_name) in state.data.feature_names.iter().enumerate() {
let global_stats = &state.data.feature_stats[feature_index];
let histogram_min = global_stats.histogram.min;
let histogram_max = global_stats.histogram.max;
let bin_width = global_stats.histogram.bin_width;
let mut count = 0usize;
let mut min_value = f64::INFINITY;
let mut max_value = f64::NEG_INFINITY;
let mut sum = 0.0f64;
let mut bins = vec![0u64; HISTOGRAM_BINS];
for &row in &matching_rows {
let value = feature_data[row * num_features + feature_index];
if value.is_finite() {
count += 1;
if value < min_value {
min_value = value;
}
if value > max_value {
max_value = value;
}
sum += value;
// Bin into histogram using global edges
if bin_width > 0.0 {
let bin_index =
((value - histogram_min) / bin_width).floor() as isize;
let clamped_index = bin_index.max(0).min((HISTOGRAM_BINS - 1) as isize) as usize;
bins[clamped_index] += 1;
}
}
}
if count == 0 {
continue;
}
if !first_numeric {
output.push(',');
}
first_numeric = false;
let mean = sum / count as f64;
output.push_str("{\"name\":");
write_json_string(&mut output, feature_name);
write!(output, ",\"count\":{}", count).unwrap();
write!(output, ",\"min\":{}", format_f64(min_value)).unwrap();
write!(output, ",\"max\":{}", format_f64(max_value)).unwrap();
write!(output, ",\"mean\":{}", format_f64(mean)).unwrap();
output.push_str(",\"histogram\":{\"min\":");
write!(output, "{}", format_f64(histogram_min)).unwrap();
output.push_str(",\"max\":");
write!(output, "{}", format_f64(histogram_max)).unwrap();
output.push_str(",\"bin_width\":");
write!(output, "{}", format_f64(bin_width)).unwrap();
output.push_str(",\"counts\":[");
for (bin_index, &bin_count) in bins.iter().enumerate() {
if bin_index > 0 {
output.push(',');
}
write!(output, "{}", bin_count).unwrap();
}
output.push_str("]}}")
}
// Enum features: count per value
output.push_str("],\"enum_features\":[");
let mut first_enum = true;
for enum_feature in enum_features {
let enum_index = match state.enum_name_to_idx.get(&enum_feature.name) {
Some(&index) => index,
None => continue,
};
let enum_data = &state.data.enum_features[enum_index];
let mut value_counts = vec![0u64; enum_data.values.len()];
for &row in &matching_rows {
let value = enum_data.data[row];
if value != ENUM_NULL && (value as usize) < value_counts.len() {
value_counts[value as usize] += 1;
}
}
// Only include if there are any non-zero counts
let has_values = value_counts.iter().any(|&count| count > 0);
if !has_values {
continue;
}
if !first_enum {
output.push(',');
}
first_enum = false;
output.push_str("{\"name\":");
write_json_string(&mut output, &enum_feature.name);
output.push_str(",\"counts\":{");
let mut first_value = true;
for (value_index, &count) in value_counts.iter().enumerate() {
if count == 0 {
continue;
}
if !first_value {
output.push(',');
}
first_value = false;
write_json_string(&mut output, &enum_data.values[value_index]);
write!(output, ":{}", count).unwrap();
}
output.push_str("}}");
}
output.push_str("]}");
let elapsed = start_time.elapsed();
info!(
h3 = %h3_str,
resolution,
total_count,
filters = num_filters,
filters_raw = filters_str.as_deref().unwrap_or("-"),
ms = format_args!("{:.1}", elapsed.as_secs_f64() * 1000.0),
"GET /api/hexagon-stats"
);
output
})
.await
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error.to_string()))?;
Ok((
[(axum::http::header::CONTENT_TYPE, "application/json")],
result,
))
}
fn write_json_string(output: &mut String, value: &str) {
output.push('"');
for character in value.chars() {
match character {
'"' => output.push_str("\\\""),
'\\' => output.push_str("\\\\"),
'\n' => output.push_str("\\n"),
'\r' => output.push_str("\\r"),
'\t' => output.push_str("\\t"),
other => output.push(other),
}
}
output.push('"');
}
fn format_f64(value: f64) -> String {
if value.fract() == 0.0 && value.abs() < 1e15 {
format!("{:.1}", value)
} else {
format!("{}", value)
}
}

View file

@ -1,4 +1,4 @@
use std::fmt::Write;
use std::fmt::{self, Write};
use std::sync::Arc;
use axum::extract::Query;
@ -8,11 +8,29 @@ use rustc_hash::FxHashMap;
use serde::Deserialize;
use tracing::{info, warn};
use crate::consts::{H3_PRECOMPUTE_MAX, H3_PRECOMPUTE_MIN};
use crate::consts::{
BOUNDS_BUFFER_PERCENT, BOUNDS_QUANTIZATION, ENUM_NULL, H3_PRECOMPUTE_MAX, H3_PRECOMPUTE_MIN,
POSTCODE_MIN_RESOLUTION,
};
use crate::filter::parse_filters;
use crate::state::AppState;
const BOUNDS_BUFFER_PERCENT: f64 = 0.2;
use super::parse::parse_bounds;
struct HumanBytes(usize);
impl fmt::Display for HumanBytes {
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
let bytes = self.0;
if bytes >= 1_000_000 {
write!(formatter, "{:.1} MB", bytes as f64 / 1_000_000.0)
} else if bytes >= 1_000 {
write!(formatter, "{:.1} KB", bytes as f64 / 1_000.0)
} else {
write!(formatter, "{} B", bytes)
}
}
}
#[derive(Deserialize)]
pub struct HexagonParams {
@ -28,14 +46,28 @@ struct CellAgg {
count: u32,
mins: Vec<f64>,
maxs: Vec<f64>,
/// Min/max ordinal indices for enum features (255 = no data yet)
enum_mins: Vec<u8>,
enum_maxs: Vec<u8>,
/// Most common postcode in this cell (only tracked at high resolutions)
postcode: Option<String>,
postcode_count: u32,
lat_sum: f64,
lon_sum: f64,
}
impl CellAgg {
fn new(num_features: usize) -> Self {
fn new(num_features: usize, num_enums: usize) -> Self {
CellAgg {
count: 0,
mins: vec![f64::INFINITY; num_features],
maxs: vec![f64::NEG_INFINITY; num_features],
enum_mins: vec![ENUM_NULL; num_enums],
enum_maxs: vec![0; num_enums],
postcode: None,
postcode_count: 0,
lat_sum: 0.0,
lon_sum: 0.0,
}
}
@ -47,49 +79,129 @@ impl CellAgg {
self.count += 1;
let base = row * num_features;
let row_slice = &feature_data[base..base + num_features];
for (i, &v) in row_slice.iter().enumerate() {
if v.is_finite() {
if v < self.mins[i] {
self.mins[i] = v;
for (feat_index, &value) in row_slice.iter().enumerate() {
if value.is_finite() {
if value < self.mins[feat_index] {
self.mins[feat_index] = value;
}
if v > self.maxs[i] {
self.maxs[i] = v;
if value > self.maxs[feat_index] {
self.maxs[feat_index] = value;
}
}
}
}
/// Track min/max ordinal index for each enum feature in this cell.
#[inline]
fn add_enums(&mut self, enum_features: &[crate::data::EnumFeatureData], row: usize) {
for (enum_index, enum_feature) in enum_features.iter().enumerate() {
let value = enum_feature.data[row];
if value != ENUM_NULL {
if self.enum_mins[enum_index] == ENUM_NULL || value < self.enum_mins[enum_index] {
self.enum_mins[enum_index] = value;
}
if value > self.enum_maxs[enum_index] {
self.enum_maxs[enum_index] = value;
}
}
}
}
/// Track postcode and centroid for high-resolution cells.
/// Uses simple "first seen" approach — at res 11/12, most rows in a cell share a postcode.
#[inline]
fn add_postcode(&mut self, postcode: &str, lat: f64, lon: f64) {
self.lat_sum += lat;
self.lon_sum += lon;
if postcode.is_empty() {
return;
}
if self.postcode.is_none() {
self.postcode = Some(postcode.to_string());
self.postcode_count = 1;
} else if self.postcode.as_deref() == Some(postcode) {
self.postcode_count += 1;
}
}
}
/// Escape a string for inclusion in a JSON string literal.
pub(crate) fn write_json_escaped(buf: &mut String, text: &str) {
for character in text.chars() {
match character {
'"' => buf.push_str("\\\""),
'\\' => buf.push_str("\\\\"),
'\n' => buf.push_str("\\n"),
'\r' => buf.push_str("\\r"),
'\t' => buf.push_str("\\t"),
ctrl if ctrl < '\x20' => { let _ = write!(buf, "\\u{:04x}", ctrl as u32); }
other => buf.push(other),
}
}
}
/// Write the hexagons JSON response directly to a String buffer,
/// avoiding serde_json::Value allocations entirely.
#[allow(clippy::too_many_arguments)]
fn write_hexagons_json(
buf: &mut String,
groups: &FxHashMap<u64, CellAgg>,
min_keys: &[String],
max_keys: &[String],
num_features: usize,
enum_min_keys: &[String],
enum_max_keys: &[String],
num_enums: usize,
include_postcode: bool,
) {
buf.push_str("{\"features\":[");
let mut first = true;
for (&cell_id, agg) in groups {
for (&cell_id, aggregation) in groups {
let Some(cell) = h3o::CellIndex::try_from(cell_id).ok() else {
continue;
};
if !first {
buf.push(',');
}
first = false;
let cell = h3o::CellIndex::try_from(cell_id).unwrap();
write!(buf, "{{\"h3\":\"{}\",\"count\":{}", cell, agg.count).unwrap();
let _ = write!(buf, "{{\"h3\":\"{}\",\"count\":{}", cell, aggregation.count);
for i in 0..num_features {
if agg.mins[i] != f64::INFINITY {
write!(
for feat_index in 0..num_features {
if aggregation.mins[feat_index].is_finite() && aggregation.maxs[feat_index].is_finite() {
let _ = write!(
buf,
",\"{}\":{},\"{}\":{}",
min_keys[i], agg.mins[i], max_keys[i], agg.maxs[i]
)
.unwrap();
min_keys[feat_index], aggregation.mins[feat_index], max_keys[feat_index], aggregation.maxs[feat_index]
);
}
}
for enum_index in 0..num_enums {
if aggregation.enum_mins[enum_index] != ENUM_NULL {
let _ = write!(
buf,
",\"{}\":{},\"{}\":{}",
enum_min_keys[enum_index], aggregation.enum_mins[enum_index],
enum_max_keys[enum_index], aggregation.enum_maxs[enum_index]
);
}
}
if include_postcode {
if let Some(ref postcode) = aggregation.postcode {
let total = aggregation.count as f64;
let centroid_lat = aggregation.lat_sum / total;
let centroid_lon = aggregation.lon_sum / total;
if centroid_lat.is_finite() && centroid_lon.is_finite() {
buf.push_str(",\"postcode\":\"");
write_json_escaped(buf, postcode);
let _ = write!(buf, "\",\"lat\":{},\"lon\":{}", centroid_lat, centroid_lon);
}
}
}
buf.push('}');
}
buf.push_str("]}");
@ -101,7 +213,10 @@ pub async fn get_hexagons(
) -> Result<impl IntoResponse, (StatusCode, String)> {
let resolution = params.resolution;
if resolution < H3_PRECOMPUTE_MIN || resolution > H3_PRECOMPUTE_MAX {
warn!(resolution, "Resolution out of range [{}, {}]", H3_PRECOMPUTE_MIN, H3_PRECOMPUTE_MAX);
warn!(
resolution,
"Resolution out of range [{}, {}]", H3_PRECOMPUTE_MIN, H3_PRECOMPUTE_MAX
);
return Err((
StatusCode::BAD_REQUEST,
format!(
@ -116,25 +231,7 @@ pub async fn get_hexagons(
"bounds parameter is required".into(),
))?;
let parts: Vec<f64> = bounds_str
.split(',')
.map(|s| s.trim().parse::<f64>())
.collect::<Result<Vec<_>, _>>()
.map_err(|_| {
(
StatusCode::BAD_REQUEST,
"Invalid bounds format. Use: south,west,north,east".into(),
)
})?;
if parts.len() != 4 {
return Err((
StatusCode::BAD_REQUEST,
"Invalid bounds format. Use: south,west,north,east".into(),
));
}
let (mut south, mut west, mut north, mut east) = (parts[0], parts[1], parts[2], parts[3]);
let (mut south, mut west, mut north, mut east) = parse_bounds(&bounds_str)?;
let lat_range = north - south;
let lng_range = east - west;
@ -143,11 +240,10 @@ pub async fn get_hexagons(
west -= lng_range * BOUNDS_BUFFER_PERCENT;
east += lng_range * BOUNDS_BUFFER_PERCENT;
let precision = 0.01;
south = (south / precision).floor() * precision;
west = (west / precision).floor() * precision;
north = (north / precision).ceil() * precision;
east = (east / precision).ceil() * precision;
south = (south / BOUNDS_QUANTIZATION).floor() * BOUNDS_QUANTIZATION;
west = (west / BOUNDS_QUANTIZATION).floor() * BOUNDS_QUANTIZATION;
north = (north / BOUNDS_QUANTIZATION).ceil() * BOUNDS_QUANTIZATION;
east = (east / BOUNDS_QUANTIZATION).ceil() * BOUNDS_QUANTIZATION;
let filters_str = params.filters.clone();
let (parsed_filters, parsed_enum_filters) = parse_filters(
@ -157,44 +253,38 @@ pub async fn get_hexagons(
);
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
let json_body = tokio::task::spawn_blocking(move || {
let json_body = tokio::task::spawn_blocking(move || -> Result<String, String> {
let t0 = std::time::Instant::now();
let num_features = state.data.num_features;
let num_enums = state.data.enum_features.len();
let feature_data = &state.data.feature_data;
let min_keys: Vec<String> = state
.data
.feature_names
.iter()
.map(|n| format!("min_{}", n))
.collect();
let max_keys: Vec<String> = state
.data
.feature_names
.iter()
.map(|n| format!("max_{}", n))
.collect();
let min_keys = &state.min_keys;
let max_keys = &state.max_keys;
let enum_min_keys = &state.enum_min_keys;
let enum_max_keys = &state.enum_max_keys;
let h3_cells_for_res: Option<&[u64]> = state
.h3_cells
.get(resolution as usize)
.filter(|v| !v.is_empty())
.map(|v| v.as_slice());
.filter(|cells| !cells.is_empty())
.map(|cells| cells.as_slice());
let mut groups: FxHashMap<u64, CellAgg> = FxHashMap::default();
let enum_features = &state.data.enum_features;
let include_postcode = resolution >= POSTCODE_MIN_RESOLUTION;
// Row-level filter check: numeric must be non-NaN and within [min, max],
// enum must have value index in the allowed set
let row_passes = |row: usize| -> bool {
parsed_filters.iter().all(|f| {
let v = feature_data[row * num_features + f.feat_idx];
v.is_finite() && v >= f.min && v <= f.max
}) && parsed_enum_filters.iter().all(|ef| {
let v = enum_features[ef.enum_idx].data[row];
v != 255 && ef.allowed.contains(&v)
parsed_filters.iter().all(|filter| {
let value = feature_data[row * num_features + filter.feat_idx];
value.is_finite() && value >= filter.min && value <= filter.max
}) && parsed_enum_filters.iter().all(|enum_filter| {
let value = enum_features[enum_filter.enum_idx].data[row];
value != ENUM_NULL && enum_filter.allowed.contains(&value)
})
};
@ -207,13 +297,22 @@ pub async fn get_hexagons(
return;
}
let cell_id = precomputed[row];
groups
let aggregation = groups
.entry(cell_id)
.or_insert_with(|| CellAgg::new(num_features))
.add_row(feature_data, row, num_features);
.or_insert_with(|| CellAgg::new(num_features, num_enums));
aggregation.add_row(feature_data, row, num_features);
aggregation.add_enums(enum_features, row);
if include_postcode {
aggregation.add_postcode(
&state.data.postcode[row],
state.data.lat[row],
state.data.lon[row],
);
}
});
} else {
let h3_res = h3o::Resolution::try_from(resolution).unwrap();
let h3_res = h3o::Resolution::try_from(resolution)
.map_err(|error| format!("Invalid H3 resolution {}: {}", resolution, error))?;
state
.grid
.for_each_in_bounds(south, west, north, east, |row_idx| {
@ -222,19 +321,37 @@ pub async fn get_hexagons(
return;
}
let cell_id = h3o::LatLng::new(state.data.lat[row], state.data.lon[row])
.map(|c| u64::from(c.to_cell(h3_res)))
.map(|coord| u64::from(coord.to_cell(h3_res)))
.unwrap_or(0);
groups
let aggregation = groups
.entry(cell_id)
.or_insert_with(|| CellAgg::new(num_features))
.add_row(feature_data, row, num_features);
.or_insert_with(|| CellAgg::new(num_features, num_enums));
aggregation.add_row(feature_data, row, num_features);
aggregation.add_enums(enum_features, row);
if include_postcode {
aggregation.add_postcode(
&state.data.postcode[row],
state.data.lat[row],
state.data.lon[row],
);
}
});
}
let t_agg = t0.elapsed();
let mut json_buf = String::with_capacity(groups.len() * 128);
write_hexagons_json(&mut json_buf, &groups, &min_keys, &max_keys, num_features);
write_hexagons_json(
&mut json_buf,
&groups,
min_keys,
max_keys,
num_features,
enum_min_keys,
enum_max_keys,
num_enums,
include_postcode,
);
let t_total = t0.elapsed();
info!(
@ -244,14 +361,15 @@ pub async fn get_hexagons(
filters_raw = filters_str.as_deref().unwrap_or("-"),
agg_ms = format_args!("{:.1}", t_agg.as_secs_f64() * 1000.0),
total_ms = format_args!("{:.1}", t_total.as_secs_f64() * 1000.0),
bytes = json_buf.len(),
size = format_args!("{}", HumanBytes(json_buf.len())),
"GET /api/hexagons"
);
json_buf
Ok(json_buf)
})
.await
.unwrap();
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error.to_string()))?
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error))?;
Ok(([("content-type", "application/json")], json_body))
}

View file

@ -1,9 +1,12 @@
mod features;
mod hexagons;
pub(crate) mod hexagons;
mod hexagon_stats;
pub(crate) mod parse;
mod pois;
mod properties;
pub(crate) mod properties;
pub use features::get_features;
pub use hexagon_stats::get_hexagon_stats;
pub use hexagons::get_hexagons;
pub use pois::{get_poi_categories, get_pois};
pub use properties::get_hexagon_properties;

View file

@ -1,9 +1,38 @@
use axum::http::StatusCode;
/// Compute the lat/lon bounding box of an H3 cell, with a configurable buffer in degrees.
pub fn h3_cell_bounds(cell: h3o::CellIndex, buffer: f64) -> (f64, f64, f64, f64) {
let boundary = cell.boundary();
let (mut min_lat, mut max_lat) = (f64::INFINITY, f64::NEG_INFINITY);
let (mut min_lon, mut max_lon) = (f64::INFINITY, f64::NEG_INFINITY);
for vertex in boundary.iter() {
let lat = vertex.lat();
let lon = vertex.lng();
if lat < min_lat {
min_lat = lat;
}
if lat > max_lat {
max_lat = lat;
}
if lon < min_lon {
min_lon = lon;
}
if lon > max_lon {
max_lon = lon;
}
}
(
min_lat - buffer,
min_lon - buffer,
max_lat + buffer,
max_lon + buffer,
)
}
pub fn parse_bounds(bounds_str: &str) -> Result<(f64, f64, f64, f64), (StatusCode, String)> {
let parts: Vec<f64> = bounds_str
.split(',')
.map(|s| s.trim().parse::<f64>())
.map(|part| part.trim().parse::<f64>())
.collect::<Result<Vec<_>, _>>()
.map_err(|_| {
(

View file

@ -39,37 +39,56 @@ pub async fn get_pois(
let category_filter: Option<rustc_hash::FxHashSet<String>> = params
.categories
.as_deref()
.filter(|s| !s.is_empty())
.map(|s| s.split(',').map(|c| c.trim().to_string()).collect());
.filter(|text| !text.is_empty())
.map(|text| text.split(',').map(|part| part.trim().to_string()).collect());
let num_categories = category_filter.as_ref().map(|c| c.len()).unwrap_or(0);
let num_categories = category_filter.as_ref().map(|cats| cats.len()).unwrap_or(0);
let result = tokio::task::spawn_blocking(move || {
let t0 = std::time::Instant::now();
let row_indices = state.poi_grid.query(south, west, north, east);
let pois: Vec<POI> = row_indices
// Collect matching row indices first, then sample randomly so the
// subset covers the viewport uniformly instead of clustering in one area.
let mut matching_rows: Vec<usize> = row_indices
.iter()
.filter_map(|&row_idx| {
let row = row_idx as usize;
if let Some(ref categories) = category_filter {
if !categories.contains(&state.poi_data.category[row]) {
return None;
}
}
Some(POI {
id: state.poi_data.id[row].clone(),
name: state.poi_data.name[row].clone(),
category: state.poi_data.category[row].clone(),
group: state.poi_data.group[row].clone(),
lat: state.poi_data.lat[row],
lng: state.poi_data.lng[row],
emoji: state.poi_data.emoji[row].clone(),
})
Some(row)
})
.collect();
if matching_rows.len() > MAX_POIS_PER_REQUEST {
// Use a power-of-2 sampling step so each POI's inclusion depends
// only on its own priority hash, not on what other POIs are in
// the viewport. This prevents visible reshuffling when panning.
let ratio = (matching_rows.len() / MAX_POIS_PER_REQUEST) as u32;
let step = ratio.next_power_of_two();
let mask = step - 1;
matching_rows.retain(|&row| state.poi_data.priority[row] & mask == 0);
// Statistical noise may leave us slightly over the limit
if matching_rows.len() > MAX_POIS_PER_REQUEST {
matching_rows.sort_unstable_by_key(|&row| state.poi_data.priority[row]);
matching_rows.truncate(MAX_POIS_PER_REQUEST);
}
}
let pois: Vec<POI> = matching_rows
.iter()
.map(|&row| POI {
id: state.poi_data.id[row].clone(),
name: state.poi_data.name[row].clone(),
category: state.poi_data.category[row].clone(),
group: state.poi_data.group[row].clone(),
lat: state.poi_data.lat[row],
lng: state.poi_data.lng[row],
emoji: state.poi_data.emoji[row].clone(),
})
.take(MAX_POIS_PER_REQUEST)
.collect();
let elapsed = t0.elapsed();
@ -85,7 +104,7 @@ pub async fn get_pois(
POIsResponse { pois }
})
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error.to_string()))?;
Ok(Json(result))
}
@ -98,7 +117,7 @@ pub struct POICategoriesResponse {
pub async fn get_poi_categories(state: Arc<AppState>) -> Json<POICategoriesResponse> {
let groups: Vec<POICategoryGroup> = state.poi_category_groups.clone();
let total: usize = groups.iter().map(|g| g.categories.len()).sum();
let total: usize = groups.iter().map(|group| group.categories.len()).sum();
info!(
count = total,
groups = groups.len(),

View file

@ -8,9 +8,13 @@ use rustc_hash::FxHashMap;
use serde::{Deserialize, Serialize};
use tracing::{info, warn};
use crate::consts::{DEFAULT_PROPERTIES_LIMIT, ENUM_NULL, MAX_PROPERTIES_LIMIT};
use crate::data::EnumFeatureData;
use crate::filter::{parse_filters, row_passes_filters};
use crate::state::AppState;
use super::parse::h3_cell_bounds;
#[derive(Deserialize)]
pub struct HexagonPropertiesParams {
pub h3: String,
@ -35,6 +39,8 @@ pub struct Property {
pub lat: f64,
pub lon: f64,
pub is_construction_date_approximate: Option<bool>,
#[serde(flatten)]
pub features: FxHashMap<String, f64>,
}
@ -48,20 +54,51 @@ pub struct HexagonPropertiesResponse {
pub truncated: bool,
}
fn non_empty_string(text: &str) -> Option<String> {
let trimmed = text.trim();
if trimmed.is_empty() {
None
} else {
Some(trimmed.to_string())
}
}
fn lookup_enum_value(
enum_features: &[EnumFeatureData],
enum_idx: &FxHashMap<String, usize>,
row: usize,
names: &[&str],
) -> Option<String> {
for name in names {
if let Some(&feature_index) = enum_idx.get(*name) {
let enum_feature = &enum_features[feature_index];
let data_index = enum_feature.data[row];
if data_index != ENUM_NULL {
if let Some(value) = enum_feature.values.get(data_index as usize) {
return Some(value.clone());
}
}
}
}
None
}
pub async fn get_hexagon_properties(
state: Arc<AppState>,
Query(params): Query<HexagonPropertiesParams>,
) -> Result<Json<HexagonPropertiesResponse>, (StatusCode, String)> {
let cell = h3o::CellIndex::from_str(&params.h3)
.map_err(|e| {
warn!(h3 = %params.h3, error = %e, "Invalid H3 cell index");
(StatusCode::BAD_REQUEST, format!("Invalid H3 cell: {}", e))
})?;
let cell = h3o::CellIndex::from_str(&params.h3).map_err(|error| {
warn!(h3 = %params.h3, error = %error, "Invalid H3 cell index");
(StatusCode::BAD_REQUEST, format!("Invalid H3 cell: {}", error))
})?;
let cell_u64: u64 = cell.into();
let resolution = params.resolution as usize;
if resolution >= state.h3_cells.len() || state.h3_cells[resolution].is_empty() {
warn!(resolution, "Invalid or non-precomputed resolution for hexagon-properties");
warn!(
resolution,
"Invalid or non-precomputed resolution for hexagon-properties"
);
return Err((
StatusCode::BAD_REQUEST,
"Invalid or non-precomputed resolution".to_string(),
@ -84,31 +121,29 @@ pub async fn get_hexagon_properties(
let feature_data = &state.data.feature_data;
let enum_features = &state.data.enum_features;
let matching_rows: Vec<usize> = h3_data
.iter()
.enumerate()
.filter_map(|(idx, &h3_cell)| {
if h3_cell == cell_u64 {
if row_passes_filters(
idx,
let (min_lat, min_lon, max_lat, max_lon) = h3_cell_bounds(cell, 0.001);
let mut matching_rows: Vec<usize> = Vec::new();
state
.grid
.for_each_in_bounds(min_lat, min_lon, max_lat, max_lon, |row_idx| {
let row = row_idx as usize;
if h3_data[row] == cell_u64
&& row_passes_filters(
row,
&parsed_filters,
&parsed_enum_filters,
feature_data,
num_features,
enum_features,
) {
Some(idx)
} else {
None
}
} else {
None
)
{
matching_rows.push(row);
}
})
.collect();
});
let total = matching_rows.len();
let limit = params.limit.unwrap_or(100).min(500);
let limit = params.limit.unwrap_or(DEFAULT_PROPERTIES_LIMIT).min(MAX_PROPERTIES_LIMIT);
let offset = params.offset.unwrap_or(0);
let truncated = total > offset + limit;
@ -120,49 +155,46 @@ pub async fn get_hexagon_properties(
let mut features = FxHashMap::default();
let base = row * num_features;
for (feat_idx, feat_name) in state.data.feature_names.iter().enumerate() {
let v = feature_data[base + feat_idx];
if v.is_finite() {
features.insert(feat_name.clone(), v);
let value = feature_data[base + feat_idx];
if value.is_finite() {
features.insert(feat_name.clone(), value);
}
}
let get_string = |s: &str| -> Option<String> {
let trimmed = s.trim();
if trimmed.is_empty() {
None
} else {
Some(trimmed.to_string())
}
};
let get_enum_value = |names: &[&str]| -> Option<String> {
for name in names {
if let Some(val) = enum_features.iter().find_map(|ef| {
if ef.name == *name {
let idx = ef.data[row];
if idx == 255 {
None
} else {
ef.values.get(idx as usize).cloned()
}
} else {
None
}
}) {
return Some(val);
}
}
None
};
Property {
address: get_string(&state.data.address[row]),
postcode: get_string(&state.data.postcode[row]),
property_type: get_enum_value(&["Property type", "epc_property_type", "pp_property_type"]),
built_form: get_enum_value(&["Property type/built form", "built_form"]),
duration: get_enum_value(&["Leashold/Freehold", "duration"]),
current_energy_rating: get_enum_value(&["Current energy rating", "current_energy_rating"]),
potential_energy_rating: get_enum_value(&["Potential energy rating", "potential_energy_rating"]),
address: non_empty_string(&state.data.address[row]),
postcode: non_empty_string(&state.data.postcode[row]),
is_construction_date_approximate: Some(state.data.is_approx_build_date[row]),
property_type: lookup_enum_value(
enum_features,
&state.enum_name_to_idx,
row,
&["Property type", "epc_property_type", "pp_property_type"],
),
built_form: lookup_enum_value(
enum_features,
&state.enum_name_to_idx,
row,
&["Property type/built form", "built_form"],
),
duration: lookup_enum_value(
enum_features,
&state.enum_name_to_idx,
row,
&["Leashold/Freehold", "duration"],
),
current_energy_rating: lookup_enum_value(
enum_features,
&state.enum_name_to_idx,
row,
&["Current energy rating", "current_energy_rating"],
),
potential_energy_rating: lookup_enum_value(
enum_features,
&state.enum_name_to_idx,
row,
&["Potential energy rating", "potential_energy_rating"],
),
lat: state.data.lat[row],
lon: state.data.lon[row],
features,
@ -192,7 +224,7 @@ pub async fn get_hexagon_properties(
}
})
.await
.unwrap();
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error.to_string()))?;
Ok(Json(result))
}

View file

@ -1,5 +1,14 @@
use rustc_hash::FxHashMap;
use serde::Serialize;
use crate::data::{POIData, PropertyData};
use crate::index::GridIndex;
use crate::grid_index::GridIndex;
#[derive(Serialize, Clone)]
pub struct POICategoryGroup {
pub name: String,
pub categories: Vec<String>,
}
pub struct AppState {
pub data: PropertyData,
@ -9,4 +18,16 @@ pub struct AppState {
pub h3_cells: Vec<Vec<u64>>,
pub poi_data: POIData,
pub poi_grid: GridIndex,
/// Precomputed JSON key names: "min_{feature_name}" for each numeric feature
pub min_keys: Vec<String>,
/// Precomputed JSON key names: "max_{feature_name}" for each numeric feature
pub max_keys: Vec<String>,
/// Precomputed JSON key names: "min_{enum_name}" for each enum feature
pub enum_min_keys: Vec<String>,
/// Precomputed JSON key names: "max_{enum_name}" for each enum feature
pub enum_max_keys: Vec<String>,
/// Precomputed POI category groups (sorted)
pub poi_category_groups: Vec<POICategoryGroup>,
/// Precomputed map from enum feature name to index in data.enum_features
pub enum_name_to_idx: FxHashMap<String, usize>,
}

View file

@ -159,8 +159,6 @@ mod filter_tests {
#[cfg(test)]
mod json_tests {
use std::fmt::Write;
#[test]
fn json_escaped_postcode_with_quotes_is_valid() {
use crate::routes::hexagons::write_json_escaped;
@ -199,6 +197,7 @@ mod json_tests {
#[test]
fn nan_is_not_valid_json() {
use std::fmt::Write;
// Verify that raw NaN in write! is still invalid JSON (documenting the risk
// that the is_finite() guard in write_hexagons_json protects against).
let mut buf = String::new();
@ -210,6 +209,7 @@ mod json_tests {
#[test]
fn infinity_is_not_valid_json() {
use std::fmt::Write;
let mut buf = String::new();
write!(buf, "{{\"min_price\":{}}}", f64::INFINITY).unwrap();
@ -225,7 +225,7 @@ mod enum_encoding_tests {
// Documents the underlying u8 wrapping behavior that the truncation
// guard in property.rs now prevents.
let num_values = 300usize;
let indices: Vec<u8> = (0..num_values).map(|i| i as u8).collect();
let indices: Vec<u8> = (0..num_values).map(|index| index as u8).collect();
assert_eq!(indices[0], indices[256], "u8 wraps: 0 == 256");
assert_eq!(indices[1], indices[257], "u8 wraps: 1 == 257");
@ -235,7 +235,7 @@ mod enum_encoding_tests {
let value_to_idx: HashMap<&str, u8> = values
.iter()
.enumerate()
.map(|(i, v)| (v.as_str(), i as u8))
.map(|(index, value)| (value.as_str(), index as u8))
.collect();
let unique_indices: std::collections::HashSet<u8> =