This commit is contained in:
Andras Schmelczer 2026-05-06 22:40:46 +01:00
parent 28323f145e
commit 94f9c0d594
76 changed files with 3238 additions and 1230 deletions

View file

@ -19,6 +19,7 @@ use crate::parsing::{
use crate::state::SharedState;
use super::stats;
use super::travel_time::{load_travel_data, parse_optional_travel, row_passes_travel_filters};
#[derive(Serialize)]
pub struct HistogramStats {
@ -76,6 +77,9 @@ pub struct HexagonStatsParams {
/// shortest travel time for this mode+slug (so it has journey data).
pub journey_mode: Option<String>,
pub journey_slug: Option<String>,
/// Pipe-separated travel time entries: `mode:slug|mode:slug:min:max`.
/// Optional min:max applies as a filter (exclude properties outside range).
pub travel: Option<String>,
/// Share-link code; grants bbox-scoped access for unlicensed users.
pub share: Option<String>,
}
@ -118,6 +122,9 @@ pub async fn get_hexagon_stats(
let (fields_specified, field_set) = parse_field_set(params.fields.as_deref());
let travel_entries = parse_optional_travel(params.travel.as_deref())
.map_err(|err| (StatusCode::BAD_REQUEST, err).into_response())?;
// Load travel time data for central_postcode selection (if requested)
let journey_travel_data = match (&params.journey_mode, &params.journey_slug) {
(Some(mode), Some(slug)) if state.travel_time_store.has_destination(mode, slug) => {
@ -134,6 +141,8 @@ pub async fn get_hexagon_stats(
let need_parent = needs_parent(resolution);
let num_features = state.data.num_features;
let feature_data = &state.data.feature_data;
let travel_data = load_travel_data(&state.travel_time_store, &travel_entries)?;
let has_travel = !travel_entries.is_empty();
let (min_lat, min_lon, max_lat, max_lon) = h3_cell_bounds(cell, 0.001);
@ -153,6 +162,12 @@ pub async fn get_hexagon_stats(
num_features,
)
{
if has_travel {
let postcode = state.data.postcode(row);
if !row_passes_travel_filters(postcode, &travel_entries, &travel_data) {
return;
}
}
matching_rows.push(row);
}
});
@ -235,6 +250,7 @@ pub async fn get_hexagon_stats(
total_count,
filters = num_filters,
filters_raw = filters_str.as_deref().unwrap_or("-"),
travel_entries = travel_entries.len(),
ms = format_args!("{:.1}", elapsed.as_secs_f64() * 1000.0),
"GET /api/hexagon-stats"
);

View file

@ -3,24 +3,21 @@ use std::sync::Arc;
use axum::extract::{Query, State};
use axum::http::StatusCode;
use axum::response::Json;
use rustc_hash::FxHashMap;
use serde::{Deserialize, Serialize};
use tracing::info;
use crate::consts::MAX_POIS_PER_REQUEST;
use crate::data::{POICategoryGroup, POIData};
use crate::data::POICategoryGroup;
use crate::parsing::require_bounds;
use crate::state::SharedState;
const TUBE_STATION_CATEGORY: &str = "Tube station";
const TUBE_STATION_MERGE_RADIUS_DEGREES: f32 = 0.01;
#[derive(Serialize)]
#[allow(clippy::upper_case_acronyms)]
pub struct POI {
id: String,
name: String,
category: String,
icon_category: String,
group: String,
lat: f32,
lng: f32,
@ -39,167 +36,6 @@ pub struct POIParams {
categories: Option<String>,
}
struct SelectedPOIRow {
row: usize,
id_override: Option<String>,
name_override: Option<String>,
lat: f32,
lng: f32,
lat_sum: f32,
lng_sum: f32,
count: u32,
priority: u32,
}
impl SelectedPOIRow {
fn new(data: &POIData, row: usize, override_identity: bool) -> Self {
Self {
row,
id_override: override_identity.then(|| data.id(row).to_string()),
name_override: override_identity.then(|| data.name[row].clone()),
lat: data.lat[row],
lng: data.lng[row],
lat_sum: data.lat[row],
lng_sum: data.lng[row],
count: 1,
priority: data.priority[row],
}
}
fn merge_tube_station(&mut self, data: &POIData, row: usize) {
self.lat_sum += data.lat[row];
self.lng_sum += data.lng[row];
self.count += 1;
self.lat = self.lat_sum / self.count as f32;
self.lng = self.lng_sum / self.count as f32;
self.priority = self.priority.min(data.priority[row]);
let current_name = self
.name_override
.as_deref()
.unwrap_or(&data.name[self.row]);
let candidate_name = &data.name[row];
if tube_station_name_score(candidate_name) < tube_station_name_score(current_name) {
self.id_override = Some(data.id(row).to_string());
self.name_override = Some(candidate_name.clone());
}
}
fn id(&self, data: &POIData) -> String {
self.id_override
.clone()
.unwrap_or_else(|| data.id(self.row).to_string())
}
fn name(&self, data: &POIData) -> String {
self.name_override
.clone()
.unwrap_or_else(|| data.name[self.row].clone())
}
}
fn dedupe_tube_stations(data: &POIData, rows: Vec<usize>) -> Vec<SelectedPOIRow> {
let mut selected = Vec::with_capacity(rows.len());
let mut tube_groups: FxHashMap<String, Vec<usize>> = FxHashMap::default();
for row in rows {
if data.category.get(row) != TUBE_STATION_CATEGORY {
selected.push(SelectedPOIRow::new(data, row, false));
continue;
}
let station_key = canonical_tube_station_name(&data.name[row]);
if station_key.is_empty() {
selected.push(SelectedPOIRow::new(data, row, false));
continue;
}
let existing = tube_groups.get(&station_key).and_then(|indices| {
indices.iter().copied().find(|&index| {
same_tube_station_area(&selected[index], data.lat[row], data.lng[row])
})
});
if let Some(index) = existing {
selected[index].merge_tube_station(data, row);
} else {
let index = selected.len();
selected.push(SelectedPOIRow::new(data, row, true));
tube_groups.entry(station_key).or_default().push(index);
}
}
selected
}
fn canonical_tube_station_name(name: &str) -> String {
let mut normalized = String::with_capacity(name.len());
let mut paren_depth = 0u32;
for ch in name.chars() {
match ch {
'(' => {
paren_depth += 1;
normalized.push(' ');
}
')' => {
paren_depth = paren_depth.saturating_sub(1);
normalized.push(' ');
}
_ if paren_depth > 0 => {}
'\'' | '' | '`' => {}
'&' => normalized.push_str(" and "),
_ if ch.is_ascii_alphanumeric() => normalized.push(ch.to_ascii_lowercase()),
_ => normalized.push(' '),
}
}
let mut words: Vec<&str> = normalized.split_whitespace().collect();
const SUFFIXES: &[&[&str]] = &[
&["underground", "station"],
&["tube", "station"],
&["dlr", "station"],
&["metro", "station"],
&["tram", "stop"],
&["rail", "station"],
&["railway", "station"],
&["station"],
&["stop"],
];
loop {
let Some(suffix) = SUFFIXES.iter().find(|suffix| words.ends_with(suffix)) else {
break;
};
words.truncate(words.len() - suffix.len());
}
words.join(" ")
}
fn same_tube_station_area(station: &SelectedPOIRow, lat: f32, lng: f32) -> bool {
let dlat = station.lat - lat;
let dlng = (station.lng - lng) * station.lat.to_radians().cos();
(dlat * dlat + dlng * dlng) <= TUBE_STATION_MERGE_RADIUS_DEGREES.powi(2)
}
fn tube_station_name_score(name: &str) -> (u8, usize) {
let lower = name.to_ascii_lowercase();
let suffix_penalty = if lower.ends_with(" underground station")
|| lower.ends_with(" tube station")
|| lower.ends_with(" dlr station")
|| lower.ends_with(" metro station")
|| lower.ends_with(" tram stop")
|| lower.ends_with(" station")
|| lower.ends_with(" stop")
{
1
} else {
0
};
(suffix_penalty, name.len())
}
pub async fn get_pois(
State(shared): State<Arc<SharedState>>,
Query(params): Query<POIParams>,
@ -246,32 +82,30 @@ pub async fn get_pois(
})
.collect();
let mut matching_pois = dedupe_tube_stations(&state.poi_data, matching_rows);
let mut matching_pois = matching_rows;
if matching_pois.len() > MAX_POIS_PER_REQUEST {
let ratio = (matching_pois.len() / MAX_POIS_PER_REQUEST) as u32;
let step = ratio.next_power_of_two();
let mask = step - 1;
matching_pois.retain(|poi| poi.priority & mask == 0);
matching_pois.retain(|&row| state.poi_data.priority[row] & mask == 0);
if matching_pois.len() > MAX_POIS_PER_REQUEST {
matching_pois.sort_unstable_by_key(|poi| poi.priority);
matching_pois.sort_unstable_by_key(|&row| state.poi_data.priority[row]);
matching_pois.truncate(MAX_POIS_PER_REQUEST);
}
}
let pois: Vec<POI> = matching_pois
.iter()
.map(|poi| {
let row = poi.row;
POI {
id: poi.id(&state.poi_data),
name: poi.name(&state.poi_data),
category: state.poi_data.category.get(row).to_string(),
group: state.poi_data.group.get(row).to_string(),
lat: poi.lat,
lng: poi.lng,
emoji: state.poi_data.emoji.get(row).to_string(),
}
.map(|&row| POI {
id: state.poi_data.id(row).to_string(),
name: state.poi_data.name[row].clone(),
category: state.poi_data.category.get(row).to_string(),
icon_category: state.poi_data.icon_category.get(row).to_string(),
group: state.poi_data.group.get(row).to_string(),
lat: state.poi_data.lat[row],
lng: state.poi_data.lng[row],
emoji: state.poi_data.emoji.get(row).to_string(),
})
.collect();
@ -313,53 +147,3 @@ pub async fn get_poi_categories(
Json(POICategoriesResponse { groups })
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn canonical_tube_station_name_strips_transport_suffixes() {
assert_eq!(canonical_tube_station_name("Bank"), "bank");
assert_eq!(
canonical_tube_station_name("Bank Underground Station"),
"bank"
);
assert_eq!(canonical_tube_station_name("Bank DLR Station"), "bank");
assert_eq!(
canonical_tube_station_name("Pleasure Beach (Blackpool Tramway)"),
"pleasure beach"
);
assert_eq!(
canonical_tube_station_name("Earl's Court Tube Station"),
"earls court"
);
}
#[test]
fn same_tube_station_area_keeps_distant_names_separate() {
let station = SelectedPOIRow {
row: 0,
id_override: None,
name_override: None,
lat: 51.5130,
lng: -0.0889,
lat_sum: 51.5130,
lng_sum: -0.0889,
count: 1,
priority: 0,
};
assert!(same_tube_station_area(&station, 51.5132, -0.0885));
assert!(!same_tube_station_area(&station, 55.0140, -1.6781));
}
#[test]
fn tube_station_name_score_prefers_plain_station_names() {
assert!(tube_station_name_score("Bank") < tube_station_name_score("Bank DLR Station"));
assert!(
tube_station_name_score("Acton Town")
< tube_station_name_score("Acton Town Underground Station")
);
}
}

View file

@ -15,11 +15,15 @@ use crate::state::SharedState;
use crate::utils::normalize_postcode;
use super::properties::{HexagonPropertiesResponse, Property};
use super::travel_time::{load_travel_data, parse_optional_travel, row_passes_travel_filters};
#[derive(Deserialize)]
pub struct PostcodePropertiesParams {
pub postcode: String,
pub filters: Option<String>,
/// Pipe-separated travel time entries: `mode:slug|mode:slug:min:max`.
/// Optional min:max applies as a filter (exclude properties outside range).
pub travel: Option<String>,
pub limit: Option<usize>,
pub offset: Option<usize>,
/// Exact address to rank first when opening properties from address search.
@ -67,6 +71,8 @@ pub async fn get_postcode_properties(
.map_err(|err| (StatusCode::BAD_REQUEST, err).into_response())?;
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
let filters_str = params.filters;
let travel_entries = parse_optional_travel(params.travel.as_deref())
.map_err(|err| (StatusCode::BAD_REQUEST, err).into_response())?;
let postcode_str = normalized;
let focus_address = params
@ -83,6 +89,8 @@ pub async fn get_postcode_properties(
let feature_names = &state.data.feature_names;
let feature_name_to_index = &state.feature_name_to_index;
let enum_values = &state.data.enum_values;
let travel_data = load_travel_data(&state.travel_time_store, &travel_entries)?;
let has_travel = !travel_entries.is_empty();
let offset_deg: f64 = POSTCODE_SEARCH_OFFSET;
let min_lat = centroid_lat as f64 - offset_deg;
@ -104,6 +112,15 @@ pub async fn get_postcode_properties(
num_features,
)
{
if has_travel
&& !row_passes_travel_filters(
state.data.postcode(row),
&travel_entries,
&travel_data,
)
{
return;
}
matching_rows.push(row);
}
});
@ -154,6 +171,7 @@ pub async fn get_postcode_properties(
offset = page_offset,
filters = num_filters,
filters_raw = filters_str.as_deref().unwrap_or("-"),
travel_entries = travel_entries.len(),
ms = format_args!("{:.1}", elapsed.as_secs_f64() * 1000.0),
"GET /api/postcode-properties"
);

View file

@ -16,6 +16,7 @@ use crate::utils::normalize_postcode;
use super::hexagon_stats::HexagonStatsResponse;
use super::stats;
use super::travel_time::{load_travel_data, parse_optional_travel, row_passes_travel_filters};
#[derive(Deserialize)]
pub struct PostcodeStatsParams {
@ -24,6 +25,9 @@ pub struct PostcodeStatsParams {
/// Comma-separated feature names to include in stats response.
/// Only listed features are computed; if absent or empty, no features are returned.
pub fields: Option<String>,
/// Pipe-separated travel time entries: `mode:slug|mode:slug:min:max`.
/// Optional min:max applies as a filter (exclude properties outside range).
pub travel: Option<String>,
/// Share-link code; grants bbox-scoped access for unlicensed users.
pub share: Option<String>,
}
@ -71,6 +75,8 @@ pub async fn get_postcode_stats(
let filters_str = params.filters;
let (fields_specified, field_set) = parse_field_set(params.fields.as_deref());
let travel_entries = parse_optional_travel(params.travel.as_deref())
.map_err(|err| (StatusCode::BAD_REQUEST, err).into_response())?;
let postcode_str = normalized;
@ -78,6 +84,8 @@ pub async fn get_postcode_stats(
let start_time = std::time::Instant::now();
let num_features = state.data.num_features;
let feature_data = &state.data.feature_data;
let travel_data = load_travel_data(&state.travel_time_store, &travel_entries)?;
let has_travel = !travel_entries.is_empty();
// Search around centroid (generous for a postcode)
let offset: f64 = POSTCODE_SEARCH_OFFSET;
@ -101,6 +109,11 @@ pub async fn get_postcode_stats(
num_features,
)
{
if has_travel
&& !row_passes_travel_filters(row_postcode, &travel_entries, &travel_data)
{
return;
}
matching_rows.push(row);
}
});
@ -126,6 +139,7 @@ pub async fn get_postcode_stats(
total_count,
filters = num_filters,
filters_raw = filters_str.as_deref().unwrap_or("-"),
travel_entries = travel_entries.len(),
ms = format_args!("{:.1}", elapsed.as_secs_f64() * 1000.0),
"GET /api/postcode-stats"
);

View file

@ -19,11 +19,16 @@ use crate::parsing::{
};
use crate::state::{AppState, SharedState};
use super::travel_time::{load_travel_data, parse_optional_travel, row_passes_travel_filters};
#[derive(Deserialize)]
pub struct HexagonPropertiesParams {
pub h3: String,
pub resolution: u8,
pub filters: Option<String>,
/// Pipe-separated travel time entries: `mode:slug|mode:slug:min:max`.
/// Optional min:max applies as a filter (exclude properties outside range).
pub travel: Option<String>,
pub limit: Option<usize>,
pub offset: Option<usize>,
/// Share-link code; grants bbox-scoped access for unlicensed users.
@ -203,6 +208,8 @@ pub async fn get_hexagon_properties(
.map_err(|err| (StatusCode::BAD_REQUEST, err).into_response())?;
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
let filters_str = params.filters;
let travel_entries = parse_optional_travel(params.travel.as_deref())
.map_err(|err| (StatusCode::BAD_REQUEST, err).into_response())?;
let result = tokio::task::spawn_blocking(move || {
let t0 = std::time::Instant::now();
@ -215,6 +222,8 @@ pub async fn get_hexagon_properties(
let feature_names = &state.data.feature_names;
let feature_name_to_index = &state.feature_name_to_index;
let enum_values = &state.data.enum_values;
let travel_data = load_travel_data(&state.travel_time_store, &travel_entries)?;
let has_travel = !travel_entries.is_empty();
let (min_lat, min_lon, max_lat, max_lon) = h3_cell_bounds(cell, 0.001);
@ -234,6 +243,12 @@ pub async fn get_hexagon_properties(
num_features,
)
{
if has_travel {
let postcode = state.data.postcode(row);
if !row_passes_travel_filters(postcode, &travel_entries, &travel_data) {
return;
}
}
matching_rows.push(row);
}
});
@ -273,6 +288,7 @@ pub async fn get_hexagon_properties(
offset,
filters = num_filters,
filters_raw = filters_str.as_deref().unwrap_or("-"),
travel_entries = travel_entries.len(),
ms = format_args!("{:.1}", elapsed.as_secs_f64() * 1000.0),
"GET /api/hexagon-properties"
);

View file

@ -17,14 +17,13 @@ pub fn extract_price_history(
let year_idx = feature_name_to_index
.get("Date of last transaction")
.copied();
let price_idx = feature_name_to_index.get("Last known price").copied();
match (year_idx, price_idx) {
(Some(yi), Some(pi)) => {
match year_idx {
Some(yi) => {
let mut points: Vec<PricePoint> = matching_rows
.iter()
.filter_map(|&row| {
let year = data.get_feature(row, yi);
let price = data.get_feature(row, pi);
let price = data.last_known_price_raw(row);
if year.is_finite() && price.is_finite() {
Some(PricePoint { year, price })
} else {
@ -46,7 +45,7 @@ pub fn extract_price_history(
}
points
}
_ => Vec::new(),
None => Vec::new(),
}
}

View file

@ -1,3 +1,5 @@
use crate::data::travel_time::{TravelData, TravelTimeStore};
/// Parse the optional `travel` query param, returning an empty Vec when absent or empty.
pub fn parse_optional_travel(travel: Option<&str>) -> Result<Vec<TravelEntry>, String> {
match travel.filter(|val| !val.is_empty()) {
@ -15,6 +17,46 @@ pub struct TravelEntry {
pub filter_max: Option<f32>,
}
pub fn load_travel_data(
store: &TravelTimeStore,
entries: &[TravelEntry],
) -> Result<Vec<TravelData>, String> {
entries
.iter()
.map(|entry| {
store
.get(&entry.mode, &entry.slug)
.map_err(|err| format!("Failed to load travel data: {}", err))
})
.collect()
}
#[inline]
pub fn row_passes_travel_filters(
postcode: &str,
entries: &[TravelEntry],
travel_data: &[TravelData],
) -> bool {
for (index, entry) in entries.iter().enumerate() {
let (Some(fmin), Some(fmax)) = (entry.filter_min, entry.filter_max) else {
continue;
};
let Some(row_data) = travel_data.get(index).and_then(|data| data.get(postcode)) else {
return false;
};
let minutes = if entry.use_best {
row_data.best_minutes.unwrap_or(row_data.minutes)
} else {
row_data.minutes
};
if (minutes as f32) < fmin || (minutes as f32) > fmax {
return false;
}
}
true
}
/// Parse `travel` param into a list of travel entries.
/// Format: `mode:slug` or `mode:slug:best` or `mode:slug:min:max` or `mode:slug:best:min:max`
fn parse_travel_entries(travel_str: &str) -> Result<Vec<TravelEntry>, String> {