Codex changes

This commit is contained in:
Andras Schmelczer 2026-05-04 16:19:09 +01:00
parent 0bae902e08
commit d4dde21ad2
46 changed files with 4953 additions and 966 deletions

View file

@ -1953,3 +1953,6 @@
2026-04-04T21:54:19.870055Z INFO property_map_server::data::property: Loading properties from "/app/data/properties.parquet"
2026-04-04T21:54:22.182709Z INFO property_map_server::data::property: Properties joined with postcodes rows=15203393
2026-04-04T22:00:48.160218Z INFO property_map_server: Prometheus metrics initialized
2026-04-04T22:12:50.835405Z INFO property_map_server: Prometheus metrics initialized
2026-04-04T22:13:40.021088Z INFO property_map_server: Prometheus metrics initialized
2026-04-04T22:14:09.136995Z INFO property_map_server: Prometheus metrics initialized

File diff suppressed because it is too large Load diff

View file

@ -514,10 +514,7 @@ pub fn precompute_h3(lat: &[f32], lon: &[f32]) -> anyhow::Result<Vec<u64>> {
}
impl PropertyData {
pub fn load(
properties_path: &Path,
postcode_features_path: &Path,
) -> anyhow::Result<Self> {
pub fn load(properties_path: &Path, postcode_features_path: &Path) -> anyhow::Result<Self> {
// Load postcode.parquet
tracing::info!(
"Loading postcode features from {:?}",
@ -643,11 +640,22 @@ impl PropertyData {
}
let df = combined
.lazy()
.filter(col("lat").is_not_null().and(col("lon").is_not_null()))
.select(select_exprs)
.collect()
.context("Failed to select columns from combined data")?;
let row_count = df.height();
if row_count == 0 {
bail!("No property rows have usable coordinates after joining postcode data");
}
let dropped_coordinate_rows = total_rows.saturating_sub(row_count);
if dropped_coordinate_rows > 0 {
tracing::warn!(
rows = dropped_coordinate_rows,
"Dropped properties with missing postcode coordinates"
);
}
tracing::info!(rows = row_count, "Combined data selected");
let lat_series = df
@ -659,8 +667,8 @@ impl PropertyData {
.f32()
.context("Failed to read 'lat' as f32")?
.into_iter()
.map(|value| value.unwrap_or(0.0))
.collect();
.map(|value| value.context("Missing 'lat' value after coordinate filter"))
.collect::<anyhow::Result<Vec<_>>>()?;
let lon_series = df
.column("lon")
@ -671,8 +679,14 @@ impl PropertyData {
.f32()
.context("Failed to read 'lon' as f32")?
.into_iter()
.map(|value| value.unwrap_or(0.0))
.collect();
.map(|value| value.context("Missing 'lon' value after coordinate filter"))
.collect::<anyhow::Result<Vec<_>>>()?;
for (row, (&latitude, &longitude)) in lat.iter().zip(&lon).enumerate() {
if !(-90.0..=90.0).contains(&latitude) || !(-180.0..=180.0).contains(&longitude) {
bail!("Invalid coordinates at row {row}: lat={latitude}, lon={longitude}");
}
}
tracing::info!("Extracting numeric feature columns");
let numeric_col_major: Vec<Vec<f32>> = numeric_names

View file

@ -32,8 +32,7 @@ pub struct FeatureConfig {
/// Features whose histogram bins should be exactly 1 unit wide (one per integer).
/// p1/p99 are snapped to integer boundaries before binning.
pub const INTEGER_BIN_FEATURES: &[&str] =
&["Number of bedrooms & living rooms"];
pub const INTEGER_BIN_FEATURES: &[&str] = &["Number of bedrooms & living rooms"];
pub struct EnumFeatureConfig {
pub name: &'static str,
@ -302,6 +301,36 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
raw: false,
absolute: false,
}),
Feature::Numeric(FeatureConfig {
name: "Outstanding primary schools within 2km",
bounds: Bounds::Fixed {
min: 0.0,
max: 10.0,
},
step: 1.0,
description: "Primary schools rated Outstanding by Ofsted within 2km",
detail: "State-funded primary schools within 2km with a current Ofsted rating of Outstanding. Schools not yet inspected are excluded.",
source: "ofsted",
prefix: "",
suffix: "",
raw: false,
absolute: false,
}),
Feature::Numeric(FeatureConfig {
name: "Outstanding secondary schools within 2km",
bounds: Bounds::Fixed {
min: 0.0,
max: 5.0,
},
step: 1.0,
description: "Secondary schools rated Outstanding by Ofsted within 2km",
detail: "State-funded secondary schools within 2km with a current Ofsted rating of Outstanding. Schools not yet inspected are excluded.",
source: "ofsted",
prefix: "",
suffix: "",
raw: false,
absolute: false,
}),
Feature::Numeric(FeatureConfig {
name: "Good+ primary schools within 5km",
bounds: Bounds::Fixed {
@ -332,6 +361,36 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
raw: false,
absolute: false,
}),
Feature::Numeric(FeatureConfig {
name: "Outstanding primary schools within 5km",
bounds: Bounds::Fixed {
min: 0.0,
max: 30.0,
},
step: 1.0,
description: "Primary schools rated Outstanding by Ofsted within 5km",
detail: "State-funded primary schools within 5km with a current Ofsted rating of Outstanding. Schools not yet inspected are excluded.",
source: "ofsted",
prefix: "",
suffix: "",
raw: false,
absolute: false,
}),
Feature::Numeric(FeatureConfig {
name: "Outstanding secondary schools within 5km",
bounds: Bounds::Fixed {
min: 0.0,
max: 15.0,
},
step: 1.0,
description: "Secondary schools rated Outstanding by Ofsted within 5km",
detail: "State-funded secondary schools within 5km with a current Ofsted rating of Outstanding. Schools not yet inspected are excluded.",
source: "ofsted",
prefix: "",
suffix: "",
raw: false,
absolute: false,
}),
Feature::Numeric(FeatureConfig {
name: "Education, Skills and Training Score",
bounds: Bounds::Percentile {

View file

@ -165,10 +165,7 @@ async fn main() -> anyhow::Result<()> {
cli.properties.display(),
cli.postcode_features.display(),
);
let property_data = data::PropertyData::load(
&cli.properties,
&cli.postcode_features,
)?;
let property_data = data::PropertyData::load(&cli.properties, &cli.postcode_features)?;
info!(
rows = property_data.lat.len(),
features = property_data.num_features,
@ -450,7 +447,10 @@ async fn main() -> anyhow::Result<()> {
"/api/postcode-properties",
get(routes::get_postcode_properties),
)
.route("/api/screenshot", get(routes::get_screenshot))
.route(
"/api/screenshot",
get(routes::get_screenshot).layer(ConcurrencyLimitLayer::new(3)),
)
.route(
"/api/export",
get(routes::get_export).layer(ConcurrencyLimitLayer::new(3)),

View file

@ -281,6 +281,7 @@ pub fn build_system_prompt(
- \"cheap\" / \"affordable\" = lower price range. \"expensive\" = higher price range.\n\
- \"low crime\" / \"safe\" = low values on Serious crime and Minor crime summary features. \
\"quiet\" = low Noise (dB). \"green\" / \"near parks\" = high Number of parks within 1km.\n\
- \"good schools\" = Good+ school features. \"outstanding schools\" = Outstanding school features.\n\
- When the user says a number like \"under 400k\", interpret it as 400000.\n\
- When the user says \"3 bed\" or \"3 bedroom\", use Number of bedrooms & living rooms \
(note: this counts bedrooms + living rooms combined, so 3 bed ~ min 4).\n\
@ -424,6 +425,16 @@ pub fn build_system_prompt(
.to_string(),
);
parts.push(
"\nUser: \"quiet area with outstanding schools\"\n\
Output: {\"numeric_filters\": [\
{\"name\": \"Noise (dB)\", \"bound\": \"max\", \"value\": 55}, \
{\"name\": \"Outstanding primary schools within 2km\", \"bound\": \"min\", \"value\": 1}, \
{\"name\": \"Outstanding secondary schools within 2km\", \"bound\": \"min\", \"value\": 1}], \
\"enum_filters\": [], \"travel_time_filters\": [], \"notes\": \"\"}"
.to_string(),
);
parts.push(
"\nUser: \"3 bed flat under 300k with fast broadband near the beach\"\n\
Output: {\"numeric_filters\": [\

View file

@ -14,6 +14,7 @@ use tracing::{info, warn};
use crate::auth::OptionalUser;
use crate::consts::NAN_U16;
use crate::data::QuantRef;
use crate::features::INTEGER_BIN_FEATURES;
use crate::licensing::check_license_bounds;
use crate::parsing::{parse_field_indices, parse_filters, require_bounds, row_passes_filters};
use crate::routes::{fetch_screenshot_bytes, FeatureInfo};
@ -315,6 +316,11 @@ pub async fn get_export(
})
.collect();
let integer_feature_indices: FxHashSet<usize> = INTEGER_BIN_FEATURES
.iter()
.filter_map(|name| state.feature_name_to_index.get(*name).copied())
.collect();
// Build Excel number formats per feature index for unit display
let mut feat_num_fmts: FxHashMap<usize, Format> = FxHashMap::default();
for &feat_idx in &all_feature_indices {
@ -324,6 +330,8 @@ pub async fn get_export(
}
let num_fmt_str = if !prefix.is_empty() {
format!("\"{}\"#,##0", prefix)
} else if integer_feature_indices.contains(&feat_idx) {
format!("#,##0\"{}\"", suffix)
} else {
format!("#,##0.0\"{}\"", suffix)
};
@ -488,7 +496,11 @@ pub async fn get_export(
} else {
let fc = agg.finite_counts[feat_idx];
if fc > 0 {
let mean = (agg.sums[feat_idx] / fc as f64 * 100.0).round() / 100.0;
let mean = if integer_feature_indices.contains(&feat_idx) {
(agg.sums[feat_idx] / fc as f64).round()
} else {
(agg.sums[feat_idx] / fc as f64 * 100.0).round() / 100.0
};
if let Some(fmt) = feat_num_fmts.get(&feat_idx) {
sheet
.write_number_with_format(row, col, mean, fmt)

View file

@ -1,4 +1,5 @@
use std::sync::Arc;
use std::collections::HashSet;
use std::sync::{Arc, LazyLock, Mutex};
use axum::extract::{Path, State};
use axum::http::StatusCode;
@ -7,9 +8,39 @@ use axum::{Extension, Json};
use serde::{Deserialize, Serialize};
use tracing::{info, warn};
use crate::auth::OptionalUser;
use crate::auth::{OptionalUser, PocketBaseUser};
use crate::pocketbase::get_superuser_token;
use crate::state::SharedState;
use crate::state::{AppState, SharedState};
static INVITE_REDEMPTIONS_IN_PROGRESS: LazyLock<Mutex<HashSet<String>>> =
LazyLock::new(|| Mutex::new(HashSet::new()));
struct InviteRedemptionGuard {
code: String,
}
impl InviteRedemptionGuard {
fn acquire(code: &str) -> Option<Self> {
let mut in_progress = INVITE_REDEMPTIONS_IN_PROGRESS
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner());
if !in_progress.insert(code.to_string()) {
return None;
}
Some(Self {
code: code.to_string(),
})
}
}
impl Drop for InviteRedemptionGuard {
fn drop(&mut self) {
let mut in_progress = INVITE_REDEMPTIONS_IN_PROGRESS
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner());
in_progress.remove(&self.code);
}
}
#[derive(Serialize)]
struct InviteResponse {
@ -87,6 +118,207 @@ fn generate_invite_code() -> String {
chars.into_iter().collect()
}
fn current_unix_secs_string() -> String {
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_secs()
.to_string()
}
async fn lookup_unused_invite(
state: &AppState,
pb_url: &str,
token: &str,
code: &str,
) -> Result<Option<serde_json::Value>, Response> {
let filter = format!("code=\"{}\" && used_by_id=\"\"", code);
let lookup_url = format!(
"{pb_url}/api/collections/invites/records?filter={}&perPage=1",
urlencoding::encode(&filter)
);
let res = match state
.http_client
.get(&lookup_url)
.header("Authorization", format!("Bearer {token}"))
.send()
.await
{
Ok(resp) => resp,
Err(err) => {
warn!("Failed to look up invite: {err}");
return Err(StatusCode::BAD_GATEWAY.into_response());
}
};
if !res.status().is_success() {
let status = res.status();
let text = res.text().await.unwrap_or_default();
warn!("PocketBase invite lookup failed ({status}): {text}");
return Err(StatusCode::BAD_GATEWAY.into_response());
}
let body: serde_json::Value = match res.json().await {
Ok(value) => value,
Err(err) => {
warn!("Failed to parse invite lookup response: {err}");
return Err(StatusCode::BAD_GATEWAY.into_response());
}
};
Ok(body["items"]
.as_array()
.and_then(|arr| arr.first())
.cloned())
}
async fn mark_invite_used(
state: &AppState,
pb_url: &str,
token: &str,
invite_id: &str,
user_id: &str,
) -> Result<(), Response> {
let resp = match state
.http_client
.patch(format!(
"{pb_url}/api/collections/invites/records/{invite_id}"
))
.header("Authorization", format!("Bearer {token}"))
.json(&serde_json::json!({
"used_by_id": user_id,
"used_at": current_unix_secs_string(),
}))
.send()
.await
{
Ok(resp) => resp,
Err(err) => {
warn!("Failed to mark invite as used: {err}");
return Err(StatusCode::BAD_GATEWAY.into_response());
}
};
if !resp.status().is_success() {
let status = resp.status();
let text = resp.text().await.unwrap_or_default();
warn!("PocketBase invite usage update failed ({status}): {text}");
return Err(StatusCode::BAD_GATEWAY.into_response());
}
Ok(())
}
async fn grant_license_for_invite(
state: &AppState,
pb_url: &str,
token: &str,
user_id: &str,
) -> Result<(), Response> {
let update_url = format!("{pb_url}/api/collections/users/records/{user_id}");
let resp = match state
.http_client
.patch(&update_url)
.header("Authorization", format!("Bearer {token}"))
.json(&serde_json::json!({ "subscription": "licensed" }))
.send()
.await
{
Ok(resp) => resp,
Err(err) => {
warn!("Failed to update user subscription for admin invite: {err}");
return Err(StatusCode::BAD_GATEWAY.into_response());
}
};
if !resp.status().is_success() {
let status = resp.status();
let text = resp.text().await.unwrap_or_default();
warn!("PocketBase user subscription update failed ({status}): {text}");
return Err(StatusCode::BAD_GATEWAY.into_response());
}
state.token_cache.invalidate_by_user_id(user_id);
Ok(())
}
async fn create_referral_checkout(
state: &AppState,
user: &PocketBaseUser,
) -> Result<String, Response> {
let count = match super::pricing::count_licensed_users(state).await {
Ok(count) => count,
Err(err) => {
warn!("Failed to count licensed users for invite checkout: {err}");
return Err(StatusCode::SERVICE_UNAVAILABLE.into_response());
}
};
let price_pence = super::pricing::price_for_count(count);
let public_url = &state.public_url;
let success_url = format!("{public_url}/pricing?license_success=1");
let cancel_url = format!("{public_url}/pricing");
let form_params = vec![
("mode", "payment".to_string()),
(
"line_items[0][price_data][unit_amount]",
price_pence.to_string(),
),
("line_items[0][price_data][currency]", "gbp".to_string()),
(
"line_items[0][price_data][product_data][name]",
"Perfect Postcodes Lifetime License".to_string(),
),
("line_items[0][quantity]", "1".to_string()),
("success_url", success_url),
("cancel_url", cancel_url),
("client_reference_id", user.id.clone()),
("customer_email", user.email.clone()),
(
"discounts[0][coupon]",
state.stripe_referral_coupon_id.clone(),
),
];
let stripe_res = state
.http_client
.post("https://api.stripe.com/v1/checkout/sessions")
.basic_auth(&state.stripe_secret_key, None::<&str>)
.form(&form_params)
.send()
.await;
match stripe_res {
Ok(resp) if resp.status().is_success() => {
let stripe_body: serde_json::Value = match resp.json().await {
Ok(value) => value,
Err(err) => {
warn!("Failed to parse Stripe checkout response: {err}");
return Err(StatusCode::BAD_GATEWAY.into_response());
}
};
let checkout_url = stripe_body["url"].as_str().unwrap_or_default().to_string();
if checkout_url.is_empty() {
warn!("Stripe checkout response did not include a URL");
return Err(StatusCode::BAD_GATEWAY.into_response());
}
Ok(checkout_url)
}
Ok(resp) => {
let status = resp.status();
let text = resp.text().await.unwrap_or_default();
warn!("Failed to create Stripe checkout for referral invite ({status}): {text}");
Err(StatusCode::BAD_GATEWAY.into_response())
}
Err(err) => {
warn!("Stripe request error for referral invite: {err}");
Err(StatusCode::BAD_GATEWAY.into_response())
}
}
}
/// Create an invite. Admins create "admin" invites (free license) by default,
/// but can explicitly request "referral" type. Licensed non-admin users always create "referral" invites (30% off).
pub async fn post_invites(
@ -319,154 +551,80 @@ pub async fn post_redeem_invite(
}
};
// Look up invite
let filter = format!("code=\"{}\" && used_by_id=\"\"", req.code);
let lookup_url = format!(
"{pb_url}/api/collections/invites/records?filter={}&perPage=1",
urlencoding::encode(&filter)
);
let _redemption_guard = match InviteRedemptionGuard::acquire(&req.code) {
Some(guard) => guard,
None => {
return (
StatusCode::CONFLICT,
"Invite redemption is already in progress",
)
.into_response()
}
};
let res = match state
.http_client
.get(&lookup_url)
.header("Authorization", format!("Bearer {token}"))
.send()
.await
{
Ok(r) => r,
Err(err) => {
warn!("Failed to look up invite: {err}");
let invite = match lookup_unused_invite(&state, pb_url, &token, &req.code).await {
Ok(Some(invite)) => invite,
Ok(None) => {
return (StatusCode::NOT_FOUND, "Invalid or already used invite code").into_response()
}
Err(response) => return response,
};
let invite_id = match invite["id"].as_str().filter(|id| !id.is_empty()) {
Some(id) => id,
None => {
warn!(code = %req.code, "Invite lookup returned record without id");
return StatusCode::BAD_GATEWAY.into_response();
}
};
let invite_type = match invite["invite_type"].as_str() {
Some("admin") => "admin",
Some("referral") => "referral",
Some(other) => {
warn!(code = %req.code, invite_type = other, "Invite has unsupported type");
return StatusCode::BAD_GATEWAY.into_response();
}
None => {
warn!(code = %req.code, "Invite lookup returned record without invite_type");
return StatusCode::BAD_GATEWAY.into_response();
}
};
let body: serde_json::Value = match res.json().await {
Ok(v) => v,
Err(_) => return StatusCode::BAD_GATEWAY.into_response(),
};
let invite = match body["items"].as_array().and_then(|arr| arr.first()) {
Some(inv) => inv.clone(),
None => {
return (StatusCode::NOT_FOUND, "Invalid or already used invite code").into_response()
}
};
let invite_id = invite["id"].as_str().unwrap_or("");
let invite_type = invite["invite_type"].as_str().unwrap_or("");
// Mark invite as used
let now = {
let dur = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default();
dur.as_secs().to_string()
};
let _ = state
.http_client
.patch(format!(
"{pb_url}/api/collections/invites/records/{invite_id}"
))
.header("Authorization", format!("Bearer {token}"))
.json(&serde_json::json!({
"used_by_id": user.id,
"used_at": now,
}))
.send()
.await;
if invite_type == "admin" {
// Grant license directly
let update_url = format!("{pb_url}/api/collections/users/records/{}", user.id);
let res = state
.http_client
.patch(&update_url)
.header("Authorization", format!("Bearer {token}"))
.json(&serde_json::json!({ "subscription": "licensed" }))
.send()
.await;
match res {
Ok(resp) if resp.status().is_success() => {
state.token_cache.invalidate_by_user_id(&user.id);
info!(user_id = %user.id, code = %req.code, "Admin invite redeemed — user licensed");
Json(RedeemResponse {
result: "licensed".to_string(),
checkout_url: None,
})
.into_response()
}
_ => {
warn!("Failed to update user subscription for admin invite");
StatusCode::BAD_GATEWAY.into_response()
}
if let Err(response) = grant_license_for_invite(&state, pb_url, &token, &user.id).await {
return response;
}
} else {
// Referral invite — create discounted checkout with dynamic pricing
let count = match super::pricing::count_licensed_users(&state).await {
Ok(c) => c,
Err(err) => {
warn!("Failed to count licensed users for invite checkout: {err}");
return StatusCode::SERVICE_UNAVAILABLE.into_response();
}
};
let price_pence = super::pricing::price_for_count(count);
let secret_key = &state.stripe_secret_key;
let public_url = &state.public_url;
let success_url = format!("{public_url}/pricing?license_success=1");
let cancel_url = format!("{public_url}/pricing");
let form_params = vec![
("mode", "payment".to_string()),
(
"line_items[0][price_data][unit_amount]",
price_pence.to_string(),
),
("line_items[0][price_data][currency]", "gbp".to_string()),
(
"line_items[0][price_data][product_data][name]",
"Perfect Postcodes Lifetime License".to_string(),
),
("line_items[0][quantity]", "1".to_string()),
("success_url", success_url),
("cancel_url", cancel_url),
("client_reference_id", user.id.clone()),
("customer_email", user.email.clone()),
(
"discounts[0][coupon]",
state.stripe_referral_coupon_id.clone(),
),
];
let stripe_res = state
.http_client
.post("https://api.stripe.com/v1/checkout/sessions")
.basic_auth(secret_key, None::<&str>)
.form(&form_params)
.send()
.await;
match stripe_res {
Ok(resp) if resp.status().is_success() => {
let stripe_body: serde_json::Value = resp.json().await.unwrap_or_default();
let checkout_url = stripe_body["url"].as_str().unwrap_or_default().to_string();
info!(user_id = %user.id, code = %req.code, "Referral invite redeemed — checkout created");
Json(RedeemResponse {
result: "checkout".to_string(),
checkout_url: Some(checkout_url),
})
.into_response()
}
_ => {
warn!("Failed to create Stripe checkout for referral invite");
StatusCode::BAD_GATEWAY.into_response()
}
if let Err(response) = mark_invite_used(&state, pb_url, &token, invite_id, &user.id).await {
return response;
}
info!(user_id = %user.id, code = %req.code, "Admin invite redeemed; user licensed");
return Json(RedeemResponse {
result: "licensed".to_string(),
checkout_url: None,
})
.into_response();
}
let checkout_url = match create_referral_checkout(&state, &user).await {
Ok(url) => url,
Err(response) => return response,
};
if let Err(response) = mark_invite_used(&state, pb_url, &token, invite_id, &user.id).await {
return response;
}
info!(user_id = %user.id, code = %req.code, "Referral invite redeemed; checkout created");
Json(RedeemResponse {
result: "checkout".to_string(),
checkout_url: Some(checkout_url),
})
.into_response()
}
/// List invites. Admins see all invites; licensed users see only their own.
/// List invites. Users only see invites they created, including admins.
pub async fn get_invites(
State(shared): State<Arc<SharedState>>,
Extension(user): Extension<OptionalUser>,
@ -487,16 +645,9 @@ pub async fn get_invites(
}
};
let filter = if user.is_admin {
String::new()
} else {
format!("created_by=\"{}\"", user.id)
};
let filter = format!("created_by=\"{}\"", user.id);
let mut url = format!("{pb_url}/api/collections/invites/records?sort=-created&perPage=200");
if !filter.is_empty() {
url.push_str(&format!("&filter={}", urlencoding::encode(&filter)));
}
url.push_str(&format!("&filter={}", urlencoding::encode(&filter)));
let res = match state
.http_client

View file

@ -3,14 +3,18 @@ use std::sync::Arc;
use axum::extract::{Query, State};
use axum::http::StatusCode;
use axum::response::Json;
use rustc_hash::FxHashMap;
use serde::{Deserialize, Serialize};
use tracing::info;
use crate::consts::MAX_POIS_PER_REQUEST;
use crate::data::POICategoryGroup;
use crate::data::{POICategoryGroup, POIData};
use crate::parsing::require_bounds;
use crate::state::SharedState;
const TUBE_STATION_CATEGORY: &str = "Tube station";
const TUBE_STATION_MERGE_RADIUS_DEGREES: f32 = 0.01;
#[derive(Serialize)]
#[allow(clippy::upper_case_acronyms)]
pub struct POI {
@ -35,6 +39,167 @@ pub struct POIParams {
categories: Option<String>,
}
struct SelectedPOIRow {
row: usize,
id_override: Option<String>,
name_override: Option<String>,
lat: f32,
lng: f32,
lat_sum: f32,
lng_sum: f32,
count: u32,
priority: u32,
}
impl SelectedPOIRow {
fn new(data: &POIData, row: usize, override_identity: bool) -> Self {
Self {
row,
id_override: override_identity.then(|| data.id(row).to_string()),
name_override: override_identity.then(|| data.name[row].clone()),
lat: data.lat[row],
lng: data.lng[row],
lat_sum: data.lat[row],
lng_sum: data.lng[row],
count: 1,
priority: data.priority[row],
}
}
fn merge_tube_station(&mut self, data: &POIData, row: usize) {
self.lat_sum += data.lat[row];
self.lng_sum += data.lng[row];
self.count += 1;
self.lat = self.lat_sum / self.count as f32;
self.lng = self.lng_sum / self.count as f32;
self.priority = self.priority.min(data.priority[row]);
let current_name = self
.name_override
.as_deref()
.unwrap_or(&data.name[self.row]);
let candidate_name = &data.name[row];
if tube_station_name_score(candidate_name) < tube_station_name_score(current_name) {
self.id_override = Some(data.id(row).to_string());
self.name_override = Some(candidate_name.clone());
}
}
fn id(&self, data: &POIData) -> String {
self.id_override
.clone()
.unwrap_or_else(|| data.id(self.row).to_string())
}
fn name(&self, data: &POIData) -> String {
self.name_override
.clone()
.unwrap_or_else(|| data.name[self.row].clone())
}
}
fn dedupe_tube_stations(data: &POIData, rows: Vec<usize>) -> Vec<SelectedPOIRow> {
let mut selected = Vec::with_capacity(rows.len());
let mut tube_groups: FxHashMap<String, Vec<usize>> = FxHashMap::default();
for row in rows {
if data.category.get(row) != TUBE_STATION_CATEGORY {
selected.push(SelectedPOIRow::new(data, row, false));
continue;
}
let station_key = canonical_tube_station_name(&data.name[row]);
if station_key.is_empty() {
selected.push(SelectedPOIRow::new(data, row, false));
continue;
}
let existing = tube_groups.get(&station_key).and_then(|indices| {
indices.iter().copied().find(|&index| {
same_tube_station_area(&selected[index], data.lat[row], data.lng[row])
})
});
if let Some(index) = existing {
selected[index].merge_tube_station(data, row);
} else {
let index = selected.len();
selected.push(SelectedPOIRow::new(data, row, true));
tube_groups.entry(station_key).or_default().push(index);
}
}
selected
}
fn canonical_tube_station_name(name: &str) -> String {
let mut normalized = String::with_capacity(name.len());
let mut paren_depth = 0u32;
for ch in name.chars() {
match ch {
'(' => {
paren_depth += 1;
normalized.push(' ');
}
')' => {
paren_depth = paren_depth.saturating_sub(1);
normalized.push(' ');
}
_ if paren_depth > 0 => {}
'\'' | '' | '`' => {}
'&' => normalized.push_str(" and "),
_ if ch.is_ascii_alphanumeric() => normalized.push(ch.to_ascii_lowercase()),
_ => normalized.push(' '),
}
}
let mut words: Vec<&str> = normalized.split_whitespace().collect();
const SUFFIXES: &[&[&str]] = &[
&["underground", "station"],
&["tube", "station"],
&["dlr", "station"],
&["metro", "station"],
&["tram", "stop"],
&["rail", "station"],
&["railway", "station"],
&["station"],
&["stop"],
];
loop {
let Some(suffix) = SUFFIXES.iter().find(|suffix| words.ends_with(suffix)) else {
break;
};
words.truncate(words.len() - suffix.len());
}
words.join(" ")
}
fn same_tube_station_area(station: &SelectedPOIRow, lat: f32, lng: f32) -> bool {
let dlat = station.lat - lat;
let dlng = (station.lng - lng) * station.lat.to_radians().cos();
(dlat * dlat + dlng * dlng) <= TUBE_STATION_MERGE_RADIUS_DEGREES.powi(2)
}
fn tube_station_name_score(name: &str) -> (u8, usize) {
let lower = name.to_ascii_lowercase();
let suffix_penalty = if lower.ends_with(" underground station")
|| lower.ends_with(" tube station")
|| lower.ends_with(" dlr station")
|| lower.ends_with(" metro station")
|| lower.ends_with(" tram stop")
|| lower.ends_with(" station")
|| lower.ends_with(" stop")
{
1
} else {
0
};
(suffix_penalty, name.len())
}
pub async fn get_pois(
State(shared): State<Arc<SharedState>>,
Query(params): Query<POIParams>,
@ -68,7 +233,7 @@ pub async fn get_pois(
let t0 = std::time::Instant::now();
let row_indices = state.poi_grid.query(south, west, north, east);
let mut matching_rows: Vec<usize> = row_indices
let matching_rows: Vec<usize> = row_indices
.iter()
.filter_map(|&row_idx| {
let row = row_idx as usize;
@ -81,27 +246,32 @@ pub async fn get_pois(
})
.collect();
if matching_rows.len() > MAX_POIS_PER_REQUEST {
let ratio = (matching_rows.len() / MAX_POIS_PER_REQUEST) as u32;
let mut matching_pois = dedupe_tube_stations(&state.poi_data, matching_rows);
if matching_pois.len() > MAX_POIS_PER_REQUEST {
let ratio = (matching_pois.len() / MAX_POIS_PER_REQUEST) as u32;
let step = ratio.next_power_of_two();
let mask = step - 1;
matching_rows.retain(|&row| state.poi_data.priority[row] & mask == 0);
if matching_rows.len() > MAX_POIS_PER_REQUEST {
matching_rows.sort_unstable_by_key(|&row| state.poi_data.priority[row]);
matching_rows.truncate(MAX_POIS_PER_REQUEST);
matching_pois.retain(|poi| poi.priority & mask == 0);
if matching_pois.len() > MAX_POIS_PER_REQUEST {
matching_pois.sort_unstable_by_key(|poi| poi.priority);
matching_pois.truncate(MAX_POIS_PER_REQUEST);
}
}
let pois: Vec<POI> = matching_rows
let pois: Vec<POI> = matching_pois
.iter()
.map(|&row| POI {
id: state.poi_data.id(row).to_string(),
name: state.poi_data.name[row].clone(),
category: state.poi_data.category.get(row).to_string(),
group: state.poi_data.group.get(row).to_string(),
lat: state.poi_data.lat[row],
lng: state.poi_data.lng[row],
emoji: state.poi_data.emoji.get(row).to_string(),
.map(|poi| {
let row = poi.row;
POI {
id: poi.id(&state.poi_data),
name: poi.name(&state.poi_data),
category: state.poi_data.category.get(row).to_string(),
group: state.poi_data.group.get(row).to_string(),
lat: poi.lat,
lng: poi.lng,
emoji: state.poi_data.emoji.get(row).to_string(),
}
})
.collect();
@ -143,3 +313,53 @@ pub async fn get_poi_categories(
Json(POICategoriesResponse { groups })
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn canonical_tube_station_name_strips_transport_suffixes() {
assert_eq!(canonical_tube_station_name("Bank"), "bank");
assert_eq!(
canonical_tube_station_name("Bank Underground Station"),
"bank"
);
assert_eq!(canonical_tube_station_name("Bank DLR Station"), "bank");
assert_eq!(
canonical_tube_station_name("Pleasure Beach (Blackpool Tramway)"),
"pleasure beach"
);
assert_eq!(
canonical_tube_station_name("Earl's Court Tube Station"),
"earls court"
);
}
#[test]
fn same_tube_station_area_keeps_distant_names_separate() {
let station = SelectedPOIRow {
row: 0,
id_override: None,
name_override: None,
lat: 51.5130,
lng: -0.0889,
lat_sum: 51.5130,
lng_sum: -0.0889,
count: 1,
priority: 0,
};
assert!(same_tube_station_area(&station, 51.5132, -0.0885));
assert!(!same_tube_station_area(&station, 55.0140, -1.6781));
}
#[test]
fn tube_station_name_score_prefers_plain_station_names() {
assert!(tube_station_name_score("Bank") < tube_station_name_score("Bank DLR Station"));
assert!(
tube_station_name_score("Acton Town")
< tube_station_name_score("Acton Town Underground Station")
);
}
}

View file

@ -13,13 +13,13 @@ use tracing::info;
use crate::aggregation::{Aggregator, EnumDistConfig};
use crate::auth::OptionalUser;
use crate::consts::MAX_CELLS_PER_REQUEST;
use crate::pocketbase::log_user_location;
use crate::data::travel_time::TravelData;
use crate::licensing::check_license_bounds;
use crate::parsing::{
bounds_intersect, parse_enum_dist, parse_field_indices, parse_filters, require_bounds,
row_passes_filters,
};
use crate::pocketbase::log_user_location;
use crate::routes::travel_time::{parse_optional_travel, TravelTimeAgg};
use crate::state::SharedState;
use crate::utils::normalize_postcode;