good stuff

This commit is contained in:
Andras Schmelczer 2026-03-15 21:10:54 +00:00
parent ea8389ef40
commit f4de0eeb9f
39 changed files with 5165 additions and 348 deletions

View file

@ -52,6 +52,23 @@ impl Aggregator {
}
}
/// Merge another aggregator's results into this one.
pub fn merge(&mut self, other: &Aggregator) {
self.count += other.count;
for i in 0..self.mins.len() {
if other.feat_counts[i] > 0 {
if other.mins[i] < self.mins[i] {
self.mins[i] = other.mins[i];
}
if other.maxs[i] > self.maxs[i] {
self.maxs[i] = other.maxs[i];
}
self.sums[i] += other.sums[i];
self.feat_counts[i] += other.feat_counts[i];
}
}
}
/// Add a row, only aggregating the features at the given indices.
#[inline]
pub fn add_row_selective(

View file

@ -38,11 +38,12 @@ struct Properties {
pub struct PostcodeData {
/// Postcode strings
pub postcodes: Vec<String>,
/// All polygon parts per postcode: polygons[i] = list of outer rings
/// Single Polygon → 1 ring, MultiPolygon → N rings
pub polygons: Vec<Vec<Vec<[f32; 2]>>>,
/// Centroid (lat, lon) for lookups
pub centroids: Vec<(f32, f32)>,
/// Precomputed AABB per postcode: (south, west, north, east) as f32
pub aabbs: Vec<(f32, f32, f32, f32)>,
/// Precomputed GeoJSON geometry Value per postcode
pub geometries: Vec<serde_json::Value>,
/// Lookup from postcode string to index
pub postcode_to_idx: FxHashMap<String, usize>,
}
@ -96,6 +97,7 @@ impl PostcodeData {
let mut local_postcodes = Vec::new();
let mut local_polygons = Vec::new();
let mut local_centroids = Vec::new();
let mut local_aabbs: Vec<(f32, f32, f32, f32)> = Vec::new();
for feature in collection.features {
let postcode = feature.properties.postcodes;
@ -140,20 +142,44 @@ impl PostcodeData {
(sum_lat / count, sum_lon / count)
};
// Compute AABB across all rings
let (mut aabb_south, mut aabb_north) = (f32::INFINITY, f32::NEG_INFINITY);
let (mut aabb_west, mut aabb_east) = (f32::INFINITY, f32::NEG_INFINITY);
for ring in &rings {
for &[lon, lat] in ring {
if lat < aabb_south {
aabb_south = lat;
}
if lat > aabb_north {
aabb_north = lat;
}
if lon < aabb_west {
aabb_west = lon;
}
if lon > aabb_east {
aabb_east = lon;
}
}
}
local_postcodes.push(postcode);
local_polygons.push(rings);
local_centroids.push(centroid);
local_aabbs.push((aabb_south, aabb_west, aabb_north, aabb_east));
}
Ok::<_, anyhow::Error>((local_postcodes, local_polygons, local_centroids))
Ok::<_, anyhow::Error>((local_postcodes, local_polygons, local_centroids, local_aabbs))
})
.collect::<Result<Vec<_>, _>>()?;
let mut aabbs: Vec<(f32, f32, f32, f32)> = Vec::new();
// Flatten results
for (local_postcodes, local_polygons, local_centroids) in file_results {
for (local_postcodes, local_polygons, local_centroids, local_aabbs) in file_results {
postcodes.extend(local_postcodes);
polygons.extend(local_polygons);
centroids.extend(local_centroids);
aabbs.extend(local_aabbs);
}
debug!(
@ -167,12 +193,49 @@ impl PostcodeData {
postcode_to_idx.insert(postcode.clone(), idx);
}
// Precompute GeoJSON geometry for each postcode
let geometries: Vec<serde_json::Value> = polygons
.iter()
.map(|rings| {
if rings.len() == 1 {
let coords: Vec<serde_json::Value> = rings[0]
.iter()
.map(|[lon, lat]| {
serde_json::Value::Array(vec![
serde_json::Value::from(*lon as f64),
serde_json::Value::from(*lat as f64),
])
})
.collect();
serde_json::json!({"type": "Polygon", "coordinates": [coords]})
} else {
let polys: Vec<serde_json::Value> = rings
.iter()
.map(|ring| {
let coords: Vec<serde_json::Value> = ring
.iter()
.map(|[lon, lat]| {
serde_json::Value::Array(vec![
serde_json::Value::from(*lon as f64),
serde_json::Value::from(*lat as f64),
])
})
.collect();
serde_json::Value::Array(vec![serde_json::Value::Array(coords)])
})
.collect();
serde_json::json!({"type": "MultiPolygon", "coordinates": polys})
}
})
.collect();
info!(postcodes = postcodes.len(), "Postcode boundary data ready");
Ok(PostcodeData {
postcodes,
polygons,
centroids,
aabbs,
geometries,
postcode_to_idx,
})
}

View file

@ -80,7 +80,7 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "",
raw: false,
absolute: true,
modes: &[],
modes: &["historical"],
linked: "",
},
FeatureConfig {
@ -114,7 +114,7 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "",
raw: false,
absolute: false,
modes: &[],
modes: &["historical"],
linked: "",
},
FeatureConfig {
@ -132,7 +132,24 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
raw: false,
absolute: false,
modes: &["historical"],
linked: "",
linked: "Asking price per sqm",
},
FeatureConfig {
name: "Asking price per sqm",
bounds: Bounds::Percentile {
low: 0.0,
high: 98.0,
},
step: 100.0,
description: "Asking price divided by total floor area",
detail: "Calculated by dividing the listed asking price by the total floor area. Only available for properties currently listed for sale where floor area data exists.",
source: "online-listings",
prefix: "£",
suffix: "",
raw: false,
absolute: false,
modes: &["buy"],
linked: "Est. price per sqm",
},
FeatureConfig {
name: "Total floor area (sqm)",
@ -165,7 +182,7 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: " m",
raw: false,
absolute: false,
modes: &[],
modes: &["historical"],
linked: "",
},
FeatureConfig {
@ -213,7 +230,7 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
suffix: "",
raw: true,
absolute: false,
modes: &[],
modes: &["historical"],
linked: "",
},
FeatureConfig {

View file

@ -339,7 +339,7 @@ async fn main() -> anyhow::Result<()> {
}
info!("Loading travel time data from {}", tt_path.display());
let travel_time_store = {
let store = data::TravelTimeStore::load(tt_path, 50)?;
let store = data::TravelTimeStore::load(tt_path, 200)?;
info!(
modes = store.available_modes.len(),
"Travel time store loaded"
@ -399,6 +399,9 @@ async fn main() -> anyhow::Result<()> {
stripe_referral_coupon_id: cli.stripe_referral_coupon_id,
});
// Start background PocketBase metrics poller (users, saved searches/properties counts)
pocketbase::start_metrics_poller(state.clone());
let cors = CorsLayer::new()
.allow_origin(
state
@ -440,6 +443,7 @@ async fn main() -> anyhow::Result<()> {
let state_invite_get = state.clone();
let state_redeem_invite = state.clone();
let state_journey = state.clone();
let state_telemetry = state.clone();
let api = Router::new()
.route(
@ -573,6 +577,13 @@ async fn main() -> anyhow::Result<()> {
.route(
"/s/{code}",
get(move |path| routes::get_short_url(state_short_url.clone(), path)),
)
.route(
"/api/telemetry",
post(move |ext, headers, body| {
let _ = state_telemetry.clone();
routes::post_telemetry(ext, headers, body)
}),
);
// Add tile routes

View file

@ -44,11 +44,69 @@ pub async fn track_metrics(request: Request<Body>, next: Next) -> Response {
}
/// Normalize paths to avoid high cardinality from dynamic segments.
///
/// Groups dynamic segments into parameterized placeholders and collapses
/// static assets, PocketBase proxy paths, and unknown paths to prevent
/// Prometheus label cardinality explosion from bot scans and unique URLs.
fn normalize_path(path: &str) -> String {
// Tiles: /api/tiles/5/16/10 → /api/tiles/:z/:x/:y
if path.starts_with("/api/tiles/") && !path.ends_with("style.json") {
return "/api/tiles/:z/:x/:y".to_string();
}
path.to_string()
// Invite API: /api/invite/abc123 → /api/invite/:code
if path.starts_with("/api/invite/") {
return "/api/invite/:code".to_string();
}
// PocketBase proxy: /pb/api/files/... → /pb/api/files/:path
if path.starts_with("/pb/api/files/") {
return "/pb/api/files/:path".to_string();
}
// PocketBase proxy: /pb/api/... → keep collection-level granularity
if path.starts_with("/pb/api/collections/") {
// /pb/api/collections/users/auth-with-password → keep as-is (bounded set)
// /pb/api/collections/saved_searches/records/abc → /pb/api/collections/saved_searches/records/:id
let parts: Vec<&str> = path.splitn(6, '/').collect();
if parts.len() >= 6 && parts[4] == "records" {
return format!("/pb/api/collections/{}/records/:id", parts[3]);
}
return path.to_string();
}
// Short URLs: /s/abc → /s/:code
if path.starts_with("/s/") {
return "/s/:code".to_string();
}
// Invite pages: /invite/abc → /invite/:code
if path.starts_with("/invite/") {
return "/invite/:code".to_string();
}
// Static assets: /assets/* → /assets/:file
if path.starts_with("/assets/") {
return "/assets/:file".to_string();
}
// Known application routes and API endpoints — keep as-is
if path.starts_with("/api/")
|| matches!(
path,
"/" | "/health"
| "/metrics"
| "/dashboard"
| "/pricing"
| "/account"
| "/saved"
| "/invites"
| "/learn"
| "/bundle.js"
| "/main.css"
| "/favicon.ico"
| "/house.png"
| "/robots.txt"
| "/sitemap.xml"
)
{
return path.to_string();
}
// Everything else (bot scans, probes, etc.) → /other
"/other".to_string()
}
/// Handler for the /metrics endpoint.

View file

@ -6,4 +6,4 @@ mod h3;
pub use bounds::{bounds_intersect, h3_cell_bounds, parse_bounds, require_bounds};
pub use fields::{parse_field_indices, parse_field_set};
pub use filters::{parse_filters, row_passes_filters, ParsedEnumFilter, ParsedFilter};
pub use h3::{cell_for_row, needs_parent, validate_h3_resolution};
pub use h3::{cell_for_row, cell_for_row_cached, needs_parent, validate_h3_resolution};

View file

@ -89,6 +89,10 @@ pub fn parse_filters(
}
}
// Sort by selectivity: more selective filters first for early rejection
numeric.sort_unstable_by_key(|f| f.max_u16.wrapping_sub(f.min_u16));
enums.sort_unstable_by_key(|f| f.allowed.len());
Ok((numeric, enums))
}

View file

@ -1,4 +1,5 @@
use axum::http::StatusCode;
use rustc_hash::FxHashMap;
use tracing::warn;
use crate::consts::{H3_PRECOMPUTE_MAX, H3_REQUEST_MAX, H3_REQUEST_MIN};
@ -45,6 +46,28 @@ pub fn cell_for_row(
)
}
/// Like cell_for_row but caches parent lookups in the provided map.
#[inline]
pub fn cell_for_row_cached(
row: usize,
precomputed: &[u64],
h3_res: h3o::Resolution,
need_parent: bool,
cache: &mut FxHashMap<u64, u64>,
) -> u64 {
let max_cell = precomputed[row];
if !need_parent || max_cell == 0 {
return max_cell;
}
*cache.entry(max_cell).or_insert_with(|| {
let cell = h3o::CellIndex::try_from(max_cell).expect("precomputed H3 cell must be valid");
u64::from(
cell.parent(h3_res)
.expect("parent resolution must be valid"),
)
})
}
/// Whether the given resolution requires computing a parent from precomputed cells.
#[inline]
pub fn needs_parent(resolution: u8) -> bool {

View file

@ -1,6 +1,12 @@
use std::sync::Arc;
use std::time::Duration;
use metrics::gauge;
use reqwest::Client;
use serde::{Deserialize, Serialize};
use tracing::info;
use tracing::{info, warn};
use crate::state::AppState;
#[derive(Deserialize)]
struct AuthResponse {
@ -344,6 +350,116 @@ async fn ensure_saved_searches_rules(
ensure_user_owned_rules(client, base_url, token, "saved_searches").await
}
/// Ensure the `saved_searches` collection has a `screenshot` file field.
/// This field was added after the initial collection schema — existing deployments
/// need it patched in so the frontend can attach screenshot JPEGs to saved searches.
async fn ensure_screenshot_field(
client: &Client,
base_url: &str,
token: &str,
) -> anyhow::Result<()> {
let url = format!("{base_url}/api/collections/saved_searches");
let resp = client
.get(&url)
.header("Authorization", format!("Bearer {token}"))
.send()
.await?;
if !resp.status().is_success() {
let status = resp.status();
let text = resp.text().await.unwrap_or_default();
anyhow::bail!("Failed to fetch saved_searches collection ({status}): {text}");
}
let body: serde_json::Value = resp.json().await?;
let fields = body["fields"]
.as_array()
.ok_or_else(|| anyhow::anyhow!("saved_searches collection has no fields array"))?;
if fields.iter().any(|f| f["name"] == "screenshot") {
return Ok(());
}
let mut new_fields = fields.clone();
new_fields.push(serde_json::json!({
"name": "screenshot",
"type": "file",
"required": false,
"maxSelect": 1,
"maxSize": 10485760,
"mimeTypes": ["image/png", "image/jpeg", "image/webp"],
}));
let patch_resp = client
.patch(&url)
.header("Authorization", format!("Bearer {token}"))
.json(&serde_json::json!({ "fields": new_fields }))
.send()
.await?;
if !patch_resp.status().is_success() {
let status = patch_resp.status();
let text = patch_resp.text().await.unwrap_or_default();
anyhow::bail!("Failed to add screenshot field to saved_searches ({status}): {text}");
}
info!("Added screenshot file field to PocketBase collection 'saved_searches'");
Ok(())
}
/// Ensure a collection has a `notes` text field for user annotations.
async fn ensure_notes_field(
client: &Client,
base_url: &str,
token: &str,
collection_name: &str,
) -> anyhow::Result<()> {
let url = format!("{base_url}/api/collections/{collection_name}");
let resp = client
.get(&url)
.header("Authorization", format!("Bearer {token}"))
.send()
.await?;
if !resp.status().is_success() {
let status = resp.status();
let text = resp.text().await.unwrap_or_default();
anyhow::bail!("Failed to fetch {collection_name} collection ({status}): {text}");
}
let body: serde_json::Value = resp.json().await?;
let fields = body["fields"]
.as_array()
.ok_or_else(|| anyhow::anyhow!("{collection_name} collection has no fields array"))?;
if fields.iter().any(|f| f["name"] == "notes") {
return Ok(());
}
let mut new_fields = fields.clone();
new_fields.push(serde_json::json!({
"name": "notes",
"type": "text",
"required": false,
}));
let patch_resp = client
.patch(&url)
.header("Authorization", format!("Bearer {token}"))
.json(&serde_json::json!({ "fields": new_fields }))
.send()
.await?;
if !patch_resp.status().is_success() {
let status = patch_resp.status();
let text = patch_resp.text().await.unwrap_or_default();
anyhow::bail!("Failed to add notes field to {collection_name} ({status}): {text}");
}
info!("Added notes text field to PocketBase collection '{collection_name}'");
Ok(())
}
/// Ensure a collection has `created` and `updated` autodate fields.
/// PocketBase 0.23+ no longer adds these automatically — they must be explicit.
async fn ensure_autodate_fields(
@ -445,6 +561,7 @@ pub async fn ensure_collections(
Field::text("name", true),
Field::text("params", true),
Field::file("screenshot", vec!["image/png", "image/jpeg", "image/webp"]),
Field::text("notes", false),
Field::autodate("created", true, false),
Field::autodate("updated", true, true),
],
@ -459,6 +576,8 @@ pub async fn ensure_collections(
} else {
ensure_saved_searches_rules(client, base_url, &token).await?;
ensure_autodate_fields(client, base_url, &token, "saved_searches").await?;
ensure_screenshot_field(client, base_url, &token).await?;
ensure_notes_field(client, base_url, &token, "saved_searches").await?;
}
if !existing.iter().any(|n| n == "saved_properties") {
@ -476,6 +595,7 @@ pub async fn ensure_collections(
Field::text("address", true),
Field::text("postcode", true),
Field::text("data", false),
Field::text("notes", false),
Field::autodate("created", true, false),
Field::autodate("updated", true, true),
],
@ -490,6 +610,7 @@ pub async fn ensure_collections(
} else {
ensure_user_owned_rules(client, base_url, &token, "saved_properties").await?;
ensure_autodate_fields(client, base_url, &token, "saved_properties").await?;
ensure_notes_field(client, base_url, &token, "saved_properties").await?;
}
if !existing.iter().any(|n| n == "invites") {
@ -639,3 +760,106 @@ pub async fn ensure_oauth_providers(
info!("PocketBase OAuth configured on users collection");
Ok(())
}
/// Spawn a background task that polls PocketBase every 60 seconds for collection counts
/// and exposes them as Prometheus gauges.
pub fn start_metrics_poller(state: Arc<AppState>) {
tokio::spawn(async move {
let mut interval = tokio::time::interval(Duration::from_secs(60));
loop {
interval.tick().await;
poll_pocketbase_counts(&state).await;
}
});
}
async fn poll_pocketbase_counts(state: &AppState) {
let pb_url = state.pocketbase_url.trim_end_matches('/');
let token = match auth_superuser(
&state.http_client,
pb_url,
&state.pocketbase_admin_email,
&state.pocketbase_admin_password,
)
.await
{
Ok(tk) => tk,
Err(err) => {
warn!("PocketBase metrics poll auth failed: {err}");
return;
}
};
// Simple collection counts
for (collection, metric_name) in [
("users", "pocketbase_users_total"),
("saved_searches", "pocketbase_saved_searches_total"),
("saved_properties", "pocketbase_saved_properties_total"),
] {
if let Some(total) = pb_count(&state.http_client, pb_url, &token, collection, None).await {
gauge!(metric_name).set(total as f64);
}
}
// Invite metrics: by type and redeemed status
for (filter, metric, labels) in [
(None, "invites_total", ("type", "all")),
(
Some(r#"invite_type="admin""#),
"invites_total",
("type", "admin"),
),
(
Some(r#"invite_type="referral""#),
"invites_total",
("type", "referral"),
),
(
Some(r#"used_by_id!=""#),
"invites_total",
("type", "redeemed"),
),
] {
if let Some(total) = pb_count(&state.http_client, pb_url, &token, "invites", filter).await
{
gauge!(metric, labels.0 => labels.1.to_string()).set(total as f64);
}
}
}
async fn pb_count(
client: &reqwest::Client,
pb_url: &str,
token: &str,
collection: &str,
filter: Option<&str>,
) -> Option<u64> {
let mut url = format!("{pb_url}/api/collections/{collection}/records?perPage=1");
if let Some(f) = filter {
url.push_str(&format!("&filter={}", urlencoding::encode(f)));
}
match client
.get(&url)
.header("Authorization", format!("Bearer {token}"))
.send()
.await
{
Ok(resp) if resp.status().is_success() => {
if let Ok(body) = resp.json::<serde_json::Value>().await {
return body.get("totalItems").and_then(|v| v.as_u64());
}
None
}
Ok(resp) => {
warn!(
"PocketBase {collection} count query failed: {}",
resp.status()
);
None
}
Err(err) => {
warn!("PocketBase {collection} count query error: {err}");
None
}
}
}

View file

@ -21,6 +21,7 @@ mod shorten;
mod stats;
mod streetview;
mod stripe_webhook;
mod telemetry;
mod tiles;
mod travel_destinations;
mod travel_modes;
@ -48,6 +49,7 @@ pub use screenshot::{fetch_screenshot_bytes, get_screenshot};
pub use shorten::{get_short_url, post_shorten};
pub use streetview::get_streetview;
pub use stripe_webhook::post_stripe_webhook;
pub use telemetry::post_telemetry;
pub use tiles::{get_style, get_tile, init_tile_reader};
pub use travel_destinations::get_travel_destinations;
pub use travel_modes::get_travel_modes;

View file

@ -5,6 +5,7 @@ use axum::response::Json;
use axum::Extension;
use serde::{Deserialize, Serialize};
use serde_json::{json, Value};
use metrics::counter;
use tracing::{info, warn};
use crate::auth::OptionalUser;
@ -527,6 +528,7 @@ pub async fn post_ai_filters(
};
if tokens_used >= AI_FILTERS_WEEKLY_TOKEN_LIMIT {
counter!("ai_requests_total", "status" => "rate_limited").increment(1);
return Err((
StatusCode::TOO_MANY_REQUESTS,
"Weekly AI usage limit reached. Resets next week.".into(),
@ -695,6 +697,9 @@ pub async fn post_ai_filters(
let new_total = tokens_used + total_tokens_accumulated;
update_ai_usage(&state, &user.id, new_total, current_week).await;
counter!("ai_tokens_total").increment(total_tokens_accumulated);
counter!("ai_requests_total", "status" => "success").increment(1);
return Ok(Json(AiFiltersResponse {
filters,
travel_time_filters,

View file

@ -6,14 +6,15 @@ use axum::extract::Query;
use axum::http::StatusCode;
use axum::response::{IntoResponse, Json};
use axum::Extension;
use rustc_hash::FxHashMap;
use serde::{Deserialize, Serialize};
use tracing::{info, warn};
use crate::auth::OptionalUser;
use crate::licensing::check_license_bounds;
use crate::parsing::{
cell_for_row, h3_cell_bounds, needs_parent, parse_field_set, parse_filters, row_passes_filters,
validate_h3_resolution,
cell_for_row_cached, h3_cell_bounds, needs_parent, parse_field_set, parse_filters,
row_passes_filters, validate_h3_resolution,
};
use crate::state::AppState;
@ -132,12 +133,14 @@ pub async fn get_hexagon_stats(
let (min_lat, min_lon, max_lat, max_lon) = h3_cell_bounds(cell, 0.001);
let mut h3_cache: FxHashMap<u64, u64> = FxHashMap::default();
let mut matching_rows: Vec<usize> = Vec::new();
state
.grid
.for_each_in_bounds(min_lat, min_lon, max_lat, max_lon, |row_idx| {
let row = row_idx as usize;
if cell_for_row(row, precomputed, h3_res, need_parent) == cell_u64
if cell_for_row_cached(row, precomputed, h3_res, need_parent, &mut h3_cache)
== cell_u64
&& row_passes_filters(
row,
&parsed_filters,

View file

@ -4,9 +4,11 @@ use axum::extract::Query;
use axum::http::StatusCode;
use axum::response::{IntoResponse, Json};
use axum::Extension;
use rayon::prelude::*;
use rustc_hash::FxHashMap;
use serde::{Deserialize, Serialize};
use serde_json::{Map, Value};
use metrics::histogram;
use tracing::info;
use crate::aggregation::Aggregator;
@ -15,12 +17,15 @@ use crate::consts::{DEMO_BOUNDS, MAX_CELLS_PER_REQUEST};
use crate::data::travel_time::TravelData;
use crate::licensing::check_license_bounds;
use crate::parsing::{
bounds_intersect, cell_for_row, h3_cell_bounds, needs_parent, parse_field_indices,
bounds_intersect, cell_for_row_cached, h3_cell_bounds, needs_parent, parse_field_indices,
parse_filters, require_bounds, row_passes_filters, validate_h3_resolution,
};
use crate::routes::travel_time::{parse_optional_travel, TravelTimeAgg};
use crate::state::AppState;
/// Row count threshold above which we use rayon parallel aggregation.
const PARALLEL_THRESHOLD: usize = 50_000;
#[derive(Serialize)]
pub struct HexagonsResponse {
features: Vec<Map<String, Value>>,
@ -202,11 +207,67 @@ pub async fn get_hexagons(
.map(|_| FxHashMap::default())
.collect();
// Main aggregation loop
let aggregate_row =
|row: usize,
groups: &mut FxHashMap<u64, Aggregator>,
travel_aggs: &mut [FxHashMap<u64, TravelTimeAgg>]| {
// Collect row indices for threshold-based sequential/parallel aggregation
let row_indices = state.grid.query(south, west, north, east);
if row_indices.len() >= PARALLEL_THRESHOLD && !has_travel {
// Parallel path: split rows across rayon threads, each with local accumulators
let chunk_size = (row_indices.len() / rayon::current_num_threads()).max(1000);
let thread_results: Vec<FxHashMap<u64, Aggregator>> = row_indices
.par_chunks(chunk_size)
.map(|chunk| {
let mut local_groups: FxHashMap<u64, Aggregator> = FxHashMap::default();
let mut h3_cache: FxHashMap<u64, u64> = FxHashMap::default();
for &row_idx in chunk {
let row = row_idx as usize;
if !row_passes_filters(
row,
&parsed_filters,
&parsed_enum_filters,
feature_data,
num_features,
) {
continue;
}
let cell_id =
cell_for_row_cached(row, precomputed, h3_res, need_parent, &mut h3_cache);
let agg = local_groups
.entry(cell_id)
.or_insert_with(|| Aggregator::new(num_features));
if let Some(sel_indices) = field_indices.as_deref() {
agg.add_row_selective(
feature_data,
row,
num_features,
sel_indices,
&quant,
);
} else {
agg.add_row(feature_data, row, num_features, &quant);
}
}
local_groups
})
.collect();
// Merge thread-local results into the main groups map
for local_groups in thread_results {
for (cell_id, local_agg) in local_groups {
let agg = groups
.entry(cell_id)
.or_insert_with(|| Aggregator::new(num_features));
agg.merge(&local_agg);
}
}
} else {
// Sequential path (also handles travel time which needs postcode lookups)
let mut travel_minutes: Vec<Option<i16>> = Vec::with_capacity(travel_entries.len());
let mut h3_cache: FxHashMap<u64, u64> = FxHashMap::default();
'row: for &row_idx in &row_indices {
let row = row_idx as usize;
// Regular filters
if !row_passes_filters(
row,
@ -215,14 +276,13 @@ pub async fn get_hexagons(
feature_data,
num_features,
) {
return;
continue;
}
// Travel time filter: check each entry with a range
let mut travel_minutes: Vec<Option<i16>> = Vec::new();
if has_travel {
travel_minutes.clear();
let postcode = pc_interner.resolve(&pc_keys[row]);
travel_minutes.reserve(travel_entries.len());
for (ti, entry) in travel_entries.iter().enumerate() {
let row_data = travel_data[ti].get(postcode);
let minutes = row_data.map(|r| {
@ -236,13 +296,14 @@ pub async fn get_hexagons(
if let (Some(fmin), Some(fmax)) = (entry.filter_min, entry.filter_max) {
match minutes {
Some(mins) if (mins as f32) >= fmin && (mins as f32) <= fmax => {}
_ => return, // Filtered out
_ => continue 'row, // Filtered out (jump to next row_idx)
}
}
}
}
let cell_id = cell_for_row(row, precomputed, h3_res, need_parent);
let cell_id =
cell_for_row_cached(row, precomputed, h3_res, need_parent, &mut h3_cache);
// Aggregate regular features
let aggregation = groups
@ -269,13 +330,8 @@ pub async fn get_hexagons(
agg.add(*mins as f32);
}
}
};
state
.grid
.for_each_in_bounds(south, west, north, east, |row_idx| {
aggregate_row(row_idx as usize, &mut groups, &mut travel_aggs);
});
}
};
let t_agg = t0.elapsed();
@ -296,9 +352,12 @@ pub async fn get_hexagons(
features.truncate(MAX_CELLS_PER_REQUEST);
}
let parallel = row_indices.len() >= PARALLEL_THRESHOLD && !has_travel;
let t_total = t0.elapsed();
info!(
resolution,
rows = row_indices.len(),
parallel,
cells_before_filter = groups.len(),
cells_after_filter = features.len(),
truncated,
@ -311,6 +370,8 @@ pub async fn get_hexagons(
"GET /api/hexagons"
);
histogram!("hexagons_response_count").record(features.len() as f64);
Ok(HexagonsResponse { features })
})
.await

View file

@ -41,13 +41,22 @@ pub async fn get_pois(
) -> Result<Json<POIsResponse>, (StatusCode, String)> {
let (south, west, north, east) = require_bounds(params.bounds)?;
let category_filter: Option<rustc_hash::FxHashSet<String>> = params
let category_filter: Option<rustc_hash::FxHashSet<u16>> = params
.categories
.as_deref()
.filter(|text| !text.is_empty())
.map(|text| {
text.split(',')
.map(|part| part.trim().to_string())
.filter_map(|part| {
let name = part.trim();
state
.poi_data
.category
.values
.iter()
.position(|v| v == name)
.map(|pos| pos as u16)
})
.collect()
});
let categories_raw = params.categories;
@ -63,7 +72,7 @@ pub async fn get_pois(
.filter_map(|&row_idx| {
let row = row_idx as usize;
if let Some(ref categories) = category_filter {
if !categories.contains(state.poi_data.category.get(row)) {
if !categories.contains(&state.poi_data.category.indices[row]) {
return None;
}
}

View file

@ -7,6 +7,7 @@ use axum::Extension;
use rustc_hash::FxHashMap;
use serde::{Deserialize, Serialize};
use serde_json::{Map, Value};
use metrics::histogram;
use tracing::info;
use crate::aggregation::Aggregator;
@ -38,34 +39,6 @@ pub struct PostcodeParams {
travel: Option<String>,
}
/// Build a GeoJSON geometry object from postcode polygon rings.
/// Returns Polygon for 1 ring, MultiPolygon for 2+ rings.
fn build_postcode_geometry(rings: &[Vec<[f32; 2]>]) -> Value {
if rings.len() == 1 {
let coords: Vec<Value> = rings[0]
.iter()
.map(|[lon, lat]| {
Value::Array(vec![Value::from(*lon as f64), Value::from(*lat as f64)])
})
.collect();
serde_json::json!({ "type": "Polygon", "coordinates": [coords] })
} else {
let polys: Vec<Value> = rings
.iter()
.map(|ring| {
let coords: Vec<Value> = ring
.iter()
.map(|[lon, lat]| {
Value::Array(vec![Value::from(*lon as f64), Value::from(*lat as f64)])
})
.collect();
Value::Array(vec![Value::Array(coords)])
})
.collect();
serde_json::json!({ "type": "MultiPolygon", "coordinates": polys })
}
}
pub async fn get_postcodes(
state: Arc<AppState>,
Extension(user): Extension<OptionalUser>,
@ -128,9 +101,8 @@ pub async fn get_postcodes(
let has_selective = field_indices.is_some();
let sel_indices = field_indices.as_deref().unwrap_or(&[]);
// Build postcode -> rows mapping by iterating properties in bounds
// and grouping by their postcode
let mut postcode_rows: FxHashMap<usize, Vec<usize>> = FxHashMap::default();
// Single-pass: aggregate directly into postcode_aggs while iterating properties in bounds
let mut postcode_aggs: FxHashMap<usize, Aggregator> = FxHashMap::default();
state
.grid
@ -146,16 +118,22 @@ pub async fn get_postcodes(
return;
}
// Get postcode for this property
let postcode = state.data.postcode(row);
if let Some(&pc_idx) = postcode_data.postcode_to_idx.get(postcode) {
postcode_rows.entry(pc_idx).or_default().push(row);
let agg = postcode_aggs
.entry(pc_idx)
.or_insert_with(|| Aggregator::new(num_features));
if has_selective {
agg.add_row_selective(feature_data, row, num_features, sel_indices, &quant);
} else {
agg.add_row(feature_data, row, num_features, &quant);
}
}
});
// Filter postcodes by travel time range (if specified)
if has_travel {
postcode_rows.retain(|&pc_idx, _rows| {
postcode_aggs.retain(|&pc_idx, _agg| {
let postcode = &postcode_data.postcodes[pc_idx];
for (ti, entry) in travel_entries.iter().enumerate() {
if let (Some(fmin), Some(fmax)) = (entry.filter_min, entry.filter_max) {
@ -176,26 +154,10 @@ pub async fn get_postcodes(
});
}
// Aggregate for each postcode that has properties in bounds
// (polygon intersection check happens later when building response)
let mut postcode_aggs: FxHashMap<usize, Aggregator> = FxHashMap::default();
// Travel time aggregation per postcode
let mut travel_aggs: FxHashMap<usize, Vec<TravelTimeAgg>> = FxHashMap::default();
for (&pc_idx, rows) in &postcode_rows {
let agg = postcode_aggs
.entry(pc_idx)
.or_insert_with(|| Aggregator::new(num_features));
for &row in rows {
if has_selective {
agg.add_row_selective(feature_data, row, num_features, sel_indices, &quant);
} else {
agg.add_row(feature_data, row, num_features, &quant);
}
}
// Aggregate travel times for this postcode
if has_travel {
if has_travel {
for &pc_idx in postcode_aggs.keys() {
let postcode = &postcode_data.postcodes[pc_idx];
let tt_aggs = travel_aggs.entry(pc_idx).or_insert_with(|| {
(0..travel_entries.len())
@ -225,37 +187,24 @@ pub async fn get_postcodes(
continue;
}
// Compute postcode polygon bounding box across ALL parts and check intersection
let rings = &postcode_data.polygons[pc_idx];
let (mut pc_south, mut pc_north) = (f64::INFINITY, f64::NEG_INFINITY);
let (mut pc_west, mut pc_east) = (f64::INFINITY, f64::NEG_INFINITY);
for ring in rings {
for &[lon, lat] in ring {
let lon_f = lon as f64;
let lat_f = lat as f64;
if lat_f < pc_south {
pc_south = lat_f;
}
if lat_f > pc_north {
pc_north = lat_f;
}
if lon_f < pc_west {
pc_west = lon_f;
}
if lon_f > pc_east {
pc_east = lon_f;
}
}
}
// Use precomputed AABB for bounds intersection check
let (pc_south, pc_west, pc_north, pc_east) = postcode_data.aabbs[pc_idx];
if !bounds_intersect(
pc_south, pc_west, pc_north, pc_east, south, west, north, east,
pc_south as f64,
pc_west as f64,
pc_north as f64,
pc_east as f64,
south,
west,
north,
east,
) {
filtered_out += 1;
continue;
}
let geometry = build_postcode_geometry(rings);
let geometry = postcode_data.geometries[pc_idx].clone();
// Build properties
let centroid = postcode_data.centroids[pc_idx];
@ -327,6 +276,8 @@ pub async fn get_postcodes(
}
}
histogram!("postcodes_response_count").record(features.len() as f64);
let truncated = features.len() > MAX_CELLS_PER_REQUEST;
let t_total = t0.elapsed();
info!(
@ -365,8 +316,7 @@ pub async fn get_postcode_lookup(
if let Some(&idx) = postcode_data.postcode_to_idx.get(&normalized) {
let (lat, lon) = postcode_data.centroids[idx];
let rings = &postcode_data.polygons[idx];
let geometry = build_postcode_geometry(rings);
let geometry = postcode_data.geometries[idx].clone();
info!(postcode = %normalized, "GET /api/postcode/{postcode}");
Ok(Json(serde_json::json!({

View file

@ -14,7 +14,7 @@ use crate::consts::{DEFAULT_PROPERTIES_LIMIT, MAX_PROPERTIES_LIMIT};
use crate::data::RenovationEvent;
use crate::licensing::check_license_bounds;
use crate::parsing::{
cell_for_row, h3_cell_bounds, needs_parent, parse_filters, row_passes_filters,
cell_for_row_cached, h3_cell_bounds, needs_parent, parse_filters, row_passes_filters,
validate_h3_resolution,
};
use crate::state::AppState;
@ -220,12 +220,14 @@ pub async fn get_hexagon_properties(
let (min_lat, min_lon, max_lat, max_lon) = h3_cell_bounds(cell, 0.001);
let mut h3_cache: FxHashMap<u64, u64> = FxHashMap::default();
let mut matching_rows: Vec<usize> = Vec::new();
state
.grid
.for_each_in_bounds(min_lat, min_lon, max_lat, max_lon, |row_idx| {
let row = row_idx as usize;
if cell_for_row(row, precomputed, h3_res, need_parent) == cell_u64
if cell_for_row_cached(row, precomputed, h3_res, need_parent, &mut h3_cache)
== cell_u64
&& row_passes_filters(
row,
&parsed_filters,

View file

@ -3,6 +3,7 @@ use std::sync::Arc;
use axum::http::header::HeaderValue;
use axum::http::{header, HeaderMap, StatusCode, Uri};
use axum::response::IntoResponse;
use metrics::histogram;
use tracing::{info, warn};
use crate::state::AppState;
@ -44,8 +45,14 @@ pub async fn get_screenshot(
) -> impl IntoResponse {
let qs = uri.query().unwrap_or_default();
let auth = headers.get(header::AUTHORIZATION);
let is_og = qs.contains("og=1");
match fetch_screenshot_bytes(&state, qs, auth).await {
let t0 = std::time::Instant::now();
let result = fetch_screenshot_bytes(&state, qs, auth).await;
let kind = if is_og { "og" } else { "export" };
histogram!("screenshot_duration_seconds", "kind" => kind).record(t0.elapsed().as_secs_f64());
match result {
Ok(bytes) => (
StatusCode::OK,
[

View file

@ -50,7 +50,32 @@ pub fn extract_price_history(
}
}
/// Per-feature accumulator kind, determined once before the row loop.
enum FeatureAccum {
/// Numeric: track count, min, max, sum, histogram bins.
Numeric {
count: usize,
min_value: f32,
max_value: f32,
sum: f64,
bins: Vec<u64>,
p1: f32,
p99: f32,
middle_width: f32,
num_bins: usize,
global_min: f32,
global_max: f32,
},
/// Enum: count occurrences per variant index.
Enum {
value_counts: Vec<u64>,
},
/// Feature skipped (not in field_set).
Skip,
}
/// Compute per-feature stats (numeric histograms + enum counts) for the given rows.
/// Single-pass: iterates rows in the outer loop for cache-friendly row-major access.
#[allow(clippy::too_many_arguments)]
pub fn compute_feature_stats(
matching_rows: &[usize],
@ -61,107 +86,161 @@ pub fn compute_feature_stats(
fields_specified: bool,
field_set: &HashSet<String>,
) -> (Vec<NumericFeatureStats>, Vec<EnumFeatureStats>) {
let num_features = feature_names.len();
// Pre-allocate accumulators for all features
let mut accums: Vec<FeatureAccum> = (0..num_features)
.map(|fi| {
let feature_name = &feature_names[fi];
if fields_specified && !field_set.contains(feature_name.as_str()) {
return FeatureAccum::Skip;
}
if let Some(ev) = enum_values.get(&fi) {
FeatureAccum::Enum {
value_counts: vec![0u64; ev.len()],
}
} else {
let global_hist = &feature_stats_data[fi].histogram;
let p1 = global_hist.p1;
let p99 = global_hist.p99;
let num_bins = global_hist.counts.len();
let middle_bins = num_bins.saturating_sub(2);
let middle_width = if middle_bins > 0 && p99 > p1 {
(p99 - p1) / middle_bins as f32
} else {
0.0
};
FeatureAccum::Numeric {
count: 0,
min_value: f32::INFINITY,
max_value: f32::NEG_INFINITY,
sum: 0.0,
bins: vec![0u64; num_bins],
p1,
p99,
middle_width,
num_bins,
global_min: global_hist.min,
global_max: global_hist.max,
}
}
})
.collect();
// Single pass: outer loop = rows, inner loop = features (cache-friendly row-major access)
for &row in matching_rows {
for (fi, accum) in accums.iter_mut().enumerate() {
match accum {
FeatureAccum::Skip => {}
FeatureAccum::Enum { value_counts } => {
let value = data.get_feature(row, fi);
if value.is_finite() {
let idx = value as usize;
if idx < value_counts.len() {
value_counts[idx] += 1;
} else {
warn!(
feature = feature_names[fi].as_str(),
idx,
max = value_counts.len(),
"Enum index out of bounds — possible data/schema mismatch"
);
}
}
}
FeatureAccum::Numeric {
count,
min_value,
max_value,
sum,
bins,
p1,
p99,
middle_width,
num_bins,
..
} => {
let value = data.get_feature(row, fi);
if value.is_finite() {
*count += 1;
if value < *min_value {
*min_value = value;
}
if value > *max_value {
*max_value = value;
}
*sum += value as f64;
let bin = if value < *p1 {
0
} else if value >= *p99 {
*num_bins - 1
} else if *middle_width > 0.0 {
let middle_bin = ((value - *p1) / *middle_width) as usize;
(1 + middle_bin).min(*num_bins - 2)
} else {
*num_bins / 2
};
bins[bin] += 1;
}
}
}
}
}
// Build response structs from accumulators
let mut numeric_features = Vec::new();
let mut enum_features_out = Vec::new();
for (feature_index, feature_name) in feature_names.iter().enumerate() {
if fields_specified && !field_set.contains(feature_name.as_str()) {
continue;
}
for (fi, accum) in accums.into_iter().enumerate() {
match accum {
FeatureAccum::Skip => {}
FeatureAccum::Enum { value_counts } => {
let ev = &enum_values[&fi];
let counts: HashMap<String, u64> = value_counts
.iter()
.enumerate()
.filter(|(_, &count)| count > 0)
.map(|(idx, &count)| (ev[idx].clone(), count))
.collect();
if let Some(ev) = enum_values.get(&feature_index) {
let mut value_counts = vec![0u64; ev.len()];
for &row in matching_rows {
let value = data.get_feature(row, feature_index);
if value.is_finite() {
let idx = value as usize;
if idx < value_counts.len() {
value_counts[idx] += 1;
} else {
warn!(
feature = feature_name.as_str(),
idx,
max = value_counts.len(),
"Enum index out of bounds — possible data/schema mismatch"
);
}
if !counts.is_empty() {
enum_features_out.push(EnumFeatureStats {
name: feature_names[fi].clone(),
counts,
});
}
}
let counts: HashMap<String, u64> = value_counts
.iter()
.enumerate()
.filter(|(_, &count)| count > 0)
.map(|(idx, &count)| (ev[idx].clone(), count))
.collect();
if !counts.is_empty() {
enum_features_out.push(EnumFeatureStats {
name: feature_name.clone(),
counts,
});
}
} else {
let global_hist = &feature_stats_data[feature_index].histogram;
let p1 = global_hist.p1;
let p99 = global_hist.p99;
let num_bins = global_hist.counts.len();
let mut count = 0usize;
let mut min_value = f32::INFINITY;
let mut max_value = f32::NEG_INFINITY;
let mut sum = 0.0f64;
let mut bins = vec![0u64; num_bins];
let middle_bins = num_bins.saturating_sub(2);
let middle_width = if middle_bins > 0 && p99 > p1 {
(p99 - p1) / middle_bins as f32
} else {
0.0
};
for &row in matching_rows {
let value = data.get_feature(row, feature_index);
if value.is_finite() {
count += 1;
if value < min_value {
min_value = value;
}
if value > max_value {
max_value = value;
}
sum += value as f64;
let bin = if value < p1 {
0
} else if value >= p99 {
num_bins - 1
} else if middle_width > 0.0 {
let middle_bin = ((value - p1) / middle_width) as usize;
(1 + middle_bin).min(num_bins - 2)
} else {
num_bins / 2
};
bins[bin] += 1;
FeatureAccum::Numeric {
count,
min_value,
max_value,
sum,
bins,
p1,
p99,
global_min,
global_max,
..
} => {
if count > 0 {
numeric_features.push(NumericFeatureStats {
name: feature_names[fi].clone(),
count,
min: min_value as f64,
max: max_value as f64,
mean: sum / count as f64,
histogram: HistogramStats {
min: global_min as f64,
max: global_max as f64,
p1: p1 as f64,
p99: p99 as f64,
counts: bins,
},
});
}
}
if count > 0 {
numeric_features.push(NumericFeatureStats {
name: feature_name.clone(),
count,
min: min_value as f64,
max: max_value as f64,
mean: sum / count as f64,
histogram: HistogramStats {
min: global_hist.min as f64,
max: global_hist.max as f64,
p1: p1 as f64,
p99: p99 as f64,
counts: bins,
},
});
}
}
}

View file

@ -0,0 +1,77 @@
use axum::http::{HeaderMap, StatusCode};
use axum::response::Json;
use axum::Extension;
use metrics::{counter, gauge};
use serde::Deserialize;
use crate::auth::OptionalUser;
#[derive(Deserialize)]
pub struct TelemetryPayload {
session_seconds: u64,
filter_count: u64,
/// Sent once on first beacon: the entry page path
#[serde(default)]
entry_path: Option<String>,
/// Sent once on first beacon: the document.referrer domain (or "direct")
#[serde(default)]
referrer: Option<String>,
}
pub async fn post_telemetry(
Extension(user): Extension<OptionalUser>,
headers: HeaderMap,
Json(payload): Json<TelemetryPayload>,
) -> StatusCode {
let user_label = match &user.0 {
Some(u) => u.email.clone(),
None => "anonymous".to_string(),
};
let ua = headers
.get("user-agent")
.and_then(|v| v.to_str().ok())
.unwrap_or("unknown");
let browser = parse_browser(ua);
gauge!("user_session_seconds", "user" => user_label.clone(), "browser" => browser.clone())
.set(payload.session_seconds as f64);
gauge!("user_active_filters", "user" => user_label, "browser" => browser)
.set(payload.filter_count as f64);
// Entrypoint tracking (sent once per session)
if let Some(path) = &payload.entry_path {
let referrer = payload.referrer.as_deref().unwrap_or("direct");
counter!("entrypoint_total", "path" => normalize_entry_path(path), "referrer" => referrer.to_string())
.increment(1);
}
StatusCode::NO_CONTENT
}
/// Normalize entry paths to prevent cardinality explosion.
/// Keep known routes, parameterize dynamic segments.
fn normalize_entry_path(path: &str) -> String {
match path {
"/" | "/dashboard" | "/pricing" | "/learn" | "/saved" | "/invites" | "/account" => {
path.to_string()
}
p if p.starts_with("/invite/") => "/invite/:code".to_string(),
p if p.starts_with("/s/") => "/s/:code".to_string(),
_ => "/other".to_string(),
}
}
fn parse_browser(ua: &str) -> String {
if ua.contains("Firefox") {
"Firefox".into()
} else if ua.contains("Edg/") {
"Edge".into()
} else if ua.contains("Chrome") {
"Chrome".into()
} else if ua.contains("Safari") {
"Safari".into()
} else {
"Other".into()
}
}

View file

@ -34,7 +34,7 @@ pub struct AppState {
pub features_response: FeaturesResponse,
/// URL of the screenshot service (e.g. http://screenshot:8002)
pub screenshot_url: String,
/// Public-facing URL for absolute og:image URLs (e.g. https://perfectpostcodes.schmelczer.dev)
/// Public-facing URL for absolute og:image URLs (e.g. https://perfectpostcodes.dev)
pub public_url: String,
/// True when --dist is not provided (no static serving, relaxed auth checks)
pub is_dev: bool,