Format and lint

This commit is contained in:
Andras Schmelczer 2026-02-08 12:37:07 +00:00
parent 42ee2d4c51
commit 04a78e7bfe
75 changed files with 1290 additions and 719 deletions

1
server-rs/Cargo.lock generated
View file

@ -2375,7 +2375,6 @@ dependencies = [
"pmtiles",
"polars",
"rayon",
"regex",
"reqwest",
"rust_xlsxwriter",
"rustc-hash",

View file

@ -22,7 +22,6 @@ tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt"] }
metrics = "0.24"
metrics-exporter-prometheus = "0.16"
reqwest = { version = "0.12", features = ["rustls-tls", "json"] }
regex = "1"
urlencoding = "2"
rust_xlsxwriter = "0.79"
pmtiles = { version = "0.12", features = ["mmap-async-tokio"] }

View file

@ -8,6 +8,7 @@ pub const SERVER_ADDRESS: &str = "0.0.0.0:8001";
pub const GRID_CELL_SIZE: f32 = 0.01;
pub const MAX_POIS_PER_REQUEST: usize = 2500;
pub const MAX_CELLS_PER_REQUEST: usize = 5000;
pub const DEFAULT_PROPERTIES_LIMIT: usize = 100;
pub const MAX_PROPERTIES_LIMIT: usize = 500;

View file

@ -233,7 +233,10 @@ async fn main() -> anyhow::Result<()> {
);
info!("PocketBase configured: {}", cli.pocketbase_url);
info!("Ollama configured: {} (model: {})", cli.ollama_url, cli.ollama_model);
info!(
"Ollama configured: {} (model: {})",
cli.ollama_url, cli.ollama_model
);
let token_cache = Arc::new(auth::TokenCache::new());
@ -273,10 +276,11 @@ async fn main() -> anyhow::Result<()> {
let state_poi_categories = state.clone();
let state_hexagon_properties = state.clone();
let state_hexagon_stats = state.clone();
let state_og_image = state.clone();
let state_screenshot = state.clone();
let state_export = state.clone();
let state_crawler = state.clone();
let state_pb = state.clone();
let state_postcode_stats = state.clone();
let state_area_summary = state.clone();
let api = Router::new()
@ -315,8 +319,12 @@ async fn main() -> anyhow::Result<()> {
get(move |query| routes::get_hexagon_stats(state_hexagon_stats.clone(), query)),
)
.route(
"/api/og-image",
get(move |query| routes::get_og_image(state_og_image.clone(), query)),
"/api/postcode-stats",
get(move |query| routes::get_postcode_stats(state_postcode_stats.clone(), query)),
)
.route(
"/api/screenshot",
get(move |query| routes::get_screenshot(state_screenshot.clone(), query)),
)
.route(
"/api/export",

View file

@ -41,9 +41,9 @@ pub async fn og_middleware(request: Request, next: Next) -> Response {
// Build OG-injected HTML (og=1 triggers heading overlay on screenshot)
let og_image_url = if query_string.is_empty() {
format!("{}/api/og-image?og=1", state.public_url)
format!("{}/api/screenshot?og=1", state.public_url)
} else {
format!("{}/api/og-image?og=1&{}", state.public_url, query_string)
format!("{}/api/screenshot?og=1&{}", state.public_url, query_string)
};
let og_tags = format!(

View file

@ -3,6 +3,7 @@ use axum::http::StatusCode;
/// Check if two bounding boxes intersect.
/// Both boxes are (south, west, north, east) / (min_lat, min_lon, max_lat, max_lon).
#[inline]
#[allow(clippy::too_many_arguments)]
pub fn bounds_intersect(
a_south: f64,
a_west: f64,

View file

@ -4,11 +4,12 @@ mod features;
mod hexagon_stats;
pub(crate) mod hexagons;
mod me;
mod og_image;
mod pb_proxy;
mod pois;
mod postcode_stats;
mod postcodes;
pub(crate) mod properties;
mod screenshot;
mod tiles;
pub use area_summary::post_area_summary;
@ -17,9 +18,10 @@ pub use features::{build_features_response, get_features, FeatureInfo, FeaturesR
pub use hexagon_stats::get_hexagon_stats;
pub use hexagons::get_hexagons;
pub use me::get_me;
pub use og_image::get_og_image;
pub use pb_proxy::proxy_to_pocketbase;
pub use pois::{get_poi_categories, get_pois};
pub use postcode_stats::get_postcode_stats;
pub use postcodes::{get_postcode_lookup, get_postcodes};
pub use properties::get_hexagon_properties;
pub use screenshot::get_screenshot;
pub use tiles::{get_style, get_tile, init_tile_reader};

View file

@ -15,7 +15,7 @@ use crate::routes::FeatureInfo;
use crate::state::AppState;
const MAX_EXPORT_POSTCODES: usize = 250;
/// Height (in pixels) reserved for the OG image row
/// Height (in pixels) reserved for the screenshot row
const IMAGE_ROW_HEIGHT: f64 = 225.0;
#[derive(Deserialize)]
@ -152,7 +152,7 @@ pub async fn get_export(
let public_url = state.public_url.clone();
// Compute view param for OG image and dashboard URL
// Compute view param for screenshot and dashboard URL
let center_lat = (south + north) / 2.0;
let center_lon = (west + east) / 2.0;
let lat_span = north - south;
@ -164,7 +164,7 @@ pub async fn get_export(
let view_param = format!("{:.4},{:.4},{:.1}", center_lat, center_lon, zoom);
// Fetch screenshot (async, before spawn_blocking)
let og_image_bytes = fetch_screenshot(&state, &view_param, filters_str.as_deref()).await;
let screenshot_bytes = fetch_screenshot(&state, &view_param, filters_str.as_deref()).await;
// Build feature name → description map from the precomputed features response
let feature_descriptions: FxHashMap<String, String> = state
@ -335,16 +335,16 @@ pub async fn get_export(
.set_row_format(0, &link_fmt)
.map_err(|err| format!("Failed to set row format: {err}"))?;
// Row 1: OG image (if available)
// Row 1: screenshot (if available)
let mut current_row = 1u32;
if let Some(ref img_bytes) = og_image_bytes {
if let Some(ref img_bytes) = screenshot_bytes {
match Image::new_from_buffer(img_bytes) {
Ok(mut image) => {
// Scale image to fit: ~400px wide, auto height preserving aspect ratio
image = image.set_scale_to_size(400, 300, true);
sheet
.insert_image(current_row, 0, &image)
.map_err(|err| format!("Failed to insert OG image: {err}"))?;
.map_err(|err| format!("Failed to insert screenshot: {err}"))?;
// Set row height to accommodate the image
sheet
.set_row_height(current_row, IMAGE_ROW_HEIGHT)
@ -352,7 +352,7 @@ pub async fn get_export(
current_row += 1;
}
Err(err) => {
warn!("Failed to parse OG image for export: {err}");
warn!("Failed to parse screenshot for export: {err}");
// Skip image row, don't leave a gap
}
}
@ -479,7 +479,7 @@ pub async fn get_export(
postcodes = postcode_aggs.len(),
sampled = was_sampled,
features = all_feature_indices.len(),
has_og_image = og_image_bytes.is_some(),
has_screenshot = screenshot_bytes.is_some(),
bytes = buf.len(),
total_ms = format_args!("{:.1}", t_total.as_secs_f64() * 1000.0),
"GET /api/export"

View file

@ -8,12 +8,12 @@ use crate::data::{Histogram, PropertyData};
use crate::features::{ENUM_FEATURE_GROUPS, FEATURE_GROUPS};
use crate::state::AppState;
fn is_empty(v: &str) -> bool {
v.is_empty()
fn is_empty(val: &str) -> bool {
val.is_empty()
}
fn is_false(v: &bool) -> bool {
!v
fn is_false(val: &bool) -> bool {
!val
}
#[derive(Clone, Serialize)]

View file

@ -14,44 +14,44 @@ use crate::state::AppState;
#[derive(Serialize)]
pub struct HistogramStats {
min: f64,
max: f64,
pub min: f64,
pub max: f64,
/// 1st percentile (left edge of main distribution)
p1: f64,
pub p1: f64,
/// 99th percentile (right edge of main distribution)
p99: f64,
counts: Vec<u64>,
pub p99: f64,
pub counts: Vec<u64>,
}
#[derive(Serialize)]
pub struct NumericFeatureStats {
name: String,
count: usize,
min: f64,
max: f64,
mean: f64,
histogram: HistogramStats,
pub name: String,
pub count: usize,
pub min: f64,
pub max: f64,
pub mean: f64,
pub histogram: HistogramStats,
}
#[derive(Serialize)]
pub struct EnumFeatureStats {
name: String,
counts: HashMap<String, u64>,
pub name: String,
pub counts: HashMap<String, u64>,
}
#[derive(Serialize)]
pub struct PricePoint {
year: f32,
price: f32,
pub year: f32,
pub price: f32,
}
#[derive(Serialize)]
pub struct HexagonStatsResponse {
count: usize,
numeric_features: Vec<NumericFeatureStats>,
enum_features: Vec<EnumFeatureStats>,
pub count: usize,
pub numeric_features: Vec<NumericFeatureStats>,
pub enum_features: Vec<EnumFeatureStats>,
#[serde(skip_serializing_if = "Vec::is_empty")]
price_history: Vec<PricePoint>,
pub price_history: Vec<PricePoint>,
}
#[derive(Deserialize)]
@ -158,7 +158,10 @@ pub async fn get_hexagon_stats(
// Collect price history (year, price) pairs
let price_history = {
let year_idx = state.feature_name_to_index.get("Date of last transaction").copied();
let year_idx = state
.feature_name_to_index
.get("Date of last transaction")
.copied();
let price_idx = state.feature_name_to_index.get("Last known price").copied();
match (year_idx, price_idx) {
(Some(yi), Some(pi)) => {

View file

@ -8,7 +8,7 @@ use serde::{Deserialize, Serialize};
use serde_json::{Map, Value};
use tracing::{info, warn};
use crate::consts::{H3_PRECOMPUTE_MAX, H3_REQUEST_MAX, H3_REQUEST_MIN};
use crate::consts::{H3_PRECOMPUTE_MAX, H3_REQUEST_MAX, H3_REQUEST_MIN, MAX_CELLS_PER_REQUEST};
use crate::parsing::{
bounds_intersect, h3_cell_bounds, parse_bounds, parse_filters, row_passes_filters,
};
@ -293,7 +293,7 @@ pub async fn get_hexagons(
let t_agg = t0.elapsed();
let features = build_feature_maps(
let mut features = build_feature_maps(
&groups,
min_keys,
max_keys,
@ -303,11 +303,17 @@ pub async fn get_hexagons(
(south, west, north, east),
);
let truncated = features.len() > MAX_CELLS_PER_REQUEST;
if truncated {
features.truncate(MAX_CELLS_PER_REQUEST);
}
let t_total = t0.elapsed();
info!(
resolution,
cells_before_filter = groups.len(),
cells_after_filter = features.len(),
truncated,
bounds = format_args!("{:.4},{:.4},{:.4},{:.4}", south, west, north, east),
filters = num_filters,
filters_raw = filters_str.as_deref().unwrap_or("-"),

View file

@ -0,0 +1,268 @@
use std::collections::HashMap;
use std::sync::Arc;
use axum::extract::Query;
use axum::http::StatusCode;
use axum::response::Json;
use serde::Deserialize;
use tracing::{info, warn};
use crate::parsing::{parse_filters, row_passes_filters};
use crate::state::AppState;
use super::hexagon_stats::{
EnumFeatureStats, HexagonStatsResponse, HistogramStats, NumericFeatureStats, PricePoint,
};
#[derive(Deserialize)]
pub struct PostcodeStatsParams {
pub postcode: String,
pub filters: Option<String>,
/// Comma-separated feature names to include in stats response.
/// Only listed features are computed; if absent or empty, no features are returned.
pub fields: Option<String>,
}
pub async fn get_postcode_stats(
state: Arc<AppState>,
Query(params): Query<PostcodeStatsParams>,
) -> Result<Json<HexagonStatsResponse>, (StatusCode, String)> {
// Normalize postcode: uppercase, collapse whitespace
let normalized = params
.postcode
.to_uppercase()
.split_whitespace()
.collect::<Vec<_>>()
.join(" ");
// Look up postcode centroid for spatial search
let pc_idx = match state.postcode_data.postcode_to_idx.get(&normalized) {
Some(&idx) => idx,
None => {
warn!(postcode = %normalized, "Postcode not found");
return Err((
StatusCode::NOT_FOUND,
format!("Postcode not found: {}", normalized),
));
}
};
let (centroid_lat, centroid_lon) = state.postcode_data.centroids[pc_idx];
let filters_str = params.filters.clone();
let (parsed_filters, parsed_enum_filters) = parse_filters(
params.filters.as_deref(),
&state.feature_name_to_index,
&state.data.enum_values,
);
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
let fields_specified = params.fields.is_some();
let field_set: std::collections::HashSet<String> = params
.fields
.as_ref()
.map(|fields_str| {
fields_str
.split(',')
.map(|field| field.trim().to_string())
.filter(|field| !field.is_empty())
.collect()
})
.unwrap_or_default();
let postcode_str = normalized.clone();
let response = tokio::task::spawn_blocking(move || {
let start_time = std::time::Instant::now();
let num_features = state.data.num_features;
let feature_data = &state.data.feature_data;
// Search ±0.02° around centroid (~2km, generous for a postcode)
let offset: f64 = 0.02;
let min_lat = centroid_lat as f64 - offset;
let max_lat = centroid_lat as f64 + offset;
let min_lon = centroid_lon as f64 - offset;
let max_lon = centroid_lon as f64 + offset;
let mut matching_rows: Vec<usize> = Vec::new();
state
.grid
.for_each_in_bounds(min_lat, min_lon, max_lat, max_lon, |row_idx| {
let row = row_idx as usize;
let row_postcode = state.data.postcode(row);
if row_postcode == postcode_str
&& row_passes_filters(
row,
&parsed_filters,
&parsed_enum_filters,
feature_data,
num_features,
)
{
matching_rows.push(row);
}
});
let total_count = matching_rows.len();
// Collect price history (year, price) pairs
let price_history = {
let year_idx = state
.feature_name_to_index
.get("Date of last transaction")
.copied();
let price_idx = state.feature_name_to_index.get("Last known price").copied();
match (year_idx, price_idx) {
(Some(yi), Some(pi)) => {
let mut points: Vec<PricePoint> = matching_rows
.iter()
.filter_map(|&row| {
let year = feature_data[row * num_features + yi];
let price = feature_data[row * num_features + pi];
if year.is_finite() && price.is_finite() {
Some(PricePoint { year, price })
} else {
None
}
})
.collect();
// Cap at 5000 points by evenly sampling
if points.len() > 5000 {
let step = points.len() as f64 / 5000.0;
points = (0..5000)
.map(|i| {
let idx = (i as f64 * step) as usize;
PricePoint {
year: points[idx].year,
price: points[idx].price,
}
})
.collect();
}
points
}
_ => Vec::new(),
}
};
let mut numeric_features = Vec::new();
let mut enum_features_out = Vec::new();
for (feature_index, feature_name) in state.data.feature_names.iter().enumerate() {
if fields_specified && !field_set.contains(feature_name.as_str()) {
continue;
}
if let Some(enum_values) = state.data.enum_values.get(&feature_index) {
// Enum feature: count occurrences of each value
let mut value_counts = vec![0u64; enum_values.len()];
for &row in &matching_rows {
let value = feature_data[row * num_features + feature_index];
if value.is_finite() {
let idx = value as usize;
if idx < value_counts.len() {
value_counts[idx] += 1;
}
}
}
let counts: HashMap<String, u64> = value_counts
.iter()
.enumerate()
.filter(|(_, &count)| count > 0)
.map(|(idx, &count)| (enum_values[idx].clone(), count))
.collect();
if !counts.is_empty() {
enum_features_out.push(EnumFeatureStats {
name: feature_name.clone(),
counts,
});
}
} else {
// Numeric feature: compute stats and histogram
let global_hist = &state.data.feature_stats[feature_index].histogram;
let p1 = global_hist.p1;
let p99 = global_hist.p99;
let num_bins = global_hist.counts.len();
let mut count = 0usize;
let mut min_value = f32::INFINITY;
let mut max_value = f32::NEG_INFINITY;
let mut sum = 0.0f64;
let mut bins = vec![0u64; num_bins];
let middle_bins = num_bins.saturating_sub(2);
let middle_width = if middle_bins > 0 && p99 > p1 {
(p99 - p1) / middle_bins as f32
} else {
0.0
};
for &row in &matching_rows {
let value = feature_data[row * num_features + feature_index];
if value.is_finite() {
count += 1;
if value < min_value {
min_value = value;
}
if value > max_value {
max_value = value;
}
sum += value as f64;
let bin = if value < p1 {
0
} else if value >= p99 {
num_bins - 1
} else if middle_width > 0.0 {
let middle_bin = ((value - p1) / middle_width) as usize;
(1 + middle_bin).min(num_bins - 2)
} else {
num_bins / 2
};
bins[bin] += 1;
}
}
if count > 0 {
numeric_features.push(NumericFeatureStats {
name: feature_name.clone(),
count,
min: min_value as f64,
max: max_value as f64,
mean: sum / count as f64,
histogram: HistogramStats {
min: global_hist.min as f64,
max: global_hist.max as f64,
p1: p1 as f64,
p99: p99 as f64,
counts: bins,
},
});
}
}
}
let elapsed = start_time.elapsed();
info!(
postcode = %postcode_str,
total_count,
filters = num_filters,
filters_raw = filters_str.as_deref().unwrap_or("-"),
ms = format_args!("{:.1}", elapsed.as_secs_f64() * 1000.0),
"GET /api/postcode-stats"
);
Ok(HexagonStatsResponse {
count: total_count,
numeric_features,
enum_features: enum_features_out,
price_history,
})
})
.await
.map_err(|error| (StatusCode::INTERNAL_SERVER_ERROR, error.to_string()))?
.map_err(|error: String| (StatusCode::INTERNAL_SERVER_ERROR, error))?;
Ok(Json(response))
}

View file

@ -8,6 +8,7 @@ use serde::{Deserialize, Serialize};
use serde_json::{Map, Value};
use tracing::info;
use crate::consts::MAX_CELLS_PER_REQUEST;
use crate::parsing::{bounds_intersect, parse_bounds, parse_filters, row_passes_filters};
use crate::state::AppState;
@ -282,7 +283,8 @@ pub async fn get_postcodes(
for feat_index in iter {
if aggregation.feat_counts[feat_index] > 0 {
let avg = aggregation.sums[feat_index] / aggregation.feat_counts[feat_index] as f64;
let avg =
aggregation.sums[feat_index] / aggregation.feat_counts[feat_index] as f64;
if let (Some(min_num), Some(max_num), Some(avg_num)) = (
serde_json::Number::from_f64(aggregation.mins[feat_index] as f64),
serde_json::Number::from_f64(aggregation.maxs[feat_index] as f64),
@ -302,13 +304,19 @@ pub async fn get_postcodes(
feature.insert("properties".into(), Value::Object(props));
features.push(feature);
if features.len() >= MAX_CELLS_PER_REQUEST {
break;
}
}
let truncated = features.len() >= MAX_CELLS_PER_REQUEST;
let t_total = t0.elapsed();
info!(
postcodes_before_filter,
postcodes_after_filter = features.len(),
filtered_out,
truncated,
bounds = format_args!("{:.6},{:.6},{:.6},{:.6}", south, west, north, east),
filters = num_filters,
filters_raw = filters_str.as_deref().unwrap_or("-"),

View file

@ -8,7 +8,7 @@ use tracing::{info, warn};
use crate::state::AppState;
#[derive(serde::Deserialize)]
pub struct OgImageQuery {
pub struct ScreenshotQuery {
#[serde(rename = "v")]
view: Option<String>,
#[serde(rename = "f")]
@ -19,9 +19,9 @@ pub struct OgImageQuery {
og: Option<String>,
}
pub async fn get_og_image(
pub async fn get_screenshot(
state: Arc<AppState>,
Query(query): Query<OgImageQuery>,
Query(query): Query<ScreenshotQuery>,
) -> impl IntoResponse {
let screenshot_base = &state.screenshot_url;

View file

@ -12,9 +12,9 @@ pub type TileReader = AsyncPmTilesReader<MmapBackend>;
pub async fn get_tile(
State(reader): State<Arc<TileReader>>,
Path((z, x, y)): Path<(u8, u32, u32)>,
Path((zoom, col, row)): Path<(u8, u32, u32)>,
) -> Response {
match reader.get_tile(z, x as u64, y as u64).await {
match reader.get_tile(zoom, col as u64, row as u64).await {
Ok(Some(tile_bytes)) => (
StatusCode::OK,
[
@ -27,7 +27,7 @@ pub async fn get_tile(
.into_response(),
Ok(None) => StatusCode::NO_CONTENT.into_response(),
Err(err) => {
warn!(z, x, y, error = %err, "Failed to get tile");
warn!(zoom, col, row, error = %err, "Failed to get tile");
StatusCode::INTERNAL_SERVER_ERROR.into_response()
}
}
@ -57,7 +57,7 @@ pub async fn get_style(
// Parse the JSON string
let metadata: serde_json::Value = match serde_json::from_str(&metadata_str) {
Ok(v) => v,
Ok(val) => val,
Err(err) => {
warn!(error = %err, "Failed to parse PMTiles metadata JSON");
serde_json::Value::Object(serde_json::Map::new())
@ -67,14 +67,14 @@ pub async fn get_style(
// Extract tilestats for layer info if available
let layers: Vec<serde_json::Value> = metadata
.get("vector_layers")
.and_then(|v| v.as_array())
.and_then(|vl| vl.as_array())
.cloned()
.unwrap_or_default();
// Build absolute tile URL using the request host
let host = headers
.get(header::HOST)
.and_then(|h| h.to_str().ok())
.and_then(|hv| hv.to_str().ok())
.unwrap_or("localhost:8001");
let tile_url = format!("http://{}/api/tiles/{{z}}/{{x}}/{{y}}", host);
let style = build_style(is_dark, &layers, &tile_url);
@ -101,7 +101,7 @@ fn build_style(is_dark: bool, layers: &[serde_json::Value], tile_url: &str) -> s
// Build layer list from metadata
let layer_ids: Vec<&str> = layers
.iter()
.filter_map(|l| l.get("id").and_then(|v| v.as_str()))
.filter_map(|ly| ly.get("id").and_then(|id| id.as_str()))
.collect();
let mut style_layers = vec![serde_json::json!({