Add pocketbase and other changes

This commit is contained in:
Andras Schmelczer 2026-02-07 19:20:22 +00:00
parent a9717d570d
commit 229150b641
14 changed files with 1178 additions and 91 deletions

View file

@ -0,0 +1,507 @@
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
use std::sync::Arc;
use axum::extract::Query;
use axum::http::{header, StatusCode};
use axum::response::IntoResponse;
use rust_xlsxwriter::{Format, FormatAlign, FormatBorder, Image, Url, Workbook};
use rustc_hash::{FxHashMap, FxHashSet};
use serde::Deserialize;
use tracing::{info, warn};
use crate::parsing::{parse_bounds, parse_filters, row_passes_filters};
use crate::routes::FeatureInfo;
use crate::state::AppState;
const MAX_EXPORT_POSTCODES: usize = 250;
/// Height (in pixels) reserved for the OG image row
const IMAGE_ROW_HEIGHT: f64 = 225.0;
#[derive(Deserialize)]
pub struct ExportParams {
bounds: Option<String>,
filters: Option<String>,
fields: Option<String>,
}
/// Per-postcode accumulator for export aggregation (mean for numeric, mode for enum).
struct PostcodeExportAgg {
count: u32,
sums: Vec<f64>,
finite_counts: Vec<u32>,
/// feat_idx -> (value_bits -> count) for enum mode calculation
enum_freqs: FxHashMap<usize, FxHashMap<u32, u32>>,
}
impl PostcodeExportAgg {
fn new(num_features: usize) -> Self {
Self {
count: 0,
sums: vec![0.0; num_features],
finite_counts: vec![0; num_features],
enum_freqs: FxHashMap::default(),
}
}
#[inline]
fn add_row(
&mut self,
feature_data: &[f32],
row: usize,
num_features: usize,
enum_indices: &FxHashMap<usize, ()>,
) {
self.count += 1;
let base = row * num_features;
let row_slice = &feature_data[base..base + num_features];
for (feat_idx, &value) in row_slice.iter().enumerate() {
if !value.is_finite() {
continue;
}
if enum_indices.contains_key(&feat_idx) {
*self
.enum_freqs
.entry(feat_idx)
.or_default()
.entry(value.to_bits())
.or_insert(0) += 1;
} else {
self.sums[feat_idx] += value as f64;
self.finite_counts[feat_idx] += 1;
}
}
}
}
/// Extract feature names referenced in the filters param (preserving order).
fn extract_filter_feature_names(filters_str: Option<&str>) -> Vec<String> {
let input = match filters_str.filter(|text| !text.is_empty()) {
Some(text) => text,
None => return Vec::new(),
};
let mut names = Vec::new();
for entry in input.split(',') {
let parts: Vec<&str> = entry.splitn(2, ':').collect();
if parts.len() == 2 {
let name = parts[0].trim().to_string();
if !names.contains(&name) {
names.push(name);
}
}
}
names
}
/// Fetch the OG screenshot image from the sidecar service.
async fn fetch_og_image(
state: &AppState,
view_param: &str,
filters_str: Option<&str>,
) -> Option<Vec<u8>> {
let sidecar_url = state.og_sidecar_url.as_deref()?;
let mut params = vec![format!("v={}", urlencoding::encode(view_param))];
if let Some(fs) = filters_str {
if !fs.is_empty() {
params.push(format!("f={}", urlencoding::encode(fs)));
}
}
let url = format!("{}/screenshot?{}", sidecar_url, params.join("&"));
match state.http_client.get(&url).send().await {
Ok(resp) if resp.status().is_success() => match resp.bytes().await {
Ok(bytes) => {
info!(bytes = bytes.len(), "Fetched OG image for export");
Some(bytes.to_vec())
}
Err(err) => {
warn!("Failed to read OG sidecar response for export: {err}");
None
}
},
Ok(resp) => {
warn!(status = %resp.status(), "OG sidecar returned error for export");
None
}
Err(err) => {
warn!("Failed to reach OG sidecar for export: {err}");
None
}
}
}
pub async fn get_export(
state: Arc<AppState>,
Query(params): Query<ExportParams>,
) -> Result<impl IntoResponse, (StatusCode, String)> {
let bounds_str = params.bounds.ok_or((
StatusCode::BAD_REQUEST,
"bounds parameter is required".into(),
))?;
let (south, west, north, east) = parse_bounds(&bounds_str)?;
let filters_str = params.filters.clone();
let fields_str = params.fields.clone();
let (parsed_filters, parsed_enum_filters) = parse_filters(
params.filters.as_deref(),
&state.feature_name_to_index,
&state.data.enum_values,
);
let public_url = state.public_url.clone();
// Compute view param for OG image and dashboard URL
let center_lat = (south + north) / 2.0;
let center_lon = (west + east) / 2.0;
let lat_span = north - south;
let zoom = if lat_span > 0.0 {
(360.0 / lat_span).log2().clamp(1.0, 18.0)
} else {
12.0
};
let view_param = format!("{:.4},{:.4},{:.1}", center_lat, center_lon, zoom);
// Fetch OG image from sidecar (async, before spawn_blocking)
let og_image_bytes = fetch_og_image(&state, &view_param, filters_str.as_deref()).await;
// Build feature name → description map from the precomputed features response
let feature_descriptions: FxHashMap<String, String> = state
.features_response
.groups
.iter()
.flat_map(|group| &group.features)
.map(|feat| match feat {
FeatureInfo::Numeric {
name, description, ..
} => (name.clone(), description.to_string()),
FeatureInfo::Enum {
name, description, ..
} => (name.clone(), description.to_string()),
})
.collect();
let bytes = tokio::task::spawn_blocking(move || -> Result<Vec<u8>, String> {
let t0 = std::time::Instant::now();
let num_features = state.data.num_features;
let feature_data = &state.data.feature_data;
let feature_names = &state.data.feature_names;
let enum_values = &state.data.enum_values;
let postcode_data = &state.postcode_data;
// Build set of enum feature indices for quick lookup
let enum_indices: FxHashMap<usize, ()> = enum_values.keys().map(|&idx| (idx, ())).collect();
// Group rows by postcode
let mut postcode_rows: FxHashMap<usize, Vec<usize>> = FxHashMap::default();
state
.grid
.for_each_in_bounds(south, west, north, east, |row_idx| {
let row = row_idx as usize;
if !row_passes_filters(
row,
&parsed_filters,
&parsed_enum_filters,
feature_data,
num_features,
) {
return;
}
let postcode = state.data.postcode(row);
if let Some(&pc_idx) = postcode_data.postcode_to_idx.get(postcode) {
postcode_rows.entry(pc_idx).or_default().push(row);
}
});
// Aggregate per postcode
let mut postcode_aggs: Vec<(usize, PostcodeExportAgg)> =
Vec::with_capacity(postcode_rows.len());
for (pc_idx, rows) in postcode_rows {
let mut agg = PostcodeExportAgg::new(num_features);
for &row in &rows {
agg.add_row(feature_data, row, num_features, &enum_indices);
}
if agg.count > 0 {
postcode_aggs.push((pc_idx, agg));
}
}
// Sort by property count descending
postcode_aggs.sort_unstable_by(|lhs, rhs| rhs.1.count.cmp(&lhs.1.count));
// Sample if too many postcodes
let was_sampled = postcode_aggs.len() > MAX_EXPORT_POSTCODES;
if was_sampled {
let mut hasher = DefaultHasher::new();
bounds_str.hash(&mut hasher);
let seed = hasher.finish();
let len = postcode_aggs.len();
for pick in 0..MAX_EXPORT_POSTCODES {
let swap_idx = pick
+ ((seed.wrapping_mul(pick as u64 + 1).wrapping_add(pick as u64)) as usize
% (len - pick));
postcode_aggs.swap(pick, swap_idx);
}
postcode_aggs.truncate(MAX_EXPORT_POSTCODES);
postcode_aggs.sort_unstable_by(|lhs, rhs| rhs.1.count.cmp(&lhs.1.count));
}
// Determine column order: filter features first, then remaining
let filter_feature_names = extract_filter_feature_names(filters_str.as_deref());
let field_indices: Option<Vec<usize>> = fields_str.as_ref().map(|fs| {
if fs.is_empty() {
return Vec::new();
}
fs.split(',')
.filter_map(|name| {
let name = name.trim();
if name.is_empty() {
return None;
}
state.feature_name_to_index.get(name).copied()
})
.collect()
});
let all_feature_indices: Vec<usize> = if let Some(ref indices) = field_indices {
indices.clone()
} else {
let mut ordered = Vec::with_capacity(num_features);
let mut used = FxHashSet::default();
for name in &filter_feature_names {
if let Some(&idx) = state.feature_name_to_index.get(name.as_str()) {
if used.insert(idx) {
ordered.push(idx);
}
}
}
for idx in 0..num_features {
if used.insert(idx) {
ordered.push(idx);
}
}
ordered
};
// Build Excel workbook
let mut workbook = Workbook::new();
let sheet = workbook.add_worksheet();
// Formats
let header_fmt = Format::new()
.set_bold()
.set_border_bottom(FormatBorder::Thin)
.set_align(FormatAlign::Center);
let desc_fmt = Format::new()
.set_italic()
.set_font_color("#666666")
.set_font_size(9)
.set_align(FormatAlign::Center)
.set_text_wrap();
let link_fmt = Format::new()
.set_font_color("#0563C1")
.set_underline(rust_xlsxwriter::FormatUnderline::Single)
.set_font_size(11);
let note_fmt = Format::new()
.set_italic()
.set_font_color("#666666")
.set_align(FormatAlign::Left);
// Row 0: "View on Narrowit" link
let mut dashboard_url = format!("{}/", public_url);
let mut query_parts: Vec<String> = Vec::new();
query_parts.push(format!("v={}", view_param));
if let Some(ref fs) = filters_str {
if !fs.is_empty() {
query_parts.push(format!("f={}", urlencoding::encode(fs)));
}
}
if !query_parts.is_empty() {
dashboard_url.push('?');
dashboard_url.push_str(&query_parts.join("&"));
}
sheet
.write_url(0, 0, Url::new(&dashboard_url).set_text("View on Narrowit"))
.map_err(|err| format!("Failed to write URL: {err}"))?;
sheet
.set_row_format(0, &link_fmt)
.map_err(|err| format!("Failed to set row format: {err}"))?;
// Row 1: OG image (if available)
let mut current_row = 1u32;
if let Some(ref img_bytes) = og_image_bytes {
match Image::new_from_buffer(img_bytes) {
Ok(mut image) => {
// Scale image to fit: ~400px wide, auto height preserving aspect ratio
image = image.set_scale_to_size(400, 300, true);
sheet
.insert_image(current_row, 0, &image)
.map_err(|err| format!("Failed to insert OG image: {err}"))?;
// Set row height to accommodate the image
sheet
.set_row_height(current_row, IMAGE_ROW_HEIGHT)
.map_err(|err| format!("Failed to set image row height: {err}"))?;
current_row += 1;
}
Err(err) => {
warn!("Failed to parse OG image for export: {err}");
// Skip image row, don't leave a gap
}
}
}
// Leave a blank row between image and header
current_row += 1;
// Header row
let header_row = current_row;
sheet
.write_string_with_format(header_row, 0, "Postcode", &header_fmt)
.map_err(|err| format!("Failed to write header: {err}"))?;
sheet
.write_string_with_format(header_row, 1, "Properties", &header_fmt)
.map_err(|err| format!("Failed to write header: {err}"))?;
for (col_offset, &feat_idx) in all_feature_indices.iter().enumerate() {
let col = (col_offset + 2) as u16;
sheet
.write_string_with_format(header_row, col, &feature_names[feat_idx], &header_fmt)
.map_err(|err| format!("Failed to write header: {err}"))?;
}
// Description row (below header)
let desc_row = header_row + 1;
// Empty descriptions for Postcode and Properties columns
sheet
.write_string_with_format(desc_row, 0, "", &desc_fmt)
.map_err(|err| format!("Failed to write desc: {err}"))?;
sheet
.write_string_with_format(desc_row, 1, "Count of properties", &desc_fmt)
.map_err(|err| format!("Failed to write desc: {err}"))?;
for (col_offset, &feat_idx) in all_feature_indices.iter().enumerate() {
let col = (col_offset + 2) as u16;
let desc = feature_descriptions
.get(&feature_names[feat_idx])
.map(String::as_str)
.unwrap_or("");
sheet
.write_string_with_format(desc_row, col, desc, &desc_fmt)
.map_err(|err| format!("Failed to write desc: {err}"))?;
}
// Write data rows (starting after description row)
let data_start_row = desc_row + 1;
for (row_offset, (pc_idx, agg)) in postcode_aggs.iter().enumerate() {
let row = data_start_row + row_offset as u32;
sheet
.write_string(row, 0, &postcode_data.postcodes[*pc_idx])
.map_err(|err| format!("Failed to write postcode: {err}"))?;
sheet
.write_number(row, 1, agg.count as f64)
.map_err(|err| format!("Failed to write count: {err}"))?;
for (col_offset, &feat_idx) in all_feature_indices.iter().enumerate() {
let col = (col_offset + 2) as u16;
if enum_indices.contains_key(&feat_idx) {
if let Some(freqs) = agg.enum_freqs.get(&feat_idx) {
if let Some((&mode_bits, _)) = freqs.iter().max_by_key(|(_, &count)| count)
{
let mode_f32 = f32::from_bits(mode_bits);
let mode_idx = mode_f32 as usize;
if let Some(values) = enum_values.get(&feat_idx) {
if mode_idx < values.len() {
sheet.write_string(row, col, &values[mode_idx]).map_err(
|err| format!("Failed to write enum value: {err}"),
)?;
}
}
}
}
} else {
let fc = agg.finite_counts[feat_idx];
if fc > 0 {
let mean = agg.sums[feat_idx] / fc as f64;
sheet
.write_number(row, col, mean)
.map_err(|err| format!("Failed to write numeric value: {err}"))?;
}
}
}
}
// If sampled, add a note at the bottom
if was_sampled {
let note_row = data_start_row + postcode_aggs.len() as u32 + 1;
let total_cols = (all_feature_indices.len() + 2) as u16;
sheet
.merge_range(
note_row,
0,
note_row,
total_cols.saturating_sub(1),
&format!(
"Only the first {} postcodes shown (randomly sampled from results)",
MAX_EXPORT_POSTCODES
),
&note_fmt,
)
.map_err(|err| format!("Failed to write note: {err}"))?;
}
// Column widths
sheet.set_column_width(0, 12).ok();
sheet.set_column_width(1, 12).ok();
for col_offset in 0..all_feature_indices.len() {
let col = (col_offset + 2) as u16;
let feat_name = &feature_names[all_feature_indices[col_offset]];
let width = (feat_name.len() as f64 * 1.1).clamp(10.0, 30.0);
sheet.set_column_width(col, width).ok();
}
let buf = workbook
.save_to_buffer()
.map_err(|err| format!("Failed to save workbook: {err}"))?;
let t_total = t0.elapsed();
info!(
postcodes = postcode_aggs.len(),
sampled = was_sampled,
features = all_feature_indices.len(),
has_og_image = og_image_bytes.is_some(),
bytes = buf.len(),
total_ms = format_args!("{:.1}", t_total.as_secs_f64() * 1000.0),
"GET /api/export"
);
Ok(buf)
})
.await
.map_err(|err| (StatusCode::INTERNAL_SERVER_ERROR, err.to_string()))?
.map_err(|err| (StatusCode::INTERNAL_SERVER_ERROR, err))?;
Ok((
[
(
header::CONTENT_TYPE,
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
),
(
header::CONTENT_DISPOSITION,
"attachment; filename=\"narrowit-export.xlsx\"",
),
],
bytes,
))
}