Add pocketbase and other changes
This commit is contained in:
parent
a9717d570d
commit
229150b641
14 changed files with 1178 additions and 91 deletions
507
server-rs/src/routes/export.rs
Normal file
507
server-rs/src/routes/export.rs
Normal file
|
|
@ -0,0 +1,507 @@
|
|||
use std::collections::hash_map::DefaultHasher;
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::extract::Query;
|
||||
use axum::http::{header, StatusCode};
|
||||
use axum::response::IntoResponse;
|
||||
use rust_xlsxwriter::{Format, FormatAlign, FormatBorder, Image, Url, Workbook};
|
||||
use rustc_hash::{FxHashMap, FxHashSet};
|
||||
use serde::Deserialize;
|
||||
use tracing::{info, warn};
|
||||
|
||||
use crate::parsing::{parse_bounds, parse_filters, row_passes_filters};
|
||||
use crate::routes::FeatureInfo;
|
||||
use crate::state::AppState;
|
||||
|
||||
const MAX_EXPORT_POSTCODES: usize = 250;
|
||||
/// Height (in pixels) reserved for the OG image row
|
||||
const IMAGE_ROW_HEIGHT: f64 = 225.0;
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct ExportParams {
|
||||
bounds: Option<String>,
|
||||
filters: Option<String>,
|
||||
fields: Option<String>,
|
||||
}
|
||||
|
||||
/// Per-postcode accumulator for export aggregation (mean for numeric, mode for enum).
|
||||
struct PostcodeExportAgg {
|
||||
count: u32,
|
||||
sums: Vec<f64>,
|
||||
finite_counts: Vec<u32>,
|
||||
/// feat_idx -> (value_bits -> count) for enum mode calculation
|
||||
enum_freqs: FxHashMap<usize, FxHashMap<u32, u32>>,
|
||||
}
|
||||
|
||||
impl PostcodeExportAgg {
|
||||
fn new(num_features: usize) -> Self {
|
||||
Self {
|
||||
count: 0,
|
||||
sums: vec![0.0; num_features],
|
||||
finite_counts: vec![0; num_features],
|
||||
enum_freqs: FxHashMap::default(),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn add_row(
|
||||
&mut self,
|
||||
feature_data: &[f32],
|
||||
row: usize,
|
||||
num_features: usize,
|
||||
enum_indices: &FxHashMap<usize, ()>,
|
||||
) {
|
||||
self.count += 1;
|
||||
let base = row * num_features;
|
||||
let row_slice = &feature_data[base..base + num_features];
|
||||
for (feat_idx, &value) in row_slice.iter().enumerate() {
|
||||
if !value.is_finite() {
|
||||
continue;
|
||||
}
|
||||
if enum_indices.contains_key(&feat_idx) {
|
||||
*self
|
||||
.enum_freqs
|
||||
.entry(feat_idx)
|
||||
.or_default()
|
||||
.entry(value.to_bits())
|
||||
.or_insert(0) += 1;
|
||||
} else {
|
||||
self.sums[feat_idx] += value as f64;
|
||||
self.finite_counts[feat_idx] += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract feature names referenced in the filters param (preserving order).
|
||||
fn extract_filter_feature_names(filters_str: Option<&str>) -> Vec<String> {
|
||||
let input = match filters_str.filter(|text| !text.is_empty()) {
|
||||
Some(text) => text,
|
||||
None => return Vec::new(),
|
||||
};
|
||||
let mut names = Vec::new();
|
||||
for entry in input.split(',') {
|
||||
let parts: Vec<&str> = entry.splitn(2, ':').collect();
|
||||
if parts.len() == 2 {
|
||||
let name = parts[0].trim().to_string();
|
||||
if !names.contains(&name) {
|
||||
names.push(name);
|
||||
}
|
||||
}
|
||||
}
|
||||
names
|
||||
}
|
||||
|
||||
/// Fetch the OG screenshot image from the sidecar service.
|
||||
async fn fetch_og_image(
|
||||
state: &AppState,
|
||||
view_param: &str,
|
||||
filters_str: Option<&str>,
|
||||
) -> Option<Vec<u8>> {
|
||||
let sidecar_url = state.og_sidecar_url.as_deref()?;
|
||||
|
||||
let mut params = vec![format!("v={}", urlencoding::encode(view_param))];
|
||||
if let Some(fs) = filters_str {
|
||||
if !fs.is_empty() {
|
||||
params.push(format!("f={}", urlencoding::encode(fs)));
|
||||
}
|
||||
}
|
||||
let url = format!("{}/screenshot?{}", sidecar_url, params.join("&"));
|
||||
|
||||
match state.http_client.get(&url).send().await {
|
||||
Ok(resp) if resp.status().is_success() => match resp.bytes().await {
|
||||
Ok(bytes) => {
|
||||
info!(bytes = bytes.len(), "Fetched OG image for export");
|
||||
Some(bytes.to_vec())
|
||||
}
|
||||
Err(err) => {
|
||||
warn!("Failed to read OG sidecar response for export: {err}");
|
||||
None
|
||||
}
|
||||
},
|
||||
Ok(resp) => {
|
||||
warn!(status = %resp.status(), "OG sidecar returned error for export");
|
||||
None
|
||||
}
|
||||
Err(err) => {
|
||||
warn!("Failed to reach OG sidecar for export: {err}");
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get_export(
|
||||
state: Arc<AppState>,
|
||||
Query(params): Query<ExportParams>,
|
||||
) -> Result<impl IntoResponse, (StatusCode, String)> {
|
||||
let bounds_str = params.bounds.ok_or((
|
||||
StatusCode::BAD_REQUEST,
|
||||
"bounds parameter is required".into(),
|
||||
))?;
|
||||
|
||||
let (south, west, north, east) = parse_bounds(&bounds_str)?;
|
||||
|
||||
let filters_str = params.filters.clone();
|
||||
let fields_str = params.fields.clone();
|
||||
let (parsed_filters, parsed_enum_filters) = parse_filters(
|
||||
params.filters.as_deref(),
|
||||
&state.feature_name_to_index,
|
||||
&state.data.enum_values,
|
||||
);
|
||||
|
||||
let public_url = state.public_url.clone();
|
||||
|
||||
// Compute view param for OG image and dashboard URL
|
||||
let center_lat = (south + north) / 2.0;
|
||||
let center_lon = (west + east) / 2.0;
|
||||
let lat_span = north - south;
|
||||
let zoom = if lat_span > 0.0 {
|
||||
(360.0 / lat_span).log2().clamp(1.0, 18.0)
|
||||
} else {
|
||||
12.0
|
||||
};
|
||||
let view_param = format!("{:.4},{:.4},{:.1}", center_lat, center_lon, zoom);
|
||||
|
||||
// Fetch OG image from sidecar (async, before spawn_blocking)
|
||||
let og_image_bytes = fetch_og_image(&state, &view_param, filters_str.as_deref()).await;
|
||||
|
||||
// Build feature name → description map from the precomputed features response
|
||||
let feature_descriptions: FxHashMap<String, String> = state
|
||||
.features_response
|
||||
.groups
|
||||
.iter()
|
||||
.flat_map(|group| &group.features)
|
||||
.map(|feat| match feat {
|
||||
FeatureInfo::Numeric {
|
||||
name, description, ..
|
||||
} => (name.clone(), description.to_string()),
|
||||
FeatureInfo::Enum {
|
||||
name, description, ..
|
||||
} => (name.clone(), description.to_string()),
|
||||
})
|
||||
.collect();
|
||||
|
||||
let bytes = tokio::task::spawn_blocking(move || -> Result<Vec<u8>, String> {
|
||||
let t0 = std::time::Instant::now();
|
||||
let num_features = state.data.num_features;
|
||||
let feature_data = &state.data.feature_data;
|
||||
let feature_names = &state.data.feature_names;
|
||||
let enum_values = &state.data.enum_values;
|
||||
let postcode_data = &state.postcode_data;
|
||||
|
||||
// Build set of enum feature indices for quick lookup
|
||||
let enum_indices: FxHashMap<usize, ()> = enum_values.keys().map(|&idx| (idx, ())).collect();
|
||||
|
||||
// Group rows by postcode
|
||||
let mut postcode_rows: FxHashMap<usize, Vec<usize>> = FxHashMap::default();
|
||||
state
|
||||
.grid
|
||||
.for_each_in_bounds(south, west, north, east, |row_idx| {
|
||||
let row = row_idx as usize;
|
||||
if !row_passes_filters(
|
||||
row,
|
||||
&parsed_filters,
|
||||
&parsed_enum_filters,
|
||||
feature_data,
|
||||
num_features,
|
||||
) {
|
||||
return;
|
||||
}
|
||||
let postcode = state.data.postcode(row);
|
||||
if let Some(&pc_idx) = postcode_data.postcode_to_idx.get(postcode) {
|
||||
postcode_rows.entry(pc_idx).or_default().push(row);
|
||||
}
|
||||
});
|
||||
|
||||
// Aggregate per postcode
|
||||
let mut postcode_aggs: Vec<(usize, PostcodeExportAgg)> =
|
||||
Vec::with_capacity(postcode_rows.len());
|
||||
for (pc_idx, rows) in postcode_rows {
|
||||
let mut agg = PostcodeExportAgg::new(num_features);
|
||||
for &row in &rows {
|
||||
agg.add_row(feature_data, row, num_features, &enum_indices);
|
||||
}
|
||||
if agg.count > 0 {
|
||||
postcode_aggs.push((pc_idx, agg));
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by property count descending
|
||||
postcode_aggs.sort_unstable_by(|lhs, rhs| rhs.1.count.cmp(&lhs.1.count));
|
||||
|
||||
// Sample if too many postcodes
|
||||
let was_sampled = postcode_aggs.len() > MAX_EXPORT_POSTCODES;
|
||||
if was_sampled {
|
||||
let mut hasher = DefaultHasher::new();
|
||||
bounds_str.hash(&mut hasher);
|
||||
let seed = hasher.finish();
|
||||
|
||||
let len = postcode_aggs.len();
|
||||
for pick in 0..MAX_EXPORT_POSTCODES {
|
||||
let swap_idx = pick
|
||||
+ ((seed.wrapping_mul(pick as u64 + 1).wrapping_add(pick as u64)) as usize
|
||||
% (len - pick));
|
||||
postcode_aggs.swap(pick, swap_idx);
|
||||
}
|
||||
postcode_aggs.truncate(MAX_EXPORT_POSTCODES);
|
||||
postcode_aggs.sort_unstable_by(|lhs, rhs| rhs.1.count.cmp(&lhs.1.count));
|
||||
}
|
||||
|
||||
// Determine column order: filter features first, then remaining
|
||||
let filter_feature_names = extract_filter_feature_names(filters_str.as_deref());
|
||||
|
||||
let field_indices: Option<Vec<usize>> = fields_str.as_ref().map(|fs| {
|
||||
if fs.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
fs.split(',')
|
||||
.filter_map(|name| {
|
||||
let name = name.trim();
|
||||
if name.is_empty() {
|
||||
return None;
|
||||
}
|
||||
state.feature_name_to_index.get(name).copied()
|
||||
})
|
||||
.collect()
|
||||
});
|
||||
|
||||
let all_feature_indices: Vec<usize> = if let Some(ref indices) = field_indices {
|
||||
indices.clone()
|
||||
} else {
|
||||
let mut ordered = Vec::with_capacity(num_features);
|
||||
let mut used = FxHashSet::default();
|
||||
|
||||
for name in &filter_feature_names {
|
||||
if let Some(&idx) = state.feature_name_to_index.get(name.as_str()) {
|
||||
if used.insert(idx) {
|
||||
ordered.push(idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
for idx in 0..num_features {
|
||||
if used.insert(idx) {
|
||||
ordered.push(idx);
|
||||
}
|
||||
}
|
||||
ordered
|
||||
};
|
||||
|
||||
// Build Excel workbook
|
||||
let mut workbook = Workbook::new();
|
||||
let sheet = workbook.add_worksheet();
|
||||
|
||||
// Formats
|
||||
let header_fmt = Format::new()
|
||||
.set_bold()
|
||||
.set_border_bottom(FormatBorder::Thin)
|
||||
.set_align(FormatAlign::Center);
|
||||
|
||||
let desc_fmt = Format::new()
|
||||
.set_italic()
|
||||
.set_font_color("#666666")
|
||||
.set_font_size(9)
|
||||
.set_align(FormatAlign::Center)
|
||||
.set_text_wrap();
|
||||
|
||||
let link_fmt = Format::new()
|
||||
.set_font_color("#0563C1")
|
||||
.set_underline(rust_xlsxwriter::FormatUnderline::Single)
|
||||
.set_font_size(11);
|
||||
|
||||
let note_fmt = Format::new()
|
||||
.set_italic()
|
||||
.set_font_color("#666666")
|
||||
.set_align(FormatAlign::Left);
|
||||
|
||||
// Row 0: "View on Narrowit" link
|
||||
let mut dashboard_url = format!("{}/", public_url);
|
||||
let mut query_parts: Vec<String> = Vec::new();
|
||||
query_parts.push(format!("v={}", view_param));
|
||||
if let Some(ref fs) = filters_str {
|
||||
if !fs.is_empty() {
|
||||
query_parts.push(format!("f={}", urlencoding::encode(fs)));
|
||||
}
|
||||
}
|
||||
if !query_parts.is_empty() {
|
||||
dashboard_url.push('?');
|
||||
dashboard_url.push_str(&query_parts.join("&"));
|
||||
}
|
||||
|
||||
sheet
|
||||
.write_url(0, 0, Url::new(&dashboard_url).set_text("View on Narrowit"))
|
||||
.map_err(|err| format!("Failed to write URL: {err}"))?;
|
||||
sheet
|
||||
.set_row_format(0, &link_fmt)
|
||||
.map_err(|err| format!("Failed to set row format: {err}"))?;
|
||||
|
||||
// Row 1: OG image (if available)
|
||||
let mut current_row = 1u32;
|
||||
if let Some(ref img_bytes) = og_image_bytes {
|
||||
match Image::new_from_buffer(img_bytes) {
|
||||
Ok(mut image) => {
|
||||
// Scale image to fit: ~400px wide, auto height preserving aspect ratio
|
||||
image = image.set_scale_to_size(400, 300, true);
|
||||
sheet
|
||||
.insert_image(current_row, 0, &image)
|
||||
.map_err(|err| format!("Failed to insert OG image: {err}"))?;
|
||||
// Set row height to accommodate the image
|
||||
sheet
|
||||
.set_row_height(current_row, IMAGE_ROW_HEIGHT)
|
||||
.map_err(|err| format!("Failed to set image row height: {err}"))?;
|
||||
current_row += 1;
|
||||
}
|
||||
Err(err) => {
|
||||
warn!("Failed to parse OG image for export: {err}");
|
||||
// Skip image row, don't leave a gap
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Leave a blank row between image and header
|
||||
current_row += 1;
|
||||
|
||||
// Header row
|
||||
let header_row = current_row;
|
||||
sheet
|
||||
.write_string_with_format(header_row, 0, "Postcode", &header_fmt)
|
||||
.map_err(|err| format!("Failed to write header: {err}"))?;
|
||||
sheet
|
||||
.write_string_with_format(header_row, 1, "Properties", &header_fmt)
|
||||
.map_err(|err| format!("Failed to write header: {err}"))?;
|
||||
|
||||
for (col_offset, &feat_idx) in all_feature_indices.iter().enumerate() {
|
||||
let col = (col_offset + 2) as u16;
|
||||
sheet
|
||||
.write_string_with_format(header_row, col, &feature_names[feat_idx], &header_fmt)
|
||||
.map_err(|err| format!("Failed to write header: {err}"))?;
|
||||
}
|
||||
|
||||
// Description row (below header)
|
||||
let desc_row = header_row + 1;
|
||||
// Empty descriptions for Postcode and Properties columns
|
||||
sheet
|
||||
.write_string_with_format(desc_row, 0, "", &desc_fmt)
|
||||
.map_err(|err| format!("Failed to write desc: {err}"))?;
|
||||
sheet
|
||||
.write_string_with_format(desc_row, 1, "Count of properties", &desc_fmt)
|
||||
.map_err(|err| format!("Failed to write desc: {err}"))?;
|
||||
|
||||
for (col_offset, &feat_idx) in all_feature_indices.iter().enumerate() {
|
||||
let col = (col_offset + 2) as u16;
|
||||
let desc = feature_descriptions
|
||||
.get(&feature_names[feat_idx])
|
||||
.map(String::as_str)
|
||||
.unwrap_or("");
|
||||
sheet
|
||||
.write_string_with_format(desc_row, col, desc, &desc_fmt)
|
||||
.map_err(|err| format!("Failed to write desc: {err}"))?;
|
||||
}
|
||||
|
||||
// Write data rows (starting after description row)
|
||||
let data_start_row = desc_row + 1;
|
||||
for (row_offset, (pc_idx, agg)) in postcode_aggs.iter().enumerate() {
|
||||
let row = data_start_row + row_offset as u32;
|
||||
|
||||
sheet
|
||||
.write_string(row, 0, &postcode_data.postcodes[*pc_idx])
|
||||
.map_err(|err| format!("Failed to write postcode: {err}"))?;
|
||||
|
||||
sheet
|
||||
.write_number(row, 1, agg.count as f64)
|
||||
.map_err(|err| format!("Failed to write count: {err}"))?;
|
||||
|
||||
for (col_offset, &feat_idx) in all_feature_indices.iter().enumerate() {
|
||||
let col = (col_offset + 2) as u16;
|
||||
|
||||
if enum_indices.contains_key(&feat_idx) {
|
||||
if let Some(freqs) = agg.enum_freqs.get(&feat_idx) {
|
||||
if let Some((&mode_bits, _)) = freqs.iter().max_by_key(|(_, &count)| count)
|
||||
{
|
||||
let mode_f32 = f32::from_bits(mode_bits);
|
||||
let mode_idx = mode_f32 as usize;
|
||||
if let Some(values) = enum_values.get(&feat_idx) {
|
||||
if mode_idx < values.len() {
|
||||
sheet.write_string(row, col, &values[mode_idx]).map_err(
|
||||
|err| format!("Failed to write enum value: {err}"),
|
||||
)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let fc = agg.finite_counts[feat_idx];
|
||||
if fc > 0 {
|
||||
let mean = agg.sums[feat_idx] / fc as f64;
|
||||
sheet
|
||||
.write_number(row, col, mean)
|
||||
.map_err(|err| format!("Failed to write numeric value: {err}"))?;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If sampled, add a note at the bottom
|
||||
if was_sampled {
|
||||
let note_row = data_start_row + postcode_aggs.len() as u32 + 1;
|
||||
let total_cols = (all_feature_indices.len() + 2) as u16;
|
||||
sheet
|
||||
.merge_range(
|
||||
note_row,
|
||||
0,
|
||||
note_row,
|
||||
total_cols.saturating_sub(1),
|
||||
&format!(
|
||||
"Only the first {} postcodes shown (randomly sampled from results)",
|
||||
MAX_EXPORT_POSTCODES
|
||||
),
|
||||
¬e_fmt,
|
||||
)
|
||||
.map_err(|err| format!("Failed to write note: {err}"))?;
|
||||
}
|
||||
|
||||
// Column widths
|
||||
sheet.set_column_width(0, 12).ok();
|
||||
sheet.set_column_width(1, 12).ok();
|
||||
for col_offset in 0..all_feature_indices.len() {
|
||||
let col = (col_offset + 2) as u16;
|
||||
let feat_name = &feature_names[all_feature_indices[col_offset]];
|
||||
let width = (feat_name.len() as f64 * 1.1).clamp(10.0, 30.0);
|
||||
sheet.set_column_width(col, width).ok();
|
||||
}
|
||||
|
||||
let buf = workbook
|
||||
.save_to_buffer()
|
||||
.map_err(|err| format!("Failed to save workbook: {err}"))?;
|
||||
|
||||
let t_total = t0.elapsed();
|
||||
info!(
|
||||
postcodes = postcode_aggs.len(),
|
||||
sampled = was_sampled,
|
||||
features = all_feature_indices.len(),
|
||||
has_og_image = og_image_bytes.is_some(),
|
||||
bytes = buf.len(),
|
||||
total_ms = format_args!("{:.1}", t_total.as_secs_f64() * 1000.0),
|
||||
"GET /api/export"
|
||||
);
|
||||
|
||||
Ok(buf)
|
||||
})
|
||||
.await
|
||||
.map_err(|err| (StatusCode::INTERNAL_SERVER_ERROR, err.to_string()))?
|
||||
.map_err(|err| (StatusCode::INTERNAL_SERVER_ERROR, err))?;
|
||||
|
||||
Ok((
|
||||
[
|
||||
(
|
||||
header::CONTENT_TYPE,
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
),
|
||||
(
|
||||
header::CONTENT_DISPOSITION,
|
||||
"attachment; filename=\"narrowit-export.xlsx\"",
|
||||
),
|
||||
],
|
||||
bytes,
|
||||
))
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue