Checkpoint all changes

This commit is contained in:
Andras Schmelczer 2026-02-01 19:30:33 +00:00
parent 65877acf95
commit 66c2a25457
28 changed files with 3035 additions and 621 deletions

View file

@ -1,15 +1,20 @@
mod consts;
mod data;
mod features;
mod filter;
mod index;
mod grid_index;
mod routes;
mod state;
#[cfg(test)]
mod tests;
use std::path::PathBuf;
use std::sync::Arc;
use anyhow::{bail, Context};
use axum::routing::get;
use axum::Router;
use clap::Parser;
use tower_http::compression::CompressionLayer;
use tower_http::cors::{Any, CorsLayer};
use tower_http::services::ServeDir;
@ -19,8 +24,24 @@ use tracing_subscriber::EnvFilter;
use state::AppState;
#[derive(Parser)]
#[command(name = "narrowit", about = "Narrowit property map server")]
struct Cli {
/// Path to the wide property parquet file
#[arg(long)]
data: PathBuf,
/// Path to the POI parquet file
#[arg(long)]
pois: PathBuf,
/// Path to the frontend dist directory
#[arg(long)]
dist: Option<PathBuf>,
}
#[tokio::main]
async fn main() {
async fn main() -> anyhow::Result<()> {
tracing_subscriber::fmt()
.with_env_filter(
EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info")),
@ -28,18 +49,18 @@ async fn main() {
.with_ansi(true)
.init();
let parquet_path = PathBuf::from(
std::env::args()
.nth(1)
.unwrap_or_else(|| "data_sources/processed/wide.parquet".to_string()),
);
let cli = Cli::parse();
let parquet_path = &cli.data;
if !parquet_path.exists() {
tracing::error!("Parquet file not found: {}", parquet_path.display());
std::process::exit(1);
bail!(
"Property parquet file not found: {}",
parquet_path.display()
);
}
info!("Loading property data from {}", parquet_path.display());
let property_data = data::PropertyData::load(&parquet_path);
let property_data = data::PropertyData::load(parquet_path)?;
info!(
rows = property_data.lat.len(),
features = property_data.num_features,
@ -48,32 +69,90 @@ async fn main() {
);
info!("Building spatial grid index (0.01° cells)");
let grid = index::GridIndex::build(&property_data.lat, &property_data.lon, 0.01);
let grid = grid_index::GridIndex::build(&property_data.lat, &property_data.lon, 0.01);
info!("Precomputing H3 cells for resolutions {}-{}", consts::H3_PRECOMPUTE_MIN, consts::H3_PRECOMPUTE_MAX);
let h3_cells = data::precompute_h3(&property_data.lat, &property_data.lon);
info!(
"Precomputing H3 cells for resolutions {}-{}",
consts::H3_PRECOMPUTE_MIN,
consts::H3_PRECOMPUTE_MAX
);
let h3_cells = data::precompute_h3(&property_data.lat, &property_data.lon)?;
let poi_path = PathBuf::from("/volumes/syncthing/Projects/property-map/data/filtered_uk_pois.parquet");
let poi_path = cli.pois;
let poi_data = if poi_path.exists() {
info!("Loading POI data from {}", poi_path.display());
let pd = data::POIData::load(&poi_path);
info!(pois = pd.lat.len(), "POI data loaded");
pd
} else {
tracing::warn!("POI file not found: {}. POI endpoints will be unavailable.", poi_path.display());
data::POIData {
id: Vec::new(),
name: Vec::new(),
category: Vec::new(),
lat: Vec::new(),
lng: Vec::new(),
emoji: Vec::new(),
}
};
if !poi_path.exists() {
bail!("POI parquet file not found: {}", poi_path.display());
}
info!("Loading POI data from {}", poi_path.display());
let poi_data = data::POIData::load(&poi_path)?;
info!(pois = poi_data.lat.len(), "POI data loaded");
info!("Building POI spatial grid index");
let poi_grid = index::GridIndex::build(&poi_data.lat, &poi_data.lng, 0.01);
let poi_grid = grid_index::GridIndex::build(&poi_data.lat, &poi_data.lng, 0.01);
let min_keys: Vec<String> = property_data
.feature_names
.iter()
.map(|name| format!("min_{}", name))
.collect();
let max_keys: Vec<String> = property_data
.feature_names
.iter()
.map(|name| format!("max_{}", name))
.collect();
let enum_min_keys: Vec<String> = property_data
.enum_features
.iter()
.map(|enum_feature| format!("min_{}", enum_feature.name))
.collect();
let enum_max_keys: Vec<String> = property_data
.enum_features
.iter()
.map(|enum_feature| format!("max_{}", enum_feature.name))
.collect();
// Precompute POI category groups
let poi_category_groups = {
let mut group_cats: std::collections::HashMap<String, std::collections::HashSet<String>> =
std::collections::HashMap::new();
for (category, group) in poi_data.category.iter().zip(poi_data.group.iter()) {
group_cats
.entry(group.clone())
.or_default()
.insert(category.clone());
}
// Validate that data groups match the hardcoded order exactly
let expected: std::collections::HashSet<&str> =
consts::POI_GROUP_ORDER.iter().copied().collect();
let actual: std::collections::HashSet<&str> =
group_cats.keys().map(|key| key.as_str()).collect();
let missing_from_data: Vec<&&str> = expected.difference(&actual).collect();
let missing_from_order: Vec<&&str> = actual.difference(&expected).collect();
if !missing_from_data.is_empty() || !missing_from_order.is_empty() {
bail!(
"POI group mismatch!\n In POI_GROUP_ORDER but not in data: {:?}\n In data but not in POI_GROUP_ORDER: {:?}",
missing_from_data, missing_from_order
);
}
consts::POI_GROUP_ORDER.iter().map(|group_name| group_name.to_string()).collect::<Vec<_>>()
.into_iter()
.map(|name| {
let mut categories: Vec<String> =
group_cats.remove(&name).context("POI group validated but missing from map")?.into_iter().collect();
categories.sort();
Ok(state::POICategoryGroup { name, categories })
})
.collect::<anyhow::Result<Vec<_>>>()?
};
// Precompute enum name → index map
let enum_name_to_idx: rustc_hash::FxHashMap<String, usize> = property_data
.enum_features
.iter()
.enumerate()
.map(|(index, enum_feature)| (enum_feature.name.clone(), index))
.collect();
let state = Arc::new(AppState {
data: property_data,
@ -81,6 +160,12 @@ async fn main() {
h3_cells,
poi_data,
poi_grid,
min_keys,
max_keys,
enum_min_keys,
enum_max_keys,
poi_category_groups,
enum_name_to_idx,
});
let cors = CorsLayer::new()
@ -93,6 +178,7 @@ async fn main() {
let state_pois = state.clone();
let state_poi_categories = state.clone();
let state_hexagon_properties = state.clone();
let state_hexagon_stats = state.clone();
let api = Router::new()
.route(
@ -116,9 +202,23 @@ async fn main() {
get(move |query| {
routes::get_hexagon_properties(state_hexagon_properties.clone(), query)
}),
)
.route(
"/api/hexagon-stats",
get(move |query| routes::get_hexagon_stats(state_hexagon_stats.clone(), query)),
);
let frontend_dist = PathBuf::from("frontend/dist");
let frontend_dist = cli.dist.unwrap_or_else(|| {
// Check next to the binary first, then fall back to working directory
if let Ok(executable) = std::env::current_exe() {
let executable_dir = executable.parent().unwrap_or_else(|| std::path::Path::new("."));
let dist_next_to_binary = executable_dir.join("dist");
if dist_next_to_binary.exists() {
return dist_next_to_binary;
}
}
PathBuf::from("frontend/dist")
});
let app = if frontend_dist.exists() {
api.fallback_service(ServeDir::new(frontend_dist))
} else {
@ -127,12 +227,16 @@ async fn main() {
let app = app
.layer(cors)
.layer(CompressionLayer::new().gzip(true))
.layer(CompressionLayer::new().zstd(true).gzip(true))
.layer(TraceLayer::new_for_http());
let addr = "0.0.0.0:8001";
let addr = consts::SERVER_ADDRESS;
let listener = tokio::net::TcpListener::bind(addr)
.await
.with_context(|| format!("Failed to bind to {addr}"))?;
info!("Server listening on {}", addr);
let listener = tokio::net::TcpListener::bind(addr).await.unwrap();
axum::serve(listener, app).await.unwrap();
axum::serve(listener, app)
.await
.context("Server error")?;
Ok(())
}