lmao
This commit is contained in:
parent
03445188ea
commit
524580eb25
102 changed files with 36625 additions and 1295 deletions
149
server-rs/src/data/poi.rs
Normal file
149
server-rs/src/data/poi.rs
Normal file
|
|
@ -0,0 +1,149 @@
|
|||
use std::collections::{HashMap, HashSet};
|
||||
use std::path::Path;
|
||||
|
||||
use anyhow::{bail, Context};
|
||||
use polars::frame::DataFrame;
|
||||
use polars::lazy::frame::LazyFrame;
|
||||
use polars::prelude::*;
|
||||
use serde::Serialize;
|
||||
use tracing::info;
|
||||
|
||||
use crate::features::POI_GROUP_ORDER;
|
||||
use crate::utils::{generate_priorities, InternedColumn};
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
pub struct POICategoryGroup {
|
||||
pub name: String,
|
||||
pub categories: Vec<String>,
|
||||
}
|
||||
|
||||
pub struct POIData {
|
||||
pub id: Vec<String>,
|
||||
pub group: InternedColumn,
|
||||
pub category: InternedColumn,
|
||||
pub name: Vec<String>,
|
||||
pub lat: Vec<f32>,
|
||||
pub lng: Vec<f32>,
|
||||
pub emoji: InternedColumn,
|
||||
/// Deterministic pseudo-random priority per row, used to select a spatially
|
||||
/// uniform subset when the POI count exceeds the per-request limit.
|
||||
/// Computed once at load time so the same POIs are always chosen for a given viewport.
|
||||
pub priority: Vec<u32>,
|
||||
}
|
||||
|
||||
fn extract_str_col(df: &DataFrame, name: &str) -> anyhow::Result<Vec<String>> {
|
||||
let column = df
|
||||
.column(name)
|
||||
.with_context(|| format!("Missing column '{name}' in POI data"))?;
|
||||
let string_column = column
|
||||
.str()
|
||||
.with_context(|| format!("Column '{name}' is not a string column"))?;
|
||||
Ok(string_column
|
||||
.into_iter()
|
||||
.map(|value| value.unwrap_or("").to_string())
|
||||
.collect())
|
||||
}
|
||||
|
||||
fn extract_f32_col(df: &DataFrame, name: &str, default: f32) -> anyhow::Result<Vec<f32>> {
|
||||
let column = df
|
||||
.column(name)
|
||||
.with_context(|| format!("Missing column '{name}' in POI data"))?;
|
||||
let cast = column
|
||||
.cast(&DataType::Float32)
|
||||
.with_context(|| format!("Failed to cast column '{name}' to Float32"))?;
|
||||
let float_column = cast
|
||||
.f32()
|
||||
.with_context(|| format!("Column '{name}' is not a float32 column"))?;
|
||||
Ok(float_column
|
||||
.into_iter()
|
||||
.map(|value| value.unwrap_or(default))
|
||||
.collect())
|
||||
}
|
||||
|
||||
impl POIData {
|
||||
pub fn load(parquet_path: &Path) -> anyhow::Result<Self> {
|
||||
info!("Loading POI data from {:?}...", parquet_path);
|
||||
|
||||
let df = LazyFrame::scan_parquet(parquet_path, Default::default())
|
||||
.context("Failed to scan POI parquet")?
|
||||
.collect()
|
||||
.context("Failed to read POI parquet")?;
|
||||
|
||||
let row_count = df.height();
|
||||
info!("Loaded {} POIs", row_count);
|
||||
|
||||
let id: Vec<String> = extract_str_col(&df, "id")?;
|
||||
let name = extract_str_col(&df, "name")?;
|
||||
let category_raw = extract_str_col(&df, "category")?;
|
||||
let group_raw = extract_str_col(&df, "group")?;
|
||||
let lat = extract_f32_col(&df, "lat", 0.0)?;
|
||||
let lng = extract_f32_col(&df, "lng", 0.0)?;
|
||||
let emoji_raw = extract_str_col(&df, "emoji")?;
|
||||
|
||||
let category = InternedColumn::build(&category_raw);
|
||||
let group = InternedColumn::build(&group_raw);
|
||||
let emoji = InternedColumn::build(&emoji_raw);
|
||||
|
||||
info!(
|
||||
category_unique = category.values.len(),
|
||||
group_unique = group.values.len(),
|
||||
emoji_unique = emoji.values.len(),
|
||||
"POI string columns interned"
|
||||
);
|
||||
|
||||
// Assign a deterministic pseudo-random priority to each row.
|
||||
// This ensures the same POIs are selected across requests,
|
||||
// preventing visual "shuffling" when panning the map.
|
||||
let priority = generate_priorities(row_count);
|
||||
|
||||
info!("POI data loading complete.");
|
||||
|
||||
Ok(POIData {
|
||||
id,
|
||||
name,
|
||||
category,
|
||||
group,
|
||||
lat,
|
||||
lng,
|
||||
emoji,
|
||||
priority,
|
||||
})
|
||||
}
|
||||
|
||||
/// Build category groups from the loaded POI data, validated against POI_GROUP_ORDER.
|
||||
pub fn category_groups(&self) -> anyhow::Result<Vec<POICategoryGroup>> {
|
||||
let mut group_cats: HashMap<String, HashSet<String>> = HashMap::new();
|
||||
let num_pois = self.category.indices.len();
|
||||
for row in 0..num_pois {
|
||||
let category = self.category.get(row).to_string();
|
||||
let group = self.group.get(row).to_string();
|
||||
group_cats.entry(group).or_default().insert(category);
|
||||
}
|
||||
|
||||
// Validate that data groups match the hardcoded order exactly
|
||||
let expected: HashSet<&str> = POI_GROUP_ORDER.iter().copied().collect();
|
||||
let actual: HashSet<&str> = group_cats.keys().map(|key| key.as_str()).collect();
|
||||
let missing_from_data: Vec<&&str> = expected.difference(&actual).collect();
|
||||
let missing_from_order: Vec<&&str> = actual.difference(&expected).collect();
|
||||
if !missing_from_data.is_empty() || !missing_from_order.is_empty() {
|
||||
bail!(
|
||||
"POI group mismatch!\n In POI_GROUP_ORDER but not in data: {:?}\n In data but not in POI_GROUP_ORDER: {:?}",
|
||||
missing_from_data, missing_from_order
|
||||
);
|
||||
}
|
||||
|
||||
POI_GROUP_ORDER
|
||||
.iter()
|
||||
.map(|group_name| {
|
||||
let name = group_name.to_string();
|
||||
let mut categories: Vec<String> = group_cats
|
||||
.remove(&name)
|
||||
.context("POI group validated but missing from map")?
|
||||
.into_iter()
|
||||
.collect();
|
||||
categories.sort();
|
||||
Ok(POICategoryGroup { name, categories })
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue