Rust things
This commit is contained in:
parent
fc10381692
commit
3debacab4f
30 changed files with 3257 additions and 647 deletions
|
|
@ -5,6 +5,7 @@ use anyhow::{bail, Context};
|
|||
use polars::frame::DataFrame;
|
||||
use polars::lazy::frame::LazyFrame;
|
||||
use polars::prelude::*;
|
||||
use rustc_hash::FxHashSet;
|
||||
use serde::Serialize;
|
||||
use tracing::info;
|
||||
|
||||
|
|
@ -17,6 +18,94 @@ pub struct POICategoryGroup {
|
|||
pub categories: Vec<String>,
|
||||
}
|
||||
|
||||
const GROCERY_DASHBOARD_CATEGORIES: &[&str] = &[
|
||||
"Supermarket",
|
||||
"Convenience Store",
|
||||
"Bakery",
|
||||
"Greengrocer",
|
||||
"Aldi",
|
||||
"Amazon",
|
||||
"Asda",
|
||||
"Booths",
|
||||
"Budgens",
|
||||
"Centra",
|
||||
"Co-op",
|
||||
"COOK",
|
||||
"Costco",
|
||||
"Dunnes Stores",
|
||||
"Farmfoods",
|
||||
"Heron Foods",
|
||||
"Iceland",
|
||||
"Lidl",
|
||||
"Makro",
|
||||
"M&S",
|
||||
"Morrisons",
|
||||
"Planet Organic",
|
||||
"Sainsbury's",
|
||||
"Spar",
|
||||
"Tesco",
|
||||
"The Food Warehouse",
|
||||
"Waitrose",
|
||||
"Whole Foods Market",
|
||||
];
|
||||
|
||||
const DASHBOARD_POI_GROUPS: &[(&str, &[&str])] = &[
|
||||
(
|
||||
"Public Transport",
|
||||
&[
|
||||
"Rail station",
|
||||
"Tube station",
|
||||
"Bus station",
|
||||
"Bus stop",
|
||||
"Airport",
|
||||
],
|
||||
),
|
||||
("Groceries", GROCERY_DASHBOARD_CATEGORIES),
|
||||
("Food & Drink", &["Café", "Restaurant", "Pub", "Fast Food"]),
|
||||
("Green Space", &["Park", "Playground"]),
|
||||
("Education", &["School"]),
|
||||
(
|
||||
"Health",
|
||||
&["GP Surgery", "Pharmacy", "Dentist", "Hospital & Clinic"],
|
||||
),
|
||||
(
|
||||
"Leisure",
|
||||
&[
|
||||
"Gym & Fitness",
|
||||
"Sports Centre",
|
||||
"Cinema",
|
||||
"Theatre",
|
||||
"Library",
|
||||
],
|
||||
),
|
||||
(
|
||||
"Practical",
|
||||
&["Post Office", "Bank", "EV Charging", "Fuel Station"],
|
||||
),
|
||||
];
|
||||
|
||||
fn add_category_filter_index(
|
||||
category_values: &[String],
|
||||
category: &str,
|
||||
selected: &mut FxHashSet<u16>,
|
||||
) {
|
||||
if let Some(pos) = category_values.iter().position(|value| value == category) {
|
||||
selected.insert(pos as u16);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn resolve_poi_category_filter(category_values: &[String], categories: &str) -> FxHashSet<u16> {
|
||||
let mut selected = FxHashSet::default();
|
||||
for part in categories.split(',') {
|
||||
let category = part.trim();
|
||||
if category.is_empty() {
|
||||
continue;
|
||||
}
|
||||
add_category_filter_index(category_values, category, &mut selected);
|
||||
}
|
||||
selected
|
||||
}
|
||||
|
||||
pub struct POIData {
|
||||
/// Contiguous buffer holding all POI ID strings end-to-end.
|
||||
id_buffer: String,
|
||||
|
|
@ -53,13 +142,18 @@ fn extract_str_col(df: &DataFrame, name: &str) -> anyhow::Result<Vec<String>> {
|
|||
let string_column = column
|
||||
.str()
|
||||
.with_context(|| format!("Column '{name}' is not a string column"))?;
|
||||
Ok(string_column
|
||||
string_column
|
||||
.into_iter()
|
||||
.map(|value| value.unwrap_or("").to_string())
|
||||
.collect())
|
||||
.enumerate()
|
||||
.map(|(row, value)| {
|
||||
value
|
||||
.map(ToString::to_string)
|
||||
.with_context(|| format!("Column '{name}' has null at row {row}"))
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn extract_f32_col(df: &DataFrame, name: &str, default: f32) -> anyhow::Result<Vec<f32>> {
|
||||
fn extract_f32_col(df: &DataFrame, name: &str) -> anyhow::Result<Vec<f32>> {
|
||||
let column = df
|
||||
.column(name)
|
||||
.with_context(|| format!("Missing column '{name}' in POI data"))?;
|
||||
|
|
@ -69,16 +163,23 @@ fn extract_f32_col(df: &DataFrame, name: &str, default: f32) -> anyhow::Result<V
|
|||
let float_column = cast
|
||||
.f32()
|
||||
.with_context(|| format!("Column '{name}' is not a float32 column"))?;
|
||||
Ok(float_column
|
||||
float_column
|
||||
.into_iter()
|
||||
.map(|value| value.unwrap_or(default))
|
||||
.collect())
|
||||
.enumerate()
|
||||
.map(|(row, value)| value.with_context(|| format!("Column '{name}' has null at row {row}")))
|
||||
.collect()
|
||||
}
|
||||
|
||||
impl POIData {
|
||||
pub fn load(parquet_path: &Path) -> anyhow::Result<Self> {
|
||||
super::run_polars_io(|| Self::load_inner(parquet_path))
|
||||
}
|
||||
|
||||
fn load_inner(parquet_path: &Path) -> anyhow::Result<Self> {
|
||||
info!("Loading POI data from {:?}...", parquet_path);
|
||||
|
||||
let parquet_path = PlRefPath::try_from_path(parquet_path)
|
||||
.context("Failed to normalize POI parquet path")?;
|
||||
let df = LazyFrame::scan_parquet(parquet_path, Default::default())
|
||||
.context("Failed to scan POI parquet")?
|
||||
.collect()
|
||||
|
|
@ -91,18 +192,10 @@ impl POIData {
|
|||
let name = extract_str_col(&df, "name")?;
|
||||
let category_raw = extract_str_col(&df, "category")?;
|
||||
let group_raw = extract_str_col(&df, "group")?;
|
||||
let lat = extract_f32_col(&df, "lat", 0.0)?;
|
||||
let lng = extract_f32_col(&df, "lng", 0.0)?;
|
||||
let lat = extract_f32_col(&df, "lat")?;
|
||||
let lng = extract_f32_col(&df, "lng")?;
|
||||
let emoji_raw = extract_str_col(&df, "emoji")?;
|
||||
let icon_category_raw = if df
|
||||
.get_column_names()
|
||||
.iter()
|
||||
.any(|name| name.as_str() == "icon_category")
|
||||
{
|
||||
extract_str_col(&df, "icon_category")?
|
||||
} else {
|
||||
category_raw.clone()
|
||||
};
|
||||
let icon_category_raw = extract_str_col(&df, "icon_category")?;
|
||||
|
||||
// Pack POI IDs into a contiguous buffer
|
||||
let total_id_bytes: usize = id_raw.iter().map(|s| s.len()).sum();
|
||||
|
|
@ -152,7 +245,7 @@ impl POIData {
|
|||
})
|
||||
}
|
||||
|
||||
/// Build category groups from the loaded POI data, validated against POI_GROUP_ORDER.
|
||||
/// Build dashboard category groups from every category present in the loaded POI data.
|
||||
pub fn category_groups(&self) -> anyhow::Result<Vec<POICategoryGroup>> {
|
||||
let mut group_cats: HashMap<String, HashSet<String>> = HashMap::new();
|
||||
let num_pois = self.category.indices.len();
|
||||
|
|
@ -174,18 +267,78 @@ impl POIData {
|
|||
);
|
||||
}
|
||||
|
||||
POI_GROUP_ORDER
|
||||
let preferred_order: HashMap<&str, HashMap<&str, usize>> = DASHBOARD_POI_GROUPS
|
||||
.iter()
|
||||
.map(|group_name| {
|
||||
let name = group_name.to_string();
|
||||
let mut categories: Vec<String> = group_cats
|
||||
.remove(&name)
|
||||
.context("POI group validated but missing from map")?
|
||||
.into_iter()
|
||||
.collect();
|
||||
categories.sort();
|
||||
Ok(POICategoryGroup { name, categories })
|
||||
.map(|(group, categories)| {
|
||||
(
|
||||
*group,
|
||||
categories
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(idx, category)| (*category, idx))
|
||||
.collect(),
|
||||
)
|
||||
})
|
||||
.collect()
|
||||
.collect();
|
||||
|
||||
let groups: Vec<POICategoryGroup> = POI_GROUP_ORDER
|
||||
.iter()
|
||||
.filter_map(|group_name| {
|
||||
let mut categories: Vec<String> = group_cats
|
||||
.get(*group_name)
|
||||
.map(|categories| categories.iter().cloned().collect())
|
||||
.unwrap_or_default();
|
||||
if categories.is_empty() {
|
||||
return None;
|
||||
}
|
||||
let group_order = preferred_order.get(*group_name);
|
||||
categories.sort_by(|a, b| {
|
||||
let a_order = group_order.and_then(|order| order.get(a.as_str())).copied();
|
||||
let b_order = group_order.and_then(|order| order.get(b.as_str())).copied();
|
||||
match (a_order, b_order) {
|
||||
(Some(left), Some(right)) => left.cmp(&right),
|
||||
(Some(_), None) => std::cmp::Ordering::Less,
|
||||
(None, Some(_)) => std::cmp::Ordering::Greater,
|
||||
(None, None) => a.cmp(b),
|
||||
}
|
||||
});
|
||||
Some(POICategoryGroup {
|
||||
name: (*group_name).to_string(),
|
||||
categories,
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(groups)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn category_filter_matches_exact_present_categories() {
|
||||
let values = vec![
|
||||
"Supermarket".to_string(),
|
||||
"Tesco".to_string(),
|
||||
"Aldi".to_string(),
|
||||
"Rail station".to_string(),
|
||||
];
|
||||
|
||||
let selected = resolve_poi_category_filter(&values, "Supermarket,Rail station");
|
||||
|
||||
assert!(selected.contains(&0));
|
||||
assert!(selected.contains(&3));
|
||||
assert_eq!(selected.len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unknown_category_filter_matches_nothing() {
|
||||
let values = vec!["Supermarket".to_string()];
|
||||
|
||||
let selected = resolve_poi_category_filter(&values, "Unknown");
|
||||
|
||||
assert!(selected.is_empty());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue