These work

This commit is contained in:
Andras Schmelczer 2026-02-11 21:32:33 +00:00
parent 3599803589
commit 1588c01b19
19 changed files with 260 additions and 201 deletions

View file

@ -191,20 +191,11 @@ async fn main() -> anyhow::Result<()> {
let poi_category_groups = poi_data.category_groups()?;
// Read index.html at startup for crawler OG injection
let frontend_dist = cli.dist.unwrap_or_else(|| {
if let Ok(executable) = std::env::current_exe() {
let executable_dir = executable
.parent()
.unwrap_or_else(|| std::path::Path::new("."));
let dist_next_to_binary = executable_dir.join("dist");
if dist_next_to_binary.exists() {
return dist_next_to_binary;
}
}
PathBuf::from("frontend/dist")
});
let frontend_dist = cli
.dist
.unwrap_or_else(|| PathBuf::from("frontend/dist"));
let index_html = if frontend_dist.exists() {
let index_html = {
let index_path = frontend_dist.join("index.html");
match std::fs::read_to_string(&index_path) {
Ok(html) => {
@ -212,12 +203,14 @@ async fn main() -> anyhow::Result<()> {
Some(html)
}
Err(err) => {
warn!("Could not read index.html: {}", err);
warn!(
"Could not read {}: {} (OG injection disabled)",
index_path.display(),
err
);
None
}
}
} else {
None
};
let http_client = reqwest::Client::new();

View file

@ -1,6 +1,7 @@
use rustc_hash::{FxHashMap, FxHashSet};
/// Filter for numeric features: value must be in [min, max] range.
#[derive(Debug)]
pub struct ParsedFilter {
pub feat_idx: usize,
pub min: f32,
@ -9,6 +10,7 @@ pub struct ParsedFilter {
/// Filter for enum features: value must be one of the allowed indices.
/// Uses FxHashSet<u32> (f32 bits) for O(1) lookups instead of O(n) Vec::contains.
#[derive(Debug)]
pub struct ParsedEnumFilter {
pub feat_idx: usize,
/// Allowed enum indices stored as f32 bits for exact comparison
@ -18,31 +20,33 @@ pub struct ParsedEnumFilter {
/// Parse comma-separated filter string into numeric and enum filters.
/// Numeric format: `name:min:max`
/// Enum format: `name:val1|val2|val3` (pipe-separated string values)
///
/// Returns an error if any filter entry is malformed or references an unknown feature.
pub fn parse_filters(
filter_str: Option<&str>,
feature_name_to_index: &FxHashMap<String, usize>,
enum_values: &FxHashMap<usize, Vec<String>>,
) -> (Vec<ParsedFilter>, Vec<ParsedEnumFilter>) {
) -> Result<(Vec<ParsedFilter>, Vec<ParsedEnumFilter>), String> {
let mut numeric = Vec::new();
let mut enums = Vec::new();
let input = match filter_str.filter(|text| !text.is_empty()) {
Some(text) => text,
None => return (numeric, enums),
None => return Ok((numeric, enums)),
};
for entry in input.split(',') {
let parts: Vec<&str> = entry.splitn(2, ':').collect();
if parts.len() != 2 {
continue;
return Err(format!("Malformed filter entry (missing ':'): '{entry}'"));
}
let name = parts[0].trim();
let rest = parts[1].trim();
// Find feature index by name (O(1) lookup)
let Some(&feat_idx) = feature_name_to_index.get(name) else {
continue;
};
let &feat_idx = feature_name_to_index
.get(name)
.ok_or_else(|| format!("Unknown feature in filter: '{name}'"))?;
// Check if this is an enum feature
if let Some(values) = enum_values.get(&feat_idx) {
@ -62,21 +66,23 @@ pub fn parse_filters(
// Numeric filter: parse min:max
let num_parts: Vec<&str> = rest.splitn(2, ':').collect();
if num_parts.len() != 2 {
continue;
return Err(format!(
"Numeric filter '{name}' must have format 'name:min:max', got '{entry}'"
));
}
let min = match num_parts[0].trim().parse::<f32>() {
Ok(value) => value,
Err(_) => continue,
};
let max = match num_parts[1].trim().parse::<f32>() {
Ok(value) => value,
Err(_) => continue,
};
let min = num_parts[0]
.trim()
.parse::<f32>()
.map_err(|err| format!("Invalid min value in filter '{name}': {err}"))?;
let max = num_parts[1]
.trim()
.parse::<f32>()
.map_err(|err| format!("Invalid max value in filter '{name}': {err}"))?;
numeric.push(ParsedFilter { feat_idx, min, max });
}
}
(numeric, enums)
Ok((numeric, enums))
}
/// Check if a row passes all filters.
@ -155,7 +161,8 @@ mod tests {
Some("price:100:500"),
&feature_name_to_index(),
&enum_values(),
);
)
.unwrap();
assert_eq!(numeric.len(), 1);
assert_eq!(numeric[0].feat_idx, 0);
assert_eq!(numeric[0].min, 100.0);
@ -166,7 +173,7 @@ mod tests {
#[test]
fn parse_filters_enum() {
let (numeric, enums) =
parse_filters(Some("rating:A|C"), &feature_name_to_index(), &enum_values());
parse_filters(Some("rating:A|C"), &feature_name_to_index(), &enum_values()).unwrap();
assert!(numeric.is_empty());
assert_eq!(enums.len(), 1);
assert_eq!(enums[0].feat_idx, 2);
@ -176,19 +183,23 @@ mod tests {
}
#[test]
fn parse_filters_empty_and_invalid() {
let (n, e) = parse_filters(None, &feature_name_to_index(), &enum_values());
fn parse_filters_empty() {
let (n, e) = parse_filters(None, &feature_name_to_index(), &enum_values()).unwrap();
assert!(n.is_empty() && e.is_empty());
let (n, e) = parse_filters(Some(""), &feature_name_to_index(), &enum_values());
let (n, e) = parse_filters(Some(""), &feature_name_to_index(), &enum_values()).unwrap();
assert!(n.is_empty() && e.is_empty());
}
let (n, e) = parse_filters(
#[test]
fn parse_filters_unknown_feature_errors() {
let result = parse_filters(
Some("unknown:1:2"),
&feature_name_to_index(),
&enum_values(),
);
assert!(n.is_empty() && e.is_empty());
assert!(result.is_err());
assert!(result.unwrap_err().contains("Unknown feature"));
}
#[test]
@ -226,7 +237,8 @@ mod tests {
Some("Price:100000:500000,Area:50:200"),
&extended_feature_map(),
&extended_enum_values(),
);
)
.unwrap();
assert_eq!(numeric.len(), 2);
assert_eq!(numeric[0].feat_idx, 0);
@ -239,22 +251,23 @@ mod tests {
Some("Price:100000:500000,Type:Semi|Terraced"),
&extended_feature_map(),
&extended_enum_values(),
);
)
.unwrap();
assert_eq!(numeric.len(), 1);
assert_eq!(enums.len(), 1);
}
#[test]
fn parse_invalid_numeric_format_ignored() {
let (numeric, enums) = parse_filters(
fn parse_invalid_numeric_format_errors() {
let result = parse_filters(
Some("Price:not_a_number:500000"),
&extended_feature_map(),
&extended_enum_values(),
);
assert!(numeric.is_empty());
assert!(enums.is_empty());
assert!(result.is_err());
assert!(result.unwrap_err().contains("Invalid min value"));
}
#[test]
@ -263,7 +276,8 @@ mod tests {
Some("Type:Detached|Unknown|Flat"),
&extended_feature_map(),
&extended_enum_values(),
);
)
.unwrap();
assert_eq!(enums.len(), 1);
assert!(enums[0].allowed.contains(&(0.0_f32).to_bits())); // Detached
@ -277,7 +291,8 @@ mod tests {
Some("Price : 100000 : 500000 , Type : Detached | Flat"),
&extended_feature_map(),
&extended_enum_values(),
);
)
.unwrap();
assert_eq!(numeric.len(), 1);
assert_eq!(enums.len(), 1);

View file

@ -90,7 +90,8 @@ pub async fn get_hexagon_stats(
params.filters.as_deref(),
&state.feature_name_to_index,
&state.data.enum_values,
);
)
.map_err(|err| (StatusCode::BAD_REQUEST, err))?;
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
let (fields_specified, field_set) = parse_field_set(params.fields.as_deref());

View file

@ -52,7 +52,8 @@ pub async fn get_postcode_stats(
params.filters.as_deref(),
&state.feature_name_to_index,
&state.data.enum_values,
);
)
.map_err(|err| (StatusCode::BAD_REQUEST, err))?;
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
let (fields_specified, field_set) = parse_field_set(params.fields.as_deref());

View file

@ -63,7 +63,7 @@ fn non_empty_string(text: &str) -> Option<String> {
}
}
/// Look up an enum feature value by trying multiple possible column names.
/// Look up an enum feature value by column name.
/// Uses the unified feature model: enum values stored as f32 indices in feature_data.
fn lookup_enum_value(
feature_name_to_index: &FxHashMap<String, usize>,
@ -71,22 +71,17 @@ fn lookup_enum_value(
num_features: usize,
enum_values: &FxHashMap<usize, Vec<String>>,
row: usize,
names: &[&str],
name: &str,
) -> Option<String> {
for name in names {
if let Some(&feat_idx) = feature_name_to_index.get(*name) {
if let Some(values) = enum_values.get(&feat_idx) {
let value = feature_data[row * num_features + feat_idx];
if value.is_finite() {
let idx = value as usize;
if let Some(str_value) = values.get(idx) {
return Some(str_value.clone());
}
}
}
}
let &feat_idx = feature_name_to_index.get(name)?;
let values = enum_values.get(&feat_idx)?;
let value = feature_data[row * num_features + feat_idx];
if value.is_finite() {
let idx = value as usize;
values.get(idx).cloned()
} else {
None
}
None
}
pub async fn get_hexagon_properties(
@ -111,7 +106,8 @@ pub async fn get_hexagon_properties(
params.filters.as_deref(),
&state.feature_name_to_index,
&state.data.enum_values,
);
)
.map_err(|err| (StatusCode::BAD_REQUEST, err))?;
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
let result = tokio::task::spawn_blocking(move || {
@ -182,7 +178,7 @@ pub async fn get_hexagon_properties(
num_features,
enum_values,
row,
&["Property type", "epc_property_type", "pp_property_type"],
"Property type",
),
built_form: lookup_enum_value(
feature_name_to_index,
@ -190,7 +186,7 @@ pub async fn get_hexagon_properties(
num_features,
enum_values,
row,
&["Property type/built form", "built_form"],
"Property type/built form",
),
duration: lookup_enum_value(
feature_name_to_index,
@ -198,7 +194,7 @@ pub async fn get_hexagon_properties(
num_features,
enum_values,
row,
&["Leashold/Freehold", "duration"],
"Leashold/Freehold",
),
current_energy_rating: lookup_enum_value(
feature_name_to_index,
@ -206,7 +202,7 @@ pub async fn get_hexagon_properties(
num_features,
enum_values,
row,
&["Current energy rating", "current_energy_rating"],
"Current energy rating",
),
potential_energy_rating: lookup_enum_value(
feature_name_to_index,
@ -214,7 +210,7 @@ pub async fn get_hexagon_properties(
num_features,
enum_values,
row,
&["Potential energy rating", "potential_energy_rating"],
"Potential energy rating",
),
lat: state.data.lat[row],
lon: state.data.lon[row],