These work
This commit is contained in:
parent
3599803589
commit
1588c01b19
19 changed files with 260 additions and 201 deletions
|
|
@ -191,20 +191,11 @@ async fn main() -> anyhow::Result<()> {
|
|||
let poi_category_groups = poi_data.category_groups()?;
|
||||
|
||||
// Read index.html at startup for crawler OG injection
|
||||
let frontend_dist = cli.dist.unwrap_or_else(|| {
|
||||
if let Ok(executable) = std::env::current_exe() {
|
||||
let executable_dir = executable
|
||||
.parent()
|
||||
.unwrap_or_else(|| std::path::Path::new("."));
|
||||
let dist_next_to_binary = executable_dir.join("dist");
|
||||
if dist_next_to_binary.exists() {
|
||||
return dist_next_to_binary;
|
||||
}
|
||||
}
|
||||
PathBuf::from("frontend/dist")
|
||||
});
|
||||
let frontend_dist = cli
|
||||
.dist
|
||||
.unwrap_or_else(|| PathBuf::from("frontend/dist"));
|
||||
|
||||
let index_html = if frontend_dist.exists() {
|
||||
let index_html = {
|
||||
let index_path = frontend_dist.join("index.html");
|
||||
match std::fs::read_to_string(&index_path) {
|
||||
Ok(html) => {
|
||||
|
|
@ -212,12 +203,14 @@ async fn main() -> anyhow::Result<()> {
|
|||
Some(html)
|
||||
}
|
||||
Err(err) => {
|
||||
warn!("Could not read index.html: {}", err);
|
||||
warn!(
|
||||
"Could not read {}: {} (OG injection disabled)",
|
||||
index_path.display(),
|
||||
err
|
||||
);
|
||||
None
|
||||
}
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let http_client = reqwest::Client::new();
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
use rustc_hash::{FxHashMap, FxHashSet};
|
||||
|
||||
/// Filter for numeric features: value must be in [min, max] range.
|
||||
#[derive(Debug)]
|
||||
pub struct ParsedFilter {
|
||||
pub feat_idx: usize,
|
||||
pub min: f32,
|
||||
|
|
@ -9,6 +10,7 @@ pub struct ParsedFilter {
|
|||
|
||||
/// Filter for enum features: value must be one of the allowed indices.
|
||||
/// Uses FxHashSet<u32> (f32 bits) for O(1) lookups instead of O(n) Vec::contains.
|
||||
#[derive(Debug)]
|
||||
pub struct ParsedEnumFilter {
|
||||
pub feat_idx: usize,
|
||||
/// Allowed enum indices stored as f32 bits for exact comparison
|
||||
|
|
@ -18,31 +20,33 @@ pub struct ParsedEnumFilter {
|
|||
/// Parse comma-separated filter string into numeric and enum filters.
|
||||
/// Numeric format: `name:min:max`
|
||||
/// Enum format: `name:val1|val2|val3` (pipe-separated string values)
|
||||
///
|
||||
/// Returns an error if any filter entry is malformed or references an unknown feature.
|
||||
pub fn parse_filters(
|
||||
filter_str: Option<&str>,
|
||||
feature_name_to_index: &FxHashMap<String, usize>,
|
||||
enum_values: &FxHashMap<usize, Vec<String>>,
|
||||
) -> (Vec<ParsedFilter>, Vec<ParsedEnumFilter>) {
|
||||
) -> Result<(Vec<ParsedFilter>, Vec<ParsedEnumFilter>), String> {
|
||||
let mut numeric = Vec::new();
|
||||
let mut enums = Vec::new();
|
||||
|
||||
let input = match filter_str.filter(|text| !text.is_empty()) {
|
||||
Some(text) => text,
|
||||
None => return (numeric, enums),
|
||||
None => return Ok((numeric, enums)),
|
||||
};
|
||||
|
||||
for entry in input.split(',') {
|
||||
let parts: Vec<&str> = entry.splitn(2, ':').collect();
|
||||
if parts.len() != 2 {
|
||||
continue;
|
||||
return Err(format!("Malformed filter entry (missing ':'): '{entry}'"));
|
||||
}
|
||||
let name = parts[0].trim();
|
||||
let rest = parts[1].trim();
|
||||
|
||||
// Find feature index by name (O(1) lookup)
|
||||
let Some(&feat_idx) = feature_name_to_index.get(name) else {
|
||||
continue;
|
||||
};
|
||||
let &feat_idx = feature_name_to_index
|
||||
.get(name)
|
||||
.ok_or_else(|| format!("Unknown feature in filter: '{name}'"))?;
|
||||
|
||||
// Check if this is an enum feature
|
||||
if let Some(values) = enum_values.get(&feat_idx) {
|
||||
|
|
@ -62,21 +66,23 @@ pub fn parse_filters(
|
|||
// Numeric filter: parse min:max
|
||||
let num_parts: Vec<&str> = rest.splitn(2, ':').collect();
|
||||
if num_parts.len() != 2 {
|
||||
continue;
|
||||
return Err(format!(
|
||||
"Numeric filter '{name}' must have format 'name:min:max', got '{entry}'"
|
||||
));
|
||||
}
|
||||
let min = match num_parts[0].trim().parse::<f32>() {
|
||||
Ok(value) => value,
|
||||
Err(_) => continue,
|
||||
};
|
||||
let max = match num_parts[1].trim().parse::<f32>() {
|
||||
Ok(value) => value,
|
||||
Err(_) => continue,
|
||||
};
|
||||
let min = num_parts[0]
|
||||
.trim()
|
||||
.parse::<f32>()
|
||||
.map_err(|err| format!("Invalid min value in filter '{name}': {err}"))?;
|
||||
let max = num_parts[1]
|
||||
.trim()
|
||||
.parse::<f32>()
|
||||
.map_err(|err| format!("Invalid max value in filter '{name}': {err}"))?;
|
||||
numeric.push(ParsedFilter { feat_idx, min, max });
|
||||
}
|
||||
}
|
||||
|
||||
(numeric, enums)
|
||||
Ok((numeric, enums))
|
||||
}
|
||||
|
||||
/// Check if a row passes all filters.
|
||||
|
|
@ -155,7 +161,8 @@ mod tests {
|
|||
Some("price:100:500"),
|
||||
&feature_name_to_index(),
|
||||
&enum_values(),
|
||||
);
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(numeric.len(), 1);
|
||||
assert_eq!(numeric[0].feat_idx, 0);
|
||||
assert_eq!(numeric[0].min, 100.0);
|
||||
|
|
@ -166,7 +173,7 @@ mod tests {
|
|||
#[test]
|
||||
fn parse_filters_enum() {
|
||||
let (numeric, enums) =
|
||||
parse_filters(Some("rating:A|C"), &feature_name_to_index(), &enum_values());
|
||||
parse_filters(Some("rating:A|C"), &feature_name_to_index(), &enum_values()).unwrap();
|
||||
assert!(numeric.is_empty());
|
||||
assert_eq!(enums.len(), 1);
|
||||
assert_eq!(enums[0].feat_idx, 2);
|
||||
|
|
@ -176,19 +183,23 @@ mod tests {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn parse_filters_empty_and_invalid() {
|
||||
let (n, e) = parse_filters(None, &feature_name_to_index(), &enum_values());
|
||||
fn parse_filters_empty() {
|
||||
let (n, e) = parse_filters(None, &feature_name_to_index(), &enum_values()).unwrap();
|
||||
assert!(n.is_empty() && e.is_empty());
|
||||
|
||||
let (n, e) = parse_filters(Some(""), &feature_name_to_index(), &enum_values());
|
||||
let (n, e) = parse_filters(Some(""), &feature_name_to_index(), &enum_values()).unwrap();
|
||||
assert!(n.is_empty() && e.is_empty());
|
||||
}
|
||||
|
||||
let (n, e) = parse_filters(
|
||||
#[test]
|
||||
fn parse_filters_unknown_feature_errors() {
|
||||
let result = parse_filters(
|
||||
Some("unknown:1:2"),
|
||||
&feature_name_to_index(),
|
||||
&enum_values(),
|
||||
);
|
||||
assert!(n.is_empty() && e.is_empty());
|
||||
assert!(result.is_err());
|
||||
assert!(result.unwrap_err().contains("Unknown feature"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -226,7 +237,8 @@ mod tests {
|
|||
Some("Price:100000:500000,Area:50:200"),
|
||||
&extended_feature_map(),
|
||||
&extended_enum_values(),
|
||||
);
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(numeric.len(), 2);
|
||||
assert_eq!(numeric[0].feat_idx, 0);
|
||||
|
|
@ -239,22 +251,23 @@ mod tests {
|
|||
Some("Price:100000:500000,Type:Semi|Terraced"),
|
||||
&extended_feature_map(),
|
||||
&extended_enum_values(),
|
||||
);
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(numeric.len(), 1);
|
||||
assert_eq!(enums.len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_invalid_numeric_format_ignored() {
|
||||
let (numeric, enums) = parse_filters(
|
||||
fn parse_invalid_numeric_format_errors() {
|
||||
let result = parse_filters(
|
||||
Some("Price:not_a_number:500000"),
|
||||
&extended_feature_map(),
|
||||
&extended_enum_values(),
|
||||
);
|
||||
|
||||
assert!(numeric.is_empty());
|
||||
assert!(enums.is_empty());
|
||||
assert!(result.is_err());
|
||||
assert!(result.unwrap_err().contains("Invalid min value"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -263,7 +276,8 @@ mod tests {
|
|||
Some("Type:Detached|Unknown|Flat"),
|
||||
&extended_feature_map(),
|
||||
&extended_enum_values(),
|
||||
);
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(enums.len(), 1);
|
||||
assert!(enums[0].allowed.contains(&(0.0_f32).to_bits())); // Detached
|
||||
|
|
@ -277,7 +291,8 @@ mod tests {
|
|||
Some("Price : 100000 : 500000 , Type : Detached | Flat"),
|
||||
&extended_feature_map(),
|
||||
&extended_enum_values(),
|
||||
);
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(numeric.len(), 1);
|
||||
assert_eq!(enums.len(), 1);
|
||||
|
|
|
|||
|
|
@ -90,7 +90,8 @@ pub async fn get_hexagon_stats(
|
|||
params.filters.as_deref(),
|
||||
&state.feature_name_to_index,
|
||||
&state.data.enum_values,
|
||||
);
|
||||
)
|
||||
.map_err(|err| (StatusCode::BAD_REQUEST, err))?;
|
||||
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
|
||||
|
||||
let (fields_specified, field_set) = parse_field_set(params.fields.as_deref());
|
||||
|
|
|
|||
|
|
@ -52,7 +52,8 @@ pub async fn get_postcode_stats(
|
|||
params.filters.as_deref(),
|
||||
&state.feature_name_to_index,
|
||||
&state.data.enum_values,
|
||||
);
|
||||
)
|
||||
.map_err(|err| (StatusCode::BAD_REQUEST, err))?;
|
||||
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
|
||||
|
||||
let (fields_specified, field_set) = parse_field_set(params.fields.as_deref());
|
||||
|
|
|
|||
|
|
@ -63,7 +63,7 @@ fn non_empty_string(text: &str) -> Option<String> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Look up an enum feature value by trying multiple possible column names.
|
||||
/// Look up an enum feature value by column name.
|
||||
/// Uses the unified feature model: enum values stored as f32 indices in feature_data.
|
||||
fn lookup_enum_value(
|
||||
feature_name_to_index: &FxHashMap<String, usize>,
|
||||
|
|
@ -71,22 +71,17 @@ fn lookup_enum_value(
|
|||
num_features: usize,
|
||||
enum_values: &FxHashMap<usize, Vec<String>>,
|
||||
row: usize,
|
||||
names: &[&str],
|
||||
name: &str,
|
||||
) -> Option<String> {
|
||||
for name in names {
|
||||
if let Some(&feat_idx) = feature_name_to_index.get(*name) {
|
||||
if let Some(values) = enum_values.get(&feat_idx) {
|
||||
let value = feature_data[row * num_features + feat_idx];
|
||||
if value.is_finite() {
|
||||
let idx = value as usize;
|
||||
if let Some(str_value) = values.get(idx) {
|
||||
return Some(str_value.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
let &feat_idx = feature_name_to_index.get(name)?;
|
||||
let values = enum_values.get(&feat_idx)?;
|
||||
let value = feature_data[row * num_features + feat_idx];
|
||||
if value.is_finite() {
|
||||
let idx = value as usize;
|
||||
values.get(idx).cloned()
|
||||
} else {
|
||||
None
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub async fn get_hexagon_properties(
|
||||
|
|
@ -111,7 +106,8 @@ pub async fn get_hexagon_properties(
|
|||
params.filters.as_deref(),
|
||||
&state.feature_name_to_index,
|
||||
&state.data.enum_values,
|
||||
);
|
||||
)
|
||||
.map_err(|err| (StatusCode::BAD_REQUEST, err))?;
|
||||
let num_filters = parsed_filters.len() + parsed_enum_filters.len();
|
||||
|
||||
let result = tokio::task::spawn_blocking(move || {
|
||||
|
|
@ -182,7 +178,7 @@ pub async fn get_hexagon_properties(
|
|||
num_features,
|
||||
enum_values,
|
||||
row,
|
||||
&["Property type", "epc_property_type", "pp_property_type"],
|
||||
"Property type",
|
||||
),
|
||||
built_form: lookup_enum_value(
|
||||
feature_name_to_index,
|
||||
|
|
@ -190,7 +186,7 @@ pub async fn get_hexagon_properties(
|
|||
num_features,
|
||||
enum_values,
|
||||
row,
|
||||
&["Property type/built form", "built_form"],
|
||||
"Property type/built form",
|
||||
),
|
||||
duration: lookup_enum_value(
|
||||
feature_name_to_index,
|
||||
|
|
@ -198,7 +194,7 @@ pub async fn get_hexagon_properties(
|
|||
num_features,
|
||||
enum_values,
|
||||
row,
|
||||
&["Leashold/Freehold", "duration"],
|
||||
"Leashold/Freehold",
|
||||
),
|
||||
current_energy_rating: lookup_enum_value(
|
||||
feature_name_to_index,
|
||||
|
|
@ -206,7 +202,7 @@ pub async fn get_hexagon_properties(
|
|||
num_features,
|
||||
enum_values,
|
||||
row,
|
||||
&["Current energy rating", "current_energy_rating"],
|
||||
"Current energy rating",
|
||||
),
|
||||
potential_energy_rating: lookup_enum_value(
|
||||
feature_name_to_index,
|
||||
|
|
@ -214,7 +210,7 @@ pub async fn get_hexagon_properties(
|
|||
num_features,
|
||||
enum_values,
|
||||
row,
|
||||
&["Potential energy rating", "potential_energy_rating"],
|
||||
"Potential energy rating",
|
||||
),
|
||||
lat: state.data.lat[row],
|
||||
lon: state.data.lon[row],
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue