Lint & small changes
This commit is contained in:
parent
0c6d207967
commit
55238f59aa
21 changed files with 2522 additions and 423 deletions
1017
server-rs/logs/server.log.2026-04-04
Normal file
1017
server-rs/logs/server.log.2026-04-04
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -1,6 +1,13 @@
|
|||
use crate::consts::NAN_U16;
|
||||
use crate::data::QuantRef;
|
||||
|
||||
/// Optional per-enum-value distribution tracking for a single feature.
|
||||
/// Counts how many rows have each enum value (by raw u16 index).
|
||||
pub struct EnumDist {
|
||||
pub feat_idx: usize,
|
||||
pub counts: Box<[u32]>,
|
||||
}
|
||||
|
||||
/// Per-cell accumulator for aggregating features (min/max/sum/count).
|
||||
/// Uses Box<[T]> instead of Vec<T> to avoid storing capacity (saves 8 bytes per field per cell).
|
||||
/// Shared by hexagon and postcode aggregation routes.
|
||||
|
|
@ -10,16 +17,26 @@ pub struct Aggregator {
|
|||
pub maxs: Box<[f32]>,
|
||||
pub sums: Box<[f64]>,
|
||||
pub feat_counts: Box<[u32]>,
|
||||
/// Optional: per-value counts for a single enum feature (for pie chart visualization).
|
||||
pub enum_dist: Option<EnumDist>,
|
||||
}
|
||||
|
||||
/// Configuration for enum distribution tracking, passed to Aggregator::new.
|
||||
/// (feature_index, number_of_enum_values)
|
||||
pub type EnumDistConfig = Option<(usize, usize)>;
|
||||
|
||||
impl Aggregator {
|
||||
pub fn new(num_features: usize) -> Self {
|
||||
pub fn new(num_features: usize, enum_dist_config: EnumDistConfig) -> Self {
|
||||
Aggregator {
|
||||
count: 0,
|
||||
mins: vec![f32::INFINITY; num_features].into_boxed_slice(),
|
||||
maxs: vec![f32::NEG_INFINITY; num_features].into_boxed_slice(),
|
||||
sums: vec![0.0f64; num_features].into_boxed_slice(),
|
||||
feat_counts: vec![0u32; num_features].into_boxed_slice(),
|
||||
enum_dist: enum_dist_config.map(|(feat_idx, num_values)| EnumDist {
|
||||
feat_idx,
|
||||
counts: vec![0u32; num_values].into_boxed_slice(),
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -50,6 +67,17 @@ impl Aggregator {
|
|||
self.feat_counts[feat_index] += 1;
|
||||
}
|
||||
}
|
||||
// Enum distribution: single branch per row (not per feature).
|
||||
// Uses raw u16 directly — enum features are stored as u16 indices.
|
||||
if let Some(ref mut ed) = self.enum_dist {
|
||||
let raw = row_slice[ed.feat_idx];
|
||||
if raw != NAN_U16 {
|
||||
let idx = raw as usize;
|
||||
if idx < ed.counts.len() {
|
||||
ed.counts[idx] += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Merge another aggregator's results into this one.
|
||||
|
|
@ -67,6 +95,12 @@ impl Aggregator {
|
|||
self.feat_counts[i] += other.feat_counts[i];
|
||||
}
|
||||
}
|
||||
// Merge enum distribution counts
|
||||
if let (Some(ref mut mine), Some(ref theirs)) = (&mut self.enum_dist, &other.enum_dist) {
|
||||
for (m, t) in mine.counts.iter_mut().zip(theirs.counts.iter()) {
|
||||
*m += t;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a row, only aggregating the features at the given indices.
|
||||
|
|
@ -95,5 +129,15 @@ impl Aggregator {
|
|||
self.feat_counts[feat_index] += 1;
|
||||
}
|
||||
}
|
||||
// Enum distribution (same raw u16 approach)
|
||||
if let Some(ref mut ed) = self.enum_dist {
|
||||
let raw = feature_data[base + ed.feat_idx];
|
||||
if raw != NAN_U16 {
|
||||
let idx = raw as usize;
|
||||
if idx < ed.counts.len() {
|
||||
ed.counts[idx] += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -31,6 +31,33 @@ pub fn parse_field_indices(
|
|||
Ok(Some(indices))
|
||||
}
|
||||
|
||||
/// Parse an optional `?enum_dist=` query param into (feature_index, num_values) for
|
||||
/// per-value distribution counting. Returns None if not requested.
|
||||
/// Returns 400 if the feature name is unknown or not an enum feature.
|
||||
pub fn parse_enum_dist(
|
||||
enum_dist: Option<&str>,
|
||||
name_to_index: &FxHashMap<String, usize>,
|
||||
enum_values: &FxHashMap<usize, Vec<String>>,
|
||||
) -> Result<Option<(usize, usize)>, (StatusCode, String)> {
|
||||
let Some(name) = enum_dist else {
|
||||
return Ok(None);
|
||||
};
|
||||
let name = name.trim();
|
||||
if name.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
let &feat_idx = name_to_index
|
||||
.get(name)
|
||||
.ok_or_else(|| (StatusCode::BAD_REQUEST, format!("Unknown feature: {name}")))?;
|
||||
let values = enum_values.get(&feat_idx).ok_or_else(|| {
|
||||
(
|
||||
StatusCode::BAD_REQUEST,
|
||||
format!("Feature is not an enum: {name}"),
|
||||
)
|
||||
})?;
|
||||
Ok(Some((feat_idx, values.len())))
|
||||
}
|
||||
|
||||
/// Parse an optional `?fields=` query param into a HashSet for stats filtering.
|
||||
/// Returns `(fields_specified, field_set)`.
|
||||
pub fn parse_field_set(fields: Option<&str>) -> (bool, HashSet<String>) {
|
||||
|
|
|
|||
|
|
@ -605,10 +605,14 @@ mod tests {
|
|||
// row 3: price=600, area=300 → fails both
|
||||
let tq = test_quant(2, 2);
|
||||
let feature_data = vec![
|
||||
tq.encode(0, 150.0), tq.encode(1, 100.0), // row 0
|
||||
tq.encode(0, 600.0), tq.encode(1, 100.0), // row 1
|
||||
tq.encode(0, 150.0), tq.encode(1, 300.0), // row 2
|
||||
tq.encode(0, 600.0), tq.encode(1, 300.0), // row 3
|
||||
tq.encode(0, 150.0),
|
||||
tq.encode(1, 100.0), // row 0
|
||||
tq.encode(0, 600.0),
|
||||
tq.encode(1, 100.0), // row 1
|
||||
tq.encode(0, 150.0),
|
||||
tq.encode(1, 300.0), // row 2
|
||||
tq.encode(0, 600.0),
|
||||
tq.encode(1, 300.0), // row 3
|
||||
];
|
||||
let filters = vec![
|
||||
ParsedFilter {
|
||||
|
|
@ -626,10 +630,10 @@ mod tests {
|
|||
let (total, impacts) =
|
||||
count_filter_impacts(&filters, &[], &feature_data, 2, (0..4u32).into_iter());
|
||||
|
||||
assert_eq!(total, 1); // only row 0 passes
|
||||
assert_eq!(impacts[0], 1); // row 1 fails price only
|
||||
assert_eq!(impacts[1], 1); // row 2 fails area only
|
||||
// row 3 fails both → not counted
|
||||
assert_eq!(total, 1); // only row 0 passes
|
||||
assert_eq!(impacts[0], 1); // row 1 fails price only
|
||||
assert_eq!(impacts[1], 1); // row 2 fails area only
|
||||
// row 3 fails both → not counted
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -640,9 +644,12 @@ mod tests {
|
|||
// row 2: price=600, type=0(A) → fails numeric only
|
||||
let tq = test_quant(2, 1);
|
||||
let feature_data = vec![
|
||||
tq.encode(0, 150.0), 0u16, // row 0
|
||||
tq.encode(0, 150.0), 2u16, // row 1
|
||||
tq.encode(0, 600.0), 0u16, // row 2
|
||||
tq.encode(0, 150.0),
|
||||
0u16, // row 0
|
||||
tq.encode(0, 150.0),
|
||||
2u16, // row 1
|
||||
tq.encode(0, 600.0),
|
||||
0u16, // row 2
|
||||
];
|
||||
let num_filters = vec![ParsedFilter {
|
||||
feat_idx: 0,
|
||||
|
|
@ -662,9 +669,9 @@ mod tests {
|
|||
(0..3u32).into_iter(),
|
||||
);
|
||||
|
||||
assert_eq!(total, 1); // row 0
|
||||
assert_eq!(impacts[0], 1); // row 2 fails numeric only → impacts[0]
|
||||
assert_eq!(impacts[1], 1); // row 1 fails enum only → impacts[1]
|
||||
assert_eq!(total, 1); // row 0
|
||||
assert_eq!(impacts[0], 1); // row 2 fails numeric only → impacts[0]
|
||||
assert_eq!(impacts[1], 1); // row 1 fails enum only → impacts[1]
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
|||
|
|
@ -946,10 +946,7 @@ pub async fn post_ai_filters(
|
|||
|
||||
// Auto-inject Listing status filter for historical mode
|
||||
if let Value::Object(ref mut map) = filters {
|
||||
map.insert(
|
||||
"Listing status".to_string(),
|
||||
json!(["Historical sale"]),
|
||||
);
|
||||
map.insert("Listing status".to_string(), json!(["Historical sale"]));
|
||||
}
|
||||
|
||||
// Count matching properties and refine if too restrictive
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue