diff --git a/server-rs/src/data/property.rs b/server-rs/src/data/property.rs index d60564f..25f4601 100644 --- a/server-rs/src/data/property.rs +++ b/server-rs/src/data/property.rs @@ -656,7 +656,7 @@ impl PropertyData { tracing::info!("Extracting numeric feature columns"); let numeric_col_major: Vec> = numeric_names - .iter() + .par_iter() .map(|name| { let column = df .column(name) @@ -733,12 +733,11 @@ impl PropertyData { tracing::info!("Building enum features"); // enum_col_major: Vec<(values_list, encoded_as_f32)> - let mut enum_col_major: Vec<(Vec, Vec)> = Vec::new(); - for &name in &enum_names { - if let Ok(column_data) = df.column(name) { - let string_column = column_data - .str() - .with_context(|| format!("Enum column '{name}' is not a string column"))?; + let enum_col_major: Vec<(Vec, Vec)> = enum_names + .par_iter() + .filter_map(|&name| { + let column_data = df.column(name).ok()?; + let string_column = column_data.str().ok()?; let unique_set: std::collections::HashSet = string_column .into_iter() .filter_map(|value| { @@ -795,9 +794,9 @@ impl PropertyData { .collect(); tracing::debug!(column = %name, unique_values = unique.len(), "Enum feature encoded as f32"); - enum_col_major.push((unique, encoded)); - } - } + Some((unique, encoded)) + }) + .collect(); // Extract is_approx_build_date: 0.0 = exact, anything else (1.0/NaN) = approximate let is_approx_build_date_raw: Vec = if has_approx_col { @@ -920,7 +919,7 @@ impl PropertyData { let grid_cols = ((max_lon_val - min_lon_val) / grid_cell_size).ceil() as u64 + 1; let mut perm: Vec = (0..row_count as u32).collect(); - perm.sort_unstable_by_key(|&perm_index| { + perm.par_sort_unstable_by_key(|&perm_index| { let grid_row = ((lat[perm_index as usize] - min_lat_val) / grid_cell_size) as u64; let grid_col = ((lon[perm_index as usize] - min_lon_val) / grid_cell_size) as u64; grid_row * grid_cols + grid_col @@ -1036,18 +1035,20 @@ impl PropertyData { // Combines numeric and enum features into a single feature_data array. tracing::info!("Transposing to row-major layout (spatially sorted)"); let mut feature_data = vec![f32::NAN; row_count * num_features]; - for (new_row, &old_row) in perm.iter().enumerate() { - let old_index = old_row as usize; - let dst_base = new_row * num_features; - // Numeric features - for (feat_idx, col_vec) in numeric_col_major.iter().enumerate() { - feature_data[dst_base + feat_idx] = col_vec[old_index]; - } - // Enum features (stored as f32 indices) - for (enum_idx, (_, encoded)) in enum_col_major.iter().enumerate() { - feature_data[dst_base + num_numeric + enum_idx] = encoded[old_index]; - } - } + feature_data + .par_chunks_mut(num_features) + .enumerate() + .for_each(|(new_row, row_slice)| { + let old_index = perm[new_row] as usize; + // Numeric features + for (feat_idx, col_vec) in numeric_col_major.iter().enumerate() { + row_slice[feat_idx] = col_vec[old_index]; + } + // Enum features (stored as f32 indices) + for (enum_idx, (_, encoded)) in enum_col_major.iter().enumerate() { + row_slice[num_numeric + enum_idx] = encoded[old_index]; + } + }); tracing::info!("Data loading complete");