server
This commit is contained in:
parent
8dc939d761
commit
d98819b569
12 changed files with 157 additions and 161 deletions
|
|
@ -1,5 +1,5 @@
|
|||
//! Per-LSOA per-crime-type per-year crime counts, loaded from a side parquet
|
||||
//! and used by the right pane to plot crime-over-time. Filtering is not
|
||||
//! Per-postcode per-crime-type per-year crime counts, loaded from a side
|
||||
//! parquet and used by the right pane to plot crime-over-time. Filtering is not
|
||||
//! supported — this data is display-only.
|
||||
|
||||
use std::path::Path;
|
||||
|
|
@ -23,8 +23,8 @@ pub struct YearPoint {
|
|||
pub count: f32,
|
||||
}
|
||||
|
||||
/// One per crime type: ordered list of (year, count) for a single LSOA.
|
||||
pub struct LsoaCrimeSeries {
|
||||
/// One per crime type: ordered list of (year, count) for a single postcode.
|
||||
pub struct PostcodeCrimeSeries {
|
||||
/// Index into `crime_types`.
|
||||
pub type_idx: u16,
|
||||
pub points: Vec<YearPoint>,
|
||||
|
|
@ -35,8 +35,8 @@ pub struct CrimeByYearData {
|
|||
pub crime_types: Vec<String>,
|
||||
/// All years available for each crime type, same order as `crime_types`.
|
||||
pub years_by_type: Vec<Vec<i32>>,
|
||||
/// LSOA code → all available per-type series for that LSOA.
|
||||
pub series_by_lsoa: FxHashMap<String, Vec<LsoaCrimeSeries>>,
|
||||
/// Postcode → all available per-type series for that postcode.
|
||||
pub series_by_postcode: FxHashMap<String, Vec<PostcodeCrimeSeries>>,
|
||||
}
|
||||
|
||||
impl CrimeByYearData {
|
||||
|
|
@ -44,7 +44,7 @@ impl CrimeByYearData {
|
|||
Self {
|
||||
crime_types: Vec::new(),
|
||||
years_by_type: Vec::new(),
|
||||
series_by_lsoa: FxHashMap::default(),
|
||||
series_by_postcode: FxHashMap::default(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -67,20 +67,20 @@ impl CrimeByYearData {
|
|||
format!("Failed to read crime-by-year parquet at {}", path.display())
|
||||
})?;
|
||||
|
||||
let lsoa_col = df
|
||||
.column("LSOA code")
|
||||
.context("crime-by-year parquet missing 'LSOA code' column")?
|
||||
let postcode_col = df
|
||||
.column("postcode")
|
||||
.context("crime-by-year parquet missing 'postcode' column")?
|
||||
.str()
|
||||
.context("'LSOA code' column is not a string")?;
|
||||
let lsoa_values: Vec<String> = lsoa_col
|
||||
.context("'postcode' column is not a string")?;
|
||||
let postcode_values: Vec<String> = postcode_col
|
||||
.into_iter()
|
||||
.enumerate()
|
||||
.map(|(row, value)| {
|
||||
let value =
|
||||
value.with_context(|| format!("crime-by-year row {row} has null LSOA code"))?;
|
||||
value.with_context(|| format!("crime-by-year row {row} has null postcode"))?;
|
||||
let trimmed = value.trim();
|
||||
if trimmed.is_empty() {
|
||||
bail!("crime-by-year row {row} has blank LSOA code");
|
||||
bail!("crime-by-year row {row} has blank postcode");
|
||||
}
|
||||
Ok(trimmed.to_string())
|
||||
})
|
||||
|
|
@ -106,7 +106,8 @@ impl CrimeByYearData {
|
|||
|
||||
let crime_types: Vec<String> = crime_type_cols.iter().map(|(t, _)| t.clone()).collect();
|
||||
|
||||
let mut series_by_lsoa: FxHashMap<String, Vec<LsoaCrimeSeries>> = FxHashMap::default();
|
||||
let mut series_by_postcode: FxHashMap<String, Vec<PostcodeCrimeSeries>> =
|
||||
FxHashMap::default();
|
||||
let mut years_by_type: Vec<Vec<i32>> = Vec::with_capacity(crime_type_cols.len());
|
||||
let row_count = df.height();
|
||||
|
||||
|
|
@ -161,10 +162,10 @@ impl CrimeByYearData {
|
|||
}
|
||||
points.sort_by_key(|p| p.year);
|
||||
|
||||
series_by_lsoa
|
||||
.entry(lsoa_values[row].clone())
|
||||
series_by_postcode
|
||||
.entry(postcode_values[row].clone())
|
||||
.or_default()
|
||||
.push(LsoaCrimeSeries {
|
||||
.push(PostcodeCrimeSeries {
|
||||
type_idx: type_idx as u16,
|
||||
points,
|
||||
});
|
||||
|
|
@ -173,7 +174,7 @@ impl CrimeByYearData {
|
|||
}
|
||||
|
||||
info!(
|
||||
lsoas = series_by_lsoa.len(),
|
||||
postcodes = series_by_postcode.len(),
|
||||
crime_types = crime_types.len(),
|
||||
"Crime-by-year data loaded"
|
||||
);
|
||||
|
|
@ -181,7 +182,7 @@ impl CrimeByYearData {
|
|||
Ok(Self {
|
||||
crime_types,
|
||||
years_by_type,
|
||||
series_by_lsoa,
|
||||
series_by_postcode,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -831,10 +831,6 @@ pub struct PropertyData {
|
|||
/// Interned postcodes: reader is thread-safe, keys index into it.
|
||||
postcode_interner: lasso::RodeoReader,
|
||||
postcode_keys: Vec<lasso::Spur>,
|
||||
/// Interned LSOA (2021) codes per row.
|
||||
/// Used to look up per-LSOA side tables (e.g. crime time series).
|
||||
lsoa_interner: lasso::RodeoReader,
|
||||
lsoa_keys: Vec<lasso::Spur>,
|
||||
/// Rows for each postcode, keyed by the interned postcode key.
|
||||
postcode_row_index: FxHashMap<lasso::Spur, Vec<u32>>,
|
||||
/// Inverted index from address tokens to property rows.
|
||||
|
|
@ -881,11 +877,6 @@ impl PropertyData {
|
|||
self.postcode_interner.resolve(&self.postcode_keys[row])
|
||||
}
|
||||
|
||||
/// Get the LSOA (2021) code for a given row.
|
||||
pub fn lsoa(&self, row: usize) -> &str {
|
||||
self.lsoa_interner.resolve(&self.lsoa_keys[row])
|
||||
}
|
||||
|
||||
/// Get postcode components for field-level borrowing (avoids conflicting borrows with feature_data).
|
||||
pub fn postcode_parts(&self) -> (&lasso::RodeoReader, &[lasso::Spur]) {
|
||||
(&self.postcode_interner, &self.postcode_keys)
|
||||
|
|
@ -1541,15 +1532,6 @@ impl PropertyData {
|
|||
}
|
||||
}
|
||||
|
||||
// LSOA (2021) code per row, brought in via the postcode join. Used as a
|
||||
// lookup key into per-LSOA side tables (e.g. crime time series).
|
||||
match schema.get("lsoa21") {
|
||||
Some(dtype) if matches!(dtype, DataType::String) || dtype.is_categorical() => {}
|
||||
Some(dtype) => bail!("Column 'lsoa21' has unexpected type {:?}", dtype),
|
||||
None => bail!("Required column 'lsoa21' not found in joined property data"),
|
||||
}
|
||||
select_exprs.push(col("lsoa21").cast(DataType::String));
|
||||
|
||||
// Enum features as String
|
||||
for &name in &enum_names {
|
||||
select_exprs.push(col(name).cast(DataType::String));
|
||||
|
|
@ -1704,33 +1686,8 @@ impl PropertyData {
|
|||
Ok(vec![None; row_count])
|
||||
}
|
||||
};
|
||||
let extract_required_trimmed_string_col =
|
||||
|df: &DataFrame, name: &str| -> anyhow::Result<Vec<String>> {
|
||||
let column = df
|
||||
.column(name)
|
||||
.with_context(|| format!("Required column '{name}' not found in parquet"))?;
|
||||
let string_column = column
|
||||
.str()
|
||||
.with_context(|| format!("Column '{name}' is not a string column"))?;
|
||||
string_column
|
||||
.into_iter()
|
||||
.enumerate()
|
||||
.map(|(row, value)| {
|
||||
let value = value.with_context(|| {
|
||||
format!("Required column '{name}' has null at row {row}")
|
||||
})?;
|
||||
let trimmed = value.trim();
|
||||
if trimmed.is_empty() {
|
||||
bail!("Required column '{name}' has blank value at row {row}");
|
||||
}
|
||||
Ok(trimmed.to_string())
|
||||
})
|
||||
.collect()
|
||||
};
|
||||
|
||||
let property_sub_type_raw = extract_optional_string_col(&df, "Property sub-type")?;
|
||||
let price_qualifier_raw = extract_optional_string_col(&df, "Price qualifier")?;
|
||||
let lsoa_raw = extract_required_trimmed_string_col(&df, "lsoa21")?;
|
||||
|
||||
tracing::info!("Building enum features");
|
||||
// enum_col_major: Vec<(values_list, encoded_as_f32)>
|
||||
|
|
@ -2041,14 +1998,6 @@ impl PropertyData {
|
|||
}
|
||||
let postcode_interner = postcode_rodeo.into_reader();
|
||||
|
||||
// Intern LSOA codes (permuted).
|
||||
let mut lsoa_rodeo = lasso::Rodeo::default();
|
||||
let mut lsoa_keys: Vec<lasso::Spur> = Vec::with_capacity(row_count);
|
||||
for &perm_index in perm.iter() {
|
||||
lsoa_keys.push(lsoa_rodeo.get_or_intern(&lsoa_raw[perm_index as usize]));
|
||||
}
|
||||
let lsoa_interner = lsoa_rodeo.into_reader();
|
||||
|
||||
let row_to_poi_metric_idx: Vec<u32> = if poi_metrics.is_empty() {
|
||||
vec![NO_POI_METRIC_ROW; row_count]
|
||||
} else {
|
||||
|
|
@ -2220,8 +2169,6 @@ impl PropertyData {
|
|||
address_lengths,
|
||||
postcode_interner,
|
||||
postcode_keys,
|
||||
lsoa_interner,
|
||||
lsoa_keys,
|
||||
postcode_row_index,
|
||||
address_token_index,
|
||||
address_prefix_index,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue