server
Some checks failed
Build and publish Docker image / build-and-push (push) Failing after 6m7s
CI / Check (push) Failing after 7m21s

This commit is contained in:
Andras Schmelczer 2026-05-31 13:19:26 +01:00
parent 8dc939d761
commit d98819b569
12 changed files with 157 additions and 161 deletions

View file

@ -1,5 +1,5 @@
//! Per-LSOA per-crime-type per-year crime counts, loaded from a side parquet
//! and used by the right pane to plot crime-over-time. Filtering is not
//! Per-postcode per-crime-type per-year crime counts, loaded from a side
//! parquet and used by the right pane to plot crime-over-time. Filtering is not
//! supported — this data is display-only.
use std::path::Path;
@ -23,8 +23,8 @@ pub struct YearPoint {
pub count: f32,
}
/// One per crime type: ordered list of (year, count) for a single LSOA.
pub struct LsoaCrimeSeries {
/// One per crime type: ordered list of (year, count) for a single postcode.
pub struct PostcodeCrimeSeries {
/// Index into `crime_types`.
pub type_idx: u16,
pub points: Vec<YearPoint>,
@ -35,8 +35,8 @@ pub struct CrimeByYearData {
pub crime_types: Vec<String>,
/// All years available for each crime type, same order as `crime_types`.
pub years_by_type: Vec<Vec<i32>>,
/// LSOA code → all available per-type series for that LSOA.
pub series_by_lsoa: FxHashMap<String, Vec<LsoaCrimeSeries>>,
/// Postcode → all available per-type series for that postcode.
pub series_by_postcode: FxHashMap<String, Vec<PostcodeCrimeSeries>>,
}
impl CrimeByYearData {
@ -44,7 +44,7 @@ impl CrimeByYearData {
Self {
crime_types: Vec::new(),
years_by_type: Vec::new(),
series_by_lsoa: FxHashMap::default(),
series_by_postcode: FxHashMap::default(),
}
}
@ -67,20 +67,20 @@ impl CrimeByYearData {
format!("Failed to read crime-by-year parquet at {}", path.display())
})?;
let lsoa_col = df
.column("LSOA code")
.context("crime-by-year parquet missing 'LSOA code' column")?
let postcode_col = df
.column("postcode")
.context("crime-by-year parquet missing 'postcode' column")?
.str()
.context("'LSOA code' column is not a string")?;
let lsoa_values: Vec<String> = lsoa_col
.context("'postcode' column is not a string")?;
let postcode_values: Vec<String> = postcode_col
.into_iter()
.enumerate()
.map(|(row, value)| {
let value =
value.with_context(|| format!("crime-by-year row {row} has null LSOA code"))?;
value.with_context(|| format!("crime-by-year row {row} has null postcode"))?;
let trimmed = value.trim();
if trimmed.is_empty() {
bail!("crime-by-year row {row} has blank LSOA code");
bail!("crime-by-year row {row} has blank postcode");
}
Ok(trimmed.to_string())
})
@ -106,7 +106,8 @@ impl CrimeByYearData {
let crime_types: Vec<String> = crime_type_cols.iter().map(|(t, _)| t.clone()).collect();
let mut series_by_lsoa: FxHashMap<String, Vec<LsoaCrimeSeries>> = FxHashMap::default();
let mut series_by_postcode: FxHashMap<String, Vec<PostcodeCrimeSeries>> =
FxHashMap::default();
let mut years_by_type: Vec<Vec<i32>> = Vec::with_capacity(crime_type_cols.len());
let row_count = df.height();
@ -161,10 +162,10 @@ impl CrimeByYearData {
}
points.sort_by_key(|p| p.year);
series_by_lsoa
.entry(lsoa_values[row].clone())
series_by_postcode
.entry(postcode_values[row].clone())
.or_default()
.push(LsoaCrimeSeries {
.push(PostcodeCrimeSeries {
type_idx: type_idx as u16,
points,
});
@ -173,7 +174,7 @@ impl CrimeByYearData {
}
info!(
lsoas = series_by_lsoa.len(),
postcodes = series_by_postcode.len(),
crime_types = crime_types.len(),
"Crime-by-year data loaded"
);
@ -181,7 +182,7 @@ impl CrimeByYearData {
Ok(Self {
crime_types,
years_by_type,
series_by_lsoa,
series_by_postcode,
})
}
}

View file

@ -831,10 +831,6 @@ pub struct PropertyData {
/// Interned postcodes: reader is thread-safe, keys index into it.
postcode_interner: lasso::RodeoReader,
postcode_keys: Vec<lasso::Spur>,
/// Interned LSOA (2021) codes per row.
/// Used to look up per-LSOA side tables (e.g. crime time series).
lsoa_interner: lasso::RodeoReader,
lsoa_keys: Vec<lasso::Spur>,
/// Rows for each postcode, keyed by the interned postcode key.
postcode_row_index: FxHashMap<lasso::Spur, Vec<u32>>,
/// Inverted index from address tokens to property rows.
@ -881,11 +877,6 @@ impl PropertyData {
self.postcode_interner.resolve(&self.postcode_keys[row])
}
/// Get the LSOA (2021) code for a given row.
pub fn lsoa(&self, row: usize) -> &str {
self.lsoa_interner.resolve(&self.lsoa_keys[row])
}
/// Get postcode components for field-level borrowing (avoids conflicting borrows with feature_data).
pub fn postcode_parts(&self) -> (&lasso::RodeoReader, &[lasso::Spur]) {
(&self.postcode_interner, &self.postcode_keys)
@ -1541,15 +1532,6 @@ impl PropertyData {
}
}
// LSOA (2021) code per row, brought in via the postcode join. Used as a
// lookup key into per-LSOA side tables (e.g. crime time series).
match schema.get("lsoa21") {
Some(dtype) if matches!(dtype, DataType::String) || dtype.is_categorical() => {}
Some(dtype) => bail!("Column 'lsoa21' has unexpected type {:?}", dtype),
None => bail!("Required column 'lsoa21' not found in joined property data"),
}
select_exprs.push(col("lsoa21").cast(DataType::String));
// Enum features as String
for &name in &enum_names {
select_exprs.push(col(name).cast(DataType::String));
@ -1704,33 +1686,8 @@ impl PropertyData {
Ok(vec![None; row_count])
}
};
let extract_required_trimmed_string_col =
|df: &DataFrame, name: &str| -> anyhow::Result<Vec<String>> {
let column = df
.column(name)
.with_context(|| format!("Required column '{name}' not found in parquet"))?;
let string_column = column
.str()
.with_context(|| format!("Column '{name}' is not a string column"))?;
string_column
.into_iter()
.enumerate()
.map(|(row, value)| {
let value = value.with_context(|| {
format!("Required column '{name}' has null at row {row}")
})?;
let trimmed = value.trim();
if trimmed.is_empty() {
bail!("Required column '{name}' has blank value at row {row}");
}
Ok(trimmed.to_string())
})
.collect()
};
let property_sub_type_raw = extract_optional_string_col(&df, "Property sub-type")?;
let price_qualifier_raw = extract_optional_string_col(&df, "Price qualifier")?;
let lsoa_raw = extract_required_trimmed_string_col(&df, "lsoa21")?;
tracing::info!("Building enum features");
// enum_col_major: Vec<(values_list, encoded_as_f32)>
@ -2041,14 +1998,6 @@ impl PropertyData {
}
let postcode_interner = postcode_rodeo.into_reader();
// Intern LSOA codes (permuted).
let mut lsoa_rodeo = lasso::Rodeo::default();
let mut lsoa_keys: Vec<lasso::Spur> = Vec::with_capacity(row_count);
for &perm_index in perm.iter() {
lsoa_keys.push(lsoa_rodeo.get_or_intern(&lsoa_raw[perm_index as usize]));
}
let lsoa_interner = lsoa_rodeo.into_reader();
let row_to_poi_metric_idx: Vec<u32> = if poi_metrics.is_empty() {
vec![NO_POI_METRIC_ROW; row_count]
} else {
@ -2220,8 +2169,6 @@ impl PropertyData {
address_lengths,
postcode_interner,
postcode_keys,
lsoa_interner,
lsoa_keys,
postcode_row_index,
address_token_index,
address_prefix_index,