Fable findings in data
This commit is contained in:
parent
b98bc6d611
commit
6a33b03fdf
20 changed files with 1502 additions and 274 deletions
|
|
@ -17,6 +17,14 @@ use super::run_polars_io;
|
|||
/// (e.g. `"Burglary (by year)"`). Stripped to derive the display name.
|
||||
pub const BY_YEAR_SUFFIX: &str = " (by year)";
|
||||
|
||||
/// Per-postcode police-force coverage calendar column: `list[struct{year,
|
||||
/// months}]` of the years the postcode's home force published enough months.
|
||||
/// police.uk has multi-year publication gaps for whole forces (e.g. Greater
|
||||
/// Manchester 2019-07 onwards), and a missing year is *no data*, not zero
|
||||
/// crime — consumers must exclude uncovered (postcode, year)s instead of
|
||||
/// charting them as zeros.
|
||||
pub const COVERAGE_COLUMN: &str = "covered_years";
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct YearPoint {
|
||||
pub year: i32,
|
||||
|
|
@ -37,6 +45,12 @@ pub struct CrimeByYearData {
|
|||
pub years_by_type: Vec<Vec<i32>>,
|
||||
/// Postcode → all available per-type series for that postcode.
|
||||
pub series_by_postcode: FxHashMap<String, Vec<PostcodeCrimeSeries>>,
|
||||
/// Postcode → years its police force actually published data for (from
|
||||
/// the `covered_years` column). An EMPTY vec means the postcode's crime
|
||||
/// picture is unknown (force gap / unusable geometry) — it must not count
|
||||
/// toward any year. A postcode ABSENT from this map (legacy parquet
|
||||
/// without the column) is treated as covered for every year.
|
||||
pub covered_years_by_postcode: FxHashMap<String, Vec<i32>>,
|
||||
}
|
||||
|
||||
impl CrimeByYearData {
|
||||
|
|
@ -165,9 +179,44 @@ impl CrimeByYearData {
|
|||
years_by_type.push(years_for_type.into_iter().collect());
|
||||
}
|
||||
|
||||
// Force-coverage calendar (optional column: legacy parquets predate it;
|
||||
// their postcodes are treated as fully covered). A row with an empty
|
||||
// list is meaningful — zero covered years — so it IS inserted.
|
||||
let mut covered_years_by_postcode: FxHashMap<String, Vec<i32>> =
|
||||
FxHashMap::default();
|
||||
if let Ok(col) = df.column(COVERAGE_COLUMN) {
|
||||
let list_ca = col
|
||||
.list()
|
||||
.with_context(|| format!("Column '{COVERAGE_COLUMN}' is not a list"))?;
|
||||
for (row, postcode) in postcode_values.iter().enumerate().take(row_count) {
|
||||
let Some(inner) = list_ca.get_as_series(row) else {
|
||||
// Null coverage: treat as legacy/fully covered (skip).
|
||||
continue;
|
||||
};
|
||||
let mut years: Vec<i32> = Vec::with_capacity(inner.len());
|
||||
if !inner.is_empty() {
|
||||
let structs = inner.struct_().with_context(|| {
|
||||
format!("Inner of '{COVERAGE_COLUMN}' is not a struct")
|
||||
})?;
|
||||
let year_field = structs.field_by_name("year").with_context(|| {
|
||||
format!("Missing 'year' field in '{COVERAGE_COLUMN}'")
|
||||
})?;
|
||||
for idx in 0..inner.len() {
|
||||
match year_field.get(idx).ok() {
|
||||
Some(AnyValue::Int32(y)) => years.push(y),
|
||||
Some(AnyValue::Int64(y)) => years.push(y as i32),
|
||||
_ => continue,
|
||||
}
|
||||
}
|
||||
}
|
||||
covered_years_by_postcode.insert(postcode.clone(), years);
|
||||
}
|
||||
}
|
||||
|
||||
info!(
|
||||
postcodes = series_by_postcode.len(),
|
||||
crime_types = crime_types.len(),
|
||||
with_coverage = covered_years_by_postcode.len(),
|
||||
"Crime-by-year data loaded"
|
||||
);
|
||||
|
||||
|
|
@ -175,6 +224,7 @@ impl CrimeByYearData {
|
|||
crime_types,
|
||||
years_by_type,
|
||||
series_by_postcode,
|
||||
covered_years_by_postcode,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue