Fable findings in data
This commit is contained in:
parent
b98bc6d611
commit
6a33b03fdf
20 changed files with 1502 additions and 274 deletions
|
|
@ -17,6 +17,14 @@ use super::run_polars_io;
|
|||
/// (e.g. `"Burglary (by year)"`). Stripped to derive the display name.
|
||||
pub const BY_YEAR_SUFFIX: &str = " (by year)";
|
||||
|
||||
/// Per-postcode police-force coverage calendar column: `list[struct{year,
|
||||
/// months}]` of the years the postcode's home force published enough months.
|
||||
/// police.uk has multi-year publication gaps for whole forces (e.g. Greater
|
||||
/// Manchester 2019-07 onwards), and a missing year is *no data*, not zero
|
||||
/// crime — consumers must exclude uncovered (postcode, year)s instead of
|
||||
/// charting them as zeros.
|
||||
pub const COVERAGE_COLUMN: &str = "covered_years";
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct YearPoint {
|
||||
pub year: i32,
|
||||
|
|
@ -37,6 +45,12 @@ pub struct CrimeByYearData {
|
|||
pub years_by_type: Vec<Vec<i32>>,
|
||||
/// Postcode → all available per-type series for that postcode.
|
||||
pub series_by_postcode: FxHashMap<String, Vec<PostcodeCrimeSeries>>,
|
||||
/// Postcode → years its police force actually published data for (from
|
||||
/// the `covered_years` column). An EMPTY vec means the postcode's crime
|
||||
/// picture is unknown (force gap / unusable geometry) — it must not count
|
||||
/// toward any year. A postcode ABSENT from this map (legacy parquet
|
||||
/// without the column) is treated as covered for every year.
|
||||
pub covered_years_by_postcode: FxHashMap<String, Vec<i32>>,
|
||||
}
|
||||
|
||||
impl CrimeByYearData {
|
||||
|
|
@ -165,9 +179,44 @@ impl CrimeByYearData {
|
|||
years_by_type.push(years_for_type.into_iter().collect());
|
||||
}
|
||||
|
||||
// Force-coverage calendar (optional column: legacy parquets predate it;
|
||||
// their postcodes are treated as fully covered). A row with an empty
|
||||
// list is meaningful — zero covered years — so it IS inserted.
|
||||
let mut covered_years_by_postcode: FxHashMap<String, Vec<i32>> =
|
||||
FxHashMap::default();
|
||||
if let Ok(col) = df.column(COVERAGE_COLUMN) {
|
||||
let list_ca = col
|
||||
.list()
|
||||
.with_context(|| format!("Column '{COVERAGE_COLUMN}' is not a list"))?;
|
||||
for (row, postcode) in postcode_values.iter().enumerate().take(row_count) {
|
||||
let Some(inner) = list_ca.get_as_series(row) else {
|
||||
// Null coverage: treat as legacy/fully covered (skip).
|
||||
continue;
|
||||
};
|
||||
let mut years: Vec<i32> = Vec::with_capacity(inner.len());
|
||||
if !inner.is_empty() {
|
||||
let structs = inner.struct_().with_context(|| {
|
||||
format!("Inner of '{COVERAGE_COLUMN}' is not a struct")
|
||||
})?;
|
||||
let year_field = structs.field_by_name("year").with_context(|| {
|
||||
format!("Missing 'year' field in '{COVERAGE_COLUMN}'")
|
||||
})?;
|
||||
for idx in 0..inner.len() {
|
||||
match year_field.get(idx).ok() {
|
||||
Some(AnyValue::Int32(y)) => years.push(y),
|
||||
Some(AnyValue::Int64(y)) => years.push(y as i32),
|
||||
_ => continue,
|
||||
}
|
||||
}
|
||||
}
|
||||
covered_years_by_postcode.insert(postcode.clone(), years);
|
||||
}
|
||||
}
|
||||
|
||||
info!(
|
||||
postcodes = series_by_postcode.len(),
|
||||
crime_types = crime_types.len(),
|
||||
with_coverage = covered_years_by_postcode.len(),
|
||||
"Crime-by-year data loaded"
|
||||
);
|
||||
|
||||
|
|
@ -175,6 +224,7 @@ impl CrimeByYearData {
|
|||
crime_types,
|
||||
years_by_type,
|
||||
series_by_postcode,
|
||||
covered_years_by_postcode,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -474,7 +474,7 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
},
|
||||
step: 1.0,
|
||||
description: "Aggregate of serious crime categories per year",
|
||||
detail: "Sum of violence, robbery, burglary, and weapons possession per year within 50m of the postcode, counted from police.uk street-level crime points (anonymised, snapped to nearby map points). Provides a single serious crime metric.",
|
||||
detail: "Sum of violence, robbery, burglary, and weapons possession per year near the postcode, counted from police.uk street-level crime points (anonymised, snapped to nearby map points). This is an area-normalised incident density for the surrounding streets, not a per-resident risk: busy commercial centres rank high however few people live there. Averaged over the months the local police force actually published data; known force gaps (e.g. Greater Manchester since mid-2019) are excluded rather than counted as zero crime.",
|
||||
source: "crime",
|
||||
prefix: "",
|
||||
suffix: "/yr",
|
||||
|
|
@ -489,7 +489,7 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
},
|
||||
step: 1.0,
|
||||
description: "Aggregate of minor crime categories per year",
|
||||
detail: "Sum of anti-social behaviour, shoplifting, bicycle theft, and other lower-severity crime per year within 50m of the postcode, counted from police.uk street-level crime points (anonymised, snapped to nearby map points). Provides a single minor crime metric.",
|
||||
detail: "Sum of anti-social behaviour, shoplifting, bicycle theft, and other lower-severity crime per year near the postcode, counted from police.uk street-level crime points (anonymised, snapped to nearby map points). This is an area-normalised incident density for the surrounding streets, not a per-resident risk: busy commercial centres rank high however few people live there. Averaged over the months the local police force actually published data; known force gaps (e.g. Greater Manchester since mid-2019) are excluded rather than counted as zero crime.",
|
||||
source: "crime",
|
||||
prefix: "",
|
||||
suffix: "/yr",
|
||||
|
|
@ -504,7 +504,7 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
},
|
||||
step: 1.0,
|
||||
description: "Average yearly violent and sexual offences in the area",
|
||||
detail: "Average number of violence and sexual offences per year within 50m of the postcode, from police.uk street-level crime data. Includes assault, harassment, and sexual offences.",
|
||||
detail: "Average number of violence and sexual offences per year near the postcode, from police.uk street-level crime data. Includes assault, harassment, and sexual offences.",
|
||||
source: "crime",
|
||||
prefix: "",
|
||||
suffix: "/yr",
|
||||
|
|
@ -519,7 +519,7 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
},
|
||||
step: 1.0,
|
||||
description: "Average yearly burglary offences in the area",
|
||||
detail: "Average number of burglary offences per year within 50m of the postcode, from police.uk street-level crime data. Includes residential and commercial burglary.",
|
||||
detail: "Average number of burglary offences per year near the postcode, from police.uk street-level crime data. Includes residential and commercial burglary.",
|
||||
source: "crime",
|
||||
prefix: "",
|
||||
suffix: "/yr",
|
||||
|
|
@ -534,7 +534,7 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
},
|
||||
step: 1.0,
|
||||
description: "Average yearly robbery offences in the area",
|
||||
detail: "Average number of robbery offences per year within 50m of the postcode, from police.uk street-level crime data. Robbery involves theft with force or threat of force.",
|
||||
detail: "Average number of robbery offences per year near the postcode, from police.uk street-level crime data. Robbery involves theft with force or threat of force.",
|
||||
source: "crime",
|
||||
prefix: "",
|
||||
suffix: "/yr",
|
||||
|
|
@ -549,7 +549,7 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
},
|
||||
step: 1.0,
|
||||
description: "Average yearly vehicle crime in the area",
|
||||
detail: "Average number of vehicle crime incidents per year within 50m of the postcode, from police.uk street-level crime data. Includes theft of and from vehicles.",
|
||||
detail: "Average number of vehicle crime incidents per year near the postcode, from police.uk street-level crime data. Includes theft of and from vehicles.",
|
||||
source: "crime",
|
||||
prefix: "",
|
||||
suffix: "/yr",
|
||||
|
|
@ -564,7 +564,7 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
},
|
||||
step: 1.0,
|
||||
description: "Average yearly anti-social behaviour incidents in the area",
|
||||
detail: "Average number of anti-social behaviour incidents per year within 50m of the postcode, from police.uk street-level crime data. Includes nuisance, environmental, and personal anti-social behaviour.",
|
||||
detail: "Average number of anti-social behaviour incidents per year near the postcode, from police.uk street-level crime data. Includes nuisance, environmental, and personal anti-social behaviour.",
|
||||
source: "crime",
|
||||
prefix: "",
|
||||
suffix: "/yr",
|
||||
|
|
@ -579,7 +579,7 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
},
|
||||
step: 1.0,
|
||||
description: "Average yearly criminal damage and arson in the area",
|
||||
detail: "Average number of criminal damage and arson incidents per year within 50m of the postcode, from police.uk street-level crime data.",
|
||||
detail: "Average number of criminal damage and arson incidents per year near the postcode, from police.uk street-level crime data.",
|
||||
source: "crime",
|
||||
prefix: "",
|
||||
suffix: "/yr",
|
||||
|
|
@ -594,7 +594,7 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
},
|
||||
step: 1.0,
|
||||
description: "Average yearly other theft offences in the area",
|
||||
detail: "Average number of 'other theft' offences per year within 50m of the postcode, from police.uk street-level crime data. Includes theft not classified under burglary, vehicle crime, shoplifting, or bicycle theft.",
|
||||
detail: "Average number of 'other theft' offences per year near the postcode, from police.uk street-level crime data. Includes theft not classified under burglary, vehicle crime, shoplifting, or bicycle theft.",
|
||||
source: "crime",
|
||||
prefix: "",
|
||||
suffix: "/yr",
|
||||
|
|
@ -609,7 +609,7 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
},
|
||||
step: 1.0,
|
||||
description: "Average yearly theft from the person in the area",
|
||||
detail: "Average number of theft from the person offences per year within 50m of the postcode, from police.uk street-level crime data. Includes pickpocketing and bag snatching without force.",
|
||||
detail: "Average number of theft from the person offences per year near the postcode, from police.uk street-level crime data. Includes pickpocketing and bag snatching without force.",
|
||||
source: "crime",
|
||||
prefix: "",
|
||||
suffix: "/yr",
|
||||
|
|
@ -624,7 +624,7 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
},
|
||||
step: 1.0,
|
||||
description: "Average yearly shoplifting offences in the area",
|
||||
detail: "Average number of shoplifting offences per year within 50m of the postcode, from police.uk street-level crime data.",
|
||||
detail: "Average number of shoplifting offences per year near the postcode, from police.uk street-level crime data.",
|
||||
source: "crime",
|
||||
prefix: "",
|
||||
suffix: "/yr",
|
||||
|
|
@ -639,7 +639,7 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
},
|
||||
step: 1.0,
|
||||
description: "Average yearly bicycle theft in the area",
|
||||
detail: "Average number of bicycle theft offences per year within 50m of the postcode, from police.uk street-level crime data.",
|
||||
detail: "Average number of bicycle theft offences per year near the postcode, from police.uk street-level crime data.",
|
||||
source: "crime",
|
||||
prefix: "",
|
||||
suffix: "/yr",
|
||||
|
|
@ -654,7 +654,7 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
},
|
||||
step: 1.0,
|
||||
description: "Average yearly drug offences in the area",
|
||||
detail: "Average number of drug offences per year within 50m of the postcode, from police.uk street-level crime data. Includes possession and trafficking offences.",
|
||||
detail: "Average number of drug offences per year near the postcode, from police.uk street-level crime data. Includes possession and trafficking offences.",
|
||||
source: "crime",
|
||||
prefix: "",
|
||||
suffix: "/yr",
|
||||
|
|
@ -669,7 +669,7 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
},
|
||||
step: 1.0,
|
||||
description: "Average yearly weapons possession offences in the area",
|
||||
detail: "Average number of possession of weapons offences per year within 50m of the postcode, from police.uk street-level crime data.",
|
||||
detail: "Average number of possession of weapons offences per year near the postcode, from police.uk street-level crime data.",
|
||||
source: "crime",
|
||||
prefix: "",
|
||||
suffix: "/yr",
|
||||
|
|
@ -684,7 +684,7 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
},
|
||||
step: 1.0,
|
||||
description: "Average yearly public order offences in the area",
|
||||
detail: "Average number of public order offences per year within 50m of the postcode, from police.uk street-level crime data. Includes causing fear, alarm, or distress.",
|
||||
detail: "Average number of public order offences per year near the postcode, from police.uk street-level crime data. Includes causing fear, alarm, or distress.",
|
||||
source: "crime",
|
||||
prefix: "",
|
||||
suffix: "/yr",
|
||||
|
|
@ -699,7 +699,7 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
},
|
||||
step: 1.0,
|
||||
description: "Average yearly other crime in the area",
|
||||
detail: "Average number of other crime offences per year within 50m of the postcode, from police.uk street-level crime data. A catch-all category for offences not classified elsewhere.",
|
||||
detail: "Average number of other crime offences per year near the postcode, from police.uk street-level crime data. A catch-all category for offences not classified elsewhere.",
|
||||
source: "crime",
|
||||
prefix: "",
|
||||
suffix: "/yr",
|
||||
|
|
|
|||
|
|
@ -391,7 +391,7 @@ pub fn build_system_prompt(
|
|||
- Use EXACT feature names from the list — spelling, capitalisation, and punctuation must match.\n\
|
||||
- \"cheap\" / \"affordable\" = lower price range. \"expensive\" = higher price range.\n\
|
||||
- \"low crime\" / \"safe\" = low values on the Serious crime (avg/yr) and Minor crime (avg/yr) \
|
||||
features (incidents counted within 50m of the postcode). Prefer these aggregates for broad \
|
||||
features (area-normalised incident density near the postcode). Prefer these aggregates for broad \
|
||||
area safety; use specific crime features only when the user names a crime type.\n\
|
||||
- \"quiet\" = low Noise (dB). \"green\" / \"near parks\" = high Number of amenities (Park) within 2km \
|
||||
or low Distance to nearest park (km), depending on wording.\n\
|
||||
|
|
@ -1167,7 +1167,8 @@ pub async fn post_ai_filters(
|
|||
.to_string();
|
||||
|
||||
// Count matching properties and refine if too restrictive
|
||||
let (match_count, match_bounds) = count_matching_rows(&state, &filters, &travel_time_filters);
|
||||
let (match_count, match_bounds) =
|
||||
count_matching_rows(&state, &filters, &travel_time_filters);
|
||||
info!(
|
||||
match_count = match_count,
|
||||
round = round,
|
||||
|
|
|
|||
|
|
@ -258,10 +258,17 @@ pub fn compute_feature_stats(
|
|||
/// Compute property-weighted per-year crime means across the selection.
|
||||
///
|
||||
/// Each matching property contributes its postcode's per-year counts (incidents
|
||||
/// within 50m of that postcode); this is the same property-weighted-average
|
||||
/// shape used elsewhere in the right pane. Postcodes with no series for a given
|
||||
/// crime type contribute 0 for that type (matching how the `(avg/yr)` columns
|
||||
/// treat missing crime types).
|
||||
/// near that postcode); this is the same property-weighted-average shape used
|
||||
/// elsewhere in the right pane.
|
||||
///
|
||||
/// Denominators are COVERAGE-AWARE: police.uk has multi-year publication gaps
|
||||
/// for whole forces (e.g. Greater Manchester from 2019-07), and the pipeline
|
||||
/// emits a `covered_years` calendar per postcode. A postcode only counts toward
|
||||
/// a year's denominator if its force published that year — and only then does
|
||||
/// its missing bar mean a genuine zero. Years no selected postcode covers are
|
||||
/// omitted entirely (charted as gaps, not zeros). Postcodes without coverage
|
||||
/// info (legacy parquet without the column) count toward every year, restoring
|
||||
/// the previous behaviour.
|
||||
pub fn compute_crime_by_year(
|
||||
matching_rows: &[usize],
|
||||
data: &PropertyData,
|
||||
|
|
@ -273,27 +280,34 @@ pub fn compute_crime_by_year(
|
|||
return Vec::new();
|
||||
}
|
||||
|
||||
// For each crime type, accumulate per-year sums and the count of rows whose
|
||||
// postcode exists in the crime side table.
|
||||
let num_types = crime_by_year.crime_types.len();
|
||||
let mut per_type_year_sums: Vec<FxHashMap<i32, f64>> =
|
||||
(0..num_types).map(|_| FxHashMap::default()).collect();
|
||||
let mut per_type_row_counts: Vec<u32> = vec![0; num_types];
|
||||
// Per-year denominator parts: rows whose coverage calendar includes the
|
||||
// year, plus rows with no calendar at all (legacy: covered everywhere).
|
||||
let mut covered_counts: FxHashMap<i32, u32> = FxHashMap::default();
|
||||
let mut fully_covered_rows: u32 = 0;
|
||||
|
||||
for &row in matching_rows {
|
||||
let postcode = data.postcode(row);
|
||||
|
||||
// A postcode absent from the by-year table has no recorded crime within
|
||||
// 50m, so it contributes 0 to every type's per-year sum. It must still be
|
||||
// counted in the denominator: the matching `(avg/yr)` stat counts those
|
||||
// same zero-crime postcodes as 0.0 (crime_by_postcode.parquet has a dense
|
||||
// row for every boundary postcode), so excluding them here would compute
|
||||
// the chart over a smaller population and report a higher magnitude than
|
||||
// the headline. Property postcodes are guaranteed to be boundary
|
||||
// postcodes by the postcode-boundary-match validation, so "absent" means
|
||||
// genuinely zero-crime, not missing data.
|
||||
match crime_by_year.covered_years_by_postcode.get(postcode) {
|
||||
Some(years) => {
|
||||
// An empty list (force gap for the whole window / unusable
|
||||
// boundary geometry) adds nothing: the postcode's crime
|
||||
// picture is unknown and must not dilute any year's mean.
|
||||
for &year in years {
|
||||
*covered_counts.entry(year).or_insert(0) += 1;
|
||||
}
|
||||
}
|
||||
None => fully_covered_rows += 1,
|
||||
}
|
||||
|
||||
// A postcode with a row but no series for a given type had no recorded
|
||||
// incidents of that type: it contributes 0 to the sums, and its covered
|
||||
// years still count in the denominator — a genuine zero. Uncovered
|
||||
// years are excluded via the denominators instead.
|
||||
if let Some(series_list) = crime_by_year.series_by_postcode.get(postcode) {
|
||||
// For every type the postcode reports, add its per-year counts.
|
||||
for series in series_list {
|
||||
let acc = &mut per_type_year_sums[series.type_idx as usize];
|
||||
for point in &series.points {
|
||||
|
|
@ -301,9 +315,6 @@ pub fn compute_crime_by_year(
|
|||
}
|
||||
}
|
||||
}
|
||||
for c in per_type_row_counts.iter_mut() {
|
||||
*c += 1;
|
||||
}
|
||||
}
|
||||
|
||||
let mut out = Vec::new();
|
||||
|
|
@ -317,10 +328,6 @@ pub fn compute_crime_by_year(
|
|||
continue;
|
||||
}
|
||||
}
|
||||
let row_count = per_type_row_counts[type_idx];
|
||||
if row_count == 0 {
|
||||
continue;
|
||||
}
|
||||
let years = crime_by_year
|
||||
.years_by_type
|
||||
.get(type_idx)
|
||||
|
|
@ -329,15 +336,26 @@ pub fn compute_crime_by_year(
|
|||
if years.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let denom = row_count as f64;
|
||||
let sums = &per_type_year_sums[type_idx];
|
||||
let points: Vec<CrimeYearPoint> = years
|
||||
.iter()
|
||||
.map(|&year| CrimeYearPoint {
|
||||
year,
|
||||
count: (sums.get(&year).copied().unwrap_or(0.0) / denom) as f32,
|
||||
.filter_map(|&year| {
|
||||
let denom = fully_covered_rows
|
||||
+ covered_counts.get(&year).copied().unwrap_or(0);
|
||||
if denom == 0 {
|
||||
// No selected postcode has published data for this year.
|
||||
return None;
|
||||
}
|
||||
Some(CrimeYearPoint {
|
||||
year,
|
||||
count: (sums.get(&year).copied().unwrap_or(0.0) / denom as f64)
|
||||
as f32,
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
if points.is_empty() {
|
||||
continue;
|
||||
}
|
||||
out.push(CrimeYearStats {
|
||||
name: name.clone(),
|
||||
points,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue