This commit is contained in:
Andras Schmelczer 2026-02-15 22:39:49 +00:00
parent 03445188ea
commit 524580eb25
102 changed files with 36625 additions and 1295 deletions

View file

@ -32,7 +32,8 @@ pub struct FeatureConfig {
/// Features whose histogram bins should be exactly 1 unit wide (one per integer).
/// p1/p99 are snapped to integer boundaries before binning.
pub const INTEGER_BIN_FEATURES: &[&str] = &["Number of bedrooms & living rooms"];
pub const INTEGER_BIN_FEATURES: &[&str] =
&["Number of bedrooms & living rooms", "Bedrooms", "Bathrooms"];
pub struct FeatureGroup {
pub name: &'static str,
@ -68,6 +69,9 @@ pub const IGNORED_COLUMNS: &[&str] = &[
"Is construction date approximate",
"Current energy rating",
"Potential energy rating",
"Property sub-type",
"Listing URL",
"Price qualifier",
];
pub static FEATURE_GROUPS: &[FeatureGroup] = &[
@ -221,6 +225,81 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
raw: true,
absolute: false,
},
FeatureConfig {
name: "Asking price",
bounds: Bounds::Fixed {
min: 0.0,
max: 2_000_000.0,
},
step: 10000.0,
description: "Listed asking price for properties currently for sale",
detail: "The advertised asking price for properties currently listed for sale on online property portals. Only populated for 'For sale' listings; null for historical sales and rentals.",
source: "online-listings",
prefix: "£",
suffix: "",
raw: false,
absolute: true,
},
FeatureConfig {
name: "Asking rent (monthly)",
bounds: Bounds::Fixed {
min: 0.0,
max: 10_000.0,
},
step: 50.0,
description: "Listed monthly rent for properties currently for rent",
detail: "The advertised rental price normalized to monthly for properties currently listed for rent on online property portals. Weekly rents are converted (×52/12), yearly (/12), daily (×365.25/12), and quarterly (/3). Only populated for 'For rent' listings.",
source: "online-listings",
prefix: "£",
suffix: "/mo",
raw: false,
absolute: true,
},
FeatureConfig {
name: "Bedrooms",
bounds: Bounds::Fixed {
min: 0.0,
max: 10.0,
},
step: 1.0,
description: "Number of bedrooms from online listing",
detail: "Number of bedrooms as advertised in the online property listing. Only populated for online listings (for sale and for rent); null for historical sales.",
source: "online-listings",
prefix: "",
suffix: "",
raw: false,
absolute: true,
},
FeatureConfig {
name: "Bathrooms",
bounds: Bounds::Fixed {
min: 0.0,
max: 10.0,
},
step: 1.0,
description: "Number of bathrooms from online listing",
detail: "Number of bathrooms as advertised in the online property listing. Only populated for online listings (for sale and for rent); null for historical sales.",
source: "online-listings",
prefix: "",
suffix: "",
raw: false,
absolute: true,
},
FeatureConfig {
name: "Listing date",
bounds: Bounds::Fixed {
min: 2006.0,
max: 2026.0,
},
step: 1.0,
description: "Date the property was first listed online",
detail: "The date when the property listing first appeared on the online property portal. Stored as a datetime; converted to fractional year for filtering. Only populated for online listings.",
source: "online-listings",
prefix: "",
suffix: "",
raw: true,
absolute: false,
},
],
},
FeatureGroup {
@ -442,7 +521,43 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
},
],
},
FeatureGroup {
FeatureGroup {
name: "Crime summary",
features: &[
FeatureConfig {
name: "Serious crime (avg/yr)",
bounds: Bounds::Percentile {
low: 2.0,
high: 98.0,
},
step: 1.0,
description: "Aggregate of serious crime categories per year",
detail: "Sum of violence, robbery, burglary, and weapons possession per year in the LSOA, from police.uk street-level crime data (2023-2025). Provides a single serious crime metric.",
source: "crime",
prefix: "",
suffix: "/yr",
raw: false,
absolute: false,
},
FeatureConfig {
name: "Minor crime (avg/yr)",
bounds: Bounds::Percentile {
low: 2.0,
high: 98.0,
},
step: 1.0,
description: "Aggregate of minor crime categories per year",
detail: "Sum of anti-social behaviour, shoplifting, bicycle theft, and other lower-severity crime per year in the LSOA, from police.uk street-level crime data (2023-2025). Provides a single minor crime metric.",
source: "crime",
prefix: "",
suffix: "/yr",
raw: false,
absolute: false,
},
],
},
FeatureGroup {
name: "Crime",
features: &[
FeatureConfig {
@ -655,36 +770,6 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
raw: false,
absolute: false,
},
FeatureConfig {
name: "Serious crime (avg/yr)",
bounds: Bounds::Percentile {
low: 2.0,
high: 98.0,
},
step: 1.0,
description: "Aggregate of serious crime categories per year",
detail: "Sum of violence, robbery, burglary, and weapons possession per year in the LSOA, from police.uk street-level crime data (2023-2025). Provides a single serious crime metric.",
source: "crime",
prefix: "",
suffix: "/yr",
raw: false,
absolute: false,
},
FeatureConfig {
name: "Minor crime (avg/yr)",
bounds: Bounds::Percentile {
low: 2.0,
high: 98.0,
},
step: 1.0,
description: "Aggregate of minor crime categories per year",
detail: "Sum of anti-social behaviour, shoplifting, bicycle theft, and other lower-severity crime per year in the LSOA, from police.uk street-level crime data (2023-2025). Provides a single minor crime metric.",
source: "crime",
prefix: "",
suffix: "/yr",
raw: false,
absolute: false,
},
],
},
FeatureGroup {
@ -858,6 +943,13 @@ pub static ENUM_FEATURE_GROUPS: &[EnumFeatureGroup] = &[
EnumFeatureGroup {
name: "Property",
features: &[
EnumFeatureConfig {
name: "Listing status",
order: Some(&["Historical sale", "For sale", "For rent"]),
description: "Whether the property is from historical sales, currently for sale, or for rent",
detail: "Indicates the source of the property record: 'Historical sale' from HM Land Registry Price Paid data, 'For sale' from current online buy listings, or 'For rent' from current online rental listings.",
source: "online-listings",
},
EnumFeatureConfig {
name: "Leashold/Freehold",
order: Some(&["Freehold", "Leasehold"]),