Codex changes

This commit is contained in:
Andras Schmelczer 2026-05-04 16:19:09 +01:00
parent 0bae902e08
commit d4dde21ad2
46 changed files with 4953 additions and 966 deletions

View file

@ -514,10 +514,7 @@ pub fn precompute_h3(lat: &[f32], lon: &[f32]) -> anyhow::Result<Vec<u64>> {
}
impl PropertyData {
pub fn load(
properties_path: &Path,
postcode_features_path: &Path,
) -> anyhow::Result<Self> {
pub fn load(properties_path: &Path, postcode_features_path: &Path) -> anyhow::Result<Self> {
// Load postcode.parquet
tracing::info!(
"Loading postcode features from {:?}",
@ -643,11 +640,22 @@ impl PropertyData {
}
let df = combined
.lazy()
.filter(col("lat").is_not_null().and(col("lon").is_not_null()))
.select(select_exprs)
.collect()
.context("Failed to select columns from combined data")?;
let row_count = df.height();
if row_count == 0 {
bail!("No property rows have usable coordinates after joining postcode data");
}
let dropped_coordinate_rows = total_rows.saturating_sub(row_count);
if dropped_coordinate_rows > 0 {
tracing::warn!(
rows = dropped_coordinate_rows,
"Dropped properties with missing postcode coordinates"
);
}
tracing::info!(rows = row_count, "Combined data selected");
let lat_series = df
@ -659,8 +667,8 @@ impl PropertyData {
.f32()
.context("Failed to read 'lat' as f32")?
.into_iter()
.map(|value| value.unwrap_or(0.0))
.collect();
.map(|value| value.context("Missing 'lat' value after coordinate filter"))
.collect::<anyhow::Result<Vec<_>>>()?;
let lon_series = df
.column("lon")
@ -671,8 +679,14 @@ impl PropertyData {
.f32()
.context("Failed to read 'lon' as f32")?
.into_iter()
.map(|value| value.unwrap_or(0.0))
.collect();
.map(|value| value.context("Missing 'lon' value after coordinate filter"))
.collect::<anyhow::Result<Vec<_>>>()?;
for (row, (&latitude, &longitude)) in lat.iter().zip(&lon).enumerate() {
if !(-90.0..=90.0).contains(&latitude) || !(-180.0..=180.0).contains(&longitude) {
bail!("Invalid coordinates at row {row}: lat={latitude}, lon={longitude}");
}
}
tracing::info!("Extracting numeric feature columns");
let numeric_col_major: Vec<Vec<f32>> = numeric_names