diff --git a/server-rs/src/data/postcodes.rs b/server-rs/src/data/postcodes.rs index 10387bc..d6e0761 100644 --- a/server-rs/src/data/postcodes.rs +++ b/server-rs/src/data/postcodes.rs @@ -133,12 +133,49 @@ pub struct PostcodeData { pub centroids: Vec<(f32, f32)>, /// Precomputed AABB per postcode: (south, west, north, east) as f32 pub aabbs: Vec<(f32, f32, f32, f32)>, - /// Precomputed GeoJSON geometry Value per postcode - pub geometries: Vec, + /// Compact polygon storage: outer Vec is per-postcode, inner Vecs are rings of [lon, lat] f32 pairs. + /// Held as raw f32 to keep boundary memory ~10x smaller than serde_json::Value form. + pub polygons: Vec>>, /// Lookup from postcode string to index pub postcode_to_idx: FxHashMap, } +impl PostcodeData { + /// Build the GeoJSON Value for a postcode polygon on demand. + pub fn geometry_geojson(&self, idx: usize) -> serde_json::Value { + let rings = &self.polygons[idx]; + if rings.len() == 1 { + let coords: Vec = rings[0] + .iter() + .map(|[lon, lat]| { + serde_json::Value::Array(vec![ + serde_json::Value::from(*lon as f64), + serde_json::Value::from(*lat as f64), + ]) + }) + .collect(); + serde_json::json!({"type": "Polygon", "coordinates": [coords]}) + } else { + let polys: Vec = rings + .iter() + .map(|ring| { + let coords: Vec = ring + .iter() + .map(|[lon, lat]| { + serde_json::Value::Array(vec![ + serde_json::Value::from(*lon as f64), + serde_json::Value::from(*lat as f64), + ]) + }) + .collect(); + serde_json::Value::Array(vec![serde_json::Value::Array(coords)]) + }) + .collect(); + serde_json::json!({"type": "MultiPolygon", "coordinates": polys}) + } + } +} + impl PostcodeData { /// Load postcode boundaries from a directory of GeoJSON files. /// Expects the directory to have a `units/` subdirectory containing .geojson files. @@ -295,49 +332,13 @@ impl PostcodeData { postcode_to_idx.insert(postcode.clone(), idx); } - // Precompute GeoJSON geometry for each postcode - let geometries: Vec = polygons - .iter() - .map(|rings| { - if rings.len() == 1 { - let coords: Vec = rings[0] - .iter() - .map(|[lon, lat]| { - serde_json::Value::Array(vec![ - serde_json::Value::from(*lon as f64), - serde_json::Value::from(*lat as f64), - ]) - }) - .collect(); - serde_json::json!({"type": "Polygon", "coordinates": [coords]}) - } else { - let polys: Vec = rings - .iter() - .map(|ring| { - let coords: Vec = ring - .iter() - .map(|[lon, lat]| { - serde_json::Value::Array(vec![ - serde_json::Value::from(*lon as f64), - serde_json::Value::from(*lat as f64), - ]) - }) - .collect(); - serde_json::Value::Array(vec![serde_json::Value::Array(coords)]) - }) - .collect(); - serde_json::json!({"type": "MultiPolygon", "coordinates": polys}) - } - }) - .collect(); - info!(postcodes = postcodes.len(), "Postcode boundary data ready"); Ok(PostcodeData { postcodes, centroids, aabbs, - geometries, + polygons, postcode_to_idx, }) } diff --git a/server-rs/src/data/property.rs b/server-rs/src/data/property.rs index 279a484..bb395a9 100644 --- a/server-rs/src/data/property.rs +++ b/server-rs/src/data/property.rs @@ -1282,23 +1282,20 @@ impl PropertyData { }; let mut poi_metrics = PostcodePoiMetrics::from_postcode_df(&postcode_df, poi_metric_names)?; - // Load properties.parquet and join with postcode data for lat/lon + area features + // Load properties.parquet and join with postcode data lazily so the + // wide combined frame is never fully materialized — projection is + // pushed down into the join, keeping peak memory bounded. tracing::info!("Loading properties from {:?}", properties_path); let properties_path = PlRefPath::try_from_path(properties_path) .context("Failed to normalize properties parquet path")?; let properties_lf = LazyFrame::scan_parquet(properties_path, Default::default()) .context("Failed to scan properties parquet")?; - let combined = properties_lf - .join( - postcode_df.clone().lazy(), - [col("Postcode")], - [col("Postcode")], - JoinArgs::new(JoinType::Left), - ) - .collect() - .context("Failed to join properties with postcodes")?; - let total_rows = combined.height(); - tracing::info!(rows = total_rows, "Properties joined with postcodes"); + let combined_lf = properties_lf.join( + postcode_df.lazy(), + [col("Postcode")], + [col("Postcode")], + JoinArgs::new(JoinType::Left), + ); // Get configured feature/enum names in config order. Dynamic POI // metrics live in a postcode-level side table so they do not widen the @@ -1306,7 +1303,10 @@ impl PropertyData { let configured_numeric_names = features::all_numeric_feature_names(); let enum_names = features::all_enum_feature_names(); - let schema = combined.schema(); + let schema = combined_lf + .clone() + .collect_schema() + .context("Failed to collect joined schema")?; let numeric_names: Vec = configured_numeric_names .iter() .map(|name| (*name).to_string()) @@ -1402,24 +1402,16 @@ impl PropertyData { if has_renovation_history { select_exprs.push(col("renovation_history")); } - let df = combined - .lazy() + let df = combined_lf .filter(col("lat").is_not_null().and(col("lon").is_not_null())) .select(select_exprs) .collect() - .context("Failed to select columns from combined data")?; + .context("Failed to select columns from joined frame")?; let row_count = df.height(); if row_count == 0 { bail!("No property rows have usable coordinates after joining postcode data"); } - let dropped_coordinate_rows = total_rows.saturating_sub(row_count); - if dropped_coordinate_rows > 0 { - tracing::warn!( - rows = dropped_coordinate_rows, - "Dropped properties with missing postcode coordinates" - ); - } tracing::info!(rows = row_count, "Combined data selected"); let lat_series = df @@ -1692,6 +1684,9 @@ impl PropertyData { FxHashMap::default() }; + // Free the projected joined frame before building the row-major matrix. + drop(df); + // Sort all rows by spatial locality so that grid queries access // contiguous memory (sequential reads instead of random DRAM accesses). tracing::info!("Sorting rows by spatial locality");