Lots of improvements

This commit is contained in:
Andras Schmelczer 2026-03-10 22:05:51 +00:00
parent ef921361ec
commit 80a5a2a774
21 changed files with 489 additions and 337 deletions

View file

@ -464,7 +464,7 @@ impl PropertyData {
tracing::info!("Concatenating all data sources");
let buy_count = listings_buy.height();
let rent_count = listings_rent.height();
let mut combined = concat(
let combined = concat(
[
properties_joined.lazy(),
listings_buy.lazy(),
@ -495,36 +495,8 @@ impl PropertyData {
let numeric_names = features::all_numeric_feature_names();
let enum_names = features::all_enum_feature_names();
// Fill in NaN/empty placeholder columns for features that don't exist in all
// sources (e.g. Listing date only comes from listings, Estimated current price
// only from properties). Without this, diagonal concat leaves them absent.
{
let schema = combined.schema();
let mut fill_exprs: Vec<Expr> = Vec::new();
for &name in &numeric_names {
if schema.get(name).is_none() {
tracing::info!(feature = %name, "Adding NaN placeholder for missing numeric feature");
fill_exprs.push(lit(f32::NAN).alias(name));
}
}
for &name in &enum_names {
if schema.get(name).is_none() {
tracing::info!(feature = %name, "Adding empty placeholder for missing enum feature");
fill_exprs.push(lit("").alias(name));
}
}
if !fill_exprs.is_empty() {
combined = combined
.lazy()
.with_columns(fill_exprs)
.collect()
.context("Failed to add placeholder columns for missing features")?;
}
}
let schema = combined.schema();
// Validate: every configured feature exists in combined schema
for name in &numeric_names {
match schema.get(name) {
Some(dtype) if is_numeric_dtype(dtype) => {}