Lots of improvements
This commit is contained in:
parent
ef921361ec
commit
80a5a2a774
21 changed files with 489 additions and 337 deletions
|
|
@ -464,7 +464,7 @@ impl PropertyData {
|
|||
tracing::info!("Concatenating all data sources");
|
||||
let buy_count = listings_buy.height();
|
||||
let rent_count = listings_rent.height();
|
||||
let mut combined = concat(
|
||||
let combined = concat(
|
||||
[
|
||||
properties_joined.lazy(),
|
||||
listings_buy.lazy(),
|
||||
|
|
@ -495,36 +495,8 @@ impl PropertyData {
|
|||
let numeric_names = features::all_numeric_feature_names();
|
||||
let enum_names = features::all_enum_feature_names();
|
||||
|
||||
// Fill in NaN/empty placeholder columns for features that don't exist in all
|
||||
// sources (e.g. Listing date only comes from listings, Estimated current price
|
||||
// only from properties). Without this, diagonal concat leaves them absent.
|
||||
{
|
||||
let schema = combined.schema();
|
||||
let mut fill_exprs: Vec<Expr> = Vec::new();
|
||||
for &name in &numeric_names {
|
||||
if schema.get(name).is_none() {
|
||||
tracing::info!(feature = %name, "Adding NaN placeholder for missing numeric feature");
|
||||
fill_exprs.push(lit(f32::NAN).alias(name));
|
||||
}
|
||||
}
|
||||
for &name in &enum_names {
|
||||
if schema.get(name).is_none() {
|
||||
tracing::info!(feature = %name, "Adding empty placeholder for missing enum feature");
|
||||
fill_exprs.push(lit("").alias(name));
|
||||
}
|
||||
}
|
||||
if !fill_exprs.is_empty() {
|
||||
combined = combined
|
||||
.lazy()
|
||||
.with_columns(fill_exprs)
|
||||
.collect()
|
||||
.context("Failed to add placeholder columns for missing features")?;
|
||||
}
|
||||
}
|
||||
|
||||
let schema = combined.schema();
|
||||
|
||||
// Validate: every configured feature exists in combined schema
|
||||
for name in &numeric_names {
|
||||
match schema.get(name) {
|
||||
Some(dtype) if is_numeric_dtype(dtype) => {}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue