import polars as pl from pipeline.transform.merge import ( _AREA_COLUMNS, _STATIC_POI_DISTANCE_RENAMES, _is_dynamic_poi_metric_column, _less_deprived_percentile_expr, ) def test_less_deprived_percentile_expr_preserves_direction_and_nulls() -> None: df = pl.DataFrame({"Income Score (rate)": [1.0, 2.0, 3.0, None]}) result = df.lazy().with_columns( _less_deprived_percentile_expr("Income Score (rate)") ).collect() assert result["Income Score (rate)"].to_list() == [100.0, 50.0, 0.0, None] def test_less_deprived_percentile_expr_uses_exact_scale_endpoints() -> None: df = pl.DataFrame({"Income Score (rate)": [1.0, 1.0, 2.0, 3.0, 3.0]}) result = df.lazy().with_columns( _less_deprived_percentile_expr("Income Score (rate)") ).collect() assert result["Income Score (rate)"].to_list() == [100.0, 100.0, 50.0, 0.0, 0.0] def test_dynamic_poi_metric_columns_are_area_level() -> None: assert _is_dynamic_poi_metric_column("Distance to nearest amenity (Cafe) (km)") assert _is_dynamic_poi_metric_column("Number of amenities (Cafe) within 2km") assert _is_dynamic_poi_metric_column("Number of amenities (Cafe) within 5km") assert not _is_dynamic_poi_metric_column("Number of restaurants within 2km") def test_static_poi_distance_columns_are_renamed_to_configured_area_features() -> None: expected = { "parks_nearest_km": "Distance to nearest park (km)", "grocery_store_nearest_km": "Distance to nearest grocery store (km)", "cafe_nearest_km": "Distance to nearest cafe (km)", "pub_nearest_km": "Distance to nearest pub (km)", "restaurant_nearest_km": "Distance to nearest restaurant (km)", } assert _STATIC_POI_DISTANCE_RENAMES == expected assert set(expected.values()).issubset(_AREA_COLUMNS)