lgtm
This commit is contained in:
parent
8708bf000d
commit
11711c57e6
38 changed files with 5361 additions and 265 deletions
99
pipeline/transform/test_tree_density.py
Normal file
99
pipeline/transform/test_tree_density.py
Normal file
|
|
@ -0,0 +1,99 @@
|
|||
from pathlib import Path
|
||||
|
||||
import polars as pl
|
||||
import pytest
|
||||
|
||||
from pipeline.transform.tree_density import (
|
||||
STREET_TREE_COVERAGE_COL,
|
||||
STREET_TREE_DENSITY_COL,
|
||||
_coverage_percentile_expr,
|
||||
_metric_columns,
|
||||
_postcode_density_percentile_col,
|
||||
_with_postcode_density_percentiles,
|
||||
_write_street_rollups,
|
||||
)
|
||||
|
||||
|
||||
def test_coverage_percentile_expr_ranks_higher_coverage_higher() -> None:
|
||||
df = pl.DataFrame({"coverage": [0.0, 5.0, 10.0, None]})
|
||||
|
||||
result = df.lazy().with_columns(
|
||||
_coverage_percentile_expr("coverage", "percentile")
|
||||
).collect()
|
||||
|
||||
assert result["percentile"].to_list() == [0.0, 50.0, 100.0, None]
|
||||
|
||||
|
||||
def test_coverage_percentile_expr_uses_exact_scale_endpoints() -> None:
|
||||
df = pl.DataFrame({"coverage": [0.0, 0.0, 5.0, 10.0, 10.0]})
|
||||
|
||||
result = df.lazy().with_columns(
|
||||
_coverage_percentile_expr("coverage", "percentile")
|
||||
).collect()
|
||||
|
||||
assert result["percentile"].to_list() == [0.0, 0.0, 50.0, 100.0, 100.0]
|
||||
|
||||
|
||||
def test_street_rollup_percentiles_are_ranked_over_raw_street_coverage(
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
radius_m = 50
|
||||
density_col, area_col, count_col, height_col = _metric_columns(radius_m)
|
||||
percentile_col = _postcode_density_percentile_col(radius_m)
|
||||
|
||||
postcode_metrics = _with_postcode_density_percentiles(
|
||||
pl.DataFrame(
|
||||
{
|
||||
"postcode": ["AA1 1AA", "AA1 1AB", "AA1 1AC"],
|
||||
density_col: [10.0, 30.0, 50.0],
|
||||
area_col: [100.0, 300.0, 500.0],
|
||||
count_col: [1, 3, 5],
|
||||
height_col: [4.0, 6.0, 8.0],
|
||||
}
|
||||
),
|
||||
radius_m,
|
||||
)
|
||||
|
||||
price_paid = pl.DataFrame(
|
||||
{
|
||||
"postcode": ["AA1 1AA", "AA1 1AA", "AA1 1AB", "AA1 1AC"],
|
||||
"paon": ["1", "2", "3", "4"],
|
||||
"saon": ["", "", "", ""],
|
||||
"street": ["Oak Road", "Oak Road", "Oak Road", "Elm Street"],
|
||||
"locality": ["", "", "", ""],
|
||||
"town_city": ["Test Town", "Test Town", "Test Town", "Test Town"],
|
||||
"district": ["Test District"] * 4,
|
||||
"county": ["Test County"] * 4,
|
||||
"date_of_transfer": [
|
||||
"2024-01-01",
|
||||
"2024-01-02",
|
||||
"2024-01-03",
|
||||
"2024-01-04",
|
||||
],
|
||||
}
|
||||
)
|
||||
price_paid_path = tmp_path / "price-paid.parquet"
|
||||
output_streets = tmp_path / "streets.parquet"
|
||||
output_addresses = tmp_path / "addresses.parquet"
|
||||
price_paid.write_parquet(price_paid_path)
|
||||
|
||||
_write_street_rollups(
|
||||
postcode_metrics=postcode_metrics,
|
||||
price_paid_path=price_paid_path,
|
||||
output_streets=output_streets,
|
||||
output_addresses=output_addresses,
|
||||
radius_m=radius_m,
|
||||
)
|
||||
|
||||
streets = pl.read_parquet(output_streets).sort("street")
|
||||
addresses = pl.read_parquet(output_addresses)
|
||||
|
||||
assert streets["street"].to_list() == ["Elm Street", "Oak Road"]
|
||||
assert streets[STREET_TREE_COVERAGE_COL].to_list() == pytest.approx([50.0, 16.7])
|
||||
assert streets.select("street", STREET_TREE_DENSITY_COL).rows() == [
|
||||
("Elm Street", 100.0),
|
||||
("Oak Road", 0.0),
|
||||
]
|
||||
assert percentile_col in addresses.columns
|
||||
assert STREET_TREE_COVERAGE_COL in addresses.columns
|
||||
assert STREET_TREE_DENSITY_COL in addresses.columns
|
||||
Loading…
Add table
Add a link
Reference in a new issue