try
This commit is contained in:
parent
843d14b7ba
commit
c938b71904
13 changed files with 698 additions and 109 deletions
|
|
@ -378,7 +378,10 @@ def test_run_new_build_keeps_early_first_transfer_when_sub_min_price(tmp_path: P
|
|||
price_paid_path = tmp_path / "price-paid.parquet"
|
||||
pl.DataFrame(
|
||||
{
|
||||
"price": [30_000, 300_000],
|
||||
# 5_000 is below MIN_PRICE (10_000) — a nominal/junk transfer that
|
||||
# must still anchor the construction year but stay out of the price
|
||||
# aggregations.
|
||||
"price": [5_000, 300_000],
|
||||
"date_of_transfer": [date(2015, 2, 3), date(2022, 2, 3)],
|
||||
"property_type": ["T", "T"],
|
||||
"postcode": ["AA1 1AA", "AA1 1AA"],
|
||||
|
|
@ -408,6 +411,48 @@ def test_run_new_build_keeps_early_first_transfer_when_sub_min_price(tmp_path: P
|
|||
assert df.get_column("historical_prices").list.len().to_list() == [1]
|
||||
|
||||
|
||||
def test_run_keeps_sale_above_lowered_min_price(tmp_path: Path):
|
||||
# A genuine cheap sale of 30_000 sits between the OLD floor (50k) and the
|
||||
# NEW floor (10k): it must now be RETAINED in the price aggregations. This
|
||||
# pins the 50k->10k change — it fails on the pre-fix 50k floor (where 30k was
|
||||
# excluded, giving historical_prices length 1 / latest_price 250_000).
|
||||
zip_path = tmp_path / "domestic-csv.zip"
|
||||
with zipfile.ZipFile(zip_path, "w", compression=zipfile.ZIP_DEFLATED) as archive:
|
||||
csv_buffer = io.StringIO()
|
||||
writer = csv.DictWriter(csv_buffer, fieldnames=EPC_SOURCE_COLUMNS)
|
||||
writer.writeheader()
|
||||
writer.writerow(_row())
|
||||
archive.writestr("certificates-2024.csv", csv_buffer.getvalue())
|
||||
|
||||
price_paid_path = tmp_path / "price-paid.parquet"
|
||||
pl.DataFrame(
|
||||
{
|
||||
"price": [250_000, 30_000],
|
||||
"date_of_transfer": [date(2018, 2, 3), date(2022, 2, 3)],
|
||||
"property_type": ["T", "T"],
|
||||
"postcode": ["AA1 1AA", "AA1 1AA"],
|
||||
"paon": ["1", "1"],
|
||||
"saon": [None, None],
|
||||
"street": ["Example Street", "Example Street"],
|
||||
"locality": [None, None],
|
||||
"town_city": ["Exampletown", "Exampletown"],
|
||||
"duration": ["F", "F"],
|
||||
"old_new": ["N", "N"],
|
||||
"ppd_category": ["A", "A"],
|
||||
}
|
||||
).write_parquet(price_paid_path)
|
||||
|
||||
output_path = tmp_path / "epc-pp.parquet"
|
||||
_run(zip_path, price_paid_path, output_path, tmp_path)
|
||||
|
||||
df = pl.read_parquet(output_path)
|
||||
|
||||
assert df.height == 1
|
||||
# Both sales now survive the 10k floor; the 30_000 (2022) is the most recent.
|
||||
assert df.get_column("historical_prices").list.len().to_list() == [2]
|
||||
assert df.get_column("latest_price").to_list() == [30_000]
|
||||
|
||||
|
||||
def test_epc_band_to_year_uses_midpoint_and_clamps():
|
||||
import polars as pl
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue