perfect-postcode/pipeline/download/test_rental_prices.py
Andras Schmelczer f59d01227b
Some checks failed
Build and publish Docker image / build-and-push (push) Failing after 15s
CI / Check (push) Failing after 1m58s
SPlit up
2026-06-12 21:51:37 +01:00

51 lines
1.9 KiB
Python

import polars as pl
from pipeline.download.rental_prices import _latest_rents_long
def test_latest_rents_long_adds_iod_alias_codes_for_south_yorkshire():
raw = pl.DataFrame(
{
"column_1": ["title", "Time period", "2026-02-01 00:00:00"],
"column_2": ["", "Area code", "E08000038"],
"column_3": ["", "Area name", "Barnsley"],
"column_12": ["", "One bed", "486"],
"column_16": ["", "Two bed", "595"],
"column_20": ["", "Three bed", "705"],
"column_24": ["", "Four or more bed", "900"],
}
)
result = _latest_rents_long(raw).filter(pl.col("bedrooms") == 1).sort("area_code")
assert result.select("area_code", "mean_monthly_rent").to_dicts() == [
{"area_code": "E08000016", "mean_monthly_rent": 486.0},
{"area_code": "E08000038", "mean_monthly_rent": 486.0},
]
def test_latest_rents_long_locates_header_in_variable_preamble():
"""The live workbook has THREE preamble rows (title, contents note,
header); a fixed two-row slice left the header in the data and only the
area-code filter happened to drop it."""
raw = pl.DataFrame(
{
"column_1": [
"title",
"This worksheet contains one table.",
"Time period",
"2026-02-01 00:00:00",
],
"column_2": ["", "", "Area code", "E08000038"],
"column_3": ["", "", "Area name", "Barnsley"],
"column_12": ["", "", "One bed", "486"],
"column_16": ["", "", "Two bed", "595"],
"column_20": ["", "", "Three bed", "705"],
"column_24": ["", "", "Four or more bed", "900"],
}
)
result = _latest_rents_long(raw)
assert result.filter(pl.col("area_code") == "E08000038").height == 5
assert result["mean_monthly_rent"].null_count() == 0