51 lines
1.9 KiB
Python
51 lines
1.9 KiB
Python
import polars as pl
|
|
|
|
from pipeline.download.rental_prices import _latest_rents_long
|
|
|
|
|
|
def test_latest_rents_long_adds_iod_alias_codes_for_south_yorkshire():
|
|
raw = pl.DataFrame(
|
|
{
|
|
"column_1": ["title", "Time period", "2026-02-01 00:00:00"],
|
|
"column_2": ["", "Area code", "E08000038"],
|
|
"column_3": ["", "Area name", "Barnsley"],
|
|
"column_12": ["", "One bed", "486"],
|
|
"column_16": ["", "Two bed", "595"],
|
|
"column_20": ["", "Three bed", "705"],
|
|
"column_24": ["", "Four or more bed", "900"],
|
|
}
|
|
)
|
|
|
|
result = _latest_rents_long(raw).filter(pl.col("bedrooms") == 1).sort("area_code")
|
|
|
|
assert result.select("area_code", "mean_monthly_rent").to_dicts() == [
|
|
{"area_code": "E08000016", "mean_monthly_rent": 486.0},
|
|
{"area_code": "E08000038", "mean_monthly_rent": 486.0},
|
|
]
|
|
|
|
|
|
def test_latest_rents_long_locates_header_in_variable_preamble():
|
|
"""The live workbook has THREE preamble rows (title, contents note,
|
|
header); a fixed two-row slice left the header in the data and only the
|
|
area-code filter happened to drop it."""
|
|
raw = pl.DataFrame(
|
|
{
|
|
"column_1": [
|
|
"title",
|
|
"This worksheet contains one table.",
|
|
"Time period",
|
|
"2026-02-01 00:00:00",
|
|
],
|
|
"column_2": ["", "", "Area code", "E08000038"],
|
|
"column_3": ["", "", "Area name", "Barnsley"],
|
|
"column_12": ["", "", "One bed", "486"],
|
|
"column_16": ["", "", "Two bed", "595"],
|
|
"column_20": ["", "", "Three bed", "705"],
|
|
"column_24": ["", "", "Four or more bed", "900"],
|
|
}
|
|
)
|
|
|
|
result = _latest_rents_long(raw)
|
|
|
|
assert result.filter(pl.col("area_code") == "E08000038").height == 5
|
|
assert result["mean_monthly_rent"].null_count() == 0
|