vibes
This commit is contained in:
parent
39ef5c6646
commit
c995f12f8b
78 changed files with 4830 additions and 1619 deletions
|
|
@ -2,16 +2,23 @@ import polars as pl
|
|||
import pyarrow as pa
|
||||
import pytest
|
||||
from shapely import box, to_wkb
|
||||
from shapely.geometry import Point
|
||||
|
||||
from pipeline.transform.merge import (
|
||||
_AREA_COLUMNS,
|
||||
CONSERVATION_AREA_FEATURE,
|
||||
LISTED_BUILDING_FEATURE,
|
||||
TREE_DENSITY_FEATURE,
|
||||
_is_unpublished_conservation_area_record,
|
||||
_LISTING_OVERLAY_SOURCES,
|
||||
_build_unmatched_listing_seed_rows,
|
||||
_canonical_postcode_expr,
|
||||
_finalize_listings,
|
||||
_integrate_listings,
|
||||
_match_direct_epc,
|
||||
_is_dynamic_poi_metric_column,
|
||||
_less_deprived_percentile_expr,
|
||||
_load_conservation_area_geometries,
|
||||
_load_listings_for_merge,
|
||||
_matched_listed_building_flags,
|
||||
_postcode_conservation_area_flags,
|
||||
_postcode_listed_building_candidates,
|
||||
|
|
@ -85,31 +92,28 @@ def test_postcode_conservation_area_flags_marks_point_membership() -> None:
|
|||
]
|
||||
|
||||
|
||||
def test_unpublished_conservation_area_records_are_identified() -> None:
|
||||
assert _is_unpublished_conservation_area_record(
|
||||
"No data available for publication by HE"
|
||||
)
|
||||
assert not _is_unpublished_conservation_area_record("Bloomsbury")
|
||||
assert not _is_unpublished_conservation_area_record(None)
|
||||
|
||||
|
||||
def test_load_conservation_area_geometries_skips_unpublished_placeholders(
|
||||
def test_load_conservation_area_geometries_uses_current_planning_data_records(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
tmp_path,
|
||||
) -> None:
|
||||
real_area = box(0, 0, 1, 1)
|
||||
placeholder_area = box(-100, -100, 100, 100)
|
||||
ended_area = box(2, 2, 3, 3)
|
||||
other_dataset_area = box(4, 4, 5, 5)
|
||||
point = Point(0.5, 0.5)
|
||||
|
||||
def fake_read_arrow(path, columns):
|
||||
assert path == tmp_path / "conservation_areas.gpkg"
|
||||
assert columns == ["NAME"]
|
||||
def fake_read_arrow(path):
|
||||
assert path == tmp_path / "conservation_areas.geojson"
|
||||
table = pa.table(
|
||||
{
|
||||
"NAME": [
|
||||
"Central Village",
|
||||
"No data available for publication by HE",
|
||||
"dataset": [
|
||||
"conservation-area",
|
||||
"conservation-area",
|
||||
"listed-building",
|
||||
"conservation-area",
|
||||
],
|
||||
"SHAPE": to_wkb([real_area, placeholder_area]),
|
||||
"end-date": ["", "2025-01-01", "", ""],
|
||||
"name": ["Central Village", "Old Boundary", "Other", "Point Record"],
|
||||
"SHAPE": to_wkb([real_area, ended_area, other_dataset_area, point]),
|
||||
}
|
||||
)
|
||||
return {"geometry_name": "SHAPE", "crs": "EPSG:4326"}, table
|
||||
|
|
@ -117,7 +121,7 @@ def test_load_conservation_area_geometries_skips_unpublished_placeholders(
|
|||
monkeypatch.setattr("pipeline.transform.merge.pyogrio.read_arrow", fake_read_arrow)
|
||||
|
||||
geometries, crs = _load_conservation_area_geometries(
|
||||
tmp_path / "conservation_areas.gpkg"
|
||||
tmp_path / "conservation_areas.geojson"
|
||||
)
|
||||
|
||||
assert crs == "EPSG:4326"
|
||||
|
|
@ -290,3 +294,440 @@ def test_tree_density_by_postcode_requires_postcode_and_density_columns(
|
|||
|
||||
with pytest.raises(ValueError, match="missing required column: postcode"):
|
||||
_tree_density_by_postcode(missing_postcode_path)
|
||||
|
||||
|
||||
def _sample_listings_frame() -> pl.DataFrame:
|
||||
return pl.DataFrame(
|
||||
{
|
||||
"Bedrooms": [3],
|
||||
"Bathrooms": [2],
|
||||
"Number of bedrooms & living rooms": [4],
|
||||
"lon": [-0.1],
|
||||
"lat": [51.5],
|
||||
"Postcode": ["sw1a1aa"],
|
||||
"Address per Property Register": ["1 Example Road"],
|
||||
"Leasehold/Freehold": ["Freehold"],
|
||||
"Property type": ["Terraced"],
|
||||
"Property sub-type": ["Mid-Terrace"],
|
||||
"Price qualifier": [""],
|
||||
"Total floor area (sqm)": [120.0],
|
||||
"Listing URL": ["https://example.test/abc"],
|
||||
"Listing features": [["Garden", "Off-street parking"]],
|
||||
"Listing date": [None],
|
||||
"Listing status": ["For sale"],
|
||||
"Asking price": [750_000],
|
||||
"Asking price per sqm": [6_250],
|
||||
},
|
||||
schema={
|
||||
"Bedrooms": pl.Int32,
|
||||
"Bathrooms": pl.Int32,
|
||||
"Number of bedrooms & living rooms": pl.Int32,
|
||||
"lon": pl.Float64,
|
||||
"lat": pl.Float64,
|
||||
"Postcode": pl.Utf8,
|
||||
"Address per Property Register": pl.Utf8,
|
||||
"Leasehold/Freehold": pl.Utf8,
|
||||
"Property type": pl.Utf8,
|
||||
"Property sub-type": pl.Utf8,
|
||||
"Price qualifier": pl.Utf8,
|
||||
"Total floor area (sqm)": pl.Float64,
|
||||
"Listing URL": pl.Utf8,
|
||||
"Listing features": pl.List(pl.Utf8),
|
||||
"Listing date": pl.Datetime("us"),
|
||||
"Listing status": pl.Utf8,
|
||||
"Asking price": pl.Int64,
|
||||
"Asking price per sqm": pl.Int32,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def _stub_arcgis(path) -> None:
|
||||
pl.DataFrame(
|
||||
{
|
||||
"pcds": ["SW1A 1AA"],
|
||||
"ctry25cd": ["E92000001"],
|
||||
"doterm": [None],
|
||||
"east1m": [530000.0],
|
||||
"north1m": [180000.0],
|
||||
},
|
||||
schema={
|
||||
"pcds": pl.Utf8,
|
||||
"ctry25cd": pl.Utf8,
|
||||
"doterm": pl.Utf8,
|
||||
"east1m": pl.Float64,
|
||||
"north1m": pl.Float64,
|
||||
},
|
||||
).write_parquet(path)
|
||||
|
||||
|
||||
def test_canonical_postcode_expr_formats_compact_postcodes() -> None:
|
||||
df = pl.DataFrame({"Postcode": ["sw1a1aa", "SW1A 1AA", "bad", None]})
|
||||
result = df.with_columns(_canonical_postcode_expr("Postcode").alias("canonical"))
|
||||
assert result["canonical"].to_list() == ["SW1A 1AA", "SW1A 1AA", None, None]
|
||||
|
||||
|
||||
def test_load_listings_for_merge_canonicalises_and_exposes_overlay_columns(
|
||||
tmp_path,
|
||||
) -> None:
|
||||
listings_path = tmp_path / "listings.parquet"
|
||||
arcgis_path = tmp_path / "arcgis.parquet"
|
||||
_sample_listings_frame().write_parquet(listings_path)
|
||||
_stub_arcgis(arcgis_path)
|
||||
|
||||
loaded = _load_listings_for_merge(listings_path, arcgis_path)
|
||||
|
||||
assert loaded["postcode"].to_list() == ["SW1A 1AA"]
|
||||
assert loaded["pp_address"].to_list() == ["1 Example Road"]
|
||||
assert loaded["_actual_listing_url"].to_list() == ["https://example.test/abc"]
|
||||
assert loaded["_actual_asking_price"].to_list() == [750_000]
|
||||
assert loaded["_actual_lat"].to_list() == [51.5]
|
||||
|
||||
|
||||
def test_build_unmatched_listing_seed_rows_fills_property_shape_fields(
|
||||
tmp_path,
|
||||
) -> None:
|
||||
listings_path = tmp_path / "listings.parquet"
|
||||
arcgis_path = tmp_path / "arcgis.parquet"
|
||||
_sample_listings_frame().write_parquet(listings_path)
|
||||
_stub_arcgis(arcgis_path)
|
||||
|
||||
listings = _load_listings_for_merge(listings_path, arcgis_path)
|
||||
template_schema = pl.Schema(
|
||||
{
|
||||
"postcode": pl.Utf8,
|
||||
"pp_address": pl.Utf8,
|
||||
"pp_property_type": pl.Utf8,
|
||||
"duration": pl.Utf8,
|
||||
"total_floor_area": pl.Float64,
|
||||
"number_habitable_rooms": pl.Int16,
|
||||
"latest_price": pl.Int64,
|
||||
"epc_address": pl.Utf8,
|
||||
**{dst: dtype for _src, dst, dtype in _LISTING_OVERLAY_SOURCES},
|
||||
}
|
||||
)
|
||||
unmatched_idxs = listings.select("_listing_idx")
|
||||
|
||||
seed = _build_unmatched_listing_seed_rows(
|
||||
unmatched_idxs, listings, template_schema
|
||||
)
|
||||
|
||||
assert seed.height == 1
|
||||
assert seed["postcode"].to_list() == ["SW1A 1AA"]
|
||||
assert seed["pp_address"].to_list() == ["1 Example Road"]
|
||||
assert seed["pp_property_type"].to_list() == ["Terraced"]
|
||||
assert seed["duration"].to_list() == ["Freehold"]
|
||||
assert seed["total_floor_area"].to_list() == [120.0]
|
||||
assert seed["number_habitable_rooms"].to_list() == [4]
|
||||
assert seed["latest_price"].to_list() == [750_000]
|
||||
# Columns not populated from the listing default to null.
|
||||
assert seed["epc_address"].to_list() == [None]
|
||||
# Overlay columns flow through 1:1.
|
||||
assert seed["_actual_listing_url"].to_list() == ["https://example.test/abc"]
|
||||
|
||||
|
||||
def test_build_unmatched_listing_seed_rows_uses_direct_epc_fallbacks(
|
||||
tmp_path,
|
||||
) -> None:
|
||||
listings_path = tmp_path / "listings.parquet"
|
||||
arcgis_path = tmp_path / "arcgis.parquet"
|
||||
_sample_listings_frame().with_columns(
|
||||
pl.lit(None, dtype=pl.Float64).alias("Total floor area (sqm)"),
|
||||
pl.lit(None, dtype=pl.Int32).alias("Number of bedrooms & living rooms"),
|
||||
).write_parquet(listings_path)
|
||||
_stub_arcgis(arcgis_path)
|
||||
|
||||
listings = _load_listings_for_merge(listings_path, arcgis_path).with_columns(
|
||||
pl.lit("1 Example Road").alias("_direct_epc_address"),
|
||||
pl.lit("C").alias("_direct_current_energy_rating"),
|
||||
pl.lit("B").alias("_direct_potential_energy_rating"),
|
||||
pl.lit(98.0).alias("_direct_total_floor_area"),
|
||||
pl.lit(4, dtype=pl.Int16).alias("_direct_number_habitable_rooms"),
|
||||
pl.lit(2.4).alias("_direct_floor_height"),
|
||||
pl.lit(1930, dtype=pl.UInt16).alias("_direct_construction_age_band"),
|
||||
pl.lit(1, dtype=pl.UInt8).alias("_direct_is_construction_date_approximate"),
|
||||
pl.lit("No").alias("_direct_was_council_house"),
|
||||
)
|
||||
template_schema = pl.Schema(
|
||||
{
|
||||
"postcode": pl.Utf8,
|
||||
"pp_address": pl.Utf8,
|
||||
"total_floor_area": pl.Float64,
|
||||
"number_habitable_rooms": pl.Int16,
|
||||
"epc_address": pl.Utf8,
|
||||
"current_energy_rating": pl.Utf8,
|
||||
"was_council_house": pl.Utf8,
|
||||
**{dst: dtype for _src, dst, dtype in _LISTING_OVERLAY_SOURCES},
|
||||
}
|
||||
)
|
||||
|
||||
seed = _build_unmatched_listing_seed_rows(
|
||||
listings.select("_listing_idx"), listings, template_schema
|
||||
)
|
||||
|
||||
assert seed["total_floor_area"].to_list() == [98.0]
|
||||
assert seed["number_habitable_rooms"].to_list() == [4]
|
||||
assert seed["epc_address"].to_list() == ["1 Example Road"]
|
||||
assert seed["current_energy_rating"].to_list() == ["C"]
|
||||
assert seed["was_council_house"].to_list() == ["No"]
|
||||
|
||||
|
||||
def test_match_direct_epc_considers_nearby_postcodes() -> None:
|
||||
listing_matches = pl.DataFrame(
|
||||
{
|
||||
"_listing_idx": [0],
|
||||
"_listing_match_address": ["1 EXAMPLE ROAD"],
|
||||
"_listing_match_postcode": ["AA11AA"],
|
||||
"_listing_east": [1000.0],
|
||||
"_listing_north": [1000.0],
|
||||
"_actual_property_type": ["Terraced"],
|
||||
"_actual_total_floor_area": [100.0],
|
||||
"_actual_number_habitable_rooms": [4],
|
||||
},
|
||||
schema={
|
||||
"_listing_idx": pl.UInt32,
|
||||
"_listing_match_address": pl.Utf8,
|
||||
"_listing_match_postcode": pl.Utf8,
|
||||
"_listing_east": pl.Float64,
|
||||
"_listing_north": pl.Float64,
|
||||
"_actual_property_type": pl.Utf8,
|
||||
"_actual_total_floor_area": pl.Float64,
|
||||
"_actual_number_habitable_rooms": pl.Int16,
|
||||
},
|
||||
)
|
||||
epc_candidates = pl.DataFrame(
|
||||
{
|
||||
"_direct_epc_row": [0],
|
||||
"_direct_epc_match_address": ["1 EXAMPLE ROAD"],
|
||||
"_direct_epc_match_postcode": ["BB11BB"],
|
||||
"_direct_epc_east": [1020.0],
|
||||
"_direct_epc_north": [1010.0],
|
||||
"_direct_epc_canonical_property_type": ["Terraced"],
|
||||
"_direct_epc_address": ["1, Example Road"],
|
||||
"_direct_current_energy_rating": ["C"],
|
||||
"_direct_potential_energy_rating": ["B"],
|
||||
"_direct_total_floor_area": [101.0],
|
||||
"_direct_number_habitable_rooms": [4],
|
||||
"_direct_floor_height": [2.5],
|
||||
"_direct_construction_age_band": [1930],
|
||||
"_direct_is_construction_date_approximate": [1],
|
||||
"_direct_was_council_house": ["No"],
|
||||
},
|
||||
schema={
|
||||
"_direct_epc_row": pl.UInt32,
|
||||
"_direct_epc_match_address": pl.Utf8,
|
||||
"_direct_epc_match_postcode": pl.Utf8,
|
||||
"_direct_epc_east": pl.Float64,
|
||||
"_direct_epc_north": pl.Float64,
|
||||
"_direct_epc_canonical_property_type": pl.Utf8,
|
||||
"_direct_epc_address": pl.Utf8,
|
||||
"_direct_current_energy_rating": pl.Utf8,
|
||||
"_direct_potential_energy_rating": pl.Utf8,
|
||||
"_direct_total_floor_area": pl.Float64,
|
||||
"_direct_number_habitable_rooms": pl.Int16,
|
||||
"_direct_floor_height": pl.Float64,
|
||||
"_direct_construction_age_band": pl.UInt16,
|
||||
"_direct_is_construction_date_approximate": pl.UInt8,
|
||||
"_direct_was_council_house": pl.Utf8,
|
||||
},
|
||||
)
|
||||
|
||||
matches = _match_direct_epc(listing_matches, epc_candidates)
|
||||
|
||||
assert matches.height == 1
|
||||
assert matches["_listing_idx"].to_list() == [0]
|
||||
assert matches["_direct_epc_address"].to_list() == ["1, Example Road"]
|
||||
|
||||
|
||||
def test_integrate_listings_attaches_overlay_by_matched_property_key(tmp_path) -> None:
|
||||
listings_path = tmp_path / "listings.parquet"
|
||||
arcgis_path = tmp_path / "arcgis.parquet"
|
||||
_sample_listings_frame().write_parquet(listings_path)
|
||||
_stub_arcgis(arcgis_path)
|
||||
wide = pl.DataFrame(
|
||||
{
|
||||
"postcode": ["SW1A 1AA", "SW1A 1AA"],
|
||||
"pp_address": ["9 Other Road", "1 Example Road"],
|
||||
"pp_property_type": ["Detached", "Terraced"],
|
||||
"duration": ["Freehold", "Freehold"],
|
||||
"total_floor_area": [80.0, 90.0],
|
||||
"number_habitable_rooms": [3, 4],
|
||||
"latest_price": [500_000, 600_000],
|
||||
"epc_address": [None, "1 Example Road"],
|
||||
"current_energy_rating": [None, "C"],
|
||||
"potential_energy_rating": [None, "B"],
|
||||
"floor_height": [None, 2.4],
|
||||
"construction_age_band": [None, 1930],
|
||||
"is_construction_date_approximate": [None, 1],
|
||||
"was_council_house": [None, "No"],
|
||||
},
|
||||
schema={
|
||||
"postcode": pl.Utf8,
|
||||
"pp_address": pl.Utf8,
|
||||
"pp_property_type": pl.Utf8,
|
||||
"duration": pl.Utf8,
|
||||
"total_floor_area": pl.Float64,
|
||||
"number_habitable_rooms": pl.Int16,
|
||||
"latest_price": pl.Int64,
|
||||
"epc_address": pl.Utf8,
|
||||
"current_energy_rating": pl.Utf8,
|
||||
"potential_energy_rating": pl.Utf8,
|
||||
"floor_height": pl.Float64,
|
||||
"construction_age_band": pl.UInt16,
|
||||
"is_construction_date_approximate": pl.UInt8,
|
||||
"was_council_house": pl.Utf8,
|
||||
},
|
||||
)
|
||||
|
||||
integrated = _integrate_listings(
|
||||
wide.lazy(), listings_path, arcgis_path, epc_path=None
|
||||
).collect()
|
||||
|
||||
matched = integrated.filter(pl.col("pp_address") == "1 Example Road")
|
||||
other = integrated.filter(pl.col("pp_address") == "9 Other Road")
|
||||
assert matched["_actual_listing_url"].to_list() == ["https://example.test/abc"]
|
||||
assert other["_actual_listing_url"].to_list() == [None]
|
||||
|
||||
|
||||
def test_integrate_listings_rejects_low_confidence_no_number_match(tmp_path) -> None:
|
||||
listings_path = tmp_path / "listings.parquet"
|
||||
arcgis_path = tmp_path / "arcgis.parquet"
|
||||
_sample_listings_frame().with_columns(
|
||||
pl.lit("Rose Cottage High Street").alias("Address per Property Register"),
|
||||
).write_parquet(listings_path)
|
||||
_stub_arcgis(arcgis_path)
|
||||
wide = pl.DataFrame(
|
||||
{
|
||||
"postcode": ["SW1A 1AA"],
|
||||
"pp_address": ["Old Cottage High Street"],
|
||||
"pp_property_type": ["Terraced"],
|
||||
"duration": ["Freehold"],
|
||||
"total_floor_area": [120.0],
|
||||
"number_habitable_rooms": [4],
|
||||
"latest_price": [750_000],
|
||||
"epc_address": ["Old Cottage High Street"],
|
||||
"current_energy_rating": ["C"],
|
||||
"potential_energy_rating": ["B"],
|
||||
"floor_height": [2.4],
|
||||
"construction_age_band": [1930],
|
||||
"is_construction_date_approximate": [1],
|
||||
"was_council_house": ["No"],
|
||||
},
|
||||
schema={
|
||||
"postcode": pl.Utf8,
|
||||
"pp_address": pl.Utf8,
|
||||
"pp_property_type": pl.Utf8,
|
||||
"duration": pl.Utf8,
|
||||
"total_floor_area": pl.Float64,
|
||||
"number_habitable_rooms": pl.Int16,
|
||||
"latest_price": pl.Int64,
|
||||
"epc_address": pl.Utf8,
|
||||
"current_energy_rating": pl.Utf8,
|
||||
"potential_energy_rating": pl.Utf8,
|
||||
"floor_height": pl.Float64,
|
||||
"construction_age_band": pl.UInt16,
|
||||
"is_construction_date_approximate": pl.UInt8,
|
||||
"was_council_house": pl.Utf8,
|
||||
},
|
||||
)
|
||||
|
||||
integrated = _integrate_listings(
|
||||
wide.lazy(), listings_path, arcgis_path, epc_path=None
|
||||
).collect()
|
||||
|
||||
existing = integrated.filter(pl.col("pp_address") == "Old Cottage High Street")
|
||||
seed = integrated.filter(pl.col("pp_address") == "Rose Cottage High Street")
|
||||
assert existing["_actual_listing_url"].to_list() == [None]
|
||||
assert seed["_actual_listing_url"].to_list() == ["https://example.test/abc"]
|
||||
|
||||
|
||||
def test_finalize_listings_promotes_overlay_columns_and_filters_to_listing_rows() -> (
|
||||
None
|
||||
):
|
||||
df = pl.DataFrame(
|
||||
{
|
||||
"Postcode": ["SW1A 1AA", "SW1A 1AA"],
|
||||
"Address per Property Register": ["1 Example Road", "2 Example Road"],
|
||||
"Address per EPC": ["1 Example Road", None],
|
||||
"Date of last transaction": [1990.0, None],
|
||||
"lat": [51.5, 51.5],
|
||||
"lon": [-0.1, -0.1],
|
||||
"Total floor area (sqm)": [100.0, 95.0],
|
||||
"Number of bedrooms & living rooms": [3, None],
|
||||
"Property type": ["Terraced", None],
|
||||
"Leasehold/Freehold": ["Leasehold", None],
|
||||
"Last known price": [500_000, None],
|
||||
"Street tree density percentile": [42.0, 42.0],
|
||||
# Overlay columns: row 0 is a matched listing, row 1 is unmatched, row none.
|
||||
"_actual_listing_url": ["url0", "url1"],
|
||||
"_actual_asking_price": [600_000, 700_000],
|
||||
"_actual_asking_price_per_sqm": [5_000, None],
|
||||
"_actual_listing_date": [None, None],
|
||||
"_actual_listing_status": ["For sale", "For sale"],
|
||||
"_actual_listing_features": [["Garden"], ["Parking"]],
|
||||
"_actual_bedrooms": [3, 4],
|
||||
"_actual_bathrooms": [1, 2],
|
||||
"_actual_price_qualifier": ["", ""],
|
||||
"_actual_property_sub_type": ["Mid-Terrace", "End-Terrace"],
|
||||
"_actual_lat": [51.51, 51.52],
|
||||
"_actual_lon": [-0.11, -0.12],
|
||||
"_actual_total_floor_area": [110.0, None],
|
||||
"_actual_number_habitable_rooms": [4, 3],
|
||||
"_actual_property_type": ["Terraced", "Flats/Maisonettes"],
|
||||
"_actual_leasehold_freehold": ["Freehold", "Leasehold"],
|
||||
},
|
||||
schema={
|
||||
"Postcode": pl.Utf8,
|
||||
"Address per Property Register": pl.Utf8,
|
||||
"Address per EPC": pl.Utf8,
|
||||
"Date of last transaction": pl.Float64,
|
||||
"lat": pl.Float64,
|
||||
"lon": pl.Float64,
|
||||
"Total floor area (sqm)": pl.Float64,
|
||||
"Number of bedrooms & living rooms": pl.Int16,
|
||||
"Property type": pl.Utf8,
|
||||
"Leasehold/Freehold": pl.Utf8,
|
||||
"Last known price": pl.Int64,
|
||||
"Street tree density percentile": pl.Float32,
|
||||
"_actual_listing_url": pl.Utf8,
|
||||
"_actual_asking_price": pl.Int64,
|
||||
"_actual_asking_price_per_sqm": pl.Int32,
|
||||
"_actual_listing_date": pl.Datetime("us"),
|
||||
"_actual_listing_status": pl.Utf8,
|
||||
"_actual_listing_features": pl.List(pl.Utf8),
|
||||
"_actual_bedrooms": pl.Int32,
|
||||
"_actual_bathrooms": pl.Int32,
|
||||
"_actual_price_qualifier": pl.Utf8,
|
||||
"_actual_property_sub_type": pl.Utf8,
|
||||
"_actual_lat": pl.Float64,
|
||||
"_actual_lon": pl.Float64,
|
||||
"_actual_total_floor_area": pl.Float64,
|
||||
"_actual_number_habitable_rooms": pl.Int16,
|
||||
"_actual_property_type": pl.Utf8,
|
||||
"_actual_leasehold_freehold": pl.Utf8,
|
||||
},
|
||||
)
|
||||
|
||||
finalized = _finalize_listings(df).sort("Address per Property Register")
|
||||
|
||||
assert finalized.height == 2
|
||||
assert finalized["Listing URL"].to_list() == ["url0", "url1"]
|
||||
assert finalized["Asking price"].to_list() == [600_000, 700_000]
|
||||
assert finalized["Asking price per sqm"].to_list() == [5_000, 7_368]
|
||||
assert finalized["Est. price per sqm"].to_list() == [5_000, 7_368]
|
||||
assert finalized["Estimated current price"].to_list() == [600_000, 700_000]
|
||||
assert finalized["Last known price"].to_list() == [500_000, 700_000]
|
||||
# Listing's preferred floor area / rooms / property type / tenure.
|
||||
assert finalized["Total floor area (sqm)"].to_list() == [110.0, 95.0]
|
||||
assert finalized["Number of bedrooms & living rooms"].to_list() == [4, 3]
|
||||
assert finalized["Property type"].to_list() == ["Terraced", "Flats/Maisonettes"]
|
||||
assert finalized["Leasehold/Freehold"].to_list() == ["Freehold", "Leasehold"]
|
||||
# Postcode-level feature carried through to both matched and unmatched rows.
|
||||
assert finalized["Street tree density percentile"].to_list() == [42.0, 42.0]
|
||||
# Match status reflects historical context availability.
|
||||
assert finalized["Historical property match status"].to_list() == [
|
||||
"matched",
|
||||
"unmatched",
|
||||
]
|
||||
# Overlay scaffolding is dropped.
|
||||
for src, dst, _dt in _LISTING_OVERLAY_SOURCES:
|
||||
assert dst not in finalized.columns, src
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue