scraping and data

This commit is contained in:
Andras Schmelczer 2026-05-31 15:36:33 +01:00
parent d98819b569
commit 8688b7475e
43 changed files with 4920 additions and 531 deletions

View file

@ -26,6 +26,7 @@ MIN_PRICE = 50_000
EPC_SOURCE_COLUMNS = [
"address",
"postcode",
"uprn",
"current_energy_rating",
"potential_energy_rating",
"property_type",
@ -57,6 +58,8 @@ def _select_epc_columns(raw: pl.LazyFrame) -> pl.LazyFrame:
raw.select(
_clean_string("address").alias("epc_address"),
_clean_string("postcode").str.to_uppercase().alias("epc_postcode"),
# UPRN keys an exact listing->EPC join downstream (~99% populated).
_clean_string("uprn").alias("uprn"),
_clean_string("current_energy_rating")
.str.to_uppercase()
.alias("current_energy_rating"),