scraping and data
This commit is contained in:
parent
d98819b569
commit
8688b7475e
43 changed files with 4920 additions and 531 deletions
|
|
@ -26,6 +26,7 @@ MIN_PRICE = 50_000
|
|||
EPC_SOURCE_COLUMNS = [
|
||||
"address",
|
||||
"postcode",
|
||||
"uprn",
|
||||
"current_energy_rating",
|
||||
"potential_energy_rating",
|
||||
"property_type",
|
||||
|
|
@ -57,6 +58,8 @@ def _select_epc_columns(raw: pl.LazyFrame) -> pl.LazyFrame:
|
|||
raw.select(
|
||||
_clean_string("address").alias("epc_address"),
|
||||
_clean_string("postcode").str.to_uppercase().alias("epc_postcode"),
|
||||
# UPRN keys an exact listing->EPC join downstream (~99% populated).
|
||||
_clean_string("uprn").alias("uprn"),
|
||||
_clean_string("current_energy_rating")
|
||||
.str.to_uppercase()
|
||||
.alias("current_energy_rating"),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue