This commit is contained in:
Andras Schmelczer 2026-05-28 21:48:35 +01:00
parent 39ef5c6646
commit c995f12f8b
78 changed files with 4830 additions and 1619 deletions

View file

@ -105,6 +105,24 @@ def write_parquet(properties: list[dict], path: Path) -> None:
"lon": [p["lon"] for p in properties],
"lat": [p["lat"] for p in properties],
"Postcode": [normalize_postcode(p["Postcode"]) for p in properties],
"Postcode source": [p.get("Postcode source", "") for p in properties],
"Extracted postcode": [
normalize_postcode(p["Extracted postcode"])
if p.get("Extracted postcode")
else None
for p in properties
],
"Inferred postcode": [
normalize_postcode(p["Inferred postcode"])
if p.get("Inferred postcode")
else None
for p in properties
],
"Listing raw address": [
p.get("Listing raw address")
or p.get("Address per Property Register", "")
for p in properties
],
"Address per Property Register": [
p["Address per Property Register"] for p in properties
],
@ -126,6 +144,10 @@ def write_parquet(properties: list[dict], path: Path) -> None:
"lon": pl.Float64,
"lat": pl.Float64,
"Postcode": pl.Utf8,
"Postcode source": pl.Utf8,
"Extracted postcode": pl.Utf8,
"Inferred postcode": pl.Utf8,
"Listing raw address": pl.Utf8,
"Address per Property Register": pl.Utf8,
"Leasehold/Freehold": pl.Utf8,
"Property type": pl.Utf8,