65 lines
2.5 KiB
Python
65 lines
2.5 KiB
Python
import logging
|
|
from pathlib import Path
|
|
|
|
import polars as pl
|
|
|
|
log = logging.getLogger("rightmove")
|
|
|
|
|
|
def write_parquet(properties: list[dict], path: Path) -> None:
|
|
"""Write properties list to parquet using Polars."""
|
|
if not properties:
|
|
log.warning("No properties to write to %s", path)
|
|
return
|
|
|
|
df = pl.DataFrame(
|
|
{
|
|
"id": [p["id"] for p in properties],
|
|
"bedrooms": [p["bedrooms"] for p in properties],
|
|
"bathrooms": [p["bathrooms"] for p in properties],
|
|
"total_rooms": [p["total_rooms"] for p in properties],
|
|
"longitude": [p["longitude"] for p in properties],
|
|
"latitude": [p["latitude"] for p in properties],
|
|
"postcode": [p["postcode"] for p in properties],
|
|
"address": [p["address"] for p in properties],
|
|
"tenure": [p["tenure"] for p in properties],
|
|
"property_type": [p["property_type"] for p in properties],
|
|
"property_sub_type": [p["property_sub_type"] for p in properties],
|
|
"price": [p["price"] for p in properties],
|
|
"price_frequency": [p["price_frequency"] for p in properties],
|
|
"price_qualifier": [p["price_qualifier"] for p in properties],
|
|
"floorspace_sqm": [p["floorspace_sqm"] for p in properties],
|
|
"url": [p["url"] for p in properties],
|
|
"features": [p["features"] for p in properties],
|
|
"first_visible_date": [p["first_visible_date"] for p in properties],
|
|
"update_date": [p["update_date"] for p in properties],
|
|
"outcode": [p["outcode"] for p in properties],
|
|
"house_share": [p["house_share"] for p in properties],
|
|
},
|
|
schema={
|
|
"id": pl.Int64,
|
|
"bedrooms": pl.Int32,
|
|
"bathrooms": pl.Int32,
|
|
"total_rooms": pl.Int32,
|
|
"longitude": pl.Float64,
|
|
"latitude": pl.Float64,
|
|
"postcode": pl.Utf8,
|
|
"address": pl.Utf8,
|
|
"tenure": pl.Utf8,
|
|
"property_type": pl.Utf8,
|
|
"property_sub_type": pl.Utf8,
|
|
"price": pl.Int64,
|
|
"price_frequency": pl.Utf8,
|
|
"price_qualifier": pl.Utf8,
|
|
"floorspace_sqm": pl.Float64,
|
|
"url": pl.Utf8,
|
|
"features": pl.List(pl.Utf8),
|
|
"first_visible_date": pl.Utf8,
|
|
"update_date": pl.Utf8,
|
|
"outcode": pl.Utf8,
|
|
"house_share": pl.Boolean,
|
|
},
|
|
)
|
|
|
|
df.write_parquet(path)
|
|
log.info("Wrote %d properties to %s", len(df), path)
|