import logging from pathlib import Path import polars as pl log = logging.getLogger("rightmove") def write_parquet(properties: list[dict], path: Path) -> None: """Write properties list to parquet using Polars.""" if not properties: log.warning("No properties to write to %s", path) return df = pl.DataFrame( { "id": [p["id"] for p in properties], "bedrooms": [p["bedrooms"] for p in properties], "bathrooms": [p["bathrooms"] for p in properties], "total_rooms": [p["total_rooms"] for p in properties], "longitude": [p["longitude"] for p in properties], "latitude": [p["latitude"] for p in properties], "postcode": [p["postcode"] for p in properties], "address": [p["address"] for p in properties], "tenure": [p["tenure"] for p in properties], "property_type": [p["property_type"] for p in properties], "property_sub_type": [p["property_sub_type"] for p in properties], "price": [p["price"] for p in properties], "price_frequency": [p["price_frequency"] for p in properties], "price_qualifier": [p["price_qualifier"] for p in properties], "floorspace_sqm": [p["floorspace_sqm"] for p in properties], "url": [p["url"] for p in properties], "features": [p["features"] for p in properties], "first_visible_date": [p["first_visible_date"] for p in properties], "update_date": [p["update_date"] for p in properties], "outcode": [p["outcode"] for p in properties], "house_share": [p["house_share"] for p in properties], }, schema={ "id": pl.Int64, "bedrooms": pl.Int32, "bathrooms": pl.Int32, "total_rooms": pl.Int32, "longitude": pl.Float64, "latitude": pl.Float64, "postcode": pl.Utf8, "address": pl.Utf8, "tenure": pl.Utf8, "property_type": pl.Utf8, "property_sub_type": pl.Utf8, "price": pl.Int64, "price_frequency": pl.Utf8, "price_qualifier": pl.Utf8, "floorspace_sqm": pl.Float64, "url": pl.Utf8, "features": pl.List(pl.Utf8), "first_visible_date": pl.Utf8, "update_date": pl.Utf8, "outcode": pl.Utf8, "house_share": pl.Boolean, }, ) df.write_parquet(path) log.info("Wrote %d properties to %s", len(df), path)