More
This commit is contained in:
parent
128b3191e7
commit
03445188ea
54 changed files with 596953 additions and 3577 deletions
65
finder/storage.py
Normal file
65
finder/storage.py
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
import polars as pl
|
||||
|
||||
log = logging.getLogger("rightmove")
|
||||
|
||||
|
||||
def write_parquet(properties: list[dict], path: Path) -> None:
|
||||
"""Write properties list to parquet using Polars."""
|
||||
if not properties:
|
||||
log.warning("No properties to write to %s", path)
|
||||
return
|
||||
|
||||
df = pl.DataFrame(
|
||||
{
|
||||
"id": [p["id"] for p in properties],
|
||||
"bedrooms": [p["bedrooms"] for p in properties],
|
||||
"bathrooms": [p["bathrooms"] for p in properties],
|
||||
"total_rooms": [p["total_rooms"] for p in properties],
|
||||
"longitude": [p["longitude"] for p in properties],
|
||||
"latitude": [p["latitude"] for p in properties],
|
||||
"postcode": [p["postcode"] for p in properties],
|
||||
"address": [p["address"] for p in properties],
|
||||
"tenure": [p["tenure"] for p in properties],
|
||||
"property_type": [p["property_type"] for p in properties],
|
||||
"property_sub_type": [p["property_sub_type"] for p in properties],
|
||||
"price": [p["price"] for p in properties],
|
||||
"price_frequency": [p["price_frequency"] for p in properties],
|
||||
"price_qualifier": [p["price_qualifier"] for p in properties],
|
||||
"floorspace_sqm": [p["floorspace_sqm"] for p in properties],
|
||||
"url": [p["url"] for p in properties],
|
||||
"features": [p["features"] for p in properties],
|
||||
"first_visible_date": [p["first_visible_date"] for p in properties],
|
||||
"update_date": [p["update_date"] for p in properties],
|
||||
"outcode": [p["outcode"] for p in properties],
|
||||
"house_share": [p["house_share"] for p in properties],
|
||||
},
|
||||
schema={
|
||||
"id": pl.Int64,
|
||||
"bedrooms": pl.Int32,
|
||||
"bathrooms": pl.Int32,
|
||||
"total_rooms": pl.Int32,
|
||||
"longitude": pl.Float64,
|
||||
"latitude": pl.Float64,
|
||||
"postcode": pl.Utf8,
|
||||
"address": pl.Utf8,
|
||||
"tenure": pl.Utf8,
|
||||
"property_type": pl.Utf8,
|
||||
"property_sub_type": pl.Utf8,
|
||||
"price": pl.Int64,
|
||||
"price_frequency": pl.Utf8,
|
||||
"price_qualifier": pl.Utf8,
|
||||
"floorspace_sqm": pl.Float64,
|
||||
"url": pl.Utf8,
|
||||
"features": pl.List(pl.Utf8),
|
||||
"first_visible_date": pl.Utf8,
|
||||
"update_date": pl.Utf8,
|
||||
"outcode": pl.Utf8,
|
||||
"house_share": pl.Boolean,
|
||||
},
|
||||
)
|
||||
|
||||
df.write_parquet(path)
|
||||
log.info("Wrote %d properties to %s", len(df), path)
|
||||
Loading…
Add table
Add a link
Reference in a new issue