Format python

This commit is contained in:
Andras Schmelczer 2026-01-31 13:07:09 +00:00
parent 85f5770e09
commit 4c258018c3
17 changed files with 348 additions and 248 deletions

View file

@ -35,7 +35,6 @@ def download_with_progress(url: str, output_path: Path) -> None:
return
def extract_zip(zip_path: Path, extract_path: Path) -> None:
extract_path.mkdir(exist_ok=True)
@ -44,7 +43,7 @@ def extract_zip(zip_path: Path, extract_path: Path) -> None:
def convert_to_parquet(data_path: Path, parquet_path: Path) -> None:
df = pl.scan_csv(data_path / 'Data/NSPL_MAY_2025_UK.csv', try_parse_dates=True)
df = pl.scan_csv(data_path / "Data/NSPL_MAY_2025_UK.csv", try_parse_dates=True)
print(f"Columns: {df.collect_schema().names()}")
parquet_path.parent.mkdir(parents=True, exist_ok=True)
df.sink_parquet(parquet_path, compression="zstd")
@ -52,8 +51,12 @@ def convert_to_parquet(data_path: Path, parquet_path: Path) -> None:
def main() -> None:
parser = argparse.ArgumentParser(description="Download and convert ArcGIS postcode data")
parser.add_argument("--output", type=Path, required=True, help="Output parquet file path")
parser = argparse.ArgumentParser(
description="Download and convert ArcGIS postcode data"
)
parser.add_argument(
"--output", type=Path, required=True, help="Output parquet file path"
)
args = parser.parse_args()
with tempfile.TemporaryDirectory() as cache_dir:
@ -64,5 +67,6 @@ def main() -> None:
extract_zip(download_path, extract_path)
convert_to_parquet(extract_path, args.output)
if __name__ == "__main__":
main()

View file

@ -41,8 +41,12 @@ def convert_to_parquet(xlsx_path: Path, parquet_path: Path) -> None:
def main() -> None:
parser = argparse.ArgumentParser(description="Download and convert Index of Deprivation data")
parser.add_argument("--output", type=Path, required=True, help="Output parquet file path")
parser = argparse.ArgumentParser(
description="Download and convert Index of Deprivation data"
)
parser.add_argument(
"--output", type=Path, required=True, help="Output parquet file path"
)
args = parser.parse_args()
with tempfile.TemporaryDirectory() as cache_dir:

View file

@ -8,16 +8,12 @@ import osmium
import polars as pl
from tqdm import tqdm
from pathlib import Path
BATCH_SIZE = 50_000
MIN_OCCURENCE_COUNT = 20
GEOFABRIK_GB_URL = (
"https://download.geofabrik.de/europe/great-britain-latest.osm.pbf"
)
GEOFABRIK_GB_URL = "https://download.geofabrik.de/europe/great-britain-latest.osm.pbf"
UK_BBOX_WEST = -7.57
UK_BBOX_SOUTH = 49.96
@ -38,7 +34,6 @@ POI_TAG_KEYS: list[str] = [
]
def download_pbf(pbf_file: Path) -> None:
pbf_file.parent.mkdir(parents=True, exist_ok=True)
tmp = pbf_file.with_suffix(".pbf.tmp")
@ -91,7 +86,12 @@ class POIHandler(osmium.SimpleHandler):
self._batch.clear()
def _add_poi(
self, osm_id: str, tags: osmium.osm.TagList, category: str, lat: float, lng: float
self,
osm_id: str,
tags: osmium.osm.TagList,
category: str,
lat: float,
lng: float,
) -> None:
self._batch.append(
{
@ -123,8 +123,12 @@ class POIHandler(osmium.SimpleHandler):
def main() -> None:
parser = argparse.ArgumentParser(description="Download and extract POIs from OpenStreetMap")
parser.add_argument("--output", type=Path, required=True, help="Output parquet file path")
parser = argparse.ArgumentParser(
description="Download and extract POIs from OpenStreetMap"
)
parser.add_argument(
"--output", type=Path, required=True, help="Output parquet file path"
)
args = parser.parse_args()
with tempfile.TemporaryDirectory() as cache_dir:

View file

@ -73,8 +73,12 @@ def convert_to_parquet(csv_path: Path, parquet_path: Path) -> None:
def main() -> None:
parser = argparse.ArgumentParser(description="Download and convert Land Registry price-paid data")
parser.add_argument("--output", type=Path, required=True, help="Output parquet file path")
parser = argparse.ArgumentParser(
description="Download and convert Land Registry price-paid data"
)
parser.add_argument(
"--output", type=Path, required=True, help="Output parquet file path"
)
args = parser.parse_args()
with tempfile.TemporaryDirectory() as cache_dir: