Format python
This commit is contained in:
parent
85f5770e09
commit
4c258018c3
17 changed files with 348 additions and 248 deletions
|
|
@ -35,7 +35,6 @@ def download_with_progress(url: str, output_path: Path) -> None:
|
|||
return
|
||||
|
||||
|
||||
|
||||
def extract_zip(zip_path: Path, extract_path: Path) -> None:
|
||||
extract_path.mkdir(exist_ok=True)
|
||||
|
||||
|
|
@ -44,7 +43,7 @@ def extract_zip(zip_path: Path, extract_path: Path) -> None:
|
|||
|
||||
|
||||
def convert_to_parquet(data_path: Path, parquet_path: Path) -> None:
|
||||
df = pl.scan_csv(data_path / 'Data/NSPL_MAY_2025_UK.csv', try_parse_dates=True)
|
||||
df = pl.scan_csv(data_path / "Data/NSPL_MAY_2025_UK.csv", try_parse_dates=True)
|
||||
print(f"Columns: {df.collect_schema().names()}")
|
||||
parquet_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
df.sink_parquet(parquet_path, compression="zstd")
|
||||
|
|
@ -52,8 +51,12 @@ def convert_to_parquet(data_path: Path, parquet_path: Path) -> None:
|
|||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description="Download and convert ArcGIS postcode data")
|
||||
parser.add_argument("--output", type=Path, required=True, help="Output parquet file path")
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Download and convert ArcGIS postcode data"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output", type=Path, required=True, help="Output parquet file path"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
with tempfile.TemporaryDirectory() as cache_dir:
|
||||
|
|
@ -64,5 +67,6 @@ def main() -> None:
|
|||
extract_zip(download_path, extract_path)
|
||||
convert_to_parquet(extract_path, args.output)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
|
|||
|
|
@ -41,8 +41,12 @@ def convert_to_parquet(xlsx_path: Path, parquet_path: Path) -> None:
|
|||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description="Download and convert Index of Deprivation data")
|
||||
parser.add_argument("--output", type=Path, required=True, help="Output parquet file path")
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Download and convert Index of Deprivation data"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output", type=Path, required=True, help="Output parquet file path"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
with tempfile.TemporaryDirectory() as cache_dir:
|
||||
|
|
|
|||
|
|
@ -8,16 +8,12 @@ import osmium
|
|||
import polars as pl
|
||||
from tqdm import tqdm
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
BATCH_SIZE = 50_000
|
||||
|
||||
MIN_OCCURENCE_COUNT = 20
|
||||
|
||||
GEOFABRIK_GB_URL = (
|
||||
"https://download.geofabrik.de/europe/great-britain-latest.osm.pbf"
|
||||
)
|
||||
GEOFABRIK_GB_URL = "https://download.geofabrik.de/europe/great-britain-latest.osm.pbf"
|
||||
|
||||
UK_BBOX_WEST = -7.57
|
||||
UK_BBOX_SOUTH = 49.96
|
||||
|
|
@ -38,7 +34,6 @@ POI_TAG_KEYS: list[str] = [
|
|||
]
|
||||
|
||||
|
||||
|
||||
def download_pbf(pbf_file: Path) -> None:
|
||||
pbf_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
tmp = pbf_file.with_suffix(".pbf.tmp")
|
||||
|
|
@ -91,7 +86,12 @@ class POIHandler(osmium.SimpleHandler):
|
|||
self._batch.clear()
|
||||
|
||||
def _add_poi(
|
||||
self, osm_id: str, tags: osmium.osm.TagList, category: str, lat: float, lng: float
|
||||
self,
|
||||
osm_id: str,
|
||||
tags: osmium.osm.TagList,
|
||||
category: str,
|
||||
lat: float,
|
||||
lng: float,
|
||||
) -> None:
|
||||
self._batch.append(
|
||||
{
|
||||
|
|
@ -123,8 +123,12 @@ class POIHandler(osmium.SimpleHandler):
|
|||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description="Download and extract POIs from OpenStreetMap")
|
||||
parser.add_argument("--output", type=Path, required=True, help="Output parquet file path")
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Download and extract POIs from OpenStreetMap"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output", type=Path, required=True, help="Output parquet file path"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
with tempfile.TemporaryDirectory() as cache_dir:
|
||||
|
|
|
|||
|
|
@ -73,8 +73,12 @@ def convert_to_parquet(csv_path: Path, parquet_path: Path) -> None:
|
|||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description="Download and convert Land Registry price-paid data")
|
||||
parser.add_argument("--output", type=Path, required=True, help="Output parquet file path")
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Download and convert Land Registry price-paid data"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output", type=Path, required=True, help="Output parquet file path"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
with tempfile.TemporaryDirectory() as cache_dir:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue