perfect-postcode/download_pois.py

54 lines
1.5 KiB
Python

"""Download POI data for the UK from Overture Maps."""
from pathlib import Path
import overturemaps
import pyarrow as pa
import pyarrow.parquet as pq
from tqdm import tqdm
# UK bounding box (west, south, east, north)
UK_BBOX = (-8.65, 49.86, 1.77, 60.86)
OUTPUT_DIR = Path("data_sources")
OUTPUT_FILE = OUTPUT_DIR / "uk_pois.parquet"
def main():
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
if OUTPUT_FILE.exists():
print(f"POI file already exists: {OUTPUT_FILE}")
print("Delete it manually to re-download.")
return
print("Downloading UK POI data from Overture Maps...")
print(f"Bounding box: {UK_BBOX}")
print("This may take several minutes...")
reader = overturemaps.record_batch_reader("place", bbox=UK_BBOX)
# Read all batches
batches = []
with tqdm(desc="Downloading batches", unit=" batches") as pbar:
for batch in reader:
batches.append(batch)
pbar.update(1)
pbar.set_postfix(rows=sum(b.num_rows for b in batches))
if not batches:
print("No data found in bounding box!")
return
# Combine batches into a table and write
table = pa.Table.from_batches(batches, schema=reader.schema)
print(f"\nWriting {table.num_rows:,} POIs to {OUTPUT_FILE}...")
pq.write_table(table, OUTPUT_FILE)
print(f"Download complete: {OUTPUT_FILE}")
print(f"File size: {OUTPUT_FILE.stat().st_size / 1024 / 1024:.1f} MB")
if __name__ == "__main__":
main()