"""Download POI data for the UK from Overture Maps.""" from pathlib import Path import overturemaps import pyarrow as pa import pyarrow.parquet as pq from tqdm import tqdm # UK bounding box (west, south, east, north) UK_BBOX = (-8.65, 49.86, 1.77, 60.86) OUTPUT_DIR = Path("data_sources") OUTPUT_FILE = OUTPUT_DIR / "uk_pois.parquet" def main(): OUTPUT_DIR.mkdir(parents=True, exist_ok=True) if OUTPUT_FILE.exists(): print(f"POI file already exists: {OUTPUT_FILE}") print("Delete it manually to re-download.") return print("Downloading UK POI data from Overture Maps...") print(f"Bounding box: {UK_BBOX}") print("This may take several minutes...") reader = overturemaps.record_batch_reader("place", bbox=UK_BBOX) # Read all batches batches = [] with tqdm(desc="Downloading batches", unit=" batches") as pbar: for batch in reader: batches.append(batch) pbar.update(1) pbar.set_postfix(rows=sum(b.num_rows for b in batches)) if not batches: print("No data found in bounding box!") return # Combine batches into a table and write table = pa.Table.from_batches(batches, schema=reader.schema) print(f"\nWriting {table.num_rows:,} POIs to {OUTPUT_FILE}...") pq.write_table(table, OUTPUT_FILE) print(f"Download complete: {OUTPUT_FILE}") print(f"File size: {OUTPUT_FILE.stat().st_size / 1024 / 1024:.1f} MB") if __name__ == "__main__": main()