54 lines
1.5 KiB
Python
54 lines
1.5 KiB
Python
"""Download POI data for the UK from Overture Maps."""
|
|
|
|
from pathlib import Path
|
|
|
|
import overturemaps
|
|
import pyarrow as pa
|
|
import pyarrow.parquet as pq
|
|
from tqdm import tqdm
|
|
|
|
# UK bounding box (west, south, east, north)
|
|
UK_BBOX = (-8.65, 49.86, 1.77, 60.86)
|
|
|
|
OUTPUT_DIR = Path("data_sources")
|
|
OUTPUT_FILE = OUTPUT_DIR / "uk_pois.parquet"
|
|
|
|
|
|
def main():
|
|
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
|
if OUTPUT_FILE.exists():
|
|
print(f"POI file already exists: {OUTPUT_FILE}")
|
|
print("Delete it manually to re-download.")
|
|
return
|
|
|
|
print("Downloading UK POI data from Overture Maps...")
|
|
print(f"Bounding box: {UK_BBOX}")
|
|
print("This may take several minutes...")
|
|
|
|
reader = overturemaps.record_batch_reader("place", bbox=UK_BBOX)
|
|
|
|
# Read all batches
|
|
batches = []
|
|
with tqdm(desc="Downloading batches", unit=" batches") as pbar:
|
|
for batch in reader:
|
|
batches.append(batch)
|
|
pbar.update(1)
|
|
pbar.set_postfix(rows=sum(b.num_rows for b in batches))
|
|
|
|
if not batches:
|
|
print("No data found in bounding box!")
|
|
return
|
|
|
|
# Combine batches into a table and write
|
|
table = pa.Table.from_batches(batches, schema=reader.schema)
|
|
|
|
print(f"\nWriting {table.num_rows:,} POIs to {OUTPUT_FILE}...")
|
|
pq.write_table(table, OUTPUT_FILE)
|
|
|
|
print(f"Download complete: {OUTPUT_FILE}")
|
|
print(f"File size: {OUTPUT_FILE.stat().st_size / 1024 / 1024:.1f} MB")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|