Move transform logic around
This commit is contained in:
parent
e1b38a1b95
commit
38b0cf1ea1
14 changed files with 1073 additions and 336 deletions
|
|
@ -8,17 +8,35 @@ import osmium
|
|||
import polars as pl
|
||||
from tqdm import tqdm
|
||||
|
||||
from .config import (
|
||||
BATCH_SIZE,
|
||||
GEOFABRIK_GB_URL,
|
||||
MIN_OCCURENCE_COUNT,
|
||||
POI_TAG_KEYS,
|
||||
UK_BBOX_EAST,
|
||||
UK_BBOX_NORTH,
|
||||
UK_BBOX_SOUTH,
|
||||
UK_BBOX_WEST,
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
BATCH_SIZE = 50_000
|
||||
|
||||
MIN_OCCURENCE_COUNT = 20
|
||||
|
||||
GEOFABRIK_GB_URL = (
|
||||
"https://download.geofabrik.de/europe/great-britain-latest.osm.pbf"
|
||||
)
|
||||
|
||||
UK_BBOX_WEST = -7.57
|
||||
UK_BBOX_SOUTH = 49.96
|
||||
UK_BBOX_EAST = 1.68
|
||||
UK_BBOX_NORTH = 58.64
|
||||
|
||||
POI_TAG_KEYS: list[str] = [
|
||||
"amenity",
|
||||
"building",
|
||||
"craft",
|
||||
"emergency",
|
||||
"healthcare",
|
||||
"leisure",
|
||||
"office",
|
||||
"shop",
|
||||
"tourism",
|
||||
"public_transport",
|
||||
]
|
||||
|
||||
|
||||
|
||||
def download_pbf(pbf_file: Path) -> None:
|
||||
|
|
@ -144,10 +162,9 @@ def main() -> None:
|
|||
)
|
||||
df = df.join(valid_categories.select("category"), on="category", how="semi")
|
||||
|
||||
args.output.parent.mkdir(parents=True, exist_ok=True)
|
||||
print(f"Total POIs: {handler.poi_count:,}")
|
||||
df.sink_parquet(args.output)
|
||||
print(f"Saved to {args.output}")
|
||||
print(f"Total POIs: {handler.poi_count:,}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
@ -1,32 +0,0 @@
|
|||
from pathlib import Path
|
||||
|
||||
DATA_DIR = Path("./data_sources")
|
||||
GB_PBF_FILE = DATA_DIR / "great-britain-latest.osm.pbf"
|
||||
OUTPUT_FILE = DATA_DIR / "uk_pois.parquet"
|
||||
|
||||
|
||||
BATCH_SIZE = 50_000
|
||||
|
||||
MIN_OCCURENCE_COUNT = 20
|
||||
|
||||
GEOFABRIK_GB_URL = (
|
||||
"https://download.geofabrik.de/europe/great-britain-latest.osm.pbf"
|
||||
)
|
||||
|
||||
UK_BBOX_WEST = -7.57
|
||||
UK_BBOX_SOUTH = 49.96
|
||||
UK_BBOX_EAST = 1.68
|
||||
UK_BBOX_NORTH = 58.64
|
||||
|
||||
POI_TAG_KEYS: list[str] = [
|
||||
"amenity",
|
||||
"building",
|
||||
"craft",
|
||||
"emergency",
|
||||
"healthcare",
|
||||
"leisure",
|
||||
"office",
|
||||
"shop",
|
||||
"tourism",
|
||||
"public_transport",
|
||||
]
|
||||
Loading…
Add table
Add a link
Reference in a new issue