129 lines
3.9 KiB
Python
129 lines
3.9 KiB
Python
"""Extract park and water body polygons from OSM PBF for postcode boundary trimming.
|
|
|
|
Uses pyosmium's FileProcessor with area assembly to convert OSM ways/relations
|
|
into Shapely polygons, reprojects to BNG (EPSG:27700), and saves as parquet.
|
|
|
|
Reuses the same england-latest.osm.pbf as pois.py.
|
|
"""
|
|
|
|
import argparse
|
|
from pathlib import Path
|
|
|
|
import osmium
|
|
import polars as pl
|
|
from pyproj import Transformer
|
|
from shapely import wkb
|
|
from shapely.geometry import MultiPolygon, Polygon
|
|
from tqdm import tqdm
|
|
|
|
|
|
MIN_AREA_SQM = 5_000 # ~70m x 70m — skip pocket parks and small ponds
|
|
|
|
# OSM tags that indicate non-residential green/water areas
|
|
GREENSPACE_TAGS = {
|
|
"leisure": {"park", "nature_reserve", "garden", "common"},
|
|
"landuse": {"forest"},
|
|
"natural": {"wood", "water", "wetland"},
|
|
"waterway": {"riverbank"},
|
|
}
|
|
|
|
_to_bng = Transformer.from_crs("EPSG:4326", "EPSG:27700", always_xy=True)
|
|
|
|
|
|
def _to_bng_polygon(geom):
|
|
"""Reproject a WGS84 Shapely geometry to BNG (EPSG:27700)."""
|
|
|
|
def transform_ring(coords):
|
|
xs, ys = zip(*coords)
|
|
bng_x, bng_y = _to_bng.transform(list(xs), list(ys))
|
|
return list(zip(bng_x, bng_y))
|
|
|
|
if geom.geom_type == "Polygon":
|
|
exterior = transform_ring(geom.exterior.coords)
|
|
holes = [transform_ring(h.coords) for h in geom.interiors]
|
|
return Polygon(exterior, holes)
|
|
elif geom.geom_type == "MultiPolygon":
|
|
parts = []
|
|
for p in geom.geoms:
|
|
exterior = transform_ring(p.exterior.coords)
|
|
holes = [transform_ring(h.coords) for h in p.interiors]
|
|
parts.append(Polygon(exterior, holes))
|
|
return MultiPolygon(parts)
|
|
return geom
|
|
|
|
|
|
def _matches_tags(tags):
|
|
"""Check if an OSM element's tags match our greenspace/water criteria."""
|
|
for key, values in GREENSPACE_TAGS.items():
|
|
if key in tags and tags[key] in values:
|
|
return True
|
|
return False
|
|
|
|
|
|
class GreenspaceHandler(osmium.SimpleHandler):
|
|
"""Collects area geometries matching greenspace/water tags."""
|
|
|
|
def __init__(self, progress):
|
|
super().__init__()
|
|
self._wkb_factory = osmium.geom.WKBFactory()
|
|
self._progress = progress
|
|
self.geometries = []
|
|
|
|
def area(self, a):
|
|
self._progress.update(1)
|
|
if not _matches_tags(a.tags):
|
|
return
|
|
try:
|
|
wkb_data = self._wkb_factory.create_multipolygon(a)
|
|
geom = wkb.loads(wkb_data, hex=True)
|
|
except Exception:
|
|
return
|
|
|
|
if geom.is_empty or not geom.is_valid:
|
|
return
|
|
|
|
# Reproject to BNG for area calculation
|
|
bng_geom = _to_bng_polygon(geom)
|
|
if bng_geom.area < MIN_AREA_SQM:
|
|
return
|
|
|
|
self.geometries.append(bng_geom.wkb)
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Extract park/water polygons from OSM PBF"
|
|
)
|
|
parser.add_argument(
|
|
"--output", type=Path, required=True, help="Output parquet file path"
|
|
)
|
|
parser.add_argument(
|
|
"--pbf", type=Path, required=True, help="Path to existing PBF file"
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
pbf_file = args.pbf
|
|
print("Extracting greenspace/water areas from PBF (two-pass area assembly)...")
|
|
with tqdm(
|
|
unit=" areas", unit_scale=True, desc="Processing", smoothing=0.05
|
|
) as progress:
|
|
handler = GreenspaceHandler(progress)
|
|
handler.apply_file(str(pbf_file), locations=True)
|
|
|
|
print(
|
|
f"Found {len(handler.geometries)} greenspace/water polygons >= {MIN_AREA_SQM} sqm"
|
|
)
|
|
|
|
# Merge overlapping geometries per 10km grid cell for efficiency
|
|
if handler.geometries:
|
|
# Save WKB geometries directly
|
|
df = pl.DataFrame({"geometry": handler.geometries})
|
|
args.output.parent.mkdir(parents=True, exist_ok=True)
|
|
df.write_parquet(args.output)
|
|
print(f"Saved to {args.output}")
|
|
else:
|
|
print("No geometries found — skipping output")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|