has issues
This commit is contained in:
parent
2e112d7398
commit
c645b0f1d4
96 changed files with 2147083 additions and 5787 deletions
|
|
@ -182,6 +182,19 @@ DROP_CATEGORIES = {
|
|||
"public_transport/platform",
|
||||
"public_transport/station",
|
||||
"public_transport/stop_position",
|
||||
# Education amenities — schools come from GIAS instead. OSM coverage for
|
||||
# tertiary education, tutoring, and childcare is too noisy/incomplete to be
|
||||
# useful on a property-search map.
|
||||
"amenity/school",
|
||||
"amenity/prep_school",
|
||||
"amenity/language_school",
|
||||
"amenity/music_school",
|
||||
"amenity/university",
|
||||
"amenity/college",
|
||||
"building/university",
|
||||
"amenity/kindergarten",
|
||||
"amenity/childcare",
|
||||
"office/tutoring",
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -943,23 +956,10 @@ _CATEGORIES: list[tuple[str, str, str, list[str]]] = [
|
|||
"tourism/chalet",
|
||||
],
|
||||
),
|
||||
(
|
||||
"Education",
|
||||
"School",
|
||||
"🏫",
|
||||
[
|
||||
"amenity/school",
|
||||
"amenity/prep_school",
|
||||
"amenity/language_school",
|
||||
"amenity/music_school",
|
||||
"amenity/university",
|
||||
"amenity/college",
|
||||
"building/university",
|
||||
"amenity/kindergarten",
|
||||
"amenity/childcare",
|
||||
"office/tutoring",
|
||||
],
|
||||
),
|
||||
# Note: schools come from the GIAS register (see transform_gias_schools).
|
||||
# Niche/tertiary education amenities that GIAS does not cover are dropped
|
||||
# rather than mixed in with state-funded schools.
|
||||
|
||||
(
|
||||
"Local Businesses",
|
||||
"Hotel",
|
||||
|
|
@ -1316,11 +1316,45 @@ def transform_grocery_retail_points(
|
|||
).select("id", "name", "category", "icon_category", "group", "lat", "lng", "emoji")
|
||||
|
||||
|
||||
def transform_gias_schools(gias_path: Path) -> pl.LazyFrame:
|
||||
"""Convert the GIAS register parquet into POI rows with school metadata."""
|
||||
return pl.scan_parquet(gias_path).select(
|
||||
pl.concat_str([pl.lit("gias-"), pl.col("urn").cast(pl.String)]).alias("id"),
|
||||
pl.col("name"),
|
||||
pl.lit("School").alias("category"),
|
||||
pl.lit("School").alias("icon_category"),
|
||||
pl.lit("Education").alias("group"),
|
||||
pl.col("lat").cast(pl.Float64),
|
||||
pl.col("lng").cast(pl.Float64),
|
||||
pl.lit("🏫").alias("emoji"),
|
||||
pl.col("phase").alias("school_phase"),
|
||||
pl.col("type").alias("school_type"),
|
||||
pl.col("type_group").alias("school_type_group"),
|
||||
pl.col("age_range").alias("school_age_range"),
|
||||
pl.col("gender").alias("school_gender"),
|
||||
pl.col("religious_character").alias("school_religious_character"),
|
||||
pl.col("admissions_policy").alias("school_admissions_policy"),
|
||||
pl.col("nursery_provision").alias("school_nursery_provision"),
|
||||
pl.col("sixth_form").alias("school_sixth_form"),
|
||||
pl.col("capacity").cast(pl.Int32, strict=False).alias("school_capacity"),
|
||||
pl.col("pupils").cast(pl.Int32, strict=False).alias("school_pupils"),
|
||||
pl.col("fsm_percent").cast(pl.Float32, strict=False).alias("school_fsm_percent"),
|
||||
pl.col("trust").alias("school_trust"),
|
||||
pl.col("address").alias("school_address"),
|
||||
pl.col("postcode").alias("school_postcode"),
|
||||
pl.col("local_authority").alias("school_local_authority"),
|
||||
pl.col("website").alias("school_website"),
|
||||
pl.col("telephone").cast(pl.String, strict=False).alias("school_telephone"),
|
||||
pl.col("head_name").alias("school_head_name"),
|
||||
)
|
||||
|
||||
|
||||
def transform(
|
||||
input_path: Path,
|
||||
naptan_path: Path | None = None,
|
||||
boundary_path: Path | None = None,
|
||||
grocery_retail_points_path: Path | None = None,
|
||||
naptan_path: Path,
|
||||
boundary_path: Path,
|
||||
grocery_retail_points_path: Path,
|
||||
gias_path: Path,
|
||||
) -> pl.LazyFrame:
|
||||
lf = pl.scan_parquet(input_path)
|
||||
|
||||
|
|
@ -1372,24 +1406,21 @@ def transform(
|
|||
)
|
||||
|
||||
naptan_df = pl.scan_parquet(naptan_path).collect()
|
||||
if boundary_path is not None:
|
||||
mask = in_england_mask(
|
||||
boundary_path,
|
||||
naptan_df["lat"].to_numpy(),
|
||||
naptan_df["lng"].to_numpy(),
|
||||
)
|
||||
naptan_df = naptan_df.filter(pl.Series(mask))
|
||||
mask = in_england_mask(
|
||||
boundary_path,
|
||||
naptan_df["lat"].to_numpy(),
|
||||
naptan_df["lng"].to_numpy(),
|
||||
)
|
||||
naptan_df = naptan_df.filter(pl.Series(mask))
|
||||
naptan = naptan_df.lazy().with_columns(
|
||||
pl.col("category").replace_strict(NAPTAN_EMOJIS).alias("emoji"),
|
||||
pl.lit("Public Transport").alias("group"),
|
||||
pl.col("category").alias("icon_category"),
|
||||
)
|
||||
|
||||
frames = [lf, naptan]
|
||||
if grocery_retail_points_path is not None:
|
||||
grocery_df = pl.read_parquet(grocery_retail_points_path)
|
||||
grocery_pois = transform_grocery_retail_points(grocery_df, boundary_path)
|
||||
frames.append(grocery_pois.lazy())
|
||||
grocery_df = pl.read_parquet(grocery_retail_points_path)
|
||||
grocery_pois = transform_grocery_retail_points(grocery_df, boundary_path)
|
||||
frames = [lf, naptan, grocery_pois.lazy(), transform_gias_schools(gias_path)]
|
||||
|
||||
return pl.concat(frames, how="diagonal_relaxed")
|
||||
|
||||
|
|
@ -1413,8 +1444,15 @@ def main():
|
|||
parser.add_argument(
|
||||
"--grocery-retail-points",
|
||||
type=Path,
|
||||
required=True,
|
||||
help="GEOLYTIX Grocery Retail Points parquet",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--gias",
|
||||
type=Path,
|
||||
required=True,
|
||||
help="GIAS schools register parquet (replaces OSM schools)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output", type=Path, required=True, help="Output filtered POIs parquet file"
|
||||
)
|
||||
|
|
@ -1425,6 +1463,7 @@ def main():
|
|||
args.naptan,
|
||||
args.boundary,
|
||||
args.grocery_retail_points,
|
||||
args.gias,
|
||||
).collect(engine="streaming")
|
||||
|
||||
df.write_parquet(args.output)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue