Use naptan pois
This commit is contained in:
parent
6a42b81a2a
commit
01edde3ebd
1 changed files with 25 additions and 8 deletions
|
|
@ -75,6 +75,11 @@ DROP_CATEGORIES = {
|
|||
"tourism/information",
|
||||
"tourism/village_sign",
|
||||
"tourism/yes",
|
||||
# public transport comes from naptan
|
||||
"public_transport/entrance",
|
||||
"public_transport/platform",
|
||||
"public_transport/station",
|
||||
"public_transport/stop_position",
|
||||
}
|
||||
|
||||
# (friendly_name, emoji) for every category we keep
|
||||
|
|
@ -368,11 +373,6 @@ CATEGORY_MAP: dict[str, tuple[str, str]] = {
|
|||
"office/university": ("University Office", "🎓"),
|
||||
"office/vacant": ("Vacant Office", "🏚️"),
|
||||
"office/web_design": ("Web Design", "🌐"),
|
||||
# public_transport
|
||||
"public_transport/entrance": ("Transport Entrance", "🚪"),
|
||||
"public_transport/platform": ("Platform", "🚉"),
|
||||
"public_transport/station": ("Station", "🚉"),
|
||||
"public_transport/stop_position": ("Stop", "🚏"),
|
||||
# shop
|
||||
"shop/accessories": ("Accessories Shop", "👜"),
|
||||
"shop/agrarian": ("Farm Supply Shop", "🌾"),
|
||||
|
|
@ -573,7 +573,18 @@ CATEGORY_MAP: dict[str, tuple[str, str]] = {
|
|||
}
|
||||
|
||||
|
||||
def transform(input_path: Path) -> pl.LazyFrame:
|
||||
NAPTAN_EMOJIS: dict[str, str] = {
|
||||
"Airport": "✈️",
|
||||
"Ferry": "⛴️",
|
||||
"Rail station": "🚆",
|
||||
"Bus stop": "🚏",
|
||||
"Bus station": "🚌",
|
||||
"Taxi rank": "🚕",
|
||||
"Metro or Tram stop": "🚊",
|
||||
}
|
||||
|
||||
|
||||
def transform(input_path: Path, naptan_path: Path | None = None) -> pl.LazyFrame:
|
||||
lf = pl.scan_parquet(input_path)
|
||||
|
||||
# Get all unique categories present in the data
|
||||
|
|
@ -618,7 +629,10 @@ def transform(input_path: Path) -> pl.LazyFrame:
|
|||
pl.col("category").replace_strict(emoji_mapping).alias("emoji"),
|
||||
)
|
||||
|
||||
return lf
|
||||
naptan = pl.scan_parquet(naptan_path).with_columns(
|
||||
pl.col("category").replace_strict(NAPTAN_EMOJIS).alias("emoji"),
|
||||
)
|
||||
return pl.concat([lf, naptan], how="diagonal_relaxed")
|
||||
|
||||
|
||||
def main():
|
||||
|
|
@ -628,12 +642,15 @@ def main():
|
|||
parser.add_argument(
|
||||
"--input", type=Path, required=True, help="Raw POIs parquet file"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--naptan", type=Path, required=True, help="NaPTAN stations parquet file"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output", type=Path, required=True, help="Output filtered POIs parquet file"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
df = transform(args.input).collect(engine="streaming")
|
||||
df = transform(args.input, args.naptan).collect(engine="streaming")
|
||||
|
||||
df.write_parquet(args.output)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue