scraping and data
This commit is contained in:
parent
d98819b569
commit
8688b7475e
43 changed files with 4920 additions and 531 deletions
|
|
@ -121,6 +121,50 @@ class TestWhitespacePostcodes:
|
|||
loaded_df, _ = load_uprns(path)
|
||||
assert len(loaded_df) == 0
|
||||
|
||||
def test_non_english_oas_excluded(self, tmp_path):
|
||||
df = pl.DataFrame(
|
||||
{
|
||||
"GRIDGB1E": [500010, 300010],
|
||||
"GRIDGB1N": [180010, 220010],
|
||||
"PCDS": ["AA1 1AA", "CF1 1AA"],
|
||||
"OA21CD": ["E00000001", "W00000001"],
|
||||
}
|
||||
)
|
||||
path = tmp_path / "uprn.parquet"
|
||||
df.write_parquet(path)
|
||||
|
||||
loaded_df, offsets = load_uprns(path)
|
||||
|
||||
assert set(offsets) == {"E00000001"}
|
||||
assert loaded_df["PCDS"].to_list() == ["AA1 1AA"]
|
||||
|
||||
def test_terminated_postcodes_are_remapped(self, tmp_path):
|
||||
uprns = pl.DataFrame(
|
||||
{
|
||||
"GRIDGB1E": [500010],
|
||||
"GRIDGB1N": [180010],
|
||||
"PCDS": ["aa1 1aa"],
|
||||
"OA21CD": ["E00000001"],
|
||||
}
|
||||
)
|
||||
uprn_path = tmp_path / "uprn.parquet"
|
||||
uprns.write_parquet(uprn_path)
|
||||
arcgis = pl.DataFrame(
|
||||
{
|
||||
"pcds": ["AA1 1AA", "AA1 1AB"],
|
||||
"east1m": [500010, 500030],
|
||||
"north1m": [180010, 180020],
|
||||
"doterm": ["2020-01-01", None],
|
||||
"ctry25cd": ["E92000001", "E92000001"],
|
||||
}
|
||||
)
|
||||
arcgis_path = tmp_path / "arcgis.parquet"
|
||||
arcgis.write_parquet(arcgis_path)
|
||||
|
||||
loaded_df, _offsets = load_uprns(uprn_path, arcgis_path)
|
||||
|
||||
assert loaded_df["PCDS"].to_list() == ["AA1 1AB"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Bug 3: Voronoi deduplication is first-seen-wins
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue