All good
This commit is contained in:
parent
6ea544a0f6
commit
6cc7288126
45 changed files with 929 additions and 1043 deletions
|
|
@ -3,6 +3,7 @@ import tempfile
|
|||
import polars as pl
|
||||
from pathlib import Path
|
||||
|
||||
from pipeline.local_temp import local_tmp_dir
|
||||
from pipeline.utils import download, extract_zip
|
||||
|
||||
URL = "https://www.arcgis.com/sharing/rest/content/items/36b718ad00de49afb9ad364f8b815b9e/data"
|
||||
|
|
@ -40,7 +41,7 @@ def main() -> None:
|
|||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
with tempfile.TemporaryDirectory() as cache_dir:
|
||||
with tempfile.TemporaryDirectory(dir=local_tmp_dir()) as cache_dir:
|
||||
download_path = Path(cache_dir) / "arcgis_data.zip"
|
||||
extract_path = Path(cache_dir) / "arcgis_extracted"
|
||||
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ from pathlib import Path
|
|||
|
||||
import httpx
|
||||
|
||||
from pipeline.local_temp import local_tmp_dir
|
||||
from pipeline.utils import download, extract_zip
|
||||
|
||||
# Ofcom Connected Nations 2025 - Fixed broadband performance (output area & local authority level)
|
||||
|
|
@ -84,7 +85,7 @@ def main() -> None:
|
|||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
with tempfile.TemporaryDirectory() as cache_dir:
|
||||
with tempfile.TemporaryDirectory(dir=local_tmp_dir()) as cache_dir:
|
||||
cache = Path(cache_dir)
|
||||
zip_path = cache / "broadband_performance.zip"
|
||||
extract_dir = cache / "extracted"
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ import tempfile
|
|||
import polars as pl
|
||||
from pathlib import Path
|
||||
|
||||
from pipeline.local_temp import local_tmp_dir
|
||||
from pipeline.utils import download
|
||||
|
||||
URL = "https://assets.publishing.service.gov.uk/media/691ded34513046b952c500bd/File_5_IoD2025_Scores_for_the_Indices_of_Deprivation.xlsx"
|
||||
|
|
@ -33,7 +34,7 @@ def main() -> None:
|
|||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
with tempfile.TemporaryDirectory() as cache_dir:
|
||||
with tempfile.TemporaryDirectory(dir=local_tmp_dir()) as cache_dir:
|
||||
xlsx_path = Path(cache_dir) / "IoD2025_Scores.xlsx"
|
||||
download(URL, xlsx_path, timeout=60)
|
||||
convert_to_parquet(xlsx_path, args.output)
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ from zipfile import ZipFile
|
|||
|
||||
import polars as pl
|
||||
|
||||
from pipeline.local_temp import local_tmp_dir
|
||||
from pipeline.utils.download import download
|
||||
|
||||
|
||||
|
|
@ -70,7 +71,9 @@ def download_geolytix_retail_points(output_path: Path) -> None:
|
|||
"""Download the GEOLYTIX ZIP, extract the latest CSV, and write parquet."""
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with TemporaryDirectory(prefix="geolytix_retail_points_") as tmp:
|
||||
with TemporaryDirectory(
|
||||
prefix="geolytix_retail_points_", dir=local_tmp_dir()
|
||||
) as tmp:
|
||||
zip_path = Path(tmp) / "geolytix_retail_points.zip"
|
||||
download(GEOLYTIX_RETAIL_POINTS_URL, zip_path, timeout=300)
|
||||
df = read_latest_csv(zip_path)
|
||||
|
|
|
|||
|
|
@ -31,6 +31,8 @@ from pyproj import Transformer
|
|||
from rasterio.transform import rowcol
|
||||
from scipy.ndimage import maximum_filter
|
||||
|
||||
from pipeline.local_temp import local_tmp_dir
|
||||
|
||||
# Noise sources:
|
||||
# (label, column_name, WCS base URL, coverage ID, WCS version, allow_missing_tiles)
|
||||
# Road/rail work with WCS 1.0.0; airport requires WCS 2.0.1 and returns 500
|
||||
|
|
@ -437,7 +439,7 @@ def main() -> None:
|
|||
|
||||
result = postcodes.select("postcode")
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
with tempfile.TemporaryDirectory(dir=local_tmp_dir()) as tmp:
|
||||
for (
|
||||
label,
|
||||
col_name,
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ import tempfile
|
|||
import polars as pl
|
||||
from pathlib import Path
|
||||
|
||||
from pipeline.local_temp import local_tmp_dir
|
||||
from pipeline.utils import download
|
||||
|
||||
# Management information - state-funded schools - latest inspections (as at 28 Feb 2026)
|
||||
|
|
@ -36,7 +37,7 @@ def main() -> None:
|
|||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
with tempfile.TemporaryDirectory() as cache_dir:
|
||||
with tempfile.TemporaryDirectory(dir=local_tmp_dir()) as cache_dir:
|
||||
csv_path = Path(cache_dir) / "ofsted_latest_inspections.csv"
|
||||
download(URL, csv_path, timeout=60)
|
||||
convert_to_parquet(csv_path, args.output)
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ from pyproj import Transformer
|
|||
from shapely.errors import GEOSException
|
||||
from shapely.geometry import shape as to_shapely
|
||||
|
||||
from pipeline.local_temp import local_tmp_dir
|
||||
from pipeline.utils.download import download, extract_zip
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -171,7 +172,7 @@ def _read_site_centroids(
|
|||
def download_greenspace(output: Path) -> None:
|
||||
output.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with tempfile.TemporaryDirectory() as cache_dir:
|
||||
with tempfile.TemporaryDirectory(dir=local_tmp_dir()) as cache_dir:
|
||||
zip_path = Path(cache_dir) / "greenspace.zip"
|
||||
extract_dir = Path(cache_dir) / "extracted"
|
||||
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ from shapely.geometry import Point
|
|||
from shapely.wkb import loads as load_wkb
|
||||
from tqdm import tqdm
|
||||
|
||||
from pipeline.local_temp import local_tmp_dir
|
||||
from pipeline.utils.england_geometry import (
|
||||
ENGLAND_BBOX_EAST,
|
||||
ENGLAND_BBOX_NORTH,
|
||||
|
|
@ -184,7 +185,7 @@ def main() -> None:
|
|||
|
||||
england_polygon = load_england_polygon(args.boundary)
|
||||
|
||||
tmp_dir = Path(mkdtemp(prefix="pois_"))
|
||||
tmp_dir = Path(mkdtemp(prefix="pois_", dir=local_tmp_dir()))
|
||||
with tqdm(
|
||||
unit=" elements",
|
||||
unit_scale=True,
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ import tarfile
|
|||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
from pipeline.local_temp import local_tmp_dir
|
||||
from pipeline.utils import download
|
||||
|
||||
URL = "https://postcodes-mapit-static.s3.eu-west-2.amazonaws.com/data/gb-postcodes-v5.tar.bz2"
|
||||
|
|
@ -37,7 +38,7 @@ def main() -> None:
|
|||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
with tempfile.TemporaryDirectory() as cache_dir:
|
||||
with tempfile.TemporaryDirectory(dir=local_tmp_dir()) as cache_dir:
|
||||
cache = Path(cache_dir)
|
||||
archive_path = cache / "gb-postcodes-v5.tar.bz2"
|
||||
extract_dir = cache / "extracted"
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ import tempfile
|
|||
import polars as pl
|
||||
from pathlib import Path
|
||||
|
||||
from pipeline.local_temp import local_tmp_dir
|
||||
from pipeline.utils import download
|
||||
|
||||
URL = "http://prod.publicdata.landregistry.gov.uk.s3-website-eu-west-1.amazonaws.com/pp-complete.csv"
|
||||
|
|
@ -55,7 +56,7 @@ def main() -> None:
|
|||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
with tempfile.TemporaryDirectory() as cache_dir:
|
||||
with tempfile.TemporaryDirectory(dir=local_tmp_dir()) as cache_dir:
|
||||
csv_path = Path(cache_dir) / "price-paid-complete.csv"
|
||||
|
||||
download(URL, csv_path)
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ from pathlib import Path
|
|||
|
||||
import polars as pl
|
||||
|
||||
from pipeline.local_temp import local_tmp_dir
|
||||
from pipeline.utils import download
|
||||
|
||||
URL = "https://www.ons.gov.uk/file?uri=/economy/inflationandpriceindices/datasets/priceindexofprivaterentsukmonthlypricestatistics/25march2026/priceindexofprivaterentsukmonthlypricestatistics.xlsx"
|
||||
|
|
@ -114,7 +115,7 @@ def main() -> None:
|
|||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
with tempfile.TemporaryDirectory() as cache_dir:
|
||||
with tempfile.TemporaryDirectory(dir=local_tmp_dir()) as cache_dir:
|
||||
xlsx_path = Path(cache_dir) / "pipr_monthly.xlsx"
|
||||
download(URL, xlsx_path, timeout=120)
|
||||
convert_to_parquet(xlsx_path, args.output)
|
||||
|
|
|
|||
|
|
@ -36,6 +36,8 @@ from pathlib import Path
|
|||
|
||||
from tqdm import tqdm
|
||||
|
||||
from pipeline.local_temp import local_tmp_dir
|
||||
|
||||
ENGLAND_PBF_URL = (
|
||||
"https://download.geofabrik.de/europe/united-kingdom/england-latest.osm.pbf"
|
||||
)
|
||||
|
|
@ -164,7 +166,10 @@ def clean_gtfs(src: Path, dst: Path) -> None:
|
|||
)
|
||||
|
||||
tmp = tempfile.NamedTemporaryFile(
|
||||
mode="wb", delete=False, suffix=".txt"
|
||||
mode="wb",
|
||||
delete=False,
|
||||
suffix=".txt",
|
||||
dir=local_tmp_dir(),
|
||||
)
|
||||
tmp.write(header)
|
||||
|
||||
|
|
@ -388,7 +393,10 @@ def convert_high_freq_to_frequency_based(
|
|||
trip_id_idx = cols.index("trip_id")
|
||||
|
||||
tmp = tempfile.NamedTemporaryFile(
|
||||
mode="wb", delete=False, suffix=".txt"
|
||||
mode="wb",
|
||||
delete=False,
|
||||
suffix=".txt",
|
||||
dir=local_tmp_dir(),
|
||||
)
|
||||
tmp.write(header)
|
||||
for line in f:
|
||||
|
|
@ -408,7 +416,10 @@ def convert_high_freq_to_frequency_based(
|
|||
trip_id_idx = cols.index("trip_id")
|
||||
|
||||
tmp = tempfile.NamedTemporaryFile(
|
||||
mode="wb", delete=False, suffix=".txt"
|
||||
mode="wb",
|
||||
delete=False,
|
||||
suffix=".txt",
|
||||
dir=local_tmp_dir(),
|
||||
)
|
||||
tmp.write(header)
|
||||
for line in f:
|
||||
|
|
@ -451,8 +462,8 @@ def download_tfl_transxchange(raw_dir: Path) -> Path:
|
|||
|
||||
|
||||
def download_naptan() -> None:
|
||||
"""Download NaPTAN stops to /tmp/Stops.csv (needed by transxchange2gtfs)."""
|
||||
dest = Path("/tmp/Stops.csv")
|
||||
"""Download NaPTAN stops to the local temp dir for transxchange2gtfs."""
|
||||
dest = local_tmp_dir() / "Stops.csv"
|
||||
if dest.exists():
|
||||
print(f"NaPTAN Stops.csv already exists: {dest}")
|
||||
return
|
||||
|
|
@ -661,7 +672,10 @@ def clean_national_rail_gtfs(src: Path, dst: Path) -> None:
|
|||
)
|
||||
|
||||
tmp = tempfile.NamedTemporaryFile(
|
||||
mode="wb", delete=False, suffix=".txt"
|
||||
mode="wb",
|
||||
delete=False,
|
||||
suffix=".txt",
|
||||
dir=local_tmp_dir(),
|
||||
)
|
||||
tmp.write(header)
|
||||
|
||||
|
|
@ -718,7 +732,10 @@ def clean_national_rail_gtfs(src: Path, dst: Path) -> None:
|
|||
lon_idx = cols.index("stop_lon")
|
||||
|
||||
tmp = tempfile.NamedTemporaryFile(
|
||||
mode="wb", delete=False, suffix=".txt"
|
||||
mode="wb",
|
||||
delete=False,
|
||||
suffix=".txt",
|
||||
dir=local_tmp_dir(),
|
||||
)
|
||||
tmp.write(header)
|
||||
|
||||
|
|
@ -749,7 +766,10 @@ def clean_national_rail_gtfs(src: Path, dst: Path) -> None:
|
|||
rt_idx = cols.index("route_type")
|
||||
|
||||
tmp = tempfile.NamedTemporaryFile(
|
||||
mode="wb", delete=False, suffix=".txt"
|
||||
mode="wb",
|
||||
delete=False,
|
||||
suffix=".txt",
|
||||
dir=local_tmp_dir(),
|
||||
)
|
||||
tmp.write(header)
|
||||
|
||||
|
|
@ -774,7 +794,10 @@ def clean_national_rail_gtfs(src: Path, dst: Path) -> None:
|
|||
trip_id_idx = cols.index("trip_id")
|
||||
|
||||
tmp = tempfile.NamedTemporaryFile(
|
||||
mode="wb", delete=False, suffix=".txt"
|
||||
mode="wb",
|
||||
delete=False,
|
||||
suffix=".txt",
|
||||
dir=local_tmp_dir(),
|
||||
)
|
||||
tmp.write(header)
|
||||
|
||||
|
|
@ -797,7 +820,10 @@ def clean_national_rail_gtfs(src: Path, dst: Path) -> None:
|
|||
end_idx = cols.index("end_date")
|
||||
|
||||
tmp = tempfile.NamedTemporaryFile(
|
||||
mode="wb", delete=False, suffix=".txt"
|
||||
mode="wb",
|
||||
delete=False,
|
||||
suffix=".txt",
|
||||
dir=local_tmp_dir(),
|
||||
)
|
||||
tmp.write(header)
|
||||
|
||||
|
|
|
|||
|
|
@ -15,6 +15,16 @@ if (!pkgDirArg || converterArgs.length < 2) {
|
|||
}
|
||||
|
||||
const pkgDir = path.resolve(pkgDirArg);
|
||||
const defaultTmpDir = path.resolve(__dirname, "..", "..", ".tmp");
|
||||
const localTmpDir =
|
||||
process.env.TMPDIR || process.env.TEMP || process.env.TMP || defaultTmpDir;
|
||||
const stopsCsv = path.join(localTmpDir, "Stops.csv");
|
||||
const converterTmpPrefix = path.join(localTmpDir, "transxchange2gtfs_");
|
||||
const converterTmpPatch =
|
||||
`static TMP = ${JSON.stringify(converterTmpPrefix)}` +
|
||||
` + process.pid + ${JSON.stringify(path.sep)};`;
|
||||
|
||||
fs.mkdirSync(localTmpDir, { recursive: true });
|
||||
|
||||
function replaceOnce(relativePath, before, after) {
|
||||
const file = path.join(pkgDir, relativePath);
|
||||
|
|
@ -37,6 +47,26 @@ function replaceOnce(relativePath, before, after) {
|
|||
// GTFS shapes are optional for R5 routing. Clear shape references and omit
|
||||
// shapes.txt so missing route geometry does not drop otherwise usable trips.
|
||||
function patchPackage() {
|
||||
replaceOnce(
|
||||
"dist/Container.js",
|
||||
"static TMP = `/tmp/transxchange2gtfs_${process.pid}/`;",
|
||||
converterTmpPatch,
|
||||
);
|
||||
replaceOnce(
|
||||
"dist/Container.js",
|
||||
'fs.existsSync("/tmp/Stops.csv")',
|
||||
`fs.existsSync(${JSON.stringify(stopsCsv)})`,
|
||||
);
|
||||
replaceOnce(
|
||||
"dist/Container.js",
|
||||
'fs.createReadStream("/tmp/Stops.csv", "utf8")',
|
||||
`fs.createReadStream(${JSON.stringify(stopsCsv)}, "utf8")`,
|
||||
);
|
||||
replaceOnce(
|
||||
"dist/converter/GetStopData.js",
|
||||
'fs.createWriteStream("/tmp/Stops.csv")',
|
||||
`fs.createWriteStream(${JSON.stringify(stopsCsv)})`,
|
||||
);
|
||||
replaceOnce(
|
||||
"dist/transxchange/TransXChangeJourneyStream.js",
|
||||
"distanceSoFarM += routeLink.Distance;",
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ from pathlib import Path
|
|||
|
||||
import polars as pl
|
||||
|
||||
from pipeline.local_temp import local_tmp_dir
|
||||
from pipeline.utils import download, extract_zip
|
||||
|
||||
URL = "https://www.arcgis.com/sharing/rest/content/items/4e0b4b3fbc2540caae27e7be532e61be/data"
|
||||
|
|
@ -62,7 +63,7 @@ def main() -> None:
|
|||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
with tempfile.TemporaryDirectory() as cache_dir:
|
||||
with tempfile.TemporaryDirectory(dir=local_tmp_dir()) as cache_dir:
|
||||
zip_path = Path(cache_dir) / "uprn_lookup.zip"
|
||||
extract_path = Path(cache_dir) / "uprn_extracted"
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue