changes
This commit is contained in:
parent
524580eb25
commit
ffe080adef
82 changed files with 2652 additions and 2956 deletions
|
|
@ -328,14 +328,19 @@ def forward_fill(index: dict, min_year: int, max_year: int) -> dict:
|
|||
return filled
|
||||
|
||||
|
||||
def build_index(input_path: Path, max_pair_year: int | None = None) -> pl.DataFrame:
|
||||
def build_index(
|
||||
input_path: Path,
|
||||
max_pair_year: int | None = None,
|
||||
postcodes_path: Path | None = None,
|
||||
) -> pl.DataFrame:
|
||||
"""Build the full price index from raw data.
|
||||
|
||||
If max_pair_year is set, only pairs before that year are used (backtesting holdout).
|
||||
The index is still forward-filled to CURRENT_YEAR.
|
||||
postcodes_path: if provided, lat/lon are read from this file instead of input_path.
|
||||
"""
|
||||
pairs = extract_pairs(input_path, max_year2=max_pair_year)
|
||||
centroids = extract_centroids(input_path)
|
||||
centroids = extract_centroids(postcodes_path or input_path)
|
||||
|
||||
min_year = int(pairs["year1"].min())
|
||||
max_year = CURRENT_YEAR
|
||||
|
|
@ -448,10 +453,12 @@ def main():
|
|||
description="Build improved repeat-sales price index"
|
||||
)
|
||||
parser.add_argument("--input", type=Path, required=True)
|
||||
parser.add_argument("--postcodes", type=Path, required=True,
|
||||
help="Path to postcode.parquet (for lat/lon centroids)")
|
||||
parser.add_argument("--output", type=Path, required=True)
|
||||
args = parser.parse_args()
|
||||
|
||||
result = build_index(args.input)
|
||||
result = build_index(args.input, postcodes_path=args.postcodes)
|
||||
|
||||
result.write_parquet(args.output)
|
||||
size_mb = args.output.stat().st_size / (1024 * 1024)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue