Add more data & fix ooms
This commit is contained in:
parent
f60fbec9d4
commit
a8cc44ea97
8 changed files with 242 additions and 82 deletions
|
|
@ -576,7 +576,7 @@ def transform(input_path: Path) -> pl.LazyFrame:
|
|||
lf = pl.scan_parquet(input_path)
|
||||
|
||||
# Get all unique categories present in the data
|
||||
all_categories = lf.select("category").unique().collect().to_series().to_list()
|
||||
all_categories = lf.select("category").unique().collect(engine="streaming").to_series().to_list()
|
||||
|
||||
# Verify every non-dropped category has a mapping
|
||||
unmapped = []
|
||||
|
|
@ -632,7 +632,7 @@ def main():
|
|||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
df = transform(args.input).collect()
|
||||
df = transform(args.input).collect(engine="streaming")
|
||||
|
||||
df.write_parquet(args.output)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue