109 lines
5.5 KiB
Text
109 lines
5.5 KiB
Text
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"100%|██████████| 3439/3439 [00:00<00:00, 6104.95it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:00<00:00, 6988.74it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:00<00:00, 6957.73it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:00<00:00, 6734.31it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:00<00:00, 7696.85it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:00<00:00, 7331.94it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:00<00:00, 6240.69it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:00<00:00, 7451.37it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:00<00:00, 7135.27it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:00<00:00, 3855.91it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:00<00:00, 3567.51it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:01<00:00, 2853.24it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:00<00:00, 6952.67it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:00<00:00, 6177.45it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:01<00:00, 3130.18it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:01<00:00, 3303.45it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:00<00:00, 3662.39it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:01<00:00, 2754.25it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:00<00:00, 6633.24it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:00<00:00, 6548.62it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:00<00:00, 4601.06it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:01<00:00, 2288.88it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:00<00:00, 3635.54it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:01<00:00, 2179.42it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:00<00:00, 6750.76it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:00<00:00, 6691.62it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:00<00:00, 5768.00it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:00<00:00, 3440.06it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:01<00:00, 2743.69it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:01<00:00, 3034.45it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:02<00:00, 1261.15it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:00<00:00, 6129.07it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:00<00:00, 6573.12it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:00<00:00, 6425.97it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:01<00:00, 2865.05it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:00<00:00, 4130.32it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:01<00:00, 3020.61it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:02<00:00, 1446.82it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:00<00:00, 8095.71it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:00<00:00, 7679.18it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:00<00:00, 7918.50it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:00<00:00, 3519.17it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:01<00:00, 3258.94it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:01<00:00, 2436.68it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:03<00:00, 1000.79it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:00<00:00, 7625.18it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:00<00:00, 7752.86it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:00<00:00, 7538.78it/s]\n",
|
|
"100%|██████████| 3439/3439 [00:01<00:00, 3115.93it/s]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"from pathlib import Path\n",
|
|
"from tqdm import tqdm\n",
|
|
"import hashlib\n",
|
|
"\n",
|
|
"\n",
|
|
"DATA_PATH = Path('/mnt/wsl/PHYSICALDRIVE1/data/laion')\n",
|
|
"DATA_PATH.mkdir(exist_ok=True, parents=True)\n",
|
|
"\n",
|
|
"LAION_PATH = Path('/home/andras/projects/laion_improved_aesthetics_6.5plus_with_images/data')\n",
|
|
"\n",
|
|
"\n",
|
|
"for file in LAION_PATH.glob(\"*.parquet\"):\n",
|
|
" df = pd.read_parquet(file)\n",
|
|
" for row in tqdm(list(df.iterrows())):\n",
|
|
" row = row[1]\n",
|
|
" bytes = row['image']['bytes']\n",
|
|
" digest = hashlib.sha1(bytes).hexdigest()\n",
|
|
" with open(DATA_PATH / f\"{digest}.jpg\", 'wb') as f:\n",
|
|
" f.write(bytes)"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "bipolaroid",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.12.2"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|