bipolaroidbipolaroid/src/laion.ipynb
2024-05-09 21:22:28 +01:00

109 lines
5.5 KiB
Text

{
"cells": [
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 3439/3439 [00:00<00:00, 6104.95it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 6988.74it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 6957.73it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 6734.31it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 7696.85it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 7331.94it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 6240.69it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 7451.37it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 7135.27it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 3855.91it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 3567.51it/s]\n",
"100%|██████████| 3439/3439 [00:01<00:00, 2853.24it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 6952.67it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 6177.45it/s]\n",
"100%|██████████| 3439/3439 [00:01<00:00, 3130.18it/s]\n",
"100%|██████████| 3439/3439 [00:01<00:00, 3303.45it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 3662.39it/s]\n",
"100%|██████████| 3439/3439 [00:01<00:00, 2754.25it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 6633.24it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 6548.62it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 4601.06it/s]\n",
"100%|██████████| 3439/3439 [00:01<00:00, 2288.88it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 3635.54it/s]\n",
"100%|██████████| 3439/3439 [00:01<00:00, 2179.42it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 6750.76it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 6691.62it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 5768.00it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 3440.06it/s]\n",
"100%|██████████| 3439/3439 [00:01<00:00, 2743.69it/s]\n",
"100%|██████████| 3439/3439 [00:01<00:00, 3034.45it/s]\n",
"100%|██████████| 3439/3439 [00:02<00:00, 1261.15it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 6129.07it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 6573.12it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 6425.97it/s]\n",
"100%|██████████| 3439/3439 [00:01<00:00, 2865.05it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 4130.32it/s]\n",
"100%|██████████| 3439/3439 [00:01<00:00, 3020.61it/s]\n",
"100%|██████████| 3439/3439 [00:02<00:00, 1446.82it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 8095.71it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 7679.18it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 7918.50it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 3519.17it/s]\n",
"100%|██████████| 3439/3439 [00:01<00:00, 3258.94it/s]\n",
"100%|██████████| 3439/3439 [00:01<00:00, 2436.68it/s]\n",
"100%|██████████| 3439/3439 [00:03<00:00, 1000.79it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 7625.18it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 7752.86it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 7538.78it/s]\n",
"100%|██████████| 3439/3439 [00:01<00:00, 3115.93it/s]\n"
]
}
],
"source": [
"import pandas as pd\n",
"from pathlib import Path\n",
"from tqdm import tqdm\n",
"import hashlib\n",
"\n",
"\n",
"DATA_PATH = Path('/mnt/wsl/PHYSICALDRIVE1/data/laion')\n",
"DATA_PATH.mkdir(exist_ok=True, parents=True)\n",
"\n",
"LAION_PATH = Path('/home/andras/projects/laion_improved_aesthetics_6.5plus_with_images/data')\n",
"\n",
"\n",
"for file in LAION_PATH.glob(\"*.parquet\"):\n",
" df = pd.read_parquet(file)\n",
" for row in tqdm(list(df.iterrows())):\n",
" row = row[1]\n",
" bytes = row['image']['bytes']\n",
" digest = hashlib.sha1(bytes).hexdigest()\n",
" with open(DATA_PATH / f\"{digest}.jpg\", 'wb') as f:\n",
" f.write(bytes)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "bipolaroid",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}