add notebooks

This commit is contained in:
Andras Schmelczer 2024-04-08 08:02:48 +01:00
parent f5c03db198
commit 44e0c129ec
No known key found for this signature in database
GPG key ID: FC8F2C3D3D1A718C
8 changed files with 229939 additions and 12849 deletions

5
.vscode/settings.json vendored Normal file
View file

@ -0,0 +1,5 @@
{
"files.exclude": {
"**/__pycache__": true
}
}

501
create_edits.ipynb Normal file

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

25748
inference.ipynb Normal file

File diff suppressed because one or more lines are too long

109
laion.ipynb Normal file
View file

@ -0,0 +1,109 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 3439/3439 [00:00<00:00, 6104.95it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 6988.74it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 6957.73it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 6734.31it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 7696.85it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 7331.94it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 6240.69it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 7451.37it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 7135.27it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 3855.91it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 3567.51it/s]\n",
"100%|██████████| 3439/3439 [00:01<00:00, 2853.24it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 6952.67it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 6177.45it/s]\n",
"100%|██████████| 3439/3439 [00:01<00:00, 3130.18it/s]\n",
"100%|██████████| 3439/3439 [00:01<00:00, 3303.45it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 3662.39it/s]\n",
"100%|██████████| 3439/3439 [00:01<00:00, 2754.25it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 6633.24it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 6548.62it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 4601.06it/s]\n",
"100%|██████████| 3439/3439 [00:01<00:00, 2288.88it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 3635.54it/s]\n",
"100%|██████████| 3439/3439 [00:01<00:00, 2179.42it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 6750.76it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 6691.62it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 5768.00it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 3440.06it/s]\n",
"100%|██████████| 3439/3439 [00:01<00:00, 2743.69it/s]\n",
"100%|██████████| 3439/3439 [00:01<00:00, 3034.45it/s]\n",
"100%|██████████| 3439/3439 [00:02<00:00, 1261.15it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 6129.07it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 6573.12it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 6425.97it/s]\n",
"100%|██████████| 3439/3439 [00:01<00:00, 2865.05it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 4130.32it/s]\n",
"100%|██████████| 3439/3439 [00:01<00:00, 3020.61it/s]\n",
"100%|██████████| 3439/3439 [00:02<00:00, 1446.82it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 8095.71it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 7679.18it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 7918.50it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 3519.17it/s]\n",
"100%|██████████| 3439/3439 [00:01<00:00, 3258.94it/s]\n",
"100%|██████████| 3439/3439 [00:01<00:00, 2436.68it/s]\n",
"100%|██████████| 3439/3439 [00:03<00:00, 1000.79it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 7625.18it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 7752.86it/s]\n",
"100%|██████████| 3439/3439 [00:00<00:00, 7538.78it/s]\n",
"100%|██████████| 3439/3439 [00:01<00:00, 3115.93it/s]\n"
]
}
],
"source": [
"import pandas as pd\n",
"from pathlib import Path\n",
"from tqdm import tqdm\n",
"import hashlib\n",
"\n",
"\n",
"DATA_PATH = Path('/mnt/wsl/PHYSICALDRIVE1/data/laion')\n",
"DATA_PATH.mkdir(exist_ok=True, parents=True)\n",
"\n",
"LAION_PATH = Path('/home/andras/projects/laion_improved_aesthetics_6.5plus_with_images/data')\n",
"\n",
"\n",
"for file in LAION_PATH.glob(\"*.parquet\"):\n",
" df = pd.read_parquet(file)\n",
" for row in tqdm(list(df.iterrows())):\n",
" row = row[1]\n",
" bytes = row['image']['bytes']\n",
" digest = hashlib.sha1(bytes).hexdigest()\n",
" with open(DATA_PATH / f\"{digest}.jpg\", 'wb') as f:\n",
" f.write(bytes)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "bipolaroid",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

File diff suppressed because one or more lines are too long

193485
train.ipynb Normal file

File diff suppressed because one or more lines are too long