This commit is contained in:
Andras Schmelczer 2024-06-18 23:04:48 +01:00
parent e50087be55
commit f09296cf78
No known key found for this signature in database
GPG key ID: FC8F2C3D3D1A718C
2 changed files with 9209 additions and 18096 deletions

File diff suppressed because one or more lines are too long

View file

@ -23,6 +23,7 @@
"import logging\n", "import logging\n",
"import os\n", "import os\n",
"from datetime import datetime\n", "from datetime import datetime\n",
"from config import LOGS_PATH\n",
"\n", "\n",
"logging.basicConfig(\n", "logging.basicConfig(\n",
" level=logging.INFO,\n", " level=logging.INFO,\n",
@ -30,7 +31,7 @@
" handlers=[\n", " handlers=[\n",
" logging.StreamHandler(),\n", " logging.StreamHandler(),\n",
" logging.FileHandler(\n", " logging.FileHandler(\n",
" f\"train-{datetime.now().isoformat(timespec='minutes')}.log\"\n", " LOGS_PATH / f\"train-{datetime.now().isoformat(timespec='minutes')}.log\"\n",
" ),\n", " ),\n",
" ],\n", " ],\n",
")\n", ")\n",
@ -51,12 +52,12 @@
"\n", "\n",
"common_hyperparameters = {\n", "common_hyperparameters = {\n",
" \"batch_size\": [64],\n", " \"batch_size\": [64],\n",
" \"edit_count\": [8],\n", " \"edit_count\": [12],\n",
" \"bin_count\": [16, 24, 32],\n", " \"bin_count\": [16],\n",
" \"clip_gradients\": [False],\n", " \"clip_gradients\": [False],\n",
" \"learning_rate\": loguniform(3e-4, 3e-3),\n", " \"learning_rate\": loguniform(1e-4, 5e-3),\n",
" \"scheduler_gamma\": uniform(0.5, 1),\n", " \"scheduler_gamma\": uniform(loc=0.7, scale=0.3),\n",
" \"num_epochs\": [16],\n", " \"num_epochs\": [24],\n",
" # \"num_epochs\": randint(5, 10),\n", " # \"num_epochs\": randint(5, 10),\n",
" \"model_type\": list(MODELS.keys()),\n", " \"model_type\": list(MODELS.keys()),\n",
"}\n", "}\n",
@ -115,8 +116,6 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Testing model SimpleCNN\n",
"Test passed! Output shape matches input shape.\n",
"Testing model Residual\n", "Testing model Residual\n",
"Test passed! Output shape matches input shape.\n" "Test passed! Output shape matches input shape.\n"
] ]
@ -132,10 +131,9 @@
"from editor.utils import get_next_run_name\n", "from editor.utils import get_next_run_name\n",
"from editor.visualisation import plot_histograms_in_2d\n", "from editor.visualisation import plot_histograms_in_2d\n",
"from editor.models import create_model, test_models\n", "from editor.models import create_model, test_models\n",
"from data import TRAIN_DATA, TEST_DATA\n",
"from datetime import timedelta, datetime\n", "from datetime import timedelta, datetime\n",
"import json\n", "import json\n",
"from config import MODELS_PATH\n", "from config import MODELS_PATH, RUNS_PATH, TRAIN_DATA, TEST_DATA\n",
"\n", "\n",
"\n", "\n",
"test_models()\n", "test_models()\n",
@ -146,7 +144,7 @@
") -> Path:\n", ") -> Path:\n",
" start_time = datetime.now()\n", " start_time = datetime.now()\n",
"\n", "\n",
" log_dir = Path(\"runs\") / get_next_run_name(Path(\"runs\"))\n", " log_dir = RUNS_PATH / get_next_run_name(RUNS_PATH)\n",
" with SummaryWriter(log_dir) as writer:\n", " with SummaryWriter(log_dir) as writer:\n",
" train_data_loader = get_data_loader(TRAIN_DATA, hyperparameters)\n", " train_data_loader = get_data_loader(TRAIN_DATA, hyperparameters)\n",
" test_data_loader = get_data_loader(TEST_DATA, hyperparameters)\n", " test_data_loader = get_data_loader(TEST_DATA, hyperparameters)\n",
@ -315,16 +313,14 @@
"# train(\n", "# train(\n",
"# {\n", "# {\n",
"# \"batch_size\": 64,\n", "# \"batch_size\": 64,\n",
"# \"edit_count\": 25,\n", "# \"edit_count\": 8,\n",
"# \"bin_count\": 32,\n", "# \"bin_count\": 16,\n",
"# \"clip_gradients\": True,\n", "# \"clip_gradients\": False,\n",
"# \"learning_rate\": 0.005,\n", "# \"learning_rate\": 0.0005220900529274365,\n",
"# \"scheduler_gamma\": 0.7,\n", "# \"scheduler_gamma\": 0.5479991284291021,\n",
"# \"num_epochs\": 20,\n", "# \"num_epochs\": 24,\n",
"# \"model_type\": \"NormalisedCNN\",\n", "# \"model_type\": \"Residual\",\n",
"# \"loss\": \"progressive\",\n", "# \"loss\": \"kl\",\n",
"# \"loss_sizes\": [16, 32],\n",
"# \"loss_damping\": 2,\n",
"# }\n", "# }\n",
"# )" "# )"
] ]
@ -338,21 +334,37 @@
"name": "stderr", "name": "stderr",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"2024-06-06 08:19:45,651 - INFO - Starting run_26 with hparams {\n", "2024-06-16 20:21:51,962 - INFO - Starting run_0 with hparams {\n",
" \"batch_size\": 64,\n", " \"batch_size\": 64,\n",
" \"bin_count\": 32,\n", " \"bin_count\": 32,\n",
" \"clip_gradients\": false,\n", " \"clip_gradients\": false,\n",
" \"edit_count\": 8,\n", " \"edit_count\": 8,\n",
" \"learning_rate\": 0.0020871393198725404,\n", " \"learning_rate\": 0.0013249692770052317,\n",
" \"loss\": \"kl\",\n", " \"loss\": \"kl\",\n",
" \"model_type\": \"Residual\",\n", " \"model_type\": \"Residual\",\n",
" \"num_epochs\": 16,\n", " \"num_epochs\": 16,\n",
" \"scheduler_gamma\": 1.2086440138363033\n", " \"scheduler_gamma\": 1.3114281184948258\n",
"}\n", "}\n",
"2024-06-06 08:19:45,768 - INFO - Loaded 22479 original images\n", "2024-06-16 20:21:52,012 - INFO - Loaded 22479 original images\n",
"2024-06-06 08:19:45,778 - INFO - Loaded 2498 original images\n", "2024-06-16 20:21:52,016 - INFO - Loaded 2498 original images\n",
"2024-06-06 08:20:02,104 - INFO - Saving model to /home/andras/projects/bipolaroid/models/run_26.pth\n", "2024-06-16 20:35:43,995 - INFO - Epoch 0 train loss: 6540.840226650238\n",
"2024-06-06 08:20:02,248 - INFO - Interrupted, stopping\n" "2024-06-16 20:36:15,017 - INFO - Epoch 0 test loss: 1531.6546006202698\n",
"2024-06-16 20:49:58,543 - INFO - Epoch 1 train loss: 5763.938045859337\n",
"2024-06-16 20:50:29,893 - INFO - Epoch 1 test loss: 1608.853798866272\n",
"2024-06-16 21:04:13,577 - INFO - Epoch 2 train loss: 5448.952376246452\n",
"2024-06-16 21:04:45,607 - INFO - Epoch 2 test loss: 1465.128571987152\n",
"2024-06-16 21:18:31,962 - INFO - Epoch 3 train loss: 5633.2793600559235\n",
"2024-06-16 21:19:09,149 - INFO - Epoch 3 test loss: 1330.329261302948\n",
"2024-06-16 21:32:58,465 - INFO - Epoch 4 train loss: 5338.784257531166\n",
"2024-06-16 21:33:37,006 - INFO - Epoch 4 test loss: 2083.3998107910156\n",
"2024-06-16 21:47:25,527 - INFO - Epoch 5 train loss: 5321.843332529068\n",
"2024-06-16 21:48:04,110 - INFO - Epoch 5 test loss: 1314.629390001297\n",
"2024-06-16 22:01:51,264 - INFO - Epoch 6 train loss: 5337.748890757561\n",
"2024-06-16 22:02:29,786 - INFO - Epoch 6 test loss: 1290.2974362373352\n",
"2024-06-16 22:16:17,529 - INFO - Epoch 7 train loss: 5167.580719232559\n",
"2024-06-16 22:16:56,284 - INFO - Epoch 7 test loss: 1889.667366027832\n",
"2024-06-16 22:26:15,914 - INFO - Saving model to /home/andras/projects/bipolaroid/models/run_67.pth\n",
"2024-06-16 22:26:16,086 - INFO - Interrupted, stopping\n"
] ]
} }
], ],
@ -369,7 +381,7 @@
" }\n", " }\n",
" key = json.dumps(current_hyperparameters, indent=2, sort_keys=True)\n", " key = json.dumps(current_hyperparameters, indent=2, sort_keys=True)\n",
" logging.info(\n", " logging.info(\n",
" f\"Starting {get_next_run_name(Path(\"runs\"))} with hparams {key}\"\n", " f\"Starting {get_next_run_name(RUNS_PATH)} with hparams {key}\"\n",
" )\n", " )\n",
" try:\n", " try:\n",
" train(current_hyperparameters, max_duration=timedelta(hours=8), use_tqdm=False)\n", " train(current_hyperparameters, max_duration=timedelta(hours=8), use_tqdm=False)\n",
@ -401,7 +413,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.12.2" "version": "3.1.-1"
} }
}, },
"nbformat": 4, "nbformat": 4,