diff --git a/middleout-lattice.ipynb b/middleout-lattice.ipynb new file mode 100644 index 0000000000..3e89513d68 --- /dev/null +++ b/middleout-lattice.ipynb @@ -0,0 +1 @@ +{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.12.12","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"nvidiaTeslaT4","dataSources":[],"dockerImageVersionId":31329,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":true}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"!pip install range-coder && python middleout-lattice/scripts/train_lightning.py --model-config small --epochs 10 --batch-size 32","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2026-05-08T10:21:05.676162Z","iopub.execute_input":"2026-05-08T10:21:05.677050Z"}},"outputs":[{"name":"stdout","text":"Requirement already satisfied: range-coder in /usr/local/lib/python3.12/dist-packages (1.1)\n============================================================\nLIGHTNING AI NEURAL COMPRESSION TRAINING\n============================================================\nData dir: /kaggle/working/middleout-lattice/data\nOutput: /kaggle/working/middleout-lattice/checkpoints/model.pt\nModel config: small\nEpochs: 10\nBatch size: 32\nGutenberg count: 5000\nGPU available: True\nGPU: Tesla T4\nVRAM: 15.6 GB\n============================================================\n\n[Step 1/3] Preparing training data...\n\n============================================================\nDOWNLOADING TRAINING DATA\n============================================================\n\nDownloading 5000 Gutenberg books (50 parallel workers)...\n100%|██████████████████████████████████████| 5000/5000 [00:44<00:00, 112.12it/s]\n✓ Gutenberg: 5000 books\n\nDownloading 35 code repositories (20 parallel workers)...\n100%|███████████████████████████████████████████| 35/35 [00:46<00:00, 1.33s/it]\n✓ Code repos: 33 archives\n\nDownloading 10 JSON/CSV/MD datasets (15 parallel workers)...\n100%|███████████████████████████████████████████| 10/10 [00:00<00:00, 34.50it/s]\n✓ JSON/CSV datasets: 8 files\n\nDownloading 5 web files (10 parallel workers)...\n100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 90.10it/s]\n✓ Web files (HTML/CSS/JS): 5 files\n\nDownloading 4 config files (8 parallel workers)...\n100%|█████████████████████████████████████████████| 4/4 [00:00<00:00, 37.41it/s]\n✓ Config files: 3 files\n\nDownloading 9 markdown docs (12 parallel workers)...\n100%|████████████████████████████████████████████| 9/9 [00:00<00:00, 104.56it/s]\n✓ Markdown docs: 6 files\n\nGenerating sample log files...\n✓ Log files: 4 files\n\nGenerating binary test files...\n✓ Binary files: 40 files\n\nGenerating 2000 synthetic files...\n100%|█████████████████████████████████████| 2000/2000 [00:00<00:00, 9318.70it/s]\n✓ Synthetic files: 2000 files\n\nDownloading tinyshakespeare...\n✓ TinyShakespeare: 1 file\n\nExtracting archives...\n100%|███████████████████████████████████████████| 33/33 [02:30<00:00, 4.57s/it]\n 0%| | 0/2 [00:00