mirror of
https://github.com/Vision-CAIR/MiniGPT-4.git
synced 2025-04-09 20:40:46 +00:00
164 lines
13 KiB
Plaintext
164 lines
13 KiB
Plaintext
|
|
2023-10-23 09:01:04,885 [INFO] Start training
|
|
batch sizes [[2]]
|
|
module.llama_model.base_model.model.model.layers.0.self_attn.q_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.0.self_attn.q_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.0.self_attn.v_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.0.self_attn.v_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.1.self_attn.q_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.1.self_attn.q_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.1.self_attn.v_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.1.self_attn.v_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.2.self_attn.q_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.2.self_attn.q_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.2.self_attn.v_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.2.self_attn.v_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.3.self_attn.q_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.3.self_attn.q_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.3.self_attn.v_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.3.self_attn.v_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.4.self_attn.q_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.4.self_attn.q_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.4.self_attn.v_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.4.self_attn.v_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.5.self_attn.q_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.5.self_attn.q_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.5.self_attn.v_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.5.self_attn.v_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.6.self_attn.q_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.6.self_attn.q_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.6.self_attn.v_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.6.self_attn.v_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.7.self_attn.q_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.7.self_attn.q_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.7.self_attn.v_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.7.self_attn.v_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.8.self_attn.q_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.8.self_attn.q_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.8.self_attn.v_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.8.self_attn.v_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.9.self_attn.q_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.9.self_attn.q_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.9.self_attn.v_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.9.self_attn.v_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.10.self_attn.q_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.10.self_attn.q_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.10.self_attn.v_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.10.self_attn.v_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.11.self_attn.q_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.11.self_attn.q_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.11.self_attn.v_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.11.self_attn.v_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.12.self_attn.q_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.12.self_attn.q_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.12.self_attn.v_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.12.self_attn.v_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.13.self_attn.q_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.13.self_attn.q_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.13.self_attn.v_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.13.self_attn.v_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.14.self_attn.q_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.14.self_attn.q_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.14.self_attn.v_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.14.self_attn.v_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.15.self_attn.q_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.15.self_attn.q_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.15.self_attn.v_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.15.self_attn.v_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.16.self_attn.q_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.16.self_attn.q_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.16.self_attn.v_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.16.self_attn.v_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.17.self_attn.q_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.17.self_attn.q_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.17.self_attn.v_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.17.self_attn.v_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.18.self_attn.q_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.18.self_attn.q_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.18.self_attn.v_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.18.self_attn.v_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.19.self_attn.q_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.19.self_attn.q_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.19.self_attn.v_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.19.self_attn.v_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.20.self_attn.q_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.20.self_attn.q_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.20.self_attn.v_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.20.self_attn.v_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.21.self_attn.q_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.21.self_attn.q_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.21.self_attn.v_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.21.self_attn.v_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.22.self_attn.q_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.22.self_attn.q_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.22.self_attn.v_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.22.self_attn.v_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.23.self_attn.q_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.23.self_attn.q_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.23.self_attn.v_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.23.self_attn.v_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.24.self_attn.q_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.24.self_attn.q_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.24.self_attn.v_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.24.self_attn.v_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.25.self_attn.q_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.25.self_attn.q_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.25.self_attn.v_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.25.self_attn.v_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.26.self_attn.q_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.26.self_attn.q_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.26.self_attn.v_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.26.self_attn.v_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.27.self_attn.q_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.27.self_attn.q_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.27.self_attn.v_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.27.self_attn.v_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.28.self_attn.q_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.28.self_attn.q_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.28.self_attn.v_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.28.self_attn.v_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.29.self_attn.q_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.29.self_attn.q_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.29.self_attn.v_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.29.self_attn.v_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.30.self_attn.q_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.30.self_attn.q_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.30.self_attn.v_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.30.self_attn.v_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.31.self_attn.q_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.31.self_attn.q_proj.lora_B.weight
|
|
module.llama_model.base_model.model.model.layers.31.self_attn.v_proj.lora_A.weight
|
|
module.llama_model.base_model.model.model.layers.31.self_attn.v_proj.lora_B.weight
|
|
module.llama_proj.weight
|
|
module.llama_proj.bias
|
|
2023-10-23 09:01:07,002 [INFO] dataset_ratios not specified, datasets will be concatenated (map-style datasets) or chained (webdataset.DataPipeline).
|
|
2023-10-23 09:01:07,002 [INFO] Loaded 12171 records for train split from the dataset.
|
|
2023-10-23 09:01:07,020 [INFO] number of trainable parameters: 56627200
|
|
2023-10-23 09:01:07,021 [INFO] Start training epoch 0, 1000 iters per inner epoch.
|
|
Train: data epoch: [0] [ 0/1000] eta: 0:45:28 lr: 0.000001 loss: 1.3049 time: 2.7288 data: 0.0000 max mem: 33055
|
|
/home/chenj0g/anaconda3/envs/eye/lib/python3.9/site-packages/torch/utils/checkpoint.py:31: UserWarning: None of the inputs have requires_grad=True. Gradients will be None
|
|
warnings.warn("None of the inputs have requires_grad=True. Gradients will be None")
|
|
Train: data epoch: [0] [ 50/1000] eta: 0:07:58 lr: 0.000001 loss: 1.2327 time: 0.4508 data: 0.0000 max mem: 38295
|
|
Train: data epoch: [0] [ 100/1000] eta: 0:07:10 lr: 0.000002 loss: 1.1049 time: 0.4457 data: 0.0000 max mem: 39575
|
|
Train: data epoch: [0] [ 150/1000] eta: 0:06:42 lr: 0.000002 loss: 1.2879 time: 0.4630 data: 0.0000 max mem: 40211
|
|
Train: data epoch: [0] [ 200/1000] eta: 0:06:18 lr: 0.000003 loss: 1.0077 time: 0.4527 data: 0.0000 max mem: 40211
|
|
Train: data epoch: [0] [ 250/1000] eta: 0:05:52 lr: 0.000003 loss: 1.4790 time: 0.4626 data: 0.0000 max mem: 41450
|
|
Train: data epoch: [0] [ 300/1000] eta: 0:05:27 lr: 0.000004 loss: 1.2864 time: 0.4639 data: 0.0000 max mem: 41450
|
|
Traceback (most recent call last):
|
|
File "/ibex/project/c2133/minigpt2_finetune/MiniGPT-4_finetune/train.py", line 118, in <module>
|
|
main()
|
|
File "/ibex/project/c2133/minigpt2_finetune/MiniGPT-4_finetune/train.py", line 114, in main
|
|
runner.train()
|
|
File "/ibex/project/c2133/minigpt2_finetune/MiniGPT-4_finetune/minigpt4/runners/runner_base.py", line 377, in train
|
|
train_stats = self.train_epoch(cur_epoch)
|
|
File "/ibex/project/c2133/minigpt2_finetune/MiniGPT-4_finetune/minigpt4/runners/runner_base.py", line 437, in train_epoch
|
|
return self.task.train_epoch(
|
|
File "/ibex/project/c2133/minigpt2_finetune/MiniGPT-4_finetune/minigpt4/tasks/base_task.py", line 116, in train_epoch
|
|
return self._train_inner_loop(
|
|
File "/ibex/project/c2133/minigpt2_finetune/MiniGPT-4_finetune/minigpt4/tasks/base_task.py", line 225, in _train_inner_loop
|
|
scaler.scale(loss).backward()
|
|
File "/home/chenj0g/anaconda3/envs/eye/lib/python3.9/site-packages/torch/_tensor.py", line 487, in backward
|
|
torch.autograd.backward(
|
|
File "/home/chenj0g/anaconda3/envs/eye/lib/python3.9/site-packages/torch/autograd/__init__.py", line 200, in backward
|
|
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
|
|
KeyboardInterrupt |