street-fighter-ai/000_image_stack_ram_based_reward/optuna/tuning_log.txt

8947 lines
370 KiB
Plaintext
Raw Normal View History

2023-03-30 18:10:25 +00:00
| value_loss | 20 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.64e+03 |
| ep_rew_mean | -99 |
| time/ | |
| fps | 353 |
| iterations | 6 |
| time_elapsed | 23 |
| total_timesteps | 8226 |
| train/ | |
| approx_kl | 0.014875706 |
| clip_fraction | 0.148 |
| clip_range | 0.26 |
| entropy_loss | -8.24 |
| explained_variance | 0.151 |
| learning_rate | 8.14e-05 |
| loss | 2.44 |
| n_updates | 50 |
| policy_gradient_loss | -0.00671 |
| value_loss | 10.3 |
-----------------------------------------
---------------------------------------
| rollout/ | |
| ep_len_mean | 2.64e+03 |
| ep_rew_mean | -99 |
| time/ | |
| fps | 350 |
| iterations | 7 |
| time_elapsed | 27 |
| total_timesteps | 9597 |
| train/ | |
| approx_kl | 0.0164865 |
| clip_fraction | 0.162 |
| clip_range | 0.26 |
| entropy_loss | -8.21 |
| explained_variance | -0.272 |
| learning_rate | 8.14e-05 |
| loss | 4.19 |
| n_updates | 60 |
| policy_gradient_loss | -0.0113 |
| value_loss | 16.8 |
---------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.73e+03 |
| ep_rew_mean | -88.5 |
| time/ | |
| fps | 349 |
| iterations | 8 |
| time_elapsed | 31 |
| total_timesteps | 10968 |
| train/ | |
| approx_kl | 0.014885512 |
| clip_fraction | 0.162 |
| clip_range | 0.26 |
| entropy_loss | -8.18 |
| explained_variance | 0.0707 |
| learning_rate | 8.14e-05 |
| loss | 1.46 |
| n_updates | 70 |
| policy_gradient_loss | -0.017 |
| value_loss | 8.77 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.73e+03 |
| ep_rew_mean | -88.5 |
| time/ | |
| fps | 349 |
| iterations | 9 |
| time_elapsed | 35 |
| total_timesteps | 12339 |
| train/ | |
| approx_kl | 0.018109197 |
| clip_fraction | 0.118 |
| clip_range | 0.26 |
| entropy_loss | -8.17 |
| explained_variance | 0.0377 |
| learning_rate | 8.14e-05 |
| loss | 2.68 |
| n_updates | 80 |
| policy_gradient_loss | -0.0208 |
| value_loss | 10 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.47e+03 |
| ep_rew_mean | -121 |
| time/ | |
| fps | 347 |
| iterations | 10 |
| time_elapsed | 39 |
| total_timesteps | 13710 |
| train/ | |
| approx_kl | 0.02112376 |
| clip_fraction | 0.154 |
| clip_range | 0.26 |
| entropy_loss | -8.17 |
| explained_variance | 0.0154 |
| learning_rate | 8.14e-05 |
| loss | 4.45 |
| n_updates | 90 |
| policy_gradient_loss | -0.0182 |
| value_loss | 13.2 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.43e+03 |
| ep_rew_mean | -94 |
| time/ | |
| fps | 347 |
| iterations | 11 |
| time_elapsed | 43 |
| total_timesteps | 15081 |
| train/ | |
| approx_kl | 0.03655843 |
| clip_fraction | 0.271 |
| clip_range | 0.26 |
| entropy_loss | -8.1 |
| explained_variance | -0.0637 |
| learning_rate | 8.14e-05 |
| loss | 2.29 |
| n_updates | 100 |
| policy_gradient_loss | -0.00361 |
| value_loss | 31.6 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.19e+03 |
| ep_rew_mean | -45.3 |
| time/ | |
| fps | 349 |
| iterations | 12 |
| time_elapsed | 47 |
| total_timesteps | 16452 |
| train/ | |
| approx_kl | 0.037120674 |
| clip_fraction | 0.245 |
| clip_range | 0.26 |
| entropy_loss | -8.08 |
| explained_variance | 0.104 |
| learning_rate | 8.14e-05 |
| loss | 7.71 |
| n_updates | 110 |
| policy_gradient_loss | -0.00649 |
| value_loss | 22.7 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.21e+03 |
| ep_rew_mean | -65 |
| time/ | |
| fps | 350 |
| iterations | 13 |
| time_elapsed | 50 |
| total_timesteps | 17823 |
| train/ | |
| approx_kl | 0.027819885 |
| clip_fraction | 0.228 |
| clip_range | 0.26 |
| entropy_loss | -7.98 |
| explained_variance | 0.0436 |
| learning_rate | 8.14e-05 |
| loss | 7.06 |
| n_updates | 120 |
| policy_gradient_loss | 0.00299 |
| value_loss | 59.9 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.21e+03 |
| ep_rew_mean | -65 |
| time/ | |
| fps | 351 |
| iterations | 14 |
| time_elapsed | 54 |
| total_timesteps | 19194 |
| train/ | |
| approx_kl | 0.027972419 |
| clip_fraction | 0.197 |
| clip_range | 0.26 |
| entropy_loss | -7.94 |
| explained_variance | 0.0199 |
| learning_rate | 8.14e-05 |
| loss | 4.1 |
| n_updates | 130 |
| policy_gradient_loss | -0.00526 |
| value_loss | 26.3 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.21e+03 |
| ep_rew_mean | -65 |
| time/ | |
| fps | 352 |
| iterations | 15 |
| time_elapsed | 58 |
| total_timesteps | 20565 |
| train/ | |
| approx_kl | 0.035860673 |
| clip_fraction | 0.236 |
| clip_range | 0.26 |
| entropy_loss | -7.89 |
| explained_variance | -0.13 |
| learning_rate | 8.14e-05 |
| loss | 4.36 |
| n_updates | 140 |
| policy_gradient_loss | -0.0122 |
| value_loss | 10.5 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.3e+03 |
| ep_rew_mean | -57.8 |
| time/ | |
| fps | 352 |
| iterations | 16 |
| time_elapsed | 62 |
| total_timesteps | 21936 |
| train/ | |
| approx_kl | 0.020882078 |
| clip_fraction | 0.207 |
| clip_range | 0.26 |
| entropy_loss | -7.97 |
| explained_variance | -0.266 |
| learning_rate | 8.14e-05 |
| loss | 17.1 |
| n_updates | 150 |
| policy_gradient_loss | -0.00845 |
| value_loss | 12.5 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.32e+03 |
| ep_rew_mean | -76.7 |
| time/ | |
| fps | 353 |
| iterations | 17 |
| time_elapsed | 65 |
| total_timesteps | 23307 |
| train/ | |
| approx_kl | 0.017862184 |
| clip_fraction | 0.112 |
| clip_range | 0.26 |
| entropy_loss | -8.11 |
| explained_variance | -0.152 |
| learning_rate | 8.14e-05 |
| loss | 0.52 |
| n_updates | 160 |
| policy_gradient_loss | -0.014 |
| value_loss | 5 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.32e+03 |
| ep_rew_mean | -76.7 |
| time/ | |
| fps | 354 |
| iterations | 18 |
| time_elapsed | 69 |
| total_timesteps | 24678 |
| train/ | |
| approx_kl | 0.02715041 |
| clip_fraction | 0.179 |
| clip_range | 0.26 |
| entropy_loss | -8.03 |
| explained_variance | 0.0571 |
| learning_rate | 8.14e-05 |
| loss | 6.35 |
| n_updates | 170 |
| policy_gradient_loss | -0.00481 |
| value_loss | 43 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.26e+03 |
| ep_rew_mean | -52.5 |
| time/ | |
| fps | 354 |
| iterations | 19 |
| time_elapsed | 73 |
| total_timesteps | 26049 |
| train/ | |
| approx_kl | 0.020203596 |
| clip_fraction | 0.15 |
| clip_range | 0.26 |
| entropy_loss | -8.08 |
| explained_variance | -0.578 |
| learning_rate | 8.14e-05 |
| loss | 0.888 |
| n_updates | 180 |
| policy_gradient_loss | -0.011 |
| value_loss | 11.7 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.28e+03 |
| ep_rew_mean | -57.9 |
| time/ | |
| fps | 354 |
| iterations | 20 |
| time_elapsed | 77 |
| total_timesteps | 27420 |
| train/ | |
| approx_kl | 0.02579885 |
| clip_fraction | 0.181 |
| clip_range | 0.26 |
| entropy_loss | -7.96 |
| explained_variance | -0.0316 |
| learning_rate | 8.14e-05 |
| loss | 1.58 |
| n_updates | 190 |
| policy_gradient_loss | -0.0102 |
| value_loss | 34.6 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.28e+03 |
| ep_rew_mean | -57.9 |
| time/ | |
| fps | 354 |
| iterations | 21 |
| time_elapsed | 81 |
| total_timesteps | 28791 |
| train/ | |
| approx_kl | 0.016173096 |
| clip_fraction | 0.172 |
| clip_range | 0.26 |
| entropy_loss | -7.84 |
| explained_variance | -0.369 |
| learning_rate | 8.14e-05 |
| loss | 0.632 |
| n_updates | 200 |
| policy_gradient_loss | -0.0133 |
| value_loss | 11.3 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.28e+03 |
| ep_rew_mean | -57.9 |
| time/ | |
| fps | 354 |
| iterations | 22 |
| time_elapsed | 84 |
| total_timesteps | 30162 |
| train/ | |
| approx_kl | 0.018948458 |
| clip_fraction | 0.159 |
| clip_range | 0.26 |
| entropy_loss | -7.89 |
| explained_variance | -0.091 |
| learning_rate | 8.14e-05 |
| loss | 5.72 |
| n_updates | 210 |
| policy_gradient_loss | -0.0145 |
| value_loss | 7.36 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.36e+03 |
| ep_rew_mean | -56.1 |
| time/ | |
| fps | 354 |
| iterations | 23 |
| time_elapsed | 88 |
| total_timesteps | 31533 |
| train/ | |
| approx_kl | 0.018955443 |
| clip_fraction | 0.145 |
| clip_range | 0.26 |
| entropy_loss | -7.97 |
| explained_variance | -0.269 |
| learning_rate | 8.14e-05 |
| loss | 1.12 |
| n_updates | 220 |
| policy_gradient_loss | -0.0227 |
| value_loss | 7.03 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.36e+03 |
| ep_rew_mean | -56.1 |
| time/ | |
| fps | 354 |
| iterations | 24 |
| time_elapsed | 92 |
| total_timesteps | 32904 |
| train/ | |
| approx_kl | 0.017530933 |
| clip_fraction | 0.151 |
| clip_range | 0.26 |
| entropy_loss | -8.05 |
| explained_variance | -0.11 |
| learning_rate | 8.14e-05 |
| loss | 0.575 |
| n_updates | 230 |
| policy_gradient_loss | -0.0203 |
| value_loss | 11 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.36e+03 |
| ep_rew_mean | -49.5 |
| time/ | |
| fps | 354 |
| iterations | 25 |
| time_elapsed | 96 |
| total_timesteps | 34275 |
| train/ | |
| approx_kl | 0.025710236 |
| clip_fraction | 0.166 |
| clip_range | 0.26 |
| entropy_loss | -8.04 |
| explained_variance | 0.00206 |
| learning_rate | 8.14e-05 |
| loss | 1.7 |
| n_updates | 240 |
| policy_gradient_loss | -0.0246 |
| value_loss | 11.2 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.36e+03 |
| ep_rew_mean | -60.1 |
| time/ | |
| fps | 354 |
| iterations | 26 |
| time_elapsed | 100 |
| total_timesteps | 35646 |
| train/ | |
| approx_kl | 0.026275737 |
| clip_fraction | 0.21 |
| clip_range | 0.26 |
| entropy_loss | -8.1 |
| explained_variance | -0.414 |
| learning_rate | 8.14e-05 |
| loss | 0.556 |
| n_updates | 250 |
| policy_gradient_loss | -0.0248 |
| value_loss | 5.13 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.36e+03 |
| ep_rew_mean | -60.1 |
| time/ | |
| fps | 354 |
| iterations | 27 |
| time_elapsed | 104 |
| total_timesteps | 37017 |
| train/ | |
| approx_kl | 0.026121318 |
| clip_fraction | 0.171 |
| clip_range | 0.26 |
| entropy_loss | -8.12 |
| explained_variance | -0.0283 |
| learning_rate | 8.14e-05 |
| loss | 2.91 |
| n_updates | 260 |
| policy_gradient_loss | -0.00495 |
| value_loss | 30 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.35e+03 |
| ep_rew_mean | -60.1 |
| time/ | |
| fps | 354 |
| iterations | 28 |
| time_elapsed | 108 |
| total_timesteps | 38388 |
| train/ | |
| approx_kl | 0.02375033 |
| clip_fraction | 0.177 |
| clip_range | 0.26 |
| entropy_loss | -8.01 |
| explained_variance | -0.12 |
| learning_rate | 8.14e-05 |
| loss | 4.69 |
| n_updates | 270 |
| policy_gradient_loss | -0.0175 |
| value_loss | 11.3 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.35e+03 |
| ep_rew_mean | -60.1 |
| time/ | |
| fps | 354 |
| iterations | 29 |
| time_elapsed | 112 |
| total_timesteps | 39759 |
| train/ | |
| approx_kl | 0.025788946 |
| clip_fraction | 0.206 |
| clip_range | 0.26 |
| entropy_loss | -8.1 |
| explained_variance | 0.122 |
| learning_rate | 8.14e-05 |
| loss | 1.84 |
| n_updates | 280 |
| policy_gradient_loss | -0.0214 |
| value_loss | 13.7 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.35e+03 |
| ep_rew_mean | -57.4 |
| time/ | |
| fps | 354 |
| iterations | 30 |
| time_elapsed | 115 |
| total_timesteps | 41130 |
| train/ | |
| approx_kl | 0.035159614 |
| clip_fraction | 0.199 |
| clip_range | 0.26 |
| entropy_loss | -8.01 |
| explained_variance | -0.0443 |
| learning_rate | 8.14e-05 |
| loss | 0.494 |
| n_updates | 290 |
| policy_gradient_loss | -0.0228 |
| value_loss | 9.62 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.35e+03 |
| ep_rew_mean | -55.4 |
| time/ | |
| fps | 354 |
| iterations | 31 |
| time_elapsed | 119 |
| total_timesteps | 42501 |
| train/ | |
| approx_kl | 0.03578476 |
| clip_fraction | 0.198 |
| clip_range | 0.26 |
| entropy_loss | -7.9 |
| explained_variance | 0.0155 |
| learning_rate | 8.14e-05 |
| loss | 0.58 |
| n_updates | 300 |
| policy_gradient_loss | -0.0134 |
| value_loss | 7.94 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.3e+03 |
| ep_rew_mean | -41.5 |
| time/ | |
| fps | 355 |
| iterations | 32 |
| time_elapsed | 123 |
| total_timesteps | 43872 |
| train/ | |
| approx_kl | 0.027321111 |
| clip_fraction | 0.229 |
| clip_range | 0.26 |
| entropy_loss | -7.84 |
| explained_variance | -0.272 |
| learning_rate | 8.14e-05 |
| loss | 1.25 |
| n_updates | 310 |
| policy_gradient_loss | -0.0237 |
| value_loss | 8.41 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.24e+03 |
| ep_rew_mean | -28.3 |
| time/ | |
| fps | 355 |
| iterations | 33 |
| time_elapsed | 127 |
| total_timesteps | 45243 |
| train/ | |
| approx_kl | 0.032422796 |
| clip_fraction | 0.232 |
| clip_range | 0.26 |
| entropy_loss | -7.83 |
| explained_variance | 0.101 |
| learning_rate | 8.14e-05 |
| loss | 6.39 |
| n_updates | 320 |
| policy_gradient_loss | -0.00398 |
| value_loss | 30.1 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.2e+03 |
| ep_rew_mean | -20.8 |
| time/ | |
| fps | 356 |
| iterations | 34 |
| time_elapsed | 130 |
| total_timesteps | 46614 |
| train/ | |
| approx_kl | 0.031185307 |
| clip_fraction | 0.234 |
| clip_range | 0.26 |
| entropy_loss | -7.86 |
| explained_variance | -0.055 |
| learning_rate | 8.14e-05 |
| loss | 3.37 |
| n_updates | 330 |
| policy_gradient_loss | -0.0114 |
| value_loss | 35.8 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.2e+03 |
| ep_rew_mean | -20.8 |
| time/ | |
| fps | 355 |
| iterations | 35 |
| time_elapsed | 134 |
| total_timesteps | 47985 |
| train/ | |
| approx_kl | 0.030157859 |
| clip_fraction | 0.268 |
| clip_range | 0.26 |
| entropy_loss | -7.92 |
| explained_variance | -0.443 |
| learning_rate | 8.14e-05 |
| loss | 1.93 |
| n_updates | 340 |
| policy_gradient_loss | -0.0119 |
| value_loss | 16.3 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.23e+03 |
| ep_rew_mean | -15.9 |
| time/ | |
| fps | 355 |
| iterations | 36 |
| time_elapsed | 138 |
| total_timesteps | 49356 |
| train/ | |
| approx_kl | 0.028865792 |
| clip_fraction | 0.237 |
| clip_range | 0.26 |
| entropy_loss | -7.59 |
| explained_variance | 0.14 |
| learning_rate | 8.14e-05 |
| loss | 0.794 |
| n_updates | 350 |
| policy_gradient_loss | -0.0105 |
| value_loss | 9.95 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.23e+03 |
| ep_rew_mean | -15.9 |
| time/ | |
| fps | 355 |
| iterations | 37 |
| time_elapsed | 142 |
| total_timesteps | 50727 |
| train/ | |
| approx_kl | 0.02842192 |
| clip_fraction | 0.236 |
| clip_range | 0.26 |
| entropy_loss | -7.84 |
| explained_variance | -0.549 |
| learning_rate | 8.14e-05 |
| loss | 0.895 |
| n_updates | 360 |
| policy_gradient_loss | -0.0193 |
| value_loss | 10.1 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.22e+03 |
| ep_rew_mean | -20.1 |
| time/ | |
| fps | 355 |
| iterations | 38 |
| time_elapsed | 146 |
| total_timesteps | 52098 |
| train/ | |
| approx_kl | 0.03672131 |
| clip_fraction | 0.237 |
| clip_range | 0.26 |
| entropy_loss | -8.07 |
| explained_variance | 0.0648 |
| learning_rate | 8.14e-05 |
| loss | 4.86 |
| n_updates | 370 |
| policy_gradient_loss | -0.0211 |
| value_loss | 6.36 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.22e+03 |
| ep_rew_mean | -30.7 |
| time/ | |
| fps | 355 |
| iterations | 39 |
| time_elapsed | 150 |
| total_timesteps | 53469 |
| train/ | |
| approx_kl | 0.035383318 |
| clip_fraction | 0.218 |
| clip_range | 0.26 |
| entropy_loss | -7.98 |
| explained_variance | 0.0373 |
| learning_rate | 8.14e-05 |
| loss | 0.993 |
| n_updates | 380 |
| policy_gradient_loss | -0.0212 |
| value_loss | 13.9 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.19e+03 |
| ep_rew_mean | -23.8 |
| time/ | |
| fps | 355 |
| iterations | 40 |
| time_elapsed | 154 |
| total_timesteps | 54840 |
| train/ | |
| approx_kl | 0.035663478 |
| clip_fraction | 0.313 |
| clip_range | 0.26 |
| entropy_loss | -7.36 |
| explained_variance | 0.0586 |
| learning_rate | 8.14e-05 |
| loss | 2.23 |
| n_updates | 390 |
| policy_gradient_loss | -0.00997 |
| value_loss | 46.3 |
-----------------------------------------
--------------------------------------
| rollout/ | |
| ep_len_mean | 2.19e+03 |
| ep_rew_mean | -23.8 |
| time/ | |
| fps | 354 |
| iterations | 41 |
| time_elapsed | 158 |
| total_timesteps | 56211 |
| train/ | |
| approx_kl | 0.052496 |
| clip_fraction | 0.338 |
| clip_range | 0.26 |
| entropy_loss | -7.89 |
| explained_variance | -0.49 |
| learning_rate | 8.14e-05 |
| loss | 1.45 |
| n_updates | 400 |
| policy_gradient_loss | -0.00552 |
| value_loss | 23.9 |
--------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.18e+03 |
| ep_rew_mean | -25.5 |
| time/ | |
| fps | 354 |
| iterations | 42 |
| time_elapsed | 162 |
| total_timesteps | 57582 |
| train/ | |
| approx_kl | 0.032533452 |
| clip_fraction | 0.26 |
| clip_range | 0.26 |
| entropy_loss | -7.35 |
| explained_variance | -0.0372 |
| learning_rate | 8.14e-05 |
| loss | 1.11 |
| n_updates | 410 |
| policy_gradient_loss | -0.0189 |
| value_loss | 9.41 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.18e+03 |
| ep_rew_mean | -25.5 |
| time/ | |
| fps | 355 |
| iterations | 43 |
| time_elapsed | 166 |
| total_timesteps | 58953 |
| train/ | |
| approx_kl | 0.033652484 |
| clip_fraction | 0.245 |
| clip_range | 0.26 |
| entropy_loss | -7.66 |
| explained_variance | -0.0541 |
| learning_rate | 8.14e-05 |
| loss | 1.85 |
| n_updates | 420 |
| policy_gradient_loss | -0.013 |
| value_loss | 14.4 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.14e+03 |
| ep_rew_mean | -12.6 |
| time/ | |
| fps | 355 |
| iterations | 44 |
| time_elapsed | 169 |
| total_timesteps | 60324 |
| train/ | |
| approx_kl | 0.029627763 |
| clip_fraction | 0.287 |
| clip_range | 0.26 |
| entropy_loss | -7.78 |
| explained_variance | -0.173 |
| learning_rate | 8.14e-05 |
| loss | 4.15 |
| n_updates | 430 |
| policy_gradient_loss | -0.0145 |
| value_loss | 11.2 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.1e+03 |
| ep_rew_mean | -3.31 |
| time/ | |
| fps | 355 |
| iterations | 45 |
| time_elapsed | 173 |
| total_timesteps | 61695 |
| train/ | |
| approx_kl | 0.053978715 |
| clip_fraction | 0.301 |
| clip_range | 0.26 |
| entropy_loss | -7.78 |
| explained_variance | 0.0145 |
| learning_rate | 8.14e-05 |
| loss | 5.25 |
| n_updates | 440 |
| policy_gradient_loss | -0.00936 |
| value_loss | 83.2 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.1e+03 |
| ep_rew_mean | -3.31 |
| time/ | |
| fps | 355 |
| iterations | 46 |
| time_elapsed | 177 |
| total_timesteps | 63066 |
| train/ | |
| approx_kl | 0.04385848 |
| clip_fraction | 0.309 |
| clip_range | 0.26 |
| entropy_loss | -7.72 |
| explained_variance | -0.142 |
| learning_rate | 8.14e-05 |
| loss | 1.44 |
| n_updates | 450 |
| policy_gradient_loss | -0.00675 |
| value_loss | 32.4 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.12e+03 |
| ep_rew_mean | -1.23 |
| time/ | |
| fps | 356 |
| iterations | 47 |
| time_elapsed | 180 |
| total_timesteps | 64437 |
| train/ | |
| approx_kl | 0.034602597 |
| clip_fraction | 0.301 |
| clip_range | 0.26 |
| entropy_loss | -7.3 |
| explained_variance | -0.657 |
| learning_rate | 8.14e-05 |
| loss | 1.27 |
| n_updates | 460 |
| policy_gradient_loss | -0.0142 |
| value_loss | 9.68 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.12e+03 |
| ep_rew_mean | -1.23 |
| time/ | |
| fps | 356 |
| iterations | 48 |
| time_elapsed | 184 |
| total_timesteps | 65808 |
| train/ | |
| approx_kl | 0.034854636 |
| clip_fraction | 0.264 |
| clip_range | 0.26 |
| entropy_loss | -7.11 |
| explained_variance | 0.0247 |
| learning_rate | 8.14e-05 |
| loss | 11.7 |
| n_updates | 470 |
| policy_gradient_loss | -0.0175 |
| value_loss | 20.1 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.13e+03 |
| ep_rew_mean | -1.26 |
| time/ | |
| fps | 356 |
| iterations | 49 |
| time_elapsed | 188 |
| total_timesteps | 67179 |
| train/ | |
| approx_kl | 0.050826874 |
| clip_fraction | 0.328 |
| clip_range | 0.26 |
| entropy_loss | -7.92 |
| explained_variance | -0.296 |
| learning_rate | 8.14e-05 |
| loss | 5.44 |
| n_updates | 480 |
| policy_gradient_loss | -0.0181 |
| value_loss | 13 |
-----------------------------------------
---------------------------------------
| rollout/ | |
| ep_len_mean | 2.12e+03 |
| ep_rew_mean | -2.72 |
| time/ | |
| fps | 356 |
| iterations | 50 |
| time_elapsed | 192 |
| total_timesteps | 68550 |
| train/ | |
| approx_kl | 0.0387544 |
| clip_fraction | 0.284 |
| clip_range | 0.26 |
| entropy_loss | -7.56 |
| explained_variance | -0.016 |
| learning_rate | 8.14e-05 |
| loss | 1.01 |
| n_updates | 490 |
| policy_gradient_loss | -0.0124 |
| value_loss | 10.5 |
---------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.09e+03 |
| ep_rew_mean | 2.79 |
| time/ | |
| fps | 356 |
| iterations | 51 |
| time_elapsed | 196 |
| total_timesteps | 69921 |
| train/ | |
| approx_kl | 0.033755988 |
| clip_fraction | 0.261 |
| clip_range | 0.26 |
| entropy_loss | -6.82 |
| explained_variance | 0.0437 |
| learning_rate | 8.14e-05 |
| loss | 1.73 |
| n_updates | 500 |
| policy_gradient_loss | -0.019 |
| value_loss | 10.5 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.08e+03 |
| ep_rew_mean | 6.5 |
| time/ | |
| fps | 356 |
| iterations | 52 |
| time_elapsed | 200 |
| total_timesteps | 71292 |
| train/ | |
| approx_kl | 0.028060019 |
| clip_fraction | 0.276 |
| clip_range | 0.26 |
| entropy_loss | -7.04 |
| explained_variance | 0.0647 |
| learning_rate | 8.14e-05 |
| loss | 1.4 |
| n_updates | 510 |
| policy_gradient_loss | -0.00959 |
| value_loss | 33.6 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.08e+03 |
| ep_rew_mean | 6.5 |
| time/ | |
| fps | 355 |
| iterations | 53 |
| time_elapsed | 204 |
| total_timesteps | 72663 |
| train/ | |
| approx_kl | 0.029590033 |
| clip_fraction | 0.232 |
| clip_range | 0.26 |
| entropy_loss | -6.96 |
| explained_variance | 0.174 |
| learning_rate | 8.14e-05 |
| loss | 7.49 |
| n_updates | 520 |
| policy_gradient_loss | -0.00783 |
| value_loss | 14.9 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.08e+03 |
| ep_rew_mean | 6.5 |
| time/ | |
| fps | 355 |
| iterations | 54 |
| time_elapsed | 208 |
| total_timesteps | 74034 |
| train/ | |
| approx_kl | 0.044851318 |
| clip_fraction | 0.327 |
| clip_range | 0.26 |
| entropy_loss | -7.81 |
| explained_variance | 0.106 |
| learning_rate | 8.14e-05 |
| loss | 1.97 |
| n_updates | 530 |
| policy_gradient_loss | -0.00695 |
| value_loss | 14.4 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.12e+03 |
| ep_rew_mean | 6 |
| time/ | |
| fps | 355 |
| iterations | 55 |
| time_elapsed | 212 |
| total_timesteps | 75405 |
| train/ | |
| approx_kl | 0.042934623 |
| clip_fraction | 0.296 |
| clip_range | 0.26 |
| entropy_loss | -7.69 |
| explained_variance | -0.881 |
| learning_rate | 8.14e-05 |
| loss | 0.152 |
| n_updates | 540 |
| policy_gradient_loss | -0.023 |
| value_loss | 3.2 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.12e+03 |
| ep_rew_mean | 6 |
| time/ | |
| fps | 355 |
| iterations | 56 |
| time_elapsed | 216 |
| total_timesteps | 76776 |
| train/ | |
| approx_kl | 0.031715214 |
| clip_fraction | 0.266 |
| clip_range | 0.26 |
| entropy_loss | -7.55 |
| explained_variance | 0.163 |
| learning_rate | 8.14e-05 |
| loss | 0.161 |
| n_updates | 550 |
| policy_gradient_loss | -0.0273 |
| value_loss | 4.01 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.14e+03 |
| ep_rew_mean | 5.56 |
| time/ | |
| fps | 355 |
| iterations | 57 |
| time_elapsed | 219 |
| total_timesteps | 78147 |
| train/ | |
| approx_kl | 0.043580677 |
| clip_fraction | 0.346 |
| clip_range | 0.26 |
| entropy_loss | -7.62 |
| explained_variance | -0.127 |
| learning_rate | 8.14e-05 |
| loss | 1.73 |
| n_updates | 560 |
| policy_gradient_loss | -0.0176 |
| value_loss | 11 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.13e+03 |
| ep_rew_mean | 11.6 |
| time/ | |
| fps | 355 |
| iterations | 58 |
| time_elapsed | 223 |
| total_timesteps | 79518 |
| train/ | |
| approx_kl | 0.038065173 |
| clip_fraction | 0.272 |
| clip_range | 0.26 |
| entropy_loss | -7.52 |
| explained_variance | 0.234 |
| learning_rate | 8.14e-05 |
| loss | 4.84 |
| n_updates | 570 |
| policy_gradient_loss | -0.018 |
| value_loss | 8.44 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.13e+03 |
| ep_rew_mean | 11.6 |
| time/ | |
| fps | 355 |
| iterations | 59 |
| time_elapsed | 227 |
| total_timesteps | 80889 |
| train/ | |
| approx_kl | 0.049862172 |
| clip_fraction | 0.31 |
| clip_range | 0.26 |
| entropy_loss | -7.6 |
| explained_variance | 0.0943 |
| learning_rate | 8.14e-05 |
| loss | 0.524 |
| n_updates | 580 |
| policy_gradient_loss | -0.015 |
| value_loss | 14.5 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.13e+03 |
| ep_rew_mean | 11.2 |
| time/ | |
| fps | 355 |
| iterations | 60 |
| time_elapsed | 231 |
| total_timesteps | 82260 |
| train/ | |
| approx_kl | 0.040924706 |
| clip_fraction | 0.313 |
| clip_range | 0.26 |
| entropy_loss | -7.74 |
| explained_variance | -0.255 |
| learning_rate | 8.14e-05 |
| loss | 0.218 |
| n_updates | 590 |
| policy_gradient_loss | -0.0118 |
| value_loss | 10.8 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.14e+03 |
| ep_rew_mean | 5.38 |
| time/ | |
| fps | 355 |
| iterations | 61 |
| time_elapsed | 234 |
| total_timesteps | 83631 |
| train/ | |
| approx_kl | 0.031327777 |
| clip_fraction | 0.273 |
| clip_range | 0.26 |
| entropy_loss | -7.33 |
| explained_variance | -0.235 |
| learning_rate | 8.14e-05 |
| loss | 1.87 |
| n_updates | 600 |
| policy_gradient_loss | -0.00756 |
| value_loss | 9.36 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.11e+03 |
| ep_rew_mean | 9.3 |
| time/ | |
| fps | 356 |
| iterations | 62 |
| time_elapsed | 238 |
| total_timesteps | 85002 |
| train/ | |
| approx_kl | 0.077066906 |
| clip_fraction | 0.375 |
| clip_range | 0.26 |
| entropy_loss | -7.63 |
| explained_variance | -6.45e-05 |
| learning_rate | 8.14e-05 |
| loss | 4.58 |
| n_updates | 610 |
| policy_gradient_loss | -0.00174 |
| value_loss | 24.8 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.08e+03 |
| ep_rew_mean | 12.6 |
| time/ | |
| fps | 356 |
| iterations | 63 |
| time_elapsed | 242 |
| total_timesteps | 86373 |
| train/ | |
| approx_kl | 0.047113765 |
| clip_fraction | 0.325 |
| clip_range | 0.26 |
| entropy_loss | -7.27 |
| explained_variance | 0.345 |
| learning_rate | 8.14e-05 |
| loss | 2.71 |
| n_updates | 620 |
| policy_gradient_loss | -0.0105 |
| value_loss | 22 |
-----------------------------------------
--------------------------------------
| rollout/ | |
| ep_len_mean | 2.07e+03 |
| ep_rew_mean | 7.55 |
| time/ | |
| fps | 356 |
| iterations | 64 |
| time_elapsed | 246 |
| total_timesteps | 87744 |
| train/ | |
| approx_kl | 0.051026 |
| clip_fraction | 0.328 |
| clip_range | 0.26 |
| entropy_loss | -7.45 |
| explained_variance | -0.0504 |
| learning_rate | 8.14e-05 |
| loss | 8.05 |
| n_updates | 630 |
| policy_gradient_loss | -0.0202 |
| value_loss | 21.4 |
--------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.07e+03 |
| ep_rew_mean | 7.55 |
| time/ | |
| fps | 356 |
| iterations | 65 |
| time_elapsed | 249 |
| total_timesteps | 89115 |
| train/ | |
| approx_kl | 0.08706577 |
| clip_fraction | 0.408 |
| clip_range | 0.26 |
| entropy_loss | -7.05 |
| explained_variance | -0.281 |
| learning_rate | 8.14e-05 |
| loss | 6.06 |
| n_updates | 640 |
| policy_gradient_loss | -0.0151 |
| value_loss | 24.7 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.07e+03 |
| ep_rew_mean | 7.37 |
| time/ | |
| fps | 356 |
| iterations | 66 |
| time_elapsed | 253 |
| total_timesteps | 90486 |
| train/ | |
| approx_kl | 0.060183015 |
| clip_fraction | 0.353 |
| clip_range | 0.26 |
| entropy_loss | -7.27 |
| explained_variance | -0.235 |
| learning_rate | 8.14e-05 |
| loss | 2.65 |
| n_updates | 650 |
| policy_gradient_loss | -0.00283 |
| value_loss | 11.1 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.07e+03 |
| ep_rew_mean | 6.11 |
| time/ | |
| fps | 356 |
| iterations | 67 |
| time_elapsed | 257 |
| total_timesteps | 91857 |
| train/ | |
| approx_kl | 0.03188397 |
| clip_fraction | 0.266 |
| clip_range | 0.26 |
| entropy_loss | -7.28 |
| explained_variance | 0.319 |
| learning_rate | 8.14e-05 |
| loss | 0.867 |
| n_updates | 660 |
| policy_gradient_loss | -0.022 |
| value_loss | 7.59 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.07e+03 |
| ep_rew_mean | 6.11 |
| time/ | |
| fps | 356 |
| iterations | 68 |
| time_elapsed | 261 |
| total_timesteps | 93228 |
| train/ | |
| approx_kl | 0.049166773 |
| clip_fraction | 0.343 |
| clip_range | 0.26 |
| entropy_loss | -7.11 |
| explained_variance | -0.196 |
| learning_rate | 8.14e-05 |
| loss | 0.647 |
| n_updates | 670 |
| policy_gradient_loss | -0.0119 |
| value_loss | 11.7 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.08e+03 |
| ep_rew_mean | 2.13 |
| time/ | |
| fps | 355 |
| iterations | 69 |
| time_elapsed | 265 |
| total_timesteps | 94599 |
| train/ | |
| approx_kl | 0.03328535 |
| clip_fraction | 0.278 |
| clip_range | 0.26 |
| entropy_loss | -6.78 |
| explained_variance | -0.396 |
| learning_rate | 8.14e-05 |
| loss | 6.24 |
| n_updates | 680 |
| policy_gradient_loss | -0.0167 |
| value_loss | 7.49 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.08e+03 |
| ep_rew_mean | 2.13 |
| time/ | |
| fps | 355 |
| iterations | 70 |
| time_elapsed | 269 |
| total_timesteps | 95970 |
| train/ | |
| approx_kl | 0.08318052 |
| clip_fraction | 0.38 |
| clip_range | 0.26 |
| entropy_loss | -7.26 |
| explained_variance | -0.267 |
| learning_rate | 8.14e-05 |
| loss | 4.04 |
| n_updates | 690 |
| policy_gradient_loss | 0.00117 |
| value_loss | 15 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.09e+03 |
| ep_rew_mean | 1.28 |
| time/ | |
| fps | 355 |
| iterations | 71 |
| time_elapsed | 274 |
| total_timesteps | 97341 |
| train/ | |
| approx_kl | 0.05132381 |
| clip_fraction | 0.372 |
| clip_range | 0.26 |
| entropy_loss | -7.08 |
| explained_variance | -0.574 |
| learning_rate | 8.14e-05 |
| loss | 14.8 |
| n_updates | 700 |
| policy_gradient_loss | 0.0229 |
| value_loss | 15.2 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.09e+03 |
| ep_rew_mean | 0.298 |
| time/ | |
| fps | 354 |
| iterations | 72 |
| time_elapsed | 278 |
| total_timesteps | 98712 |
| train/ | |
| approx_kl | 0.049137857 |
| clip_fraction | 0.354 |
| clip_range | 0.26 |
| entropy_loss | -6.89 |
| explained_variance | 0.0478 |
| learning_rate | 8.14e-05 |
| loss | 1.22 |
| n_updates | 710 |
| policy_gradient_loss | -0.0197 |
| value_loss | 7.75 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.09e+03 |
| ep_rew_mean | 0.298 |
| time/ | |
| fps | 354 |
| iterations | 73 |
| time_elapsed | 282 |
| total_timesteps | 100083 |
| train/ | |
| approx_kl | 0.04495397 |
| clip_fraction | 0.278 |
| clip_range | 0.26 |
| entropy_loss | -6.95 |
| explained_variance | 0.0999 |
| learning_rate | 8.14e-05 |
| loss | 0.888 |
| n_updates | 720 |
| policy_gradient_loss | -0.0123 |
| value_loss | 11.4 |
----------------------------------------
[I 2023-03-30 21:45:23,843] Trial 0 finished with value: -347.0 and parameters: {'n_steps': 1371, 'gamma': 0.9373200020810921, 'learning_rate': 8.141042840141496e-05, 'clip_range': 0.2600128459343352, 'gae_lambda': 0.9415709130298376}. Best is trial 0 with value: -347.0.
Using cuda device
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.
C:\ProgramData\Anaconda3\envs\StreetFighterAI\lib\site-packages\stable_baselines3\ppo\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 3294`, after every 51 untruncated mini-batches, there will be a truncated mini-batch of size 30
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=3294 and n_envs=1)
warnings.warn(
Logging to logs/PPO_19
---------------------------------
| rollout/ | |
| ep_len_mean | 2.44e+03 |
| ep_rew_mean | -47 |
| time/ | |
| fps | 550 |
| iterations | 1 |
| time_elapsed | 5 |
| total_timesteps | 3294 |
---------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2e+03 |
| ep_rew_mean | -140 |
| time/ | |
| fps | 417 |
| iterations | 2 |
| time_elapsed | 15 |
| total_timesteps | 6588 |
| train/ | |
| approx_kl | 0.008250391 |
| clip_fraction | 0.13 |
| clip_range | 0.193 |
| entropy_loss | -8.31 |
| explained_variance | -0.00164 |
| learning_rate | 5.11e-05 |
| loss | 0.766 |
| n_updates | 10 |
| policy_gradient_loss | -0.00627 |
| value_loss | 8.03 |
-----------------------------------------
------------------------------------------
| rollout/ | |
| ep_len_mean | 2.31e+03 |
| ep_rew_mean | -95.8 |
| time/ | |
| fps | 392 |
| iterations | 3 |
| time_elapsed | 25 |
| total_timesteps | 9882 |
| train/ | |
| approx_kl | 0.0076712077 |
| clip_fraction | 0.113 |
| clip_range | 0.193 |
| entropy_loss | -8.3 |
| explained_variance | -0.0133 |
| learning_rate | 5.11e-05 |
| loss | 8.2 |
| n_updates | 20 |
| policy_gradient_loss | -0.00919 |
| value_loss | 22.1 |
------------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.32e+03 |
| ep_rew_mean | -75.8 |
| time/ | |
| fps | 379 |
| iterations | 4 |
| time_elapsed | 34 |
| total_timesteps | 13176 |
| train/ | |
| approx_kl | 0.009143725 |
| clip_fraction | 0.1 |
| clip_range | 0.193 |
| entropy_loss | -8.29 |
| explained_variance | -0.0603 |
| learning_rate | 5.11e-05 |
| loss | 2.06 |
| n_updates | 30 |
| policy_gradient_loss | -0.0105 |
| value_loss | 7.27 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.32e+03 |
| ep_rew_mean | -68.2 |
| time/ | |
| fps | 371 |
| iterations | 5 |
| time_elapsed | 44 |
| total_timesteps | 16470 |
| train/ | |
| approx_kl | 0.009427849 |
| clip_fraction | 0.108 |
| clip_range | 0.193 |
| entropy_loss | -8.28 |
| explained_variance | 0.00375 |
| learning_rate | 5.11e-05 |
| loss | 1.5 |
| n_updates | 40 |
| policy_gradient_loss | -0.0116 |
| value_loss | 5.16 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.4e+03 |
| ep_rew_mean | -60.1 |
| time/ | |
| fps | 367 |
| iterations | 6 |
| time_elapsed | 53 |
| total_timesteps | 19764 |
| train/ | |
| approx_kl | 0.008516062 |
| clip_fraction | 0.102 |
| clip_range | 0.193 |
| entropy_loss | -8.27 |
| explained_variance | 0.0201 |
| learning_rate | 5.11e-05 |
| loss | 1.73 |
| n_updates | 50 |
| policy_gradient_loss | -0.0116 |
| value_loss | 7.34 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.44e+03 |
| ep_rew_mean | -73.8 |
| time/ | |
| fps | 364 |
| iterations | 7 |
| time_elapsed | 63 |
| total_timesteps | 23058 |
| train/ | |
| approx_kl | 0.009343307 |
| clip_fraction | 0.105 |
| clip_range | 0.193 |
| entropy_loss | -8.27 |
| explained_variance | 0.0736 |
| learning_rate | 5.11e-05 |
| loss | 2.54 |
| n_updates | 60 |
| policy_gradient_loss | -0.0116 |
| value_loss | 5.97 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.45e+03 |
| ep_rew_mean | -94.5 |
| time/ | |
| fps | 359 |
| iterations | 8 |
| time_elapsed | 73 |
| total_timesteps | 26352 |
| train/ | |
| approx_kl | 0.008755345 |
| clip_fraction | 0.103 |
| clip_range | 0.193 |
| entropy_loss | -8.26 |
| explained_variance | -0.0946 |
| learning_rate | 5.11e-05 |
| loss | 13.2 |
| n_updates | 70 |
| policy_gradient_loss | -0.0106 |
| value_loss | 18.1 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.46e+03 |
| ep_rew_mean | -100 |
| time/ | |
| fps | 357 |
| iterations | 9 |
| time_elapsed | 82 |
| total_timesteps | 29646 |
| train/ | |
| approx_kl | 0.007682183 |
| clip_fraction | 0.0851 |
| clip_range | 0.193 |
| entropy_loss | -8.25 |
| explained_variance | 0.0101 |
| learning_rate | 5.11e-05 |
| loss | 0.305 |
| n_updates | 80 |
| policy_gradient_loss | -0.0103 |
| value_loss | 18.6 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.34e+03 |
| ep_rew_mean | -116 |
| time/ | |
| fps | 355 |
| iterations | 10 |
| time_elapsed | 92 |
| total_timesteps | 32940 |
| train/ | |
| approx_kl | 0.010493592 |
| clip_fraction | 0.12 |
| clip_range | 0.193 |
| entropy_loss | -8.25 |
| explained_variance | -0.0487 |
| learning_rate | 5.11e-05 |
| loss | 0.736 |
| n_updates | 90 |
| policy_gradient_loss | -0.0141 |
| value_loss | 10.8 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.34e+03 |
| ep_rew_mean | -116 |
| time/ | |
| fps | 355 |
| iterations | 11 |
| time_elapsed | 102 |
| total_timesteps | 36234 |
| train/ | |
| approx_kl | 0.014990667 |
| clip_fraction | 0.177 |
| clip_range | 0.193 |
| entropy_loss | -8.24 |
| explained_variance | -0.0262 |
| learning_rate | 5.11e-05 |
| loss | 6.56 |
| n_updates | 100 |
| policy_gradient_loss | -0.00923 |
| value_loss | 17.9 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.42e+03 |
| ep_rew_mean | -95.2 |
| time/ | |
| fps | 355 |
| iterations | 12 |
| time_elapsed | 111 |
| total_timesteps | 39528 |
| train/ | |
| approx_kl | 0.012156485 |
| clip_fraction | 0.158 |
| clip_range | 0.193 |
| entropy_loss | -8.22 |
| explained_variance | 0.0904 |
| learning_rate | 5.11e-05 |
| loss | 11.6 |
| n_updates | 110 |
| policy_gradient_loss | -0.0144 |
| value_loss | 4.6 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.43e+03 |
| ep_rew_mean | -96.6 |
| time/ | |
| fps | 354 |
| iterations | 13 |
| time_elapsed | 120 |
| total_timesteps | 42822 |
| train/ | |
| approx_kl | 0.009144909 |
| clip_fraction | 0.116 |
| clip_range | 0.193 |
| entropy_loss | -8.21 |
| explained_variance | 0.0377 |
| learning_rate | 5.11e-05 |
| loss | 0.243 |
| n_updates | 120 |
| policy_gradient_loss | -0.0126 |
| value_loss | 7.32 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.47e+03 |
| ep_rew_mean | -93.6 |
| time/ | |
| fps | 353 |
| iterations | 14 |
| time_elapsed | 130 |
| total_timesteps | 46116 |
| train/ | |
| approx_kl | 0.010074705 |
| clip_fraction | 0.143 |
| clip_range | 0.193 |
| entropy_loss | -8.21 |
| explained_variance | 0.0431 |
| learning_rate | 5.11e-05 |
| loss | 0.422 |
| n_updates | 130 |
| policy_gradient_loss | -0.0116 |
| value_loss | 8.13 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.46e+03 |
| ep_rew_mean | -89.6 |
| time/ | |
| fps | 352 |
| iterations | 15 |
| time_elapsed | 140 |
| total_timesteps | 49410 |
| train/ | |
| approx_kl | 0.00962226 |
| clip_fraction | 0.141 |
| clip_range | 0.193 |
| entropy_loss | -8.19 |
| explained_variance | -0.127 |
| learning_rate | 5.11e-05 |
| loss | 4.31 |
| n_updates | 140 |
| policy_gradient_loss | -0.0141 |
| value_loss | 7.67 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.51e+03 |
| ep_rew_mean | -95.2 |
| time/ | |
| fps | 352 |
| iterations | 16 |
| time_elapsed | 149 |
| total_timesteps | 52704 |
| train/ | |
| approx_kl | 0.010282748 |
| clip_fraction | 0.149 |
| clip_range | 0.193 |
| entropy_loss | -8.18 |
| explained_variance | 0.0186 |
| learning_rate | 5.11e-05 |
| loss | 0.291 |
| n_updates | 150 |
| policy_gradient_loss | -0.0157 |
| value_loss | 3.42 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.49e+03 |
| ep_rew_mean | -86.3 |
| time/ | |
| fps | 352 |
| iterations | 17 |
| time_elapsed | 158 |
| total_timesteps | 55998 |
| train/ | |
| approx_kl | 0.011693283 |
| clip_fraction | 0.129 |
| clip_range | 0.193 |
| entropy_loss | -8.17 |
| explained_variance | 0.038 |
| learning_rate | 5.11e-05 |
| loss | 2.92 |
| n_updates | 160 |
| policy_gradient_loss | -0.0117 |
| value_loss | 12.6 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.47e+03 |
| ep_rew_mean | -91.1 |
| time/ | |
| fps | 353 |
| iterations | 18 |
| time_elapsed | 167 |
| total_timesteps | 59292 |
| train/ | |
| approx_kl | 0.011810733 |
| clip_fraction | 0.155 |
| clip_range | 0.193 |
| entropy_loss | -8.16 |
| explained_variance | 0.0673 |
| learning_rate | 5.11e-05 |
| loss | 0.93 |
| n_updates | 170 |
| policy_gradient_loss | -0.0134 |
| value_loss | 9.58 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.5e+03 |
| ep_rew_mean | -88.9 |
| time/ | |
| fps | 353 |
| iterations | 19 |
| time_elapsed | 177 |
| total_timesteps | 62586 |
| train/ | |
| approx_kl | 0.012234138 |
| clip_fraction | 0.167 |
| clip_range | 0.193 |
| entropy_loss | -8.17 |
| explained_variance | 0.033 |
| learning_rate | 5.11e-05 |
| loss | 0.243 |
| n_updates | 180 |
| policy_gradient_loss | -0.0139 |
| value_loss | 8.98 |
-----------------------------------------
------------------------------------------
| rollout/ | |
| ep_len_mean | 2.5e+03 |
| ep_rew_mean | -88.9 |
| time/ | |
| fps | 352 |
| iterations | 20 |
| time_elapsed | 186 |
| total_timesteps | 65880 |
| train/ | |
| approx_kl | 0.0135463225 |
| clip_fraction | 0.167 |
| clip_range | 0.193 |
| entropy_loss | -8.15 |
| explained_variance | 0.104 |
| learning_rate | 5.11e-05 |
| loss | 1.94 |
| n_updates | 190 |
| policy_gradient_loss | -0.0105 |
| value_loss | 8.12 |
------------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.59e+03 |
| ep_rew_mean | 114 |
| time/ | |
| fps | 351 |
| iterations | 21 |
| time_elapsed | 196 |
| total_timesteps | 69174 |
| train/ | |
| approx_kl | 0.012275431 |
| clip_fraction | 0.164 |
| clip_range | 0.193 |
| entropy_loss | -8.14 |
| explained_variance | -0.00593 |
| learning_rate | 5.11e-05 |
| loss | 15.6 |
| n_updates | 200 |
| policy_gradient_loss | -0.00313 |
| value_loss | 1.08e+04 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.54e+03 |
| ep_rew_mean | 104 |
| time/ | |
| fps | 351 |
| iterations | 22 |
| time_elapsed | 206 |
| total_timesteps | 72468 |
| train/ | |
| approx_kl | 0.014285704 |
| clip_fraction | 0.189 |
| clip_range | 0.193 |
| entropy_loss | -8.14 |
| explained_variance | 0.172 |
| learning_rate | 5.11e-05 |
| loss | 0.239 |
| n_updates | 210 |
| policy_gradient_loss | -0.0122 |
| value_loss | 8.08 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.53e+03 |
| ep_rew_mean | 91.4 |
| time/ | |
| fps | 351 |
| iterations | 23 |
| time_elapsed | 215 |
| total_timesteps | 75762 |
| train/ | |
| approx_kl | 0.01594875 |
| clip_fraction | 0.212 |
| clip_range | 0.193 |
| entropy_loss | -8.11 |
| explained_variance | -0.0207 |
| learning_rate | 5.11e-05 |
| loss | 19.3 |
| n_updates | 220 |
| policy_gradient_loss | -0.0113 |
| value_loss | 12.8 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.52e+03 |
| ep_rew_mean | 98.8 |
| time/ | |
| fps | 351 |
| iterations | 24 |
| time_elapsed | 225 |
| total_timesteps | 79056 |
| train/ | |
| approx_kl | 0.016312802 |
| clip_fraction | 0.236 |
| clip_range | 0.193 |
| entropy_loss | -8.03 |
| explained_variance | 0.118 |
| learning_rate | 5.11e-05 |
| loss | 0.819 |
| n_updates | 230 |
| policy_gradient_loss | -0.0082 |
| value_loss | 15.8 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.49e+03 |
| ep_rew_mean | 77.5 |
| time/ | |
| fps | 351 |
| iterations | 25 |
| time_elapsed | 234 |
| total_timesteps | 82350 |
| train/ | |
| approx_kl | 0.012337481 |
| clip_fraction | 0.163 |
| clip_range | 0.193 |
| entropy_loss | -8.13 |
| explained_variance | 0.0669 |
| learning_rate | 5.11e-05 |
| loss | 16.1 |
| n_updates | 240 |
| policy_gradient_loss | -0.00899 |
| value_loss | 16.1 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.46e+03 |
| ep_rew_mean | 66.2 |
| time/ | |
| fps | 347 |
| iterations | 26 |
| time_elapsed | 246 |
| total_timesteps | 85644 |
| train/ | |
| approx_kl | 0.018887786 |
| clip_fraction | 0.241 |
| clip_range | 0.193 |
| entropy_loss | -8 |
| explained_variance | 0.107 |
| learning_rate | 5.11e-05 |
| loss | 3.57 |
| n_updates | 250 |
| policy_gradient_loss | -0.00903 |
| value_loss | 26.9 |
-----------------------------------------
---------------------------------------
| rollout/ | |
| ep_len_mean | 2.47e+03 |
| ep_rew_mean | 58 |
| time/ | |
| fps | 346 |
| iterations | 27 |
| time_elapsed | 256 |
| total_timesteps | 88938 |
| train/ | |
| approx_kl | 0.0155593 |
| clip_fraction | 0.219 |
| clip_range | 0.193 |
| entropy_loss | -8.07 |
| explained_variance | 0.0512 |
| learning_rate | 5.11e-05 |
| loss | 3.59 |
| n_updates | 260 |
| policy_gradient_loss | -0.0133 |
| value_loss | 21 |
---------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.44e+03 |
| ep_rew_mean | 54.1 |
| time/ | |
| fps | 346 |
| iterations | 28 |
| time_elapsed | 266 |
| total_timesteps | 92232 |
| train/ | |
| approx_kl | 0.015150225 |
| clip_fraction | 0.198 |
| clip_range | 0.193 |
| entropy_loss | -8.1 |
| explained_variance | 0.149 |
| learning_rate | 5.11e-05 |
| loss | 3.1 |
| n_updates | 270 |
| policy_gradient_loss | -0.00993 |
| value_loss | 9.94 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.47e+03 |
| ep_rew_mean | 52.1 |
| time/ | |
| fps | 346 |
| iterations | 29 |
| time_elapsed | 275 |
| total_timesteps | 95526 |
| train/ | |
| approx_kl | 0.016742641 |
| clip_fraction | 0.223 |
| clip_range | 0.193 |
| entropy_loss | -7.91 |
| explained_variance | 0.276 |
| learning_rate | 5.11e-05 |
| loss | 4.49 |
| n_updates | 280 |
| policy_gradient_loss | -0.015 |
| value_loss | 7.73 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.44e+03 |
| ep_rew_mean | 41.7 |
| time/ | |
| fps | 346 |
| iterations | 30 |
| time_elapsed | 284 |
| total_timesteps | 98820 |
| train/ | |
| approx_kl | 0.016321812 |
| clip_fraction | 0.238 |
| clip_range | 0.193 |
| entropy_loss | -8.08 |
| explained_variance | 0.104 |
| learning_rate | 5.11e-05 |
| loss | 0.345 |
| n_updates | 290 |
| policy_gradient_loss | -0.0153 |
| value_loss | 5.16 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.43e+03 |
| ep_rew_mean | 40 |
| time/ | |
| fps | 346 |
| iterations | 31 |
| time_elapsed | 294 |
| total_timesteps | 102114 |
| train/ | |
| approx_kl | 0.016819764 |
| clip_fraction | 0.22 |
| clip_range | 0.193 |
| entropy_loss | -7.89 |
| explained_variance | -0.0187 |
| learning_rate | 5.11e-05 |
| loss | 1.07 |
| n_updates | 300 |
| policy_gradient_loss | -0.00886 |
| value_loss | 15.6 |
-----------------------------------------
[I 2023-03-30 21:50:56,850] Trial 1 finished with value: -205.0 and parameters: {'n_steps': 3294, 'gamma': 0.9019828232975781, 'learning_rate': 5.112209134805487e-05, 'clip_range': 0.1926590966798606, 'gae_lambda': 0.8670673597089896}. Best is trial 1 with value: -205.0.
Using cuda device
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.
C:\ProgramData\Anaconda3\envs\StreetFighterAI\lib\site-packages\stable_baselines3\ppo\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 1586`, after every 24 untruncated mini-batches, there will be a truncated mini-batch of size 50
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1586 and n_envs=1)
warnings.warn(
Logging to logs/PPO_20
-----------------------------
| time/ | |
| fps | 611 |
| iterations | 1 |
| time_elapsed | 2 |
| total_timesteps | 1586 |
-----------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.1e+03 |
| ep_rew_mean | -13 |
| time/ | |
| fps | 448 |
| iterations | 2 |
| time_elapsed | 7 |
| total_timesteps | 3172 |
| train/ | |
| approx_kl | 0.007479368 |
| clip_fraction | 0.211 |
| clip_range | 0.161 |
| entropy_loss | -8.31 |
| explained_variance | -0.00514 |
| learning_rate | 5.02e-05 |
| loss | 5.13 |
| n_updates | 10 |
| policy_gradient_loss | -0.0115 |
| value_loss | 11.6 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.21e+03 |
| ep_rew_mean | -115 |
| time/ | |
| fps | 413 |
| iterations | 3 |
| time_elapsed | 11 |
| total_timesteps | 4758 |
| train/ | |
| approx_kl | 0.009029714 |
| clip_fraction | 0.168 |
| clip_range | 0.161 |
| entropy_loss | -8.3 |
| explained_variance | 0.0171 |
| learning_rate | 5.02e-05 |
| loss | 1.61 |
| n_updates | 20 |
| policy_gradient_loss | -0.0118 |
| value_loss | 15 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.21e+03 |
| ep_rew_mean | -115 |
| time/ | |
| fps | 395 |
| iterations | 4 |
| time_elapsed | 16 |
| total_timesteps | 6344 |
| train/ | |
| approx_kl | 0.011066959 |
| clip_fraction | 0.22 |
| clip_range | 0.161 |
| entropy_loss | -8.28 |
| explained_variance | 0.138 |
| learning_rate | 5.02e-05 |
| loss | 4.3 |
| n_updates | 30 |
| policy_gradient_loss | -0.0035 |
| value_loss | 26.5 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.13e+03 |
| ep_rew_mean | -64.3 |
| time/ | |
| fps | 385 |
| iterations | 5 |
| time_elapsed | 20 |
| total_timesteps | 7930 |
| train/ | |
| approx_kl | 0.006398973 |
| clip_fraction | 0.123 |
| clip_range | 0.161 |
| entropy_loss | -8.29 |
| explained_variance | 0.0506 |
| learning_rate | 5.02e-05 |
| loss | 7.38 |
| n_updates | 40 |
| policy_gradient_loss | -0.00387 |
| value_loss | 13.7 |
-----------------------------------------
------------------------------------------
| rollout/ | |
| ep_len_mean | 2.07e+03 |
| ep_rew_mean | -124 |
| time/ | |
| fps | 378 |
| iterations | 6 |
| time_elapsed | 25 |
| total_timesteps | 9516 |
| train/ | |
| approx_kl | 0.0068364535 |
| clip_fraction | 0.234 |
| clip_range | 0.161 |
| entropy_loss | -8.28 |
| explained_variance | 0.0926 |
| learning_rate | 5.02e-05 |
| loss | 1.79 |
| n_updates | 50 |
| policy_gradient_loss | -0.00452 |
| value_loss | 8.46 |
------------------------------------------
------------------------------------------
| rollout/ | |
| ep_len_mean | 2.2e+03 |
| ep_rew_mean | -129 |
| time/ | |
| fps | 373 |
| iterations | 7 |
| time_elapsed | 29 |
| total_timesteps | 11102 |
| train/ | |
| approx_kl | 0.0063208304 |
| clip_fraction | 0.139 |
| clip_range | 0.161 |
| entropy_loss | -8.29 |
| explained_variance | 0.225 |
| learning_rate | 5.02e-05 |
| loss | 11 |
| n_updates | 60 |
| policy_gradient_loss | -0.00482 |
| value_loss | 55.3 |
------------------------------------------
------------------------------------------
| rollout/ | |
| ep_len_mean | 2.2e+03 |
| ep_rew_mean | -129 |
| time/ | |
| fps | 369 |
| iterations | 8 |
| time_elapsed | 34 |
| total_timesteps | 12688 |
| train/ | |
| approx_kl | 0.0049916673 |
| clip_fraction | 0.146 |
| clip_range | 0.161 |
| entropy_loss | -8.29 |
| explained_variance | 0.341 |
| learning_rate | 5.02e-05 |
| loss | 4.11 |
| n_updates | 70 |
| policy_gradient_loss | -0.00677 |
| value_loss | 21.2 |
------------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.25e+03 |
| ep_rew_mean | -140 |
| time/ | |
| fps | 368 |
| iterations | 9 |
| time_elapsed | 38 |
| total_timesteps | 14274 |
| train/ | |
| approx_kl | 0.00807819 |
| clip_fraction | 0.222 |
| clip_range | 0.161 |
| entropy_loss | -8.29 |
| explained_variance | 0.368 |
| learning_rate | 5.02e-05 |
| loss | 0.672 |
| n_updates | 80 |
| policy_gradient_loss | -0.00792 |
| value_loss | 6.29 |
----------------------------------------
------------------------------------------
| rollout/ | |
| ep_len_mean | 2.09e+03 |
| ep_rew_mean | -96.1 |
| time/ | |
| fps | 366 |
| iterations | 10 |
| time_elapsed | 43 |
| total_timesteps | 15860 |
| train/ | |
| approx_kl | 0.0066149407 |
| clip_fraction | 0.157 |
| clip_range | 0.161 |
| entropy_loss | -8.29 |
| explained_variance | 0.102 |
| learning_rate | 5.02e-05 |
| loss | 47.7 |
| n_updates | 90 |
| policy_gradient_loss | -0.00711 |
| value_loss | 24.1 |
------------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.09e+03 |
| ep_rew_mean | -96.1 |
| time/ | |
| fps | 366 |
| iterations | 11 |
| time_elapsed | 47 |
| total_timesteps | 17446 |
| train/ | |
| approx_kl | 0.009226098 |
| clip_fraction | 0.162 |
| clip_range | 0.161 |
| entropy_loss | -8.29 |
| explained_variance | 0.103 |
| learning_rate | 5.02e-05 |
| loss | 32.2 |
| n_updates | 100 |
| policy_gradient_loss | -0.00681 |
| value_loss | 30 |
-----------------------------------------
------------------------------------------
| rollout/ | |
| ep_len_mean | 2.23e+03 |
| ep_rew_mean | -84.8 |
| time/ | |
| fps | 365 |
| iterations | 12 |
| time_elapsed | 52 |
| total_timesteps | 19032 |
| train/ | |
| approx_kl | 0.0071024043 |
| clip_fraction | 0.202 |
| clip_range | 0.161 |
| entropy_loss | -8.28 |
| explained_variance | 0.477 |
| learning_rate | 5.02e-05 |
| loss | 1.68 |
| n_updates | 110 |
| policy_gradient_loss | -0.00699 |
| value_loss | 8.59 |
------------------------------------------
------------------------------------------
| rollout/ | |
| ep_len_mean | 2.29e+03 |
| ep_rew_mean | -80.7 |
| time/ | |
| fps | 364 |
| iterations | 13 |
| time_elapsed | 56 |
| total_timesteps | 20618 |
| train/ | |
| approx_kl | 0.0079917265 |
| clip_fraction | 0.177 |
| clip_range | 0.161 |
| entropy_loss | -8.28 |
| explained_variance | 0.499 |
| learning_rate | 5.02e-05 |
| loss | 2.03 |
| n_updates | 120 |
| policy_gradient_loss | -0.00561 |
| value_loss | 9.53 |
------------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.29e+03 |
| ep_rew_mean | -80.7 |
| time/ | |
| fps | 364 |
| iterations | 14 |
| time_elapsed | 60 |
| total_timesteps | 22204 |
| train/ | |
| approx_kl | 0.008118922 |
| clip_fraction | 0.183 |
| clip_range | 0.161 |
| entropy_loss | -8.28 |
| explained_variance | 0.592 |
| learning_rate | 5.02e-05 |
| loss | 1.45 |
| n_updates | 130 |
| policy_gradient_loss | -0.00841 |
| value_loss | 8.69 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.35e+03 |
| ep_rew_mean | -77.8 |
| time/ | |
| fps | 363 |
| iterations | 15 |
| time_elapsed | 65 |
| total_timesteps | 23790 |
| train/ | |
| approx_kl | 0.009747963 |
| clip_fraction | 0.177 |
| clip_range | 0.161 |
| entropy_loss | -8.27 |
| explained_variance | 0.568 |
| learning_rate | 5.02e-05 |
| loss | 3.4 |
| n_updates | 140 |
| policy_gradient_loss | -0.00655 |
| value_loss | 12.6 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.35e+03 |
| ep_rew_mean | -77.8 |
| time/ | |
| fps | 363 |
| iterations | 16 |
| time_elapsed | 69 |
| total_timesteps | 25376 |
| train/ | |
| approx_kl | 0.011058032 |
| clip_fraction | 0.209 |
| clip_range | 0.161 |
| entropy_loss | -8.25 |
| explained_variance | 0.54 |
| learning_rate | 5.02e-05 |
| loss | 0.934 |
| n_updates | 150 |
| policy_gradient_loss | -0.00562 |
| value_loss | 8.9 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.4e+03 |
| ep_rew_mean | -70.5 |
| time/ | |
| fps | 364 |
| iterations | 17 |
| time_elapsed | 74 |
| total_timesteps | 26962 |
| train/ | |
| approx_kl | 0.007287364 |
| clip_fraction | 0.151 |
| clip_range | 0.161 |
| entropy_loss | -8.26 |
| explained_variance | 0.473 |
| learning_rate | 5.02e-05 |
| loss | 0.892 |
| n_updates | 160 |
| policy_gradient_loss | -0.00825 |
| value_loss | 7.3 |
-----------------------------------------
------------------------------------------
| rollout/ | |
| ep_len_mean | 2.4e+03 |
| ep_rew_mean | -70.5 |
| time/ | |
| fps | 364 |
| iterations | 18 |
| time_elapsed | 78 |
| total_timesteps | 28548 |
| train/ | |
| approx_kl | 0.0059617176 |
| clip_fraction | 0.119 |
| clip_range | 0.161 |
| entropy_loss | -8.27 |
| explained_variance | 0.193 |
| learning_rate | 5.02e-05 |
| loss | 5.01 |
| n_updates | 170 |
| policy_gradient_loss | -0.00776 |
| value_loss | 15.3 |
------------------------------------------
------------------------------------------
| rollout/ | |
| ep_len_mean | 2.49e+03 |
| ep_rew_mean | -64.2 |
| time/ | |
| fps | 364 |
| iterations | 19 |
| time_elapsed | 82 |
| total_timesteps | 30134 |
| train/ | |
| approx_kl | 0.0068875425 |
| clip_fraction | 0.143 |
| clip_range | 0.161 |
| entropy_loss | -8.27 |
| explained_variance | 0.208 |
| learning_rate | 5.02e-05 |
| loss | 0.754 |
| n_updates | 180 |
| policy_gradient_loss | -0.00782 |
| value_loss | 8.96 |
------------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.49e+03 |
| ep_rew_mean | -64.2 |
| time/ | |
| fps | 364 |
| iterations | 20 |
| time_elapsed | 86 |
| total_timesteps | 31720 |
| train/ | |
| approx_kl | 0.006500314 |
| clip_fraction | 0.141 |
| clip_range | 0.161 |
| entropy_loss | -8.28 |
| explained_variance | 0.431 |
| learning_rate | 5.02e-05 |
| loss | 0.835 |
| n_updates | 190 |
| policy_gradient_loss | -0.0125 |
| value_loss | 4.6 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.47e+03 |
| ep_rew_mean | -59.9 |
| time/ | |
| fps | 363 |
| iterations | 21 |
| time_elapsed | 91 |
| total_timesteps | 33306 |
| train/ | |
| approx_kl | 0.00925716 |
| clip_fraction | 0.167 |
| clip_range | 0.161 |
| entropy_loss | -8.28 |
| explained_variance | 0.166 |
| learning_rate | 5.02e-05 |
| loss | 4.07 |
| n_updates | 200 |
| policy_gradient_loss | -0.0126 |
| value_loss | 13.9 |
----------------------------------------
------------------------------------------
| rollout/ | |
| ep_len_mean | 2.47e+03 |
| ep_rew_mean | -59.9 |
| time/ | |
| fps | 362 |
| iterations | 22 |
| time_elapsed | 96 |
| total_timesteps | 34892 |
| train/ | |
| approx_kl | 0.0061101955 |
| clip_fraction | 0.121 |
| clip_range | 0.161 |
| entropy_loss | -8.27 |
| explained_variance | 0.192 |
| learning_rate | 5.02e-05 |
| loss | 0.275 |
| n_updates | 210 |
| policy_gradient_loss | -0.0125 |
| value_loss | 6.15 |
------------------------------------------
------------------------------------------
| rollout/ | |
| ep_len_mean | 2.5e+03 |
| ep_rew_mean | -68.9 |
| time/ | |
| fps | 361 |
| iterations | 23 |
| time_elapsed | 100 |
| total_timesteps | 36478 |
| train/ | |
| approx_kl | 0.0070993374 |
| clip_fraction | 0.127 |
| clip_range | 0.161 |
| entropy_loss | -8.29 |
| explained_variance | 0.0764 |
| learning_rate | 5.02e-05 |
| loss | 1.86 |
| n_updates | 220 |
| policy_gradient_loss | -0.0111 |
| value_loss | 11.7 |
------------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.5e+03 |
| ep_rew_mean | -65.4 |
| time/ | |
| fps | 360 |
| iterations | 24 |
| time_elapsed | 105 |
| total_timesteps | 38064 |
| train/ | |
| approx_kl | 0.010024515 |
| clip_fraction | 0.182 |
| clip_range | 0.161 |
| entropy_loss | -8.27 |
| explained_variance | 0.335 |
| learning_rate | 5.02e-05 |
| loss | 30 |
| n_updates | 230 |
| policy_gradient_loss | -0.012 |
| value_loss | 25.5 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.5e+03 |
| ep_rew_mean | -65.4 |
| time/ | |
| fps | 358 |
| iterations | 25 |
| time_elapsed | 110 |
| total_timesteps | 39650 |
| train/ | |
| approx_kl | 0.009245104 |
| clip_fraction | 0.179 |
| clip_range | 0.161 |
| entropy_loss | -8.27 |
| explained_variance | 0.325 |
| learning_rate | 5.02e-05 |
| loss | 9.09 |
| n_updates | 240 |
| policy_gradient_loss | -0.012 |
| value_loss | 9.09 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.5e+03 |
| ep_rew_mean | -73.7 |
| time/ | |
| fps | 355 |
| iterations | 26 |
| time_elapsed | 115 |
| total_timesteps | 41236 |
| train/ | |
| approx_kl | 0.007121284 |
| clip_fraction | 0.155 |
| clip_range | 0.161 |
| entropy_loss | -8.26 |
| explained_variance | 0.297 |
| learning_rate | 5.02e-05 |
| loss | 0.498 |
| n_updates | 250 |
| policy_gradient_loss | -0.00963 |
| value_loss | 5.33 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.48e+03 |
| ep_rew_mean | -70.9 |
| time/ | |
| fps | 353 |
| iterations | 27 |
| time_elapsed | 121 |
| total_timesteps | 42822 |
| train/ | |
| approx_kl | 0.006750791 |
| clip_fraction | 0.22 |
| clip_range | 0.161 |
| entropy_loss | -8.24 |
| explained_variance | 0.0803 |
| learning_rate | 5.02e-05 |
| loss | 11.9 |
| n_updates | 260 |
| policy_gradient_loss | -0.00203 |
| value_loss | 36.9 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.41e+03 |
| ep_rew_mean | -58.2 |
| time/ | |
| fps | 353 |
| iterations | 28 |
| time_elapsed | 125 |
| total_timesteps | 44408 |
| train/ | |
| approx_kl | 0.010176781 |
| clip_fraction | 0.163 |
| clip_range | 0.161 |
| entropy_loss | -8.23 |
| explained_variance | 0.524 |
| learning_rate | 5.02e-05 |
| loss | 3.07 |
| n_updates | 270 |
| policy_gradient_loss | -0.0123 |
| value_loss | 12.7 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.41e+03 |
| ep_rew_mean | -65.8 |
| time/ | |
| fps | 353 |
| iterations | 29 |
| time_elapsed | 130 |
| total_timesteps | 45994 |
| train/ | |
| approx_kl | 0.009089488 |
| clip_fraction | 0.196 |
| clip_range | 0.161 |
| entropy_loss | -8.25 |
| explained_variance | -0.0364 |
| learning_rate | 5.02e-05 |
| loss | 6.55 |
| n_updates | 280 |
| policy_gradient_loss | -0.00565 |
| value_loss | 31.2 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.41e+03 |
| ep_rew_mean | -65.8 |
| time/ | |
| fps | 353 |
| iterations | 30 |
| time_elapsed | 134 |
| total_timesteps | 47580 |
| train/ | |
| approx_kl | 0.010195761 |
| clip_fraction | 0.168 |
| clip_range | 0.161 |
| entropy_loss | -8.21 |
| explained_variance | 0.248 |
| learning_rate | 5.02e-05 |
| loss | 2.92 |
| n_updates | 290 |
| policy_gradient_loss | -0.00979 |
| value_loss | 21.1 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.41e+03 |
| ep_rew_mean | -62.6 |
| time/ | |
| fps | 354 |
| iterations | 31 |
| time_elapsed | 138 |
| total_timesteps | 49166 |
| train/ | |
| approx_kl | 0.00898233 |
| clip_fraction | 0.207 |
| clip_range | 0.161 |
| entropy_loss | -8.21 |
| explained_variance | 0.268 |
| learning_rate | 5.02e-05 |
| loss | 2.67 |
| n_updates | 300 |
| policy_gradient_loss | -0.00882 |
| value_loss | 9.28 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.39e+03 |
| ep_rew_mean | -61.5 |
| time/ | |
| fps | 353 |
| iterations | 32 |
| time_elapsed | 143 |
| total_timesteps | 50752 |
| train/ | |
| approx_kl | 0.011192194 |
| clip_fraction | 0.247 |
| clip_range | 0.161 |
| entropy_loss | -8.2 |
| explained_variance | 0.513 |
| learning_rate | 5.02e-05 |
| loss | 2.01 |
| n_updates | 310 |
| policy_gradient_loss | -0.00966 |
| value_loss | 5.51 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.39e+03 |
| ep_rew_mean | -61.5 |
| time/ | |
| fps | 351 |
| iterations | 33 |
| time_elapsed | 148 |
| total_timesteps | 52338 |
| train/ | |
| approx_kl | 0.010591626 |
| clip_fraction | 0.226 |
| clip_range | 0.161 |
| entropy_loss | -8.2 |
| explained_variance | 0.328 |
| learning_rate | 5.02e-05 |
| loss | 1.02 |
| n_updates | 320 |
| policy_gradient_loss | -0.0127 |
| value_loss | 8.93 |
-----------------------------------------
------------------------------------------
| rollout/ | |
| ep_len_mean | 2.41e+03 |
| ep_rew_mean | -63 |
| time/ | |
| fps | 350 |
| iterations | 34 |
| time_elapsed | 154 |
| total_timesteps | 53924 |
| train/ | |
| approx_kl | 0.0077109425 |
| clip_fraction | 0.228 |
| clip_range | 0.161 |
| entropy_loss | -8.17 |
| explained_variance | 0.17 |
| learning_rate | 5.02e-05 |
| loss | 2.71 |
| n_updates | 330 |
| policy_gradient_loss | -0.00697 |
| value_loss | 8.1 |
------------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.37e+03 |
| ep_rew_mean | -71.1 |
| time/ | |
| fps | 348 |
| iterations | 35 |
| time_elapsed | 159 |
| total_timesteps | 55510 |
| train/ | |
| approx_kl | 0.009447264 |
| clip_fraction | 0.209 |
| clip_range | 0.161 |
| entropy_loss | -8.2 |
| explained_variance | 0.244 |
| learning_rate | 5.02e-05 |
| loss | 13.6 |
| n_updates | 340 |
| policy_gradient_loss | -0.00654 |
| value_loss | 11.1 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.34e+03 |
| ep_rew_mean | -77.4 |
| time/ | |
| fps | 347 |
| iterations | 36 |
| time_elapsed | 164 |
| total_timesteps | 57096 |
| train/ | |
| approx_kl | 0.011554491 |
| clip_fraction | 0.213 |
| clip_range | 0.161 |
| entropy_loss | -8.18 |
| explained_variance | 0.243 |
| learning_rate | 5.02e-05 |
| loss | 1.56 |
| n_updates | 350 |
| policy_gradient_loss | -0.00918 |
| value_loss | 35.2 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.32e+03 |
| ep_rew_mean | -65.5 |
| time/ | |
| fps | 346 |
| iterations | 37 |
| time_elapsed | 169 |
| total_timesteps | 58682 |
| train/ | |
| approx_kl | 0.014401031 |
| clip_fraction | 0.277 |
| clip_range | 0.161 |
| entropy_loss | -8.09 |
| explained_variance | 0.676 |
| learning_rate | 5.02e-05 |
| loss | 3.44 |
| n_updates | 360 |
| policy_gradient_loss | -0.0114 |
| value_loss | 29 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.32e+03 |
| ep_rew_mean | -65.5 |
| time/ | |
| fps | 345 |
| iterations | 38 |
| time_elapsed | 174 |
| total_timesteps | 60268 |
| train/ | |
| approx_kl | 0.01188478 |
| clip_fraction | 0.218 |
| clip_range | 0.161 |
| entropy_loss | -8.03 |
| explained_variance | 0.506 |
| learning_rate | 5.02e-05 |
| loss | 3.72 |
| n_updates | 370 |
| policy_gradient_loss | -0.00428 |
| value_loss | 51.7 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.33e+03 |
| ep_rew_mean | -69 |
| time/ | |
| fps | 346 |
| iterations | 39 |
| time_elapsed | 178 |
| total_timesteps | 61854 |
| train/ | |
| approx_kl | 0.010957578 |
| clip_fraction | 0.247 |
| clip_range | 0.161 |
| entropy_loss | -8.11 |
| explained_variance | 0.534 |
| learning_rate | 5.02e-05 |
| loss | 1.98 |
| n_updates | 380 |
| policy_gradient_loss | -0.0112 |
| value_loss | 8.17 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.34e+03 |
| ep_rew_mean | -74.6 |
| time/ | |
| fps | 346 |
| iterations | 40 |
| time_elapsed | 182 |
| total_timesteps | 63440 |
| train/ | |
| approx_kl | 0.012994195 |
| clip_fraction | 0.245 |
| clip_range | 0.161 |
| entropy_loss | -8.05 |
| explained_variance | 0.572 |
| learning_rate | 5.02e-05 |
| loss | 2.95 |
| n_updates | 390 |
| policy_gradient_loss | -0.0108 |
| value_loss | 13.3 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.34e+03 |
| ep_rew_mean | -74.6 |
| time/ | |
| fps | 337 |
| iterations | 41 |
| time_elapsed | 192 |
| total_timesteps | 65026 |
| train/ | |
| approx_kl | 0.01015701 |
| clip_fraction | 0.236 |
| clip_range | 0.161 |
| entropy_loss | -8.1 |
| explained_variance | 0.263 |
| learning_rate | 5.02e-05 |
| loss | 4.03 |
| n_updates | 400 |
| policy_gradient_loss | -0.012 |
| value_loss | 27.9 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.33e+03 |
| ep_rew_mean | -82 |
| time/ | |
| fps | 338 |
| iterations | 42 |
| time_elapsed | 196 |
| total_timesteps | 66612 |
| train/ | |
| approx_kl | 0.013143239 |
| clip_fraction | 0.255 |
| clip_range | 0.161 |
| entropy_loss | -8.09 |
| explained_variance | 0.672 |
| learning_rate | 5.02e-05 |
| loss | 2.09 |
| n_updates | 410 |
| policy_gradient_loss | -0.0143 |
| value_loss | 3.7 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.31e+03 |
| ep_rew_mean | -78.7 |
| time/ | |
| fps | 338 |
| iterations | 43 |
| time_elapsed | 201 |
| total_timesteps | 68198 |
| train/ | |
| approx_kl | 0.019480813 |
| clip_fraction | 0.326 |
| clip_range | 0.161 |
| entropy_loss | -8.01 |
| explained_variance | 0.28 |
| learning_rate | 5.02e-05 |
| loss | 3.27 |
| n_updates | 420 |
| policy_gradient_loss | -0.000208 |
| value_loss | 31.3 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.31e+03 |
| ep_rew_mean | -78.7 |
| time/ | |
| fps | 339 |
| iterations | 44 |
| time_elapsed | 205 |
| total_timesteps | 69784 |
| train/ | |
| approx_kl | 0.01460914 |
| clip_fraction | 0.273 |
| clip_range | 0.161 |
| entropy_loss | -7.99 |
| explained_variance | 0.66 |
| learning_rate | 5.02e-05 |
| loss | 3.57 |
| n_updates | 430 |
| policy_gradient_loss | -0.0082 |
| value_loss | 15.4 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.35e+03 |
| ep_rew_mean | -76.4 |
| time/ | |
| fps | 338 |
| iterations | 45 |
| time_elapsed | 210 |
| total_timesteps | 71370 |
| train/ | |
| approx_kl | 0.01634773 |
| clip_fraction | 0.303 |
| clip_range | 0.161 |
| entropy_loss | -7.99 |
| explained_variance | 0.729 |
| learning_rate | 5.02e-05 |
| loss | 4.86 |
| n_updates | 440 |
| policy_gradient_loss | -0.00564 |
| value_loss | 13.7 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.34e+03 |
| ep_rew_mean | -79.5 |
| time/ | |
| fps | 337 |
| iterations | 46 |
| time_elapsed | 216 |
| total_timesteps | 72956 |
| train/ | |
| approx_kl | 0.013031598 |
| clip_fraction | 0.306 |
| clip_range | 0.161 |
| entropy_loss | -7.89 |
| explained_variance | 0.513 |
| learning_rate | 5.02e-05 |
| loss | 0.69 |
| n_updates | 450 |
| policy_gradient_loss | -0.00602 |
| value_loss | 7.02 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.34e+03 |
| ep_rew_mean | -79.5 |
| time/ | |
| fps | 336 |
| iterations | 47 |
| time_elapsed | 221 |
| total_timesteps | 74542 |
| train/ | |
| approx_kl | 0.012420004 |
| clip_fraction | 0.261 |
| clip_range | 0.161 |
| entropy_loss | -7.91 |
| explained_variance | 0.314 |
| learning_rate | 5.02e-05 |
| loss | 3.91 |
| n_updates | 460 |
| policy_gradient_loss | -0.00565 |
| value_loss | 20.3 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.36e+03 |
| ep_rew_mean | -76.7 |
| time/ | |
| fps | 335 |
| iterations | 48 |
| time_elapsed | 226 |
| total_timesteps | 76128 |
| train/ | |
| approx_kl | 0.012879474 |
| clip_fraction | 0.28 |
| clip_range | 0.161 |
| entropy_loss | -7.89 |
| explained_variance | 0.787 |
| learning_rate | 5.02e-05 |
| loss | 4.51 |
| n_updates | 470 |
| policy_gradient_loss | -0.0114 |
| value_loss | 7.77 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.36e+03 |
| ep_rew_mean | -76.7 |
| time/ | |
| fps | 334 |
| iterations | 49 |
| time_elapsed | 232 |
| total_timesteps | 77714 |
| train/ | |
| approx_kl | 0.015691841 |
| clip_fraction | 0.268 |
| clip_range | 0.161 |
| entropy_loss | -7.97 |
| explained_variance | 0.792 |
| learning_rate | 5.02e-05 |
| loss | 2.4 |
| n_updates | 480 |
| policy_gradient_loss | -0.00649 |
| value_loss | 13.2 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.36e+03 |
| ep_rew_mean | -75.5 |
| time/ | |
| fps | 334 |
| iterations | 50 |
| time_elapsed | 237 |
| total_timesteps | 79300 |
| train/ | |
| approx_kl | 0.01415793 |
| clip_fraction | 0.271 |
| clip_range | 0.161 |
| entropy_loss | -7.91 |
| explained_variance | 0.663 |
| learning_rate | 5.02e-05 |
| loss | 3.25 |
| n_updates | 490 |
| policy_gradient_loss | -0.0147 |
| value_loss | 10.4 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.34e+03 |
| ep_rew_mean | -76.9 |
| time/ | |
| fps | 333 |
| iterations | 51 |
| time_elapsed | 242 |
| total_timesteps | 80886 |
| train/ | |
| approx_kl | 0.011811551 |
| clip_fraction | 0.309 |
| clip_range | 0.161 |
| entropy_loss | -7.68 |
| explained_variance | 0.634 |
| learning_rate | 5.02e-05 |
| loss | 1.68 |
| n_updates | 500 |
| policy_gradient_loss | -0.00301 |
| value_loss | 10.3 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.34e+03 |
| ep_rew_mean | -83.2 |
| time/ | |
| fps | 332 |
| iterations | 52 |
| time_elapsed | 247 |
| total_timesteps | 82472 |
| train/ | |
| approx_kl | 0.015533115 |
| clip_fraction | 0.25 |
| clip_range | 0.161 |
| entropy_loss | -7.86 |
| explained_variance | 0.652 |
| learning_rate | 5.02e-05 |
| loss | 5.43 |
| n_updates | 510 |
| policy_gradient_loss | -0.0107 |
| value_loss | 10.6 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.33e+03 |
| ep_rew_mean | -87.6 |
| time/ | |
| fps | 333 |
| iterations | 53 |
| time_elapsed | 252 |
| total_timesteps | 84058 |
| train/ | |
| approx_kl | 0.017240252 |
| clip_fraction | 0.316 |
| clip_range | 0.161 |
| entropy_loss | -8 |
| explained_variance | 0.623 |
| learning_rate | 5.02e-05 |
| loss | 14.6 |
| n_updates | 520 |
| policy_gradient_loss | -0.00709 |
| value_loss | 33 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.31e+03 |
| ep_rew_mean | -82.3 |
| time/ | |
| fps | 333 |
| iterations | 54 |
| time_elapsed | 256 |
| total_timesteps | 85644 |
| train/ | |
| approx_kl | 0.015610819 |
| clip_fraction | 0.297 |
| clip_range | 0.161 |
| entropy_loss | -7.9 |
| explained_variance | 0.506 |
| learning_rate | 5.02e-05 |
| loss | 79.8 |
| n_updates | 530 |
| policy_gradient_loss | -0.0053 |
| value_loss | 30.7 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.31e+03 |
| ep_rew_mean | -82.3 |
| time/ | |
| fps | 334 |
| iterations | 55 |
| time_elapsed | 261 |
| total_timesteps | 87230 |
| train/ | |
| approx_kl | 0.01877381 |
| clip_fraction | 0.33 |
| clip_range | 0.161 |
| entropy_loss | -7.88 |
| explained_variance | 0.388 |
| learning_rate | 5.02e-05 |
| loss | 2.4 |
| n_updates | 540 |
| policy_gradient_loss | -0.00414 |
| value_loss | 19.6 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.3e+03 |
| ep_rew_mean | -89.3 |
| time/ | |
| fps | 334 |
| iterations | 56 |
| time_elapsed | 265 |
| total_timesteps | 88816 |
| train/ | |
| approx_kl | 0.018082947 |
| clip_fraction | 0.339 |
| clip_range | 0.161 |
| entropy_loss | -7.79 |
| explained_variance | 0.742 |
| learning_rate | 5.02e-05 |
| loss | 1.75 |
| n_updates | 550 |
| policy_gradient_loss | -0.00747 |
| value_loss | 8.59 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.31e+03 |
| ep_rew_mean | -89.3 |
| time/ | |
| fps | 334 |
| iterations | 57 |
| time_elapsed | 269 |
| total_timesteps | 90402 |
| train/ | |
| approx_kl | 0.033854794 |
| clip_fraction | 0.4 |
| clip_range | 0.161 |
| entropy_loss | -7.85 |
| explained_variance | 0.145 |
| learning_rate | 5.02e-05 |
| loss | 2.04 |
| n_updates | 560 |
| policy_gradient_loss | 0.00031 |
| value_loss | 43.8 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.28e+03 |
| ep_rew_mean | -81.3 |
| time/ | |
| fps | 335 |
| iterations | 58 |
| time_elapsed | 274 |
| total_timesteps | 91988 |
| train/ | |
| approx_kl | 0.02308767 |
| clip_fraction | 0.369 |
| clip_range | 0.161 |
| entropy_loss | -7.65 |
| explained_variance | 0.836 |
| learning_rate | 5.02e-05 |
| loss | 1.59 |
| n_updates | 570 |
| policy_gradient_loss | -0.00735 |
| value_loss | 12.1 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.28e+03 |
| ep_rew_mean | -81.3 |
| time/ | |
| fps | 335 |
| iterations | 59 |
| time_elapsed | 279 |
| total_timesteps | 93574 |
| train/ | |
| approx_kl | 0.020519579 |
| clip_fraction | 0.345 |
| clip_range | 0.161 |
| entropy_loss | -7.8 |
| explained_variance | 0.278 |
| learning_rate | 5.02e-05 |
| loss | 9.11 |
| n_updates | 580 |
| policy_gradient_loss | -0.00419 |
| value_loss | 45 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.28e+03 |
| ep_rew_mean | -81.3 |
| time/ | |
| fps | 335 |
| iterations | 60 |
| time_elapsed | 283 |
| total_timesteps | 95160 |
| train/ | |
| approx_kl | 0.021029348 |
| clip_fraction | 0.365 |
| clip_range | 0.161 |
| entropy_loss | -7.82 |
| explained_variance | 0.839 |
| learning_rate | 5.02e-05 |
| loss | 1.28 |
| n_updates | 590 |
| policy_gradient_loss | -0.00539 |
| value_loss | 4.64 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.28e+03 |
| ep_rew_mean | -81.3 |
| time/ | |
| fps | 335 |
| iterations | 61 |
| time_elapsed | 288 |
| total_timesteps | 96746 |
| train/ | |
| approx_kl | 0.017239623 |
| clip_fraction | 0.328 |
| clip_range | 0.161 |
| entropy_loss | -7.65 |
| explained_variance | 0.762 |
| learning_rate | 5.02e-05 |
| loss | 3.36 |
| n_updates | 600 |
| policy_gradient_loss | -0.00336 |
| value_loss | 13.7 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.39e+03 |
| ep_rew_mean | -84.9 |
| time/ | |
| fps | 335 |
| iterations | 62 |
| time_elapsed | 292 |
| total_timesteps | 98332 |
| train/ | |
| approx_kl | 0.014254608 |
| clip_fraction | 0.358 |
| clip_range | 0.161 |
| entropy_loss | -7.61 |
| explained_variance | 0.69 |
| learning_rate | 5.02e-05 |
| loss | 10.9 |
| n_updates | 610 |
| policy_gradient_loss | -0.00208 |
| value_loss | 5.69 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.39e+03 |
| ep_rew_mean | -84.9 |
| time/ | |
| fps | 336 |
| iterations | 63 |
| time_elapsed | 297 |
| total_timesteps | 99918 |
| train/ | |
| approx_kl | 0.012963827 |
| clip_fraction | 0.302 |
| clip_range | 0.161 |
| entropy_loss | -7.63 |
| explained_variance | 0.322 |
| learning_rate | 5.02e-05 |
| loss | 9.69 |
| n_updates | 620 |
| policy_gradient_loss | -0.00529 |
| value_loss | 29.8 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.41e+03 |
| ep_rew_mean | -83.5 |
| time/ | |
| fps | 336 |
| iterations | 64 |
| time_elapsed | 301 |
| total_timesteps | 101504 |
| train/ | |
| approx_kl | 0.018386848 |
| clip_fraction | 0.319 |
| clip_range | 0.161 |
| entropy_loss | -7.67 |
| explained_variance | 0.638 |
| learning_rate | 5.02e-05 |
| loss | 1.82 |
| n_updates | 630 |
| policy_gradient_loss | -0.00643 |
| value_loss | 6.2 |
-----------------------------------------
[I 2023-03-30 21:56:30,174] Trial 2 finished with value: -348.0 and parameters: {'n_steps': 1586, 'gamma': 0.9956348644941185, 'learning_rate': 5.0170841536324054e-05, 'clip_range': 0.16056638694970846, 'gae_lambda': 0.8608765423049661}. Best is trial 1 with value: -205.0.
Using cuda device
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.
C:\ProgramData\Anaconda3\envs\StreetFighterAI\lib\site-packages\stable_baselines3\ppo\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 3813`, after every 59 untruncated mini-batches, there will be a truncated mini-batch of size 37
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=3813 and n_envs=1)
warnings.warn(
Logging to logs/PPO_21
---------------------------------
| rollout/ | |
| ep_len_mean | 3.42e+03 |
| ep_rew_mean | -8 |
| time/ | |
| fps | 534 |
| iterations | 1 |
| time_elapsed | 7 |
| total_timesteps | 3813 |
---------------------------------
------------------------------------------
| rollout/ | |
| ep_len_mean | 3.23e+03 |
| ep_rew_mean | -32 |
| time/ | |
| fps | 404 |
| iterations | 2 |
| time_elapsed | 18 |
| total_timesteps | 7626 |
| train/ | |
| approx_kl | 0.0036819936 |
| clip_fraction | 0.136 |
| clip_range | 0.123 |
| entropy_loss | -8.31 |
| explained_variance | 0.000731 |
| learning_rate | 5.81e-05 |
| loss | 0.479 |
| n_updates | 10 |
| policy_gradient_loss | -0.00524 |
| value_loss | 9.78 |
------------------------------------------
------------------------------------------
| rollout/ | |
| ep_len_mean | 2.82e+03 |
| ep_rew_mean | -104 |
| time/ | |
| fps | 365 |
| iterations | 3 |
| time_elapsed | 31 |
| total_timesteps | 11439 |
| train/ | |
| approx_kl | 0.0041210777 |
| clip_fraction | 0.136 |
| clip_range | 0.123 |
| entropy_loss | -8.31 |
| explained_variance | -0.119 |
| learning_rate | 5.81e-05 |
| loss | 0.859 |
| n_updates | 20 |
| policy_gradient_loss | -0.0063 |
| value_loss | 8.05 |
------------------------------------------
------------------------------------------
| rollout/ | |
| ep_len_mean | 2.65e+03 |
| ep_rew_mean | -75.4 |
| time/ | |
| fps | 348 |
| iterations | 4 |
| time_elapsed | 43 |
| total_timesteps | 15252 |
| train/ | |
| approx_kl | 0.0068013067 |
| clip_fraction | 0.186 |
| clip_range | 0.123 |
| entropy_loss | -8.3 |
| explained_variance | -0.164 |
| learning_rate | 5.81e-05 |
| loss | 125 |
| n_updates | 30 |
| policy_gradient_loss | -0.00652 |
| value_loss | 27.2 |
------------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.58e+03 |
| ep_rew_mean | -83 |
| time/ | |
| fps | 337 |
| iterations | 5 |
| time_elapsed | 56 |
| total_timesteps | 19065 |
| train/ | |
| approx_kl | 0.00511329 |
| clip_fraction | 0.199 |
| clip_range | 0.123 |
| entropy_loss | -8.3 |
| explained_variance | -0.527 |
| learning_rate | 5.81e-05 |
| loss | 1.5 |
| n_updates | 40 |
| policy_gradient_loss | -0.00346 |
| value_loss | 14.6 |
----------------------------------------
------------------------------------------
| rollout/ | |
| ep_len_mean | 2.54e+03 |
| ep_rew_mean | -92.9 |
| time/ | |
| fps | 336 |
| iterations | 6 |
| time_elapsed | 68 |
| total_timesteps | 22878 |
| train/ | |
| approx_kl | 0.0047720987 |
| clip_fraction | 0.186 |
| clip_range | 0.123 |
| entropy_loss | -8.3 |
| explained_variance | -0.222 |
| learning_rate | 5.81e-05 |
| loss | 3.05 |
| n_updates | 50 |
| policy_gradient_loss | -0.00716 |
| value_loss | 13.6 |
------------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.56e+03 |
| ep_rew_mean | -122 |
| time/ | |
| fps | 336 |
| iterations | 7 |
| time_elapsed | 79 |
| total_timesteps | 26691 |
| train/ | |
| approx_kl | 0.005124747 |
| clip_fraction | 0.208 |
| clip_range | 0.123 |
| entropy_loss | -8.3 |
| explained_variance | -0.166 |
| learning_rate | 5.81e-05 |
| loss | 2.19 |
| n_updates | 60 |
| policy_gradient_loss | -0.00516 |
| value_loss | 15.4 |
-----------------------------------------
------------------------------------------
| rollout/ | |
| ep_len_mean | 2.46e+03 |
| ep_rew_mean | -90 |
| time/ | |
| fps | 331 |
| iterations | 8 |
| time_elapsed | 92 |
| total_timesteps | 30504 |
| train/ | |
| approx_kl | 0.0077587436 |
| clip_fraction | 0.243 |
| clip_range | 0.123 |
| entropy_loss | -8.29 |
| explained_variance | -0.0477 |
| learning_rate | 5.81e-05 |
| loss | 1.42 |
| n_updates | 70 |
| policy_gradient_loss | -0.00297 |
| value_loss | 31.2 |
------------------------------------------
------------------------------------------
| rollout/ | |
| ep_len_mean | 2.46e+03 |
| ep_rew_mean | -78.2 |
| time/ | |
| fps | 327 |
| iterations | 9 |
| time_elapsed | 104 |
| total_timesteps | 34317 |
| train/ | |
| approx_kl | 0.0055853897 |
| clip_fraction | 0.222 |
| clip_range | 0.123 |
| entropy_loss | -8.29 |
| explained_variance | -0.242 |
| learning_rate | 5.81e-05 |
| loss | 0.979 |
| n_updates | 80 |
| policy_gradient_loss | -0.00466 |
| value_loss | 18.7 |
------------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.45e+03 |
| ep_rew_mean | -81.7 |
| time/ | |
| fps | 326 |
| iterations | 10 |
| time_elapsed | 116 |
| total_timesteps | 38130 |
| train/ | |
| approx_kl | 0.005408008 |
| clip_fraction | 0.217 |
| clip_range | 0.123 |
| entropy_loss | -8.29 |
| explained_variance | -1.29 |
| learning_rate | 5.81e-05 |
| loss | 4.61 |
| n_updates | 90 |
| policy_gradient_loss | -0.00827 |
| value_loss | 5.36 |
-----------------------------------------
------------------------------------------
| rollout/ | |
| ep_len_mean | 2.45e+03 |
| ep_rew_mean | -76.2 |
| time/ | |
| fps | 326 |
| iterations | 11 |
| time_elapsed | 128 |
| total_timesteps | 41943 |
| train/ | |
| approx_kl | 0.0057736286 |
| clip_fraction | 0.19 |
| clip_range | 0.123 |
| entropy_loss | -8.28 |
| explained_variance | -0.141 |
| learning_rate | 5.81e-05 |
| loss | 3.36 |
| n_updates | 100 |
| policy_gradient_loss | -0.00708 |
| value_loss | 19.9 |
------------------------------------------
------------------------------------------
| rollout/ | |
| ep_len_mean | 2.6e+03 |
| ep_rew_mean | -88.4 |
| time/ | |
| fps | 326 |
| iterations | 12 |
| time_elapsed | 140 |
| total_timesteps | 45756 |
| train/ | |
| approx_kl | 0.0061197034 |
| clip_fraction | 0.221 |
| clip_range | 0.123 |
| entropy_loss | -8.27 |
| explained_variance | -0.275 |
| learning_rate | 5.81e-05 |
| loss | 1.75 |
| n_updates | 110 |
| policy_gradient_loss | -0.00862 |
| value_loss | 9.48 |
------------------------------------------
------------------------------------------
| rollout/ | |
| ep_len_mean | 2.58e+03 |
| ep_rew_mean | -86.6 |
| time/ | |
| fps | 329 |
| iterations | 13 |
| time_elapsed | 150 |
| total_timesteps | 49569 |
| train/ | |
| approx_kl | 0.0073136846 |
| clip_fraction | 0.233 |
| clip_range | 0.123 |
| entropy_loss | -8.27 |
| explained_variance | -0.0933 |
| learning_rate | 5.81e-05 |
| loss | 0.466 |
| n_updates | 120 |
| policy_gradient_loss | -0.00841 |
| value_loss | 18 |
------------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.8e+03 |
| ep_rew_mean | -18.5 |
| time/ | |
| fps | 330 |
| iterations | 14 |
| time_elapsed | 161 |
| total_timesteps | 53382 |
| train/ | |
| approx_kl | 0.012948585 |
| clip_fraction | 0.348 |
| clip_range | 0.123 |
| entropy_loss | -8.26 |
| explained_variance | 0.0057 |
| learning_rate | 5.81e-05 |
| loss | 2.17 |
| n_updates | 130 |
| policy_gradient_loss | 0.00365 |
| value_loss | 1.38e+03 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.79e+03 |
| ep_rew_mean | -17.8 |
| time/ | |
| fps | 332 |
| iterations | 15 |
| time_elapsed | 172 |
| total_timesteps | 57195 |
| train/ | |
| approx_kl | 0.010128591 |
| clip_fraction | 0.417 |
| clip_range | 0.123 |
| entropy_loss | -8.25 |
| explained_variance | -4.29 |
| learning_rate | 5.81e-05 |
| loss | 3.52 |
| n_updates | 140 |
| policy_gradient_loss | 0.00333 |
| value_loss | 7.59 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.69e+03 |
| ep_rew_mean | -25 |
| time/ | |
| fps | 333 |
| iterations | 16 |
| time_elapsed | 182 |
| total_timesteps | 61008 |
| train/ | |
| approx_kl | 0.009500639 |
| clip_fraction | 0.306 |
| clip_range | 0.123 |
| entropy_loss | -8.27 |
| explained_variance | -0.37 |
| learning_rate | 5.81e-05 |
| loss | 3.36 |
| n_updates | 150 |
| policy_gradient_loss | -0.00701 |
| value_loss | 9.43 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.69e+03 |
| ep_rew_mean | -35.8 |
| time/ | |
| fps | 334 |
| iterations | 17 |
| time_elapsed | 193 |
| total_timesteps | 64821 |
| train/ | |
| approx_kl | 0.013091733 |
| clip_fraction | 0.352 |
| clip_range | 0.123 |
| entropy_loss | -8.25 |
| explained_variance | -0.138 |
| learning_rate | 5.81e-05 |
| loss | 0.699 |
| n_updates | 160 |
| policy_gradient_loss | -0.00203 |
| value_loss | 25.9 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.67e+03 |
| ep_rew_mean | -34 |
| time/ | |
| fps | 335 |
| iterations | 18 |
| time_elapsed | 204 |
| total_timesteps | 68634 |
| train/ | |
| approx_kl | 0.013554989 |
| clip_fraction | 0.341 |
| clip_range | 0.123 |
| entropy_loss | -8.24 |
| explained_variance | -0.24 |
| learning_rate | 5.81e-05 |
| loss | 0.791 |
| n_updates | 170 |
| policy_gradient_loss | -0.00409 |
| value_loss | 18.6 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.64e+03 |
| ep_rew_mean | -52.1 |
| time/ | |
| fps | 335 |
| iterations | 19 |
| time_elapsed | 215 |
| total_timesteps | 72447 |
| train/ | |
| approx_kl | 0.010819951 |
| clip_fraction | 0.347 |
| clip_range | 0.123 |
| entropy_loss | -8.23 |
| explained_variance | -0.575 |
| learning_rate | 5.81e-05 |
| loss | 0.601 |
| n_updates | 180 |
| policy_gradient_loss | -0.00691 |
| value_loss | 9.12 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.63e+03 |
| ep_rew_mean | -60.8 |
| time/ | |
| fps | 333 |
| iterations | 20 |
| time_elapsed | 228 |
| total_timesteps | 76260 |
| train/ | |
| approx_kl | 0.015728015 |
| clip_fraction | 0.39 |
| clip_range | 0.123 |
| entropy_loss | -8.24 |
| explained_variance | -0.0299 |
| learning_rate | 5.81e-05 |
| loss | 1.85 |
| n_updates | 190 |
| policy_gradient_loss | -0.00169 |
| value_loss | 27.5 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.62e+03 |
| ep_rew_mean | -65.9 |
| time/ | |
| fps | 331 |
| iterations | 21 |
| time_elapsed | 241 |
| total_timesteps | 80073 |
| train/ | |
| approx_kl | 0.013783906 |
| clip_fraction | 0.427 |
| clip_range | 0.123 |
| entropy_loss | -8.22 |
| explained_variance | -0.214 |
| learning_rate | 5.81e-05 |
| loss | 1.94 |
| n_updates | 200 |
| policy_gradient_loss | 0.000583 |
| value_loss | 35.7 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.63e+03 |
| ep_rew_mean | -65.4 |
| time/ | |
| fps | 329 |
| iterations | 22 |
| time_elapsed | 254 |
| total_timesteps | 83886 |
| train/ | |
| approx_kl | 0.014645203 |
| clip_fraction | 0.43 |
| clip_range | 0.123 |
| entropy_loss | -8.21 |
| explained_variance | -0.164 |
| learning_rate | 5.81e-05 |
| loss | 2.59 |
| n_updates | 210 |
| policy_gradient_loss | 0.00338 |
| value_loss | 16.1 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.55e+03 |
| ep_rew_mean | -75.3 |
| time/ | |
| fps | 327 |
| iterations | 23 |
| time_elapsed | 267 |
| total_timesteps | 87699 |
| train/ | |
| approx_kl | 0.015013908 |
| clip_fraction | 0.426 |
| clip_range | 0.123 |
| entropy_loss | -8.19 |
| explained_variance | -0.904 |
| learning_rate | 5.81e-05 |
| loss | 1.16 |
| n_updates | 220 |
| policy_gradient_loss | -0.000645 |
| value_loss | 7.32 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.53e+03 |
| ep_rew_mean | -73.4 |
| time/ | |
| fps | 326 |
| iterations | 24 |
| time_elapsed | 280 |
| total_timesteps | 91512 |
| train/ | |
| approx_kl | 0.02647818 |
| clip_fraction | 0.481 |
| clip_range | 0.123 |
| entropy_loss | -8.11 |
| explained_variance | -0.0693 |
| learning_rate | 5.81e-05 |
| loss | 3.85 |
| n_updates | 230 |
| policy_gradient_loss | 0.00305 |
| value_loss | 32.6 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.5e+03 |
| ep_rew_mean | -71.4 |
| time/ | |
| fps | 325 |
| iterations | 25 |
| time_elapsed | 293 |
| total_timesteps | 95325 |
| train/ | |
| approx_kl | 0.019708665 |
| clip_fraction | 0.482 |
| clip_range | 0.123 |
| entropy_loss | -8.12 |
| explained_variance | -0.496 |
| learning_rate | 5.81e-05 |
| loss | 3.13 |
| n_updates | 240 |
| policy_gradient_loss | 0.0037 |
| value_loss | 13.6 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.47e+03 |
| ep_rew_mean | -78.4 |
| time/ | |
| fps | 323 |
| iterations | 26 |
| time_elapsed | 306 |
| total_timesteps | 99138 |
| train/ | |
| approx_kl | 0.017824553 |
| clip_fraction | 0.454 |
| clip_range | 0.123 |
| entropy_loss | -8.11 |
| explained_variance | -0.288 |
| learning_rate | 5.81e-05 |
| loss | 0.948 |
| n_updates | 250 |
| policy_gradient_loss | 0.000339 |
| value_loss | 15.5 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.41e+03 |
| ep_rew_mean | -63.9 |
| time/ | |
| fps | 322 |
| iterations | 27 |
| time_elapsed | 319 |
| total_timesteps | 102951 |
| train/ | |
| approx_kl | 0.01960509 |
| clip_fraction | 0.495 |
| clip_range | 0.123 |
| entropy_loss | -8.1 |
| explained_variance | -0.318 |
| learning_rate | 5.81e-05 |
| loss | 0.768 |
| n_updates | 260 |
| policy_gradient_loss | 0.00504 |
| value_loss | 22.9 |
----------------------------------------
[I 2023-03-30 22:02:13,996] Trial 3 finished with value: -296.0 and parameters: {'n_steps': 3813, 'gamma': 0.904141731391283, 'learning_rate': 5.8079041373677925e-05, 'clip_range': 0.12331848349559273, 'gae_lambda': 0.9741511540746485}. Best is trial 1 with value: -205.0.
Using cuda device
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.
C:\ProgramData\Anaconda3\envs\StreetFighterAI\lib\site-packages\stable_baselines3\ppo\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 2255`, after every 35 untruncated mini-batches, there will be a truncated mini-batch of size 15
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=2255 and n_envs=1)
warnings.warn(
Logging to logs/PPO_22
---------------------------------
| rollout/ | |
| ep_len_mean | 1.96e+03 |
| ep_rew_mean | -277 |
| time/ | |
| fps | 612 |
| iterations | 1 |
| time_elapsed | 3 |
| total_timesteps | 2255 |
---------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 1.76e+03 |
| ep_rew_mean | -312 |
| time/ | |
| fps | 449 |
| iterations | 2 |
| time_elapsed | 10 |
| total_timesteps | 4510 |
| train/ | |
| approx_kl | 0.020538189 |
| clip_fraction | 0.184 |
| clip_range | 0.246 |
| entropy_loss | -8.3 |
| explained_variance | 0.00102 |
| learning_rate | 6.18e-05 |
| loss | 0.312 |
| n_updates | 10 |
| policy_gradient_loss | -0.0141 |
| value_loss | 28.1 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.02e+03 |
| ep_rew_mean | -205 |
| time/ | |
| fps | 397 |
| iterations | 3 |
| time_elapsed | 17 |
| total_timesteps | 6765 |
| train/ | |
| approx_kl | 0.014282044 |
| clip_fraction | 0.149 |
| clip_range | 0.246 |
| entropy_loss | -8.29 |
| explained_variance | 0.0389 |
| learning_rate | 6.18e-05 |
| loss | 3.57 |
| n_updates | 20 |
| policy_gradient_loss | -0.00883 |
| value_loss | 41.2 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.02e+03 |
| ep_rew_mean | -205 |
| time/ | |
| fps | 378 |
| iterations | 4 |
| time_elapsed | 23 |
| total_timesteps | 9020 |
| train/ | |
| approx_kl | 0.010472495 |
| clip_fraction | 0.124 |
| clip_range | 0.246 |
| entropy_loss | -8.27 |
| explained_variance | -0.182 |
| learning_rate | 6.18e-05 |
| loss | 0.526 |
| n_updates | 30 |
| policy_gradient_loss | -0.0058 |
| value_loss | 6.26 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.29e+03 |
| ep_rew_mean | -158 |
| time/ | |
| fps | 368 |
| iterations | 5 |
| time_elapsed | 30 |
| total_timesteps | 11275 |
| train/ | |
| approx_kl | 0.014202305 |
| clip_fraction | 0.116 |
| clip_range | 0.246 |
| entropy_loss | -8.26 |
| explained_variance | -0.0118 |
| learning_rate | 6.18e-05 |
| loss | 0.206 |
| n_updates | 40 |
| policy_gradient_loss | -0.00424 |
| value_loss | 8.23 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.4e+03 |
| ep_rew_mean | -160 |
| time/ | |
| fps | 361 |
| iterations | 6 |
| time_elapsed | 37 |
| total_timesteps | 13530 |
| train/ | |
| approx_kl | 0.009082135 |
| clip_fraction | 0.0682 |
| clip_range | 0.246 |
| entropy_loss | -8.25 |
| explained_variance | 0.0728 |
| learning_rate | 6.18e-05 |
| loss | 3.01 |
| n_updates | 50 |
| policy_gradient_loss | -0.0105 |
| value_loss | 4.79 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.38e+03 |
| ep_rew_mean | -162 |
| time/ | |
| fps | 360 |
| iterations | 7 |
| time_elapsed | 43 |
| total_timesteps | 15785 |
| train/ | |
| approx_kl | 0.016703699 |
| clip_fraction | 0.153 |
| clip_range | 0.246 |
| entropy_loss | -8.22 |
| explained_variance | 0.0113 |
| learning_rate | 6.18e-05 |
| loss | 5.27 |
| n_updates | 60 |
| policy_gradient_loss | -0.0144 |
| value_loss | 13 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.25e+03 |
| ep_rew_mean | -128 |
| time/ | |
| fps | 360 |
| iterations | 8 |
| time_elapsed | 49 |
| total_timesteps | 18040 |
| train/ | |
| approx_kl | 0.024009299 |
| clip_fraction | 0.152 |
| clip_range | 0.246 |
| entropy_loss | -8.2 |
| explained_variance | 0.0536 |
| learning_rate | 6.18e-05 |
| loss | 0.72 |
| n_updates | 70 |
| policy_gradient_loss | -0.0143 |
| value_loss | 15.3 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.25e+03 |
| ep_rew_mean | -128 |
| time/ | |
| fps | 360 |
| iterations | 9 |
| time_elapsed | 56 |
| total_timesteps | 20295 |
| train/ | |
| approx_kl | 0.02027614 |
| clip_fraction | 0.184 |
| clip_range | 0.246 |
| entropy_loss | -8.14 |
| explained_variance | 0.0651 |
| learning_rate | 6.18e-05 |
| loss | 0.961 |
| n_updates | 80 |
| policy_gradient_loss | -0.00954 |
| value_loss | 24.2 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.33e+03 |
| ep_rew_mean | -113 |
| time/ | |
| fps | 360 |
| iterations | 10 |
| time_elapsed | 62 |
| total_timesteps | 22550 |
| train/ | |
| approx_kl | 0.016172899 |
| clip_fraction | 0.143 |
| clip_range | 0.246 |
| entropy_loss | -8.12 |
| explained_variance | 0.00202 |
| learning_rate | 6.18e-05 |
| loss | 19.2 |
| n_updates | 90 |
| policy_gradient_loss | -0.00773 |
| value_loss | 8.99 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.28e+03 |
| ep_rew_mean | -104 |
| time/ | |
| fps | 359 |
| iterations | 11 |
| time_elapsed | 68 |
| total_timesteps | 24805 |
| train/ | |
| approx_kl | 0.012545445 |
| clip_fraction | 0.0956 |
| clip_range | 0.246 |
| entropy_loss | -8.15 |
| explained_variance | 0.114 |
| learning_rate | 6.18e-05 |
| loss | 0.305 |
| n_updates | 100 |
| policy_gradient_loss | -0.0125 |
| value_loss | 8.93 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.26e+03 |
| ep_rew_mean | -81.5 |
| time/ | |
| fps | 358 |
| iterations | 12 |
| time_elapsed | 75 |
| total_timesteps | 27060 |
| train/ | |
| approx_kl | 0.01598395 |
| clip_fraction | 0.135 |
| clip_range | 0.246 |
| entropy_loss | -8.1 |
| explained_variance | 0.102 |
| learning_rate | 6.18e-05 |
| loss | 0.936 |
| n_updates | 110 |
| policy_gradient_loss | -0.0195 |
| value_loss | 7.65 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.26e+03 |
| ep_rew_mean | -81.5 |
| time/ | |
| fps | 358 |
| iterations | 13 |
| time_elapsed | 81 |
| total_timesteps | 29315 |
| train/ | |
| approx_kl | 0.016748266 |
| clip_fraction | 0.138 |
| clip_range | 0.246 |
| entropy_loss | -8.09 |
| explained_variance | 0.0378 |
| learning_rate | 6.18e-05 |
| loss | 1.89 |
| n_updates | 120 |
| policy_gradient_loss | -0.016 |
| value_loss | 16 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.26e+03 |
| ep_rew_mean | -81.5 |
| time/ | |
| fps | 358 |
| iterations | 14 |
| time_elapsed | 88 |
| total_timesteps | 31570 |
| train/ | |
| approx_kl | 0.02423302 |
| clip_fraction | 0.145 |
| clip_range | 0.246 |
| entropy_loss | -8.1 |
| explained_variance | -0.000177 |
| learning_rate | 6.18e-05 |
| loss | 0.226 |
| n_updates | 130 |
| policy_gradient_loss | -0.0147 |
| value_loss | 47.5 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.58e+03 |
| ep_rew_mean | -80.2 |
| time/ | |
| fps | 358 |
| iterations | 15 |
| time_elapsed | 94 |
| total_timesteps | 33825 |
| train/ | |
| approx_kl | 0.016911915 |
| clip_fraction | 0.133 |
| clip_range | 0.246 |
| entropy_loss | -8.12 |
| explained_variance | 0.0574 |
| learning_rate | 6.18e-05 |
| loss | 0.128 |
| n_updates | 140 |
| policy_gradient_loss | -0.0177 |
| value_loss | 5.19 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.53e+03 |
| ep_rew_mean | -95.7 |
| time/ | |
| fps | 357 |
| iterations | 16 |
| time_elapsed | 100 |
| total_timesteps | 36080 |
| train/ | |
| approx_kl | 0.012477045 |
| clip_fraction | 0.0815 |
| clip_range | 0.246 |
| entropy_loss | -8.09 |
| explained_variance | 0.0981 |
| learning_rate | 6.18e-05 |
| loss | 1.54 |
| n_updates | 150 |
| policy_gradient_loss | -0.0104 |
| value_loss | 24.3 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.49e+03 |
| ep_rew_mean | -104 |
| time/ | |
| fps | 357 |
| iterations | 17 |
| time_elapsed | 107 |
| total_timesteps | 38335 |
| train/ | |
| approx_kl | 0.023185179 |
| clip_fraction | 0.145 |
| clip_range | 0.246 |
| entropy_loss | -8.09 |
| explained_variance | 0.0336 |
| learning_rate | 6.18e-05 |
| loss | 0.609 |
| n_updates | 160 |
| policy_gradient_loss | -0.0129 |
| value_loss | 22.4 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.47e+03 |
| ep_rew_mean | -105 |
| time/ | |
| fps | 357 |
| iterations | 18 |
| time_elapsed | 113 |
| total_timesteps | 40590 |
| train/ | |
| approx_kl | 0.015480906 |
| clip_fraction | 0.17 |
| clip_range | 0.246 |
| entropy_loss | -8.08 |
| explained_variance | 0.0682 |
| learning_rate | 6.18e-05 |
| loss | 0.449 |
| n_updates | 170 |
| policy_gradient_loss | -0.0136 |
| value_loss | 18 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.44e+03 |
| ep_rew_mean | -119 |
| time/ | |
| fps | 357 |
| iterations | 19 |
| time_elapsed | 119 |
| total_timesteps | 42845 |
| train/ | |
| approx_kl | 0.01642779 |
| clip_fraction | 0.136 |
| clip_range | 0.246 |
| entropy_loss | -8.03 |
| explained_variance | 0.0203 |
| learning_rate | 6.18e-05 |
| loss | 0.749 |
| n_updates | 180 |
| policy_gradient_loss | -0.00958 |
| value_loss | 13.1 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.48e+03 |
| ep_rew_mean | -113 |
| time/ | |
| fps | 356 |
| iterations | 20 |
| time_elapsed | 126 |
| total_timesteps | 45100 |
| train/ | |
| approx_kl | 0.013520324 |
| clip_fraction | 0.13 |
| clip_range | 0.246 |
| entropy_loss | -8.04 |
| explained_variance | 0.017 |
| learning_rate | 6.18e-05 |
| loss | 0.554 |
| n_updates | 190 |
| policy_gradient_loss | -0.0125 |
| value_loss | 33 |
-----------------------------------------
---------------------------------------
| rollout/ | |
| ep_len_mean | 2.43e+03 |
| ep_rew_mean | -117 |
| time/ | |
| fps | 355 |
| iterations | 21 |
| time_elapsed | 133 |
| total_timesteps | 47355 |
| train/ | |
| approx_kl | 0.0185782 |
| clip_fraction | 0.156 |
| clip_range | 0.246 |
| entropy_loss | -8.02 |
| explained_variance | 0.255 |
| learning_rate | 6.18e-05 |
| loss | 0.618 |
| n_updates | 200 |
| policy_gradient_loss | -0.0175 |
| value_loss | 6.95 |
---------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.43e+03 |
| ep_rew_mean | -117 |
| time/ | |
| fps | 354 |
| iterations | 22 |
| time_elapsed | 139 |
| total_timesteps | 49610 |
| train/ | |
| approx_kl | 0.016632264 |
| clip_fraction | 0.131 |
| clip_range | 0.246 |
| entropy_loss | -8.02 |
| explained_variance | 0.0436 |
| learning_rate | 6.18e-05 |
| loss | 1.05 |
| n_updates | 210 |
| policy_gradient_loss | -0.0174 |
| value_loss | 11 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.43e+03 |
| ep_rew_mean | -117 |
| time/ | |
| fps | 354 |
| iterations | 23 |
| time_elapsed | 146 |
| total_timesteps | 51865 |
| train/ | |
| approx_kl | 0.019239776 |
| clip_fraction | 0.165 |
| clip_range | 0.246 |
| entropy_loss | -7.95 |
| explained_variance | 0.0435 |
| learning_rate | 6.18e-05 |
| loss | 1.27 |
| n_updates | 220 |
| policy_gradient_loss | -0.018 |
| value_loss | 6.75 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.68e+03 |
| ep_rew_mean | -116 |
| time/ | |
| fps | 353 |
| iterations | 24 |
| time_elapsed | 153 |
| total_timesteps | 54120 |
| train/ | |
| approx_kl | 0.022035323 |
| clip_fraction | 0.184 |
| clip_range | 0.246 |
| entropy_loss | -7.93 |
| explained_variance | 0.195 |
| learning_rate | 6.18e-05 |
| loss | 0.162 |
| n_updates | 230 |
| policy_gradient_loss | -0.0164 |
| value_loss | 4.23 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.67e+03 |
| ep_rew_mean | -113 |
| time/ | |
| fps | 352 |
| iterations | 25 |
| time_elapsed | 160 |
| total_timesteps | 56375 |
| train/ | |
| approx_kl | 0.016369374 |
| clip_fraction | 0.141 |
| clip_range | 0.246 |
| entropy_loss | -7.98 |
| explained_variance | 0.0345 |
| learning_rate | 6.18e-05 |
| loss | 0.162 |
| n_updates | 240 |
| policy_gradient_loss | -0.0173 |
| value_loss | 5.37 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.62e+03 |
| ep_rew_mean | -98.4 |
| time/ | |
| fps | 352 |
| iterations | 26 |
| time_elapsed | 166 |
| total_timesteps | 58630 |
| train/ | |
| approx_kl | 0.01639726 |
| clip_fraction | 0.13 |
| clip_range | 0.246 |
| entropy_loss | -7.96 |
| explained_variance | 0.136 |
| learning_rate | 6.18e-05 |
| loss | 0.725 |
| n_updates | 250 |
| policy_gradient_loss | -0.0164 |
| value_loss | 8.18 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.59e+03 |
| ep_rew_mean | -98.9 |
| time/ | |
| fps | 352 |
| iterations | 27 |
| time_elapsed | 172 |
| total_timesteps | 60885 |
| train/ | |
| approx_kl | 0.016931452 |
| clip_fraction | 0.124 |
| clip_range | 0.246 |
| entropy_loss | -7.97 |
| explained_variance | -0.0917 |
| learning_rate | 6.18e-05 |
| loss | 3.31 |
| n_updates | 260 |
| policy_gradient_loss | -0.00691 |
| value_loss | 22.9 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.55e+03 |
| ep_rew_mean | -108 |
| time/ | |
| fps | 353 |
| iterations | 28 |
| time_elapsed | 178 |
| total_timesteps | 63140 |
| train/ | |
| approx_kl | 0.020913824 |
| clip_fraction | 0.157 |
| clip_range | 0.246 |
| entropy_loss | -7.88 |
| explained_variance | 0.215 |
| learning_rate | 6.18e-05 |
| loss | 0.995 |
| n_updates | 270 |
| policy_gradient_loss | -0.0182 |
| value_loss | 7.35 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.55e+03 |
| ep_rew_mean | -103 |
| time/ | |
| fps | 353 |
| iterations | 29 |
| time_elapsed | 184 |
| total_timesteps | 65395 |
| train/ | |
| approx_kl | 0.021241019 |
| clip_fraction | 0.149 |
| clip_range | 0.246 |
| entropy_loss | -7.81 |
| explained_variance | -0.057 |
| learning_rate | 6.18e-05 |
| loss | 0.312 |
| n_updates | 280 |
| policy_gradient_loss | -0.0152 |
| value_loss | 32.1 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.57e+03 |
| ep_rew_mean | -101 |
| time/ | |
| fps | 353 |
| iterations | 30 |
| time_elapsed | 191 |
| total_timesteps | 67650 |
| train/ | |
| approx_kl | 0.022794545 |
| clip_fraction | 0.174 |
| clip_range | 0.246 |
| entropy_loss | -7.92 |
| explained_variance | 0.0735 |
| learning_rate | 6.18e-05 |
| loss | 1.95 |
| n_updates | 290 |
| policy_gradient_loss | -0.0239 |
| value_loss | 4.61 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.57e+03 |
| ep_rew_mean | -101 |
| time/ | |
| fps | 353 |
| iterations | 31 |
| time_elapsed | 197 |
| total_timesteps | 69905 |
| train/ | |
| approx_kl | 0.018762259 |
| clip_fraction | 0.164 |
| clip_range | 0.246 |
| entropy_loss | -7.94 |
| explained_variance | -0.0688 |
| learning_rate | 6.18e-05 |
| loss | 0.732 |
| n_updates | 300 |
| policy_gradient_loss | -0.0192 |
| value_loss | 4.37 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.59e+03 |
| ep_rew_mean | -108 |
| time/ | |
| fps | 353 |
| iterations | 32 |
| time_elapsed | 204 |
| total_timesteps | 72160 |
| train/ | |
| approx_kl | 0.018497027 |
| clip_fraction | 0.166 |
| clip_range | 0.246 |
| entropy_loss | -7.91 |
| explained_variance | 0.197 |
| learning_rate | 6.18e-05 |
| loss | 0.135 |
| n_updates | 310 |
| policy_gradient_loss | -0.0232 |
| value_loss | 3.52 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.56e+03 |
| ep_rew_mean | -113 |
| time/ | |
| fps | 353 |
| iterations | 33 |
| time_elapsed | 210 |
| total_timesteps | 74415 |
| train/ | |
| approx_kl | 0.023638394 |
| clip_fraction | 0.166 |
| clip_range | 0.246 |
| entropy_loss | -7.91 |
| explained_variance | -0.0457 |
| learning_rate | 6.18e-05 |
| loss | 0.139 |
| n_updates | 320 |
| policy_gradient_loss | -0.0173 |
| value_loss | 19.5 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.54e+03 |
| ep_rew_mean | -116 |
| time/ | |
| fps | 354 |
| iterations | 34 |
| time_elapsed | 216 |
| total_timesteps | 76670 |
| train/ | |
| approx_kl | 0.030456556 |
| clip_fraction | 0.245 |
| clip_range | 0.246 |
| entropy_loss | -7.84 |
| explained_variance | 0.0645 |
| learning_rate | 6.18e-05 |
| loss | 1.4 |
| n_updates | 330 |
| policy_gradient_loss | -0.0148 |
| value_loss | 20.3 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.53e+03 |
| ep_rew_mean | -120 |
| time/ | |
| fps | 354 |
| iterations | 35 |
| time_elapsed | 222 |
| total_timesteps | 78925 |
| train/ | |
| approx_kl | 0.023147207 |
| clip_fraction | 0.216 |
| clip_range | 0.246 |
| entropy_loss | -7.9 |
| explained_variance | 0.26 |
| learning_rate | 6.18e-05 |
| loss | 0.602 |
| n_updates | 340 |
| policy_gradient_loss | -0.011 |
| value_loss | 18.6 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.52e+03 |
| ep_rew_mean | -125 |
| time/ | |
| fps | 354 |
| iterations | 36 |
| time_elapsed | 228 |
| total_timesteps | 81180 |
| train/ | |
| approx_kl | 0.027237331 |
| clip_fraction | 0.201 |
| clip_range | 0.246 |
| entropy_loss | -7.88 |
| explained_variance | 0.313 |
| learning_rate | 6.18e-05 |
| loss | 0.175 |
| n_updates | 350 |
| policy_gradient_loss | -0.0156 |
| value_loss | 16.3 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.52e+03 |
| ep_rew_mean | -125 |
| time/ | |
| fps | 354 |
| iterations | 37 |
| time_elapsed | 235 |
| total_timesteps | 83435 |
| train/ | |
| approx_kl | 0.025215741 |
| clip_fraction | 0.196 |
| clip_range | 0.246 |
| entropy_loss | -7.85 |
| explained_variance | -0.0349 |
| learning_rate | 6.18e-05 |
| loss | 328 |
| n_updates | 360 |
| policy_gradient_loss | -0.0175 |
| value_loss | 30.2 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.54e+03 |
| ep_rew_mean | -122 |
| time/ | |
| fps | 354 |
| iterations | 38 |
| time_elapsed | 241 |
| total_timesteps | 85690 |
| train/ | |
| approx_kl | 0.023207983 |
| clip_fraction | 0.192 |
| clip_range | 0.246 |
| entropy_loss | -7.88 |
| explained_variance | 0.167 |
| learning_rate | 6.18e-05 |
| loss | 0.145 |
| n_updates | 370 |
| policy_gradient_loss | -0.0241 |
| value_loss | 4.89 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.53e+03 |
| ep_rew_mean | -124 |
| time/ | |
| fps | 354 |
| iterations | 39 |
| time_elapsed | 247 |
| total_timesteps | 87945 |
| train/ | |
| approx_kl | 0.023880122 |
| clip_fraction | 0.22 |
| clip_range | 0.246 |
| entropy_loss | -7.9 |
| explained_variance | 0.033 |
| learning_rate | 6.18e-05 |
| loss | 0.34 |
| n_updates | 380 |
| policy_gradient_loss | -0.0178 |
| value_loss | 4.61 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.53e+03 |
| ep_rew_mean | -127 |
| time/ | |
| fps | 354 |
| iterations | 40 |
| time_elapsed | 254 |
| total_timesteps | 90200 |
| train/ | |
| approx_kl | 0.033778906 |
| clip_fraction | 0.203 |
| clip_range | 0.246 |
| entropy_loss | -7.82 |
| explained_variance | 0.0941 |
| learning_rate | 6.18e-05 |
| loss | 1.27 |
| n_updates | 390 |
| policy_gradient_loss | -0.0201 |
| value_loss | 10.1 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.53e+03 |
| ep_rew_mean | -124 |
| time/ | |
| fps | 354 |
| iterations | 41 |
| time_elapsed | 260 |
| total_timesteps | 92455 |
| train/ | |
| approx_kl | 0.024718465 |
| clip_fraction | 0.213 |
| clip_range | 0.246 |
| entropy_loss | -7.89 |
| explained_variance | 0.163 |
| learning_rate | 6.18e-05 |
| loss | 1.01 |
| n_updates | 400 |
| policy_gradient_loss | -0.0143 |
| value_loss | 19 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.49e+03 |
| ep_rew_mean | -119 |
| time/ | |
| fps | 354 |
| iterations | 42 |
| time_elapsed | 267 |
| total_timesteps | 94710 |
| train/ | |
| approx_kl | 0.022747982 |
| clip_fraction | 0.191 |
| clip_range | 0.246 |
| entropy_loss | -7.92 |
| explained_variance | 0.308 |
| learning_rate | 6.18e-05 |
| loss | 10.6 |
| n_updates | 410 |
| policy_gradient_loss | -0.0181 |
| value_loss | 7.14 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.49e+03 |
| ep_rew_mean | -119 |
| time/ | |
| fps | 354 |
| iterations | 43 |
| time_elapsed | 273 |
| total_timesteps | 96965 |
| train/ | |
| approx_kl | 0.02743027 |
| clip_fraction | 0.228 |
| clip_range | 0.246 |
| entropy_loss | -7.86 |
| explained_variance | 0.0156 |
| learning_rate | 6.18e-05 |
| loss | 1.72 |
| n_updates | 420 |
| policy_gradient_loss | -0.0158 |
| value_loss | 20.6 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.5e+03 |
| ep_rew_mean | -116 |
| time/ | |
| fps | 353 |
| iterations | 44 |
| time_elapsed | 280 |
| total_timesteps | 99220 |
| train/ | |
| approx_kl | 0.028826194 |
| clip_fraction | 0.233 |
| clip_range | 0.246 |
| entropy_loss | -7.89 |
| explained_variance | 0.253 |
| learning_rate | 6.18e-05 |
| loss | 0.285 |
| n_updates | 430 |
| policy_gradient_loss | -0.0227 |
| value_loss | 5.22 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.47e+03 |
| ep_rew_mean | -117 |
| time/ | |
| fps | 353 |
| iterations | 45 |
| time_elapsed | 286 |
| total_timesteps | 101475 |
| train/ | |
| approx_kl | 0.025009144 |
| clip_fraction | 0.21 |
| clip_range | 0.246 |
| entropy_loss | -7.89 |
| explained_variance | 0.248 |
| learning_rate | 6.18e-05 |
| loss | 7.05 |
| n_updates | 440 |
| policy_gradient_loss | -0.0173 |
| value_loss | 10.1 |
-----------------------------------------
[I 2023-03-30 22:07:30,694] Trial 4 finished with value: -332.0 and parameters: {'n_steps': 2255, 'gamma': 0.9347334629907355, 'learning_rate': 6.175910217498569e-05, 'clip_range': 0.2461965528470431, 'gae_lambda': 0.8597964121436965}. Best is trial 1 with value: -205.0.
Using cuda device
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.
C:\ProgramData\Anaconda3\envs\StreetFighterAI\lib\site-packages\stable_baselines3\ppo\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 7730`, after every 120 untruncated mini-batches, there will be a truncated mini-batch of size 50
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=7730 and n_envs=1)
warnings.warn(
Logging to logs/PPO_23
---------------------------------
| rollout/ | |
| ep_len_mean | 2.49e+03 |
| ep_rew_mean | -20 |
| time/ | |
| fps | 537 |
| iterations | 1 |
| time_elapsed | 14 |
| total_timesteps | 7730 |
---------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.54e+03 |
| ep_rew_mean | -19.2 |
| time/ | |
| fps | 423 |
| iterations | 2 |
| time_elapsed | 36 |
| total_timesteps | 15460 |
| train/ | |
| approx_kl | 0.04281639 |
| clip_fraction | 0.204 |
| clip_range | 0.299 |
| entropy_loss | -8.29 |
| explained_variance | 0.0017 |
| learning_rate | 9.55e-05 |
| loss | 1.29 |
| n_updates | 10 |
| policy_gradient_loss | -0.00636 |
| value_loss | 25.2 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.32e+03 |
| ep_rew_mean | -55.8 |
| time/ | |
| fps | 393 |
| iterations | 3 |
| time_elapsed | 58 |
| total_timesteps | 23190 |
| train/ | |
| approx_kl | 0.029096674 |
| clip_fraction | 0.136 |
| clip_range | 0.299 |
| entropy_loss | -8.28 |
| explained_variance | -0.0552 |
| learning_rate | 9.55e-05 |
| loss | 1.15 |
| n_updates | 20 |
| policy_gradient_loss | -0.00801 |
| value_loss | 29.6 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.41e+03 |
| ep_rew_mean | -61 |
| time/ | |
| fps | 370 |
| iterations | 4 |
| time_elapsed | 83 |
| total_timesteps | 30920 |
| train/ | |
| approx_kl | 0.041381396 |
| clip_fraction | 0.168 |
| clip_range | 0.299 |
| entropy_loss | -8.25 |
| explained_variance | 0.206 |
| learning_rate | 9.55e-05 |
| loss | 2.05 |
| n_updates | 30 |
| policy_gradient_loss | -0.0118 |
| value_loss | 28.6 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.38e+03 |
| ep_rew_mean | -101 |
| time/ | |
| fps | 365 |
| iterations | 5 |
| time_elapsed | 105 |
| total_timesteps | 38650 |
| train/ | |
| approx_kl | 0.03648014 |
| clip_fraction | 0.194 |
| clip_range | 0.299 |
| entropy_loss | -8.25 |
| explained_variance | 0.214 |
| learning_rate | 9.55e-05 |
| loss | 4.98 |
| n_updates | 40 |
| policy_gradient_loss | -0.0153 |
| value_loss | 11.3 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.36e+03 |
| ep_rew_mean | -111 |
| time/ | |
| fps | 360 |
| iterations | 6 |
| time_elapsed | 128 |
| total_timesteps | 46380 |
| train/ | |
| approx_kl | 0.048289824 |
| clip_fraction | 0.203 |
| clip_range | 0.299 |
| entropy_loss | -8.22 |
| explained_variance | 0.161 |
| learning_rate | 9.55e-05 |
| loss | 2.33 |
| n_updates | 50 |
| policy_gradient_loss | -0.0181 |
| value_loss | 27.4 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.37e+03 |
| ep_rew_mean | -102 |
| time/ | |
| fps | 357 |
| iterations | 7 |
| time_elapsed | 151 |
| total_timesteps | 54110 |
| train/ | |
| approx_kl | 0.050513566 |
| clip_fraction | 0.256 |
| clip_range | 0.299 |
| entropy_loss | -8.18 |
| explained_variance | 0.0793 |
| learning_rate | 9.55e-05 |
| loss | 36.3 |
| n_updates | 60 |
| policy_gradient_loss | -0.0118 |
| value_loss | 18.6 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.37e+03 |
| ep_rew_mean | -95.8 |
| time/ | |
| fps | 353 |
| iterations | 8 |
| time_elapsed | 174 |
| total_timesteps | 61840 |
| train/ | |
| approx_kl | 0.052512296 |
| clip_fraction | 0.277 |
| clip_range | 0.299 |
| entropy_loss | -8.11 |
| explained_variance | 0.219 |
| learning_rate | 9.55e-05 |
| loss | 2.76 |
| n_updates | 70 |
| policy_gradient_loss | -0.0174 |
| value_loss | 11.8 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.39e+03 |
| ep_rew_mean | -112 |
| time/ | |
| fps | 350 |
| iterations | 9 |
| time_elapsed | 198 |
| total_timesteps | 69570 |
| train/ | |
| approx_kl | 0.04943707 |
| clip_fraction | 0.256 |
| clip_range | 0.299 |
| entropy_loss | -8.08 |
| explained_variance | 0.254 |
| learning_rate | 9.55e-05 |
| loss | 8.24 |
| n_updates | 80 |
| policy_gradient_loss | -0.0148 |
| value_loss | 17 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.4e+03 |
| ep_rew_mean | -115 |
| time/ | |
| fps | 349 |
| iterations | 10 |
| time_elapsed | 220 |
| total_timesteps | 77300 |
| train/ | |
| approx_kl | 0.07023027 |
| clip_fraction | 0.297 |
| clip_range | 0.299 |
| entropy_loss | -7.99 |
| explained_variance | 0.245 |
| learning_rate | 9.55e-05 |
| loss | 1.51 |
| n_updates | 90 |
| policy_gradient_loss | -0.0157 |
| value_loss | 16.3 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.38e+03 |
| ep_rew_mean | -116 |
| time/ | |
| fps | 348 |
| iterations | 11 |
| time_elapsed | 243 |
| total_timesteps | 85030 |
| train/ | |
| approx_kl | 0.06494863 |
| clip_fraction | 0.324 |
| clip_range | 0.299 |
| entropy_loss | -7.97 |
| explained_variance | 0.308 |
| learning_rate | 9.55e-05 |
| loss | 9.16 |
| n_updates | 100 |
| policy_gradient_loss | -0.0122 |
| value_loss | 16.3 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.41e+03 |
| ep_rew_mean | -126 |
| time/ | |
| fps | 347 |
| iterations | 12 |
| time_elapsed | 266 |
| total_timesteps | 92760 |
| train/ | |
| approx_kl | 0.07837609 |
| clip_fraction | 0.344 |
| clip_range | 0.299 |
| entropy_loss | -7.92 |
| explained_variance | 0.267 |
| learning_rate | 9.55e-05 |
| loss | 2.41 |
| n_updates | 110 |
| policy_gradient_loss | -0.0101 |
| value_loss | 15.8 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.33e+03 |
| ep_rew_mean | -118 |
| time/ | |
| fps | 347 |
| iterations | 13 |
| time_elapsed | 289 |
| total_timesteps | 100490 |
| train/ | |
| approx_kl | 0.078203924 |
| clip_fraction | 0.348 |
| clip_range | 0.299 |
| entropy_loss | -7.78 |
| explained_variance | 0.254 |
| learning_rate | 9.55e-05 |
| loss | 3.22 |
| n_updates | 120 |
| policy_gradient_loss | -0.00835 |
| value_loss | 31 |
-----------------------------------------
[I 2023-03-30 22:13:04,197] Trial 5 finished with value: -314.0 and parameters: {'n_steps': 7730, 'gamma': 0.984052776116379, 'learning_rate': 9.545314010671991e-05, 'clip_range': 0.29860092606270394, 'gae_lambda': 0.9241326779349373}. Best is trial 1 with value: -205.0.
Using cuda device
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.
C:\ProgramData\Anaconda3\envs\StreetFighterAI\lib\site-packages\stable_baselines3\ppo\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 4033`, after every 63 untruncated mini-batches, there will be a truncated mini-batch of size 1
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=4033 and n_envs=1)
warnings.warn(
Logging to logs/PPO_24
---------------------------------
| rollout/ | |
| ep_len_mean | 1.84e+03 |
| ep_rew_mean | -128 |
| time/ | |
| fps | 596 |
| iterations | 1 |
| time_elapsed | 6 |
| total_timesteps | 4033 |
---------------------------------
C:\ProgramData\Anaconda3\envs\StreetFighterAI\lib\site-packages\stable_baselines3\ppo\ppo.py:261: FutureWarning: Non-finite norm encountered in torch.nn.utils.clip_grad_norm_; continuing anyway. Note that the default behavior will change in a future release to error out if a non-finite total norm is encountered. At that point, setting error_if_nonfinite=false will be required to retain the old behavior.
th.nn.utils.clip_grad_norm_(self.policy.parameters(), self.max_grad_norm)
[I 2023-03-30 22:13:11,640] Trial 6 finished with value: -1.0 and parameters: {'n_steps': 4033, 'gamma': 0.9021075358644198, 'learning_rate': 9.226619222512819e-05, 'clip_range': 0.2947845259713344, 'gae_lambda': 0.9634981775867985}. Best is trial 6 with value: -1.0.
Using cuda device
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.
C:\ProgramData\Anaconda3\envs\StreetFighterAI\lib\site-packages\stable_baselines3\ppo\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 1654`, after every 25 untruncated mini-batches, there will be a truncated mini-batch of size 54
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1654 and n_envs=1)
warnings.warn(
Logging to logs/PPO_25
-----------------------------
| time/ | |
| fps | 666 |
| iterations | 1 |
| time_elapsed | 2 |
| total_timesteps | 1654 |
-----------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.24e+03 |
| ep_rew_mean | -183 |
| time/ | |
| fps | 465 |
| iterations | 2 |
| time_elapsed | 7 |
| total_timesteps | 3308 |
| train/ | |
| approx_kl | 0.011272669 |
| clip_fraction | 0.18 |
| clip_range | 0.228 |
| entropy_loss | -8.3 |
| explained_variance | 0.00627 |
| learning_rate | 9.79e-05 |
| loss | 0.815 |
| n_updates | 10 |
| policy_gradient_loss | -0.0141 |
| value_loss | 6.8 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.21e+03 |
| ep_rew_mean | -194 |
| time/ | |
| fps | 425 |
| iterations | 3 |
| time_elapsed | 11 |
| total_timesteps | 4962 |
| train/ | |
| approx_kl | 0.013628463 |
| clip_fraction | 0.114 |
| clip_range | 0.228 |
| entropy_loss | -8.29 |
| explained_variance | 0.0365 |
| learning_rate | 9.79e-05 |
| loss | 40.8 |
| n_updates | 20 |
| policy_gradient_loss | -0.0135 |
| value_loss | 20.9 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.14e+03 |
| ep_rew_mean | -218 |
| time/ | |
| fps | 401 |
| iterations | 4 |
| time_elapsed | 16 |
| total_timesteps | 6616 |
| train/ | |
| approx_kl | 0.023692455 |
| clip_fraction | 0.238 |
| clip_range | 0.228 |
| entropy_loss | -8.26 |
| explained_variance | 0.251 |
| learning_rate | 9.79e-05 |
| loss | 0.977 |
| n_updates | 30 |
| policy_gradient_loss | -0.0152 |
| value_loss | 17.7 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.14e+03 |
| ep_rew_mean | -218 |
| time/ | |
| fps | 389 |
| iterations | 5 |
| time_elapsed | 21 |
| total_timesteps | 8270 |
| train/ | |
| approx_kl | 0.02734942 |
| clip_fraction | 0.272 |
| clip_range | 0.228 |
| entropy_loss | -8.25 |
| explained_variance | -0.035 |
| learning_rate | 9.79e-05 |
| loss | 0.673 |
| n_updates | 40 |
| policy_gradient_loss | 0.00113 |
| value_loss | 33.6 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.29e+03 |
| ep_rew_mean | -176 |
| time/ | |
| fps | 382 |
| iterations | 6 |
| time_elapsed | 25 |
| total_timesteps | 9924 |
| train/ | |
| approx_kl | 0.01778004 |
| clip_fraction | 0.247 |
| clip_range | 0.228 |
| entropy_loss | -8.26 |
| explained_variance | 0.369 |
| learning_rate | 9.79e-05 |
| loss | 1.41 |
| n_updates | 50 |
| policy_gradient_loss | -0.0143 |
| value_loss | 4.89 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.29e+03 |
| ep_rew_mean | -176 |
| time/ | |
| fps | 375 |
| iterations | 7 |
| time_elapsed | 30 |
| total_timesteps | 11578 |
| train/ | |
| approx_kl | 0.016047975 |
| clip_fraction | 0.174 |
| clip_range | 0.228 |
| entropy_loss | -8.25 |
| explained_variance | 0.164 |
| learning_rate | 9.79e-05 |
| loss | 1.49 |
| n_updates | 60 |
| policy_gradient_loss | -0.0128 |
| value_loss | 7.98 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.57e+03 |
| ep_rew_mean | -148 |
| time/ | |
| fps | 363 |
| iterations | 8 |
| time_elapsed | 36 |
| total_timesteps | 13232 |
| train/ | |
| approx_kl | 0.016572453 |
| clip_fraction | 0.174 |
| clip_range | 0.228 |
| entropy_loss | -8.25 |
| explained_variance | -0.193 |
| learning_rate | 9.79e-05 |
| loss | 0.18 |
| n_updates | 70 |
| policy_gradient_loss | -0.0197 |
| value_loss | 3.7 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.42e+03 |
| ep_rew_mean | -152 |
| time/ | |
| fps | 354 |
| iterations | 9 |
| time_elapsed | 42 |
| total_timesteps | 14886 |
| train/ | |
| approx_kl | 0.018261585 |
| clip_fraction | 0.163 |
| clip_range | 0.228 |
| entropy_loss | -8.23 |
| explained_variance | 0.0612 |
| learning_rate | 9.79e-05 |
| loss | 1.23 |
| n_updates | 80 |
| policy_gradient_loss | -0.017 |
| value_loss | 8.05 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.42e+03 |
| ep_rew_mean | -152 |
| time/ | |
| fps | 347 |
| iterations | 10 |
| time_elapsed | 47 |
| total_timesteps | 16540 |
| train/ | |
| approx_kl | 0.025278179 |
| clip_fraction | 0.233 |
| clip_range | 0.228 |
| entropy_loss | -8.19 |
| explained_variance | 0.00951 |
| learning_rate | 9.79e-05 |
| loss | 1.36 |
| n_updates | 90 |
| policy_gradient_loss | -0.0161 |
| value_loss | 14.8 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.53e+03 |
| ep_rew_mean | -141 |
| time/ | |
| fps | 342 |
| iterations | 11 |
| time_elapsed | 53 |
| total_timesteps | 18194 |
| train/ | |
| approx_kl | 0.019890858 |
| clip_fraction | 0.243 |
| clip_range | 0.228 |
| entropy_loss | -8.16 |
| explained_variance | 0.32 |
| learning_rate | 9.79e-05 |
| loss | 1.63 |
| n_updates | 100 |
| policy_gradient_loss | -0.0133 |
| value_loss | 5.37 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.39e+03 |
| ep_rew_mean | -163 |
| time/ | |
| fps | 339 |
| iterations | 12 |
| time_elapsed | 58 |
| total_timesteps | 19848 |
| train/ | |
| approx_kl | 0.027444609 |
| clip_fraction | 0.273 |
| clip_range | 0.228 |
| entropy_loss | -8.1 |
| explained_variance | 0.173 |
| learning_rate | 9.79e-05 |
| loss | 1.03 |
| n_updates | 110 |
| policy_gradient_loss | -0.0161 |
| value_loss | 10.9 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.39e+03 |
| ep_rew_mean | -163 |
| time/ | |
| fps | 336 |
| iterations | 13 |
| time_elapsed | 63 |
| total_timesteps | 21502 |
| train/ | |
| approx_kl | 0.029858373 |
| clip_fraction | 0.226 |
| clip_range | 0.228 |
| entropy_loss | -8.15 |
| explained_variance | 0.124 |
| learning_rate | 9.79e-05 |
| loss | 87.4 |
| n_updates | 120 |
| policy_gradient_loss | -0.0143 |
| value_loss | 37.1 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.4e+03 |
| ep_rew_mean | -168 |
| time/ | |
| fps | 333 |
| iterations | 14 |
| time_elapsed | 69 |
| total_timesteps | 23156 |
| train/ | |
| approx_kl | 0.02500601 |
| clip_fraction | 0.272 |
| clip_range | 0.228 |
| entropy_loss | -8.14 |
| explained_variance | 0.227 |
| learning_rate | 9.79e-05 |
| loss | 1.26 |
| n_updates | 130 |
| policy_gradient_loss | -0.0173 |
| value_loss | 6.66 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.47e+03 |
| ep_rew_mean | -161 |
| time/ | |
| fps | 330 |
| iterations | 15 |
| time_elapsed | 75 |
| total_timesteps | 24810 |
| train/ | |
| approx_kl | 0.025755124 |
| clip_fraction | 0.234 |
| clip_range | 0.228 |
| entropy_loss | -8.14 |
| explained_variance | 0.238 |
| learning_rate | 9.79e-05 |
| loss | 1.24 |
| n_updates | 140 |
| policy_gradient_loss | -0.016 |
| value_loss | 13.3 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.47e+03 |
| ep_rew_mean | -161 |
| time/ | |
| fps | 328 |
| iterations | 16 |
| time_elapsed | 80 |
| total_timesteps | 26464 |
| train/ | |
| approx_kl | 0.021200689 |
| clip_fraction | 0.234 |
| clip_range | 0.228 |
| entropy_loss | -8.07 |
| explained_variance | 0.176 |
| learning_rate | 9.79e-05 |
| loss | 0.559 |
| n_updates | 150 |
| policy_gradient_loss | -0.0132 |
| value_loss | 9.71 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.47e+03 |
| ep_rew_mean | -161 |
| time/ | |
| fps | 326 |
| iterations | 17 |
| time_elapsed | 86 |
| total_timesteps | 28118 |
| train/ | |
| approx_kl | 0.02932891 |
| clip_fraction | 0.27 |
| clip_range | 0.228 |
| entropy_loss | -8.08 |
| explained_variance | 0.302 |
| learning_rate | 9.79e-05 |
| loss | 2.16 |
| n_updates | 160 |
| policy_gradient_loss | -0.0225 |
| value_loss | 4.34 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.47e+03 |
| ep_rew_mean | -161 |
| time/ | |
| fps | 324 |
| iterations | 18 |
| time_elapsed | 91 |
| total_timesteps | 29772 |
| train/ | |
| approx_kl | 0.016725304 |
| clip_fraction | 0.212 |
| clip_range | 0.228 |
| entropy_loss | -8.06 |
| explained_variance | 0.0835 |
| learning_rate | 9.79e-05 |
| loss | 0.398 |
| n_updates | 170 |
| policy_gradient_loss | -0.0136 |
| value_loss | 5.97 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.84e+03 |
| ep_rew_mean | -164 |
| time/ | |
| fps | 322 |
| iterations | 19 |
| time_elapsed | 97 |
| total_timesteps | 31426 |
| train/ | |
| approx_kl | 0.020989887 |
| clip_fraction | 0.216 |
| clip_range | 0.228 |
| entropy_loss | -8.08 |
| explained_variance | 0.364 |
| learning_rate | 9.79e-05 |
| loss | 0.103 |
| n_updates | 180 |
| policy_gradient_loss | -0.0259 |
| value_loss | 1.02 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.84e+03 |
| ep_rew_mean | -164 |
| time/ | |
| fps | 321 |
| iterations | 20 |
| time_elapsed | 102 |
| total_timesteps | 33080 |
| train/ | |
| approx_kl | 0.02685814 |
| clip_fraction | 0.201 |
| clip_range | 0.228 |
| entropy_loss | -8.03 |
| explained_variance | 0.114 |
| learning_rate | 9.79e-05 |
| loss | 1.95 |
| n_updates | 190 |
| policy_gradient_loss | -0.0192 |
| value_loss | 16.3 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.84e+03 |
| ep_rew_mean | -164 |
| time/ | |
| fps | 320 |
| iterations | 21 |
| time_elapsed | 108 |
| total_timesteps | 34734 |
| train/ | |
| approx_kl | 0.02518316 |
| clip_fraction | 0.22 |
| clip_range | 0.228 |
| entropy_loss | -8.03 |
| explained_variance | 0.293 |
| learning_rate | 9.79e-05 |
| loss | 1.66 |
| n_updates | 200 |
| policy_gradient_loss | -0.0237 |
| value_loss | 6.07 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.92e+03 |
| ep_rew_mean | -148 |
| time/ | |
| fps | 319 |
| iterations | 22 |
| time_elapsed | 114 |
| total_timesteps | 36388 |
| train/ | |
| approx_kl | 0.020095803 |
| clip_fraction | 0.191 |
| clip_range | 0.228 |
| entropy_loss | -8.04 |
| explained_variance | 0.242 |
| learning_rate | 9.79e-05 |
| loss | 0.859 |
| n_updates | 210 |
| policy_gradient_loss | -0.0166 |
| value_loss | 5.24 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.87e+03 |
| ep_rew_mean | -164 |
| time/ | |
| fps | 317 |
| iterations | 23 |
| time_elapsed | 119 |
| total_timesteps | 38042 |
| train/ | |
| approx_kl | 0.023355601 |
| clip_fraction | 0.246 |
| clip_range | 0.228 |
| entropy_loss | -7.97 |
| explained_variance | 0.0157 |
| learning_rate | 9.79e-05 |
| loss | 0.642 |
| n_updates | 220 |
| policy_gradient_loss | -0.0248 |
| value_loss | 4.15 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.87e+03 |
| ep_rew_mean | -164 |
| time/ | |
| fps | 317 |
| iterations | 24 |
| time_elapsed | 125 |
| total_timesteps | 39696 |
| train/ | |
| approx_kl | 0.034935288 |
| clip_fraction | 0.267 |
| clip_range | 0.228 |
| entropy_loss | -7.95 |
| explained_variance | 0.0452 |
| learning_rate | 9.79e-05 |
| loss | 201 |
| n_updates | 230 |
| policy_gradient_loss | -0.0162 |
| value_loss | 46.4 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.92e+03 |
| ep_rew_mean | -152 |
| time/ | |
| fps | 316 |
| iterations | 25 |
| time_elapsed | 130 |
| total_timesteps | 41350 |
| train/ | |
| approx_kl | 0.025743902 |
| clip_fraction | 0.265 |
| clip_range | 0.228 |
| entropy_loss | -7.94 |
| explained_variance | 0.214 |
| learning_rate | 9.79e-05 |
| loss | 1.29 |
| n_updates | 240 |
| policy_gradient_loss | -0.0199 |
| value_loss | 7.38 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.86e+03 |
| ep_rew_mean | -162 |
| time/ | |
| fps | 315 |
| iterations | 26 |
| time_elapsed | 136 |
| total_timesteps | 43004 |
| train/ | |
| approx_kl | 0.03637928 |
| clip_fraction | 0.312 |
| clip_range | 0.228 |
| entropy_loss | -7.93 |
| explained_variance | 0.491 |
| learning_rate | 9.79e-05 |
| loss | 0.508 |
| n_updates | 250 |
| policy_gradient_loss | -0.0222 |
| value_loss | 2.89 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.86e+03 |
| ep_rew_mean | -162 |
| time/ | |
| fps | 315 |
| iterations | 27 |
| time_elapsed | 141 |
| total_timesteps | 44658 |
| train/ | |
| approx_kl | 0.056980833 |
| clip_fraction | 0.394 |
| clip_range | 0.228 |
| entropy_loss | -7.87 |
| explained_variance | 0.109 |
| learning_rate | 9.79e-05 |
| loss | 1.26 |
| n_updates | 260 |
| policy_gradient_loss | -0.00769 |
| value_loss | 19.5 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.86e+03 |
| ep_rew_mean | -156 |
| time/ | |
| fps | 316 |
| iterations | 28 |
| time_elapsed | 146 |
| total_timesteps | 46312 |
| train/ | |
| approx_kl | 0.03702618 |
| clip_fraction | 0.365 |
| clip_range | 0.228 |
| entropy_loss | -7.85 |
| explained_variance | 0.291 |
| learning_rate | 9.79e-05 |
| loss | 0.537 |
| n_updates | 270 |
| policy_gradient_loss | -0.0122 |
| value_loss | 4.75 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.86e+03 |
| ep_rew_mean | -156 |
| time/ | |
| fps | 318 |
| iterations | 29 |
| time_elapsed | 150 |
| total_timesteps | 47966 |
| train/ | |
| approx_kl | 0.033755746 |
| clip_fraction | 0.299 |
| clip_range | 0.228 |
| entropy_loss | -7.77 |
| explained_variance | 0.31 |
| learning_rate | 9.79e-05 |
| loss | 7.27 |
| n_updates | 280 |
| policy_gradient_loss | -0.016 |
| value_loss | 7.63 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.83e+03 |
| ep_rew_mean | -159 |
| time/ | |
| fps | 319 |
| iterations | 30 |
| time_elapsed | 155 |
| total_timesteps | 49620 |
| train/ | |
| approx_kl | 0.03693611 |
| clip_fraction | 0.286 |
| clip_range | 0.228 |
| entropy_loss | -7.81 |
| explained_variance | 0.343 |
| learning_rate | 9.79e-05 |
| loss | 0.828 |
| n_updates | 290 |
| policy_gradient_loss | -0.0217 |
| value_loss | 4.62 |
----------------------------------------
---------------------------------------
| rollout/ | |
| ep_len_mean | 2.81e+03 |
| ep_rew_mean | -150 |
| time/ | |
| fps | 319 |
| iterations | 31 |
| time_elapsed | 160 |
| total_timesteps | 51274 |
| train/ | |
| approx_kl | 0.0407202 |
| clip_fraction | 0.331 |
| clip_range | 0.228 |
| entropy_loss | -7.73 |
| explained_variance | 0.164 |
| learning_rate | 9.79e-05 |
| loss | 19.9 |
| n_updates | 300 |
| policy_gradient_loss | -0.00744 |
| value_loss | 25.4 |
---------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.81e+03 |
| ep_rew_mean | -150 |
| time/ | |
| fps | 321 |
| iterations | 32 |
| time_elapsed | 164 |
| total_timesteps | 52928 |
| train/ | |
| approx_kl | 0.049017448 |
| clip_fraction | 0.356 |
| clip_range | 0.228 |
| entropy_loss | -7.8 |
| explained_variance | -0.0167 |
| learning_rate | 9.79e-05 |
| loss | 0.897 |
| n_updates | 310 |
| policy_gradient_loss | -0.0214 |
| value_loss | 4.18 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.8e+03 |
| ep_rew_mean | -156 |
| time/ | |
| fps | 322 |
| iterations | 33 |
| time_elapsed | 169 |
| total_timesteps | 54582 |
| train/ | |
| approx_kl | 0.035868283 |
| clip_fraction | 0.323 |
| clip_range | 0.228 |
| entropy_loss | -7.79 |
| explained_variance | 0.177 |
| learning_rate | 9.79e-05 |
| loss | 0.298 |
| n_updates | 320 |
| policy_gradient_loss | -0.0128 |
| value_loss | 2.9 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.76e+03 |
| ep_rew_mean | -161 |
| time/ | |
| fps | 322 |
| iterations | 34 |
| time_elapsed | 174 |
| total_timesteps | 56236 |
| train/ | |
| approx_kl | 0.040655132 |
| clip_fraction | 0.321 |
| clip_range | 0.228 |
| entropy_loss | -7.6 |
| explained_variance | 0.0977 |
| learning_rate | 9.79e-05 |
| loss | 3.28 |
| n_updates | 330 |
| policy_gradient_loss | -0.0114 |
| value_loss | 29.4 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.73e+03 |
| ep_rew_mean | -162 |
| time/ | |
| fps | 323 |
| iterations | 35 |
| time_elapsed | 178 |
| total_timesteps | 57890 |
| train/ | |
| approx_kl | 0.049799267 |
| clip_fraction | 0.364 |
| clip_range | 0.228 |
| entropy_loss | -7.61 |
| explained_variance | 0.0564 |
| learning_rate | 9.79e-05 |
| loss | 0.473 |
| n_updates | 340 |
| policy_gradient_loss | -0.0171 |
| value_loss | 12.4 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.65e+03 |
| ep_rew_mean | -141 |
| time/ | |
| fps | 323 |
| iterations | 36 |
| time_elapsed | 183 |
| total_timesteps | 59544 |
| train/ | |
| approx_kl | 0.051947072 |
| clip_fraction | 0.352 |
| clip_range | 0.228 |
| entropy_loss | -7.51 |
| explained_variance | 0.194 |
| learning_rate | 9.79e-05 |
| loss | 2.48 |
| n_updates | 350 |
| policy_gradient_loss | -0.0138 |
| value_loss | 17.2 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.65e+03 |
| ep_rew_mean | -141 |
| time/ | |
| fps | 324 |
| iterations | 37 |
| time_elapsed | 188 |
| total_timesteps | 61198 |
| train/ | |
| approx_kl | 0.04901576 |
| clip_fraction | 0.4 |
| clip_range | 0.228 |
| entropy_loss | -7.55 |
| explained_variance | -0.0663 |
| learning_rate | 9.79e-05 |
| loss | 7.66 |
| n_updates | 360 |
| policy_gradient_loss | -0.00211 |
| value_loss | 28.4 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.67e+03 |
| ep_rew_mean | -132 |
| time/ | |
| fps | 326 |
| iterations | 38 |
| time_elapsed | 192 |
| total_timesteps | 62852 |
| train/ | |
| approx_kl | 0.04695523 |
| clip_fraction | 0.388 |
| clip_range | 0.228 |
| entropy_loss | -7.6 |
| explained_variance | 0.29 |
| learning_rate | 9.79e-05 |
| loss | 0.643 |
| n_updates | 370 |
| policy_gradient_loss | -0.0192 |
| value_loss | 4.24 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.63e+03 |
| ep_rew_mean | -141 |
| time/ | |
| fps | 326 |
| iterations | 39 |
| time_elapsed | 197 |
| total_timesteps | 64506 |
| train/ | |
| approx_kl | 0.046350323 |
| clip_fraction | 0.355 |
| clip_range | 0.228 |
| entropy_loss | -7.45 |
| explained_variance | 0.121 |
| learning_rate | 9.79e-05 |
| loss | 2.98 |
| n_updates | 380 |
| policy_gradient_loss | -0.0267 |
| value_loss | 5.78 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.63e+03 |
| ep_rew_mean | -145 |
| time/ | |
| fps | 326 |
| iterations | 40 |
| time_elapsed | 202 |
| total_timesteps | 66160 |
| train/ | |
| approx_kl | 0.042303674 |
| clip_fraction | 0.365 |
| clip_range | 0.228 |
| entropy_loss | -7.55 |
| explained_variance | 0.0295 |
| learning_rate | 9.79e-05 |
| loss | 2.42 |
| n_updates | 390 |
| policy_gradient_loss | -0.0111 |
| value_loss | 49.1 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.63e+03 |
| ep_rew_mean | -145 |
| time/ | |
| fps | 327 |
| iterations | 41 |
| time_elapsed | 207 |
| total_timesteps | 67814 |
| train/ | |
| approx_kl | 0.06833778 |
| clip_fraction | 0.442 |
| clip_range | 0.228 |
| entropy_loss | -7.26 |
| explained_variance | 0.0526 |
| learning_rate | 9.79e-05 |
| loss | 1.06 |
| n_updates | 400 |
| policy_gradient_loss | -0.00992 |
| value_loss | 13.1 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.63e+03 |
| ep_rew_mean | -148 |
| time/ | |
| fps | 327 |
| iterations | 42 |
| time_elapsed | 212 |
| total_timesteps | 69468 |
| train/ | |
| approx_kl | 0.051798023 |
| clip_fraction | 0.393 |
| clip_range | 0.228 |
| entropy_loss | -7.56 |
| explained_variance | 0.304 |
| learning_rate | 9.79e-05 |
| loss | 0.899 |
| n_updates | 410 |
| policy_gradient_loss | -0.0254 |
| value_loss | 6.58 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.62e+03 |
| ep_rew_mean | -136 |
| time/ | |
| fps | 326 |
| iterations | 43 |
| time_elapsed | 217 |
| total_timesteps | 71122 |
| train/ | |
| approx_kl | 0.04322006 |
| clip_fraction | 0.383 |
| clip_range | 0.228 |
| entropy_loss | -7.52 |
| explained_variance | 0.288 |
| learning_rate | 9.79e-05 |
| loss | 0.502 |
| n_updates | 420 |
| policy_gradient_loss | -0.0132 |
| value_loss | 15.5 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.58e+03 |
| ep_rew_mean | -141 |
| time/ | |
| fps | 326 |
| iterations | 44 |
| time_elapsed | 223 |
| total_timesteps | 72776 |
| train/ | |
| approx_kl | 0.051466085 |
| clip_fraction | 0.397 |
| clip_range | 0.228 |
| entropy_loss | -7.23 |
| explained_variance | 0.217 |
| learning_rate | 9.79e-05 |
| loss | 1.78 |
| n_updates | 430 |
| policy_gradient_loss | -0.0112 |
| value_loss | 15.6 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.58e+03 |
| ep_rew_mean | -141 |
| time/ | |
| fps | 325 |
| iterations | 45 |
| time_elapsed | 228 |
| total_timesteps | 74430 |
| train/ | |
| approx_kl | 0.06645863 |
| clip_fraction | 0.421 |
| clip_range | 0.228 |
| entropy_loss | -7.28 |
| explained_variance | 0.279 |
| learning_rate | 9.79e-05 |
| loss | 1.55 |
| n_updates | 440 |
| policy_gradient_loss | -0.0173 |
| value_loss | 15.6 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.58e+03 |
| ep_rew_mean | -139 |
| time/ | |
| fps | 324 |
| iterations | 46 |
| time_elapsed | 234 |
| total_timesteps | 76084 |
| train/ | |
| approx_kl | 0.042081438 |
| clip_fraction | 0.38 |
| clip_range | 0.228 |
| entropy_loss | -7.37 |
| explained_variance | 0.2 |
| learning_rate | 9.79e-05 |
| loss | 5.59 |
| n_updates | 450 |
| policy_gradient_loss | -0.016 |
| value_loss | 5.48 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.56e+03 |
| ep_rew_mean | -137 |
| time/ | |
| fps | 324 |
| iterations | 47 |
| time_elapsed | 239 |
| total_timesteps | 77738 |
| train/ | |
| approx_kl | 0.040408526 |
| clip_fraction | 0.36 |
| clip_range | 0.228 |
| entropy_loss | -7.2 |
| explained_variance | 0.439 |
| learning_rate | 9.79e-05 |
| loss | 1.19 |
| n_updates | 460 |
| policy_gradient_loss | -0.019 |
| value_loss | 8.17 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.56e+03 |
| ep_rew_mean | -132 |
| time/ | |
| fps | 323 |
| iterations | 48 |
| time_elapsed | 245 |
| total_timesteps | 79392 |
| train/ | |
| approx_kl | 0.047893133 |
| clip_fraction | 0.399 |
| clip_range | 0.228 |
| entropy_loss | -7.24 |
| explained_variance | 0.29 |
| learning_rate | 9.79e-05 |
| loss | 9.79 |
| n_updates | 470 |
| policy_gradient_loss | -0.023 |
| value_loss | 9.88 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.56e+03 |
| ep_rew_mean | -132 |
| time/ | |
| fps | 323 |
| iterations | 49 |
| time_elapsed | 250 |
| total_timesteps | 81046 |
| train/ | |
| approx_kl | 0.050825655 |
| clip_fraction | 0.409 |
| clip_range | 0.228 |
| entropy_loss | -7.27 |
| explained_variance | 0.125 |
| learning_rate | 9.79e-05 |
| loss | 0.481 |
| n_updates | 480 |
| policy_gradient_loss | -0.0222 |
| value_loss | 5.85 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.53e+03 |
| ep_rew_mean | -137 |
| time/ | |
| fps | 322 |
| iterations | 50 |
| time_elapsed | 256 |
| total_timesteps | 82700 |
| train/ | |
| approx_kl | 0.045231882 |
| clip_fraction | 0.368 |
| clip_range | 0.228 |
| entropy_loss | -7.47 |
| explained_variance | 0.378 |
| learning_rate | 9.79e-05 |
| loss | 0.404 |
| n_updates | 490 |
| policy_gradient_loss | -0.0237 |
| value_loss | 5.36 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.48e+03 |
| ep_rew_mean | -121 |
| time/ | |
| fps | 322 |
| iterations | 51 |
| time_elapsed | 261 |
| total_timesteps | 84354 |
| train/ | |
| approx_kl | 0.08339866 |
| clip_fraction | 0.483 |
| clip_range | 0.228 |
| entropy_loss | -7.08 |
| explained_variance | 0.0398 |
| learning_rate | 9.79e-05 |
| loss | 1.24 |
| n_updates | 500 |
| policy_gradient_loss | -0.0024 |
| value_loss | 26.8 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.48e+03 |
| ep_rew_mean | -121 |
| time/ | |
| fps | 321 |
| iterations | 52 |
| time_elapsed | 267 |
| total_timesteps | 86008 |
| train/ | |
| approx_kl | 0.07751507 |
| clip_fraction | 0.455 |
| clip_range | 0.228 |
| entropy_loss | -6.97 |
| explained_variance | 0.102 |
| learning_rate | 9.79e-05 |
| loss | 13.2 |
| n_updates | 510 |
| policy_gradient_loss | 9.07e-05 |
| value_loss | 26.6 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.49e+03 |
| ep_rew_mean | -116 |
| time/ | |
| fps | 321 |
| iterations | 53 |
| time_elapsed | 272 |
| total_timesteps | 87662 |
| train/ | |
| approx_kl | 0.050660215 |
| clip_fraction | 0.407 |
| clip_range | 0.228 |
| entropy_loss | -7.09 |
| explained_variance | 0.558 |
| learning_rate | 9.79e-05 |
| loss | 1.27 |
| n_updates | 520 |
| policy_gradient_loss | -0.028 |
| value_loss | 5.08 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.45e+03 |
| ep_rew_mean | -118 |
| time/ | |
| fps | 321 |
| iterations | 54 |
| time_elapsed | 278 |
| total_timesteps | 89316 |
| train/ | |
| approx_kl | 0.059903584 |
| clip_fraction | 0.435 |
| clip_range | 0.228 |
| entropy_loss | -7 |
| explained_variance | 0.34 |
| learning_rate | 9.79e-05 |
| loss | 2.36 |
| n_updates | 530 |
| policy_gradient_loss | -0.0178 |
| value_loss | 7.52 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.43e+03 |
| ep_rew_mean | -124 |
| time/ | |
| fps | 320 |
| iterations | 55 |
| time_elapsed | 283 |
| total_timesteps | 90970 |
| train/ | |
| approx_kl | 0.05906586 |
| clip_fraction | 0.397 |
| clip_range | 0.228 |
| entropy_loss | -6.97 |
| explained_variance | 0.411 |
| learning_rate | 9.79e-05 |
| loss | 3.49 |
| n_updates | 540 |
| policy_gradient_loss | -0.0154 |
| value_loss | 15.3 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.42e+03 |
| ep_rew_mean | -129 |
| time/ | |
| fps | 320 |
| iterations | 56 |
| time_elapsed | 289 |
| total_timesteps | 92624 |
| train/ | |
| approx_kl | 0.06667252 |
| clip_fraction | 0.446 |
| clip_range | 0.228 |
| entropy_loss | -7.01 |
| explained_variance | 0.12 |
| learning_rate | 9.79e-05 |
| loss | 2.8 |
| n_updates | 550 |
| policy_gradient_loss | -0.00396 |
| value_loss | 49.1 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.41e+03 |
| ep_rew_mean | -128 |
| time/ | |
| fps | 319 |
| iterations | 57 |
| time_elapsed | 295 |
| total_timesteps | 94278 |
| train/ | |
| approx_kl | 0.086899824 |
| clip_fraction | 0.46 |
| clip_range | 0.228 |
| entropy_loss | -6.97 |
| explained_variance | 0.37 |
| learning_rate | 9.79e-05 |
| loss | 0.64 |
| n_updates | 560 |
| policy_gradient_loss | -0.0243 |
| value_loss | 20.1 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.41e+03 |
| ep_rew_mean | -128 |
| time/ | |
| fps | 318 |
| iterations | 58 |
| time_elapsed | 300 |
| total_timesteps | 95932 |
| train/ | |
| approx_kl | 0.07774362 |
| clip_fraction | 0.49 |
| clip_range | 0.228 |
| entropy_loss | -6.63 |
| explained_variance | 0.221 |
| learning_rate | 9.79e-05 |
| loss | 3.45 |
| n_updates | 570 |
| policy_gradient_loss | -0.00944 |
| value_loss | 8.12 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.42e+03 |
| ep_rew_mean | -130 |
| time/ | |
| fps | 318 |
| iterations | 59 |
| time_elapsed | 306 |
| total_timesteps | 97586 |
| train/ | |
| approx_kl | 0.055749163 |
| clip_fraction | 0.422 |
| clip_range | 0.228 |
| entropy_loss | -6.97 |
| explained_variance | 0.27 |
| learning_rate | 9.79e-05 |
| loss | 0.485 |
| n_updates | 580 |
| policy_gradient_loss | -0.0239 |
| value_loss | 3.6 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.4e+03 |
| ep_rew_mean | -134 |
| time/ | |
| fps | 317 |
| iterations | 60 |
| time_elapsed | 312 |
| total_timesteps | 99240 |
| train/ | |
| approx_kl | 0.05942291 |
| clip_fraction | 0.465 |
| clip_range | 0.228 |
| entropy_loss | -7.15 |
| explained_variance | 0.522 |
| learning_rate | 9.79e-05 |
| loss | 1.21 |
| n_updates | 590 |
| policy_gradient_loss | -0.0177 |
| value_loss | 12.3 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.4e+03 |
| ep_rew_mean | -134 |
| time/ | |
| fps | 317 |
| iterations | 61 |
| time_elapsed | 317 |
| total_timesteps | 100894 |
| train/ | |
| approx_kl | 0.090608686 |
| clip_fraction | 0.468 |
| clip_range | 0.228 |
| entropy_loss | -7.05 |
| explained_variance | 0.342 |
| learning_rate | 9.79e-05 |
| loss | 4.04 |
| n_updates | 600 |
| policy_gradient_loss | 0.000938 |
| value_loss | 25.8 |
-----------------------------------------
[I 2023-03-30 22:18:58,250] Trial 7 finished with value: -352.0 and parameters: {'n_steps': 1654, 'gamma': 0.9631671321909901, 'learning_rate': 9.790024836371174e-05, 'clip_range': 0.22794548657535632, 'gae_lambda': 0.8643034328071537}. Best is trial 6 with value: -1.0.
Using cuda device
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.
C:\ProgramData\Anaconda3\envs\StreetFighterAI\lib\site-packages\stable_baselines3\ppo\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 1146`, after every 17 untruncated mini-batches, there will be a truncated mini-batch of size 58
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1146 and n_envs=1)
warnings.warn(
Logging to logs/PPO_26
-----------------------------
| time/ | |
| fps | 624 |
| iterations | 1 |
| time_elapsed | 1 |
| total_timesteps | 1146 |
-----------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 1.96e+03 |
| ep_rew_mean | 1 |
| time/ | |
| fps | 453 |
| iterations | 2 |
| time_elapsed | 5 |
| total_timesteps | 2292 |
| train/ | |
| approx_kl | 0.04023962 |
| clip_fraction | 0.18 |
| clip_range | 0.332 |
| entropy_loss | -8.29 |
| explained_variance | 0.00577 |
| learning_rate | 8.88e-05 |
| loss | 2.31 |
| n_updates | 10 |
| policy_gradient_loss | -0.0237 |
| value_loss | 11.7 |
----------------------------------------
---------------------------------------
| rollout/ | |
| ep_len_mean | 1.96e+03 |
| ep_rew_mean | 1 |
| time/ | |
| fps | 419 |
| iterations | 3 |
| time_elapsed | 8 |
| total_timesteps | 3438 |
| train/ | |
| approx_kl | 0.0303674 |
| clip_fraction | 0.155 |
| clip_range | 0.332 |
| entropy_loss | -8.25 |
| explained_variance | -0.03 |
| learning_rate | 8.88e-05 |
| loss | 1.87 |
| n_updates | 20 |
| policy_gradient_loss | -0.0214 |
| value_loss | 5.96 |
---------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 1.9e+03 |
| ep_rew_mean | 138 |
| time/ | |
| fps | 406 |
| iterations | 4 |
| time_elapsed | 11 |
| total_timesteps | 4584 |
| train/ | |
| approx_kl | 0.029227091 |
| clip_fraction | 0.13 |
| clip_range | 0.332 |
| entropy_loss | -8.25 |
| explained_variance | 0.0631 |
| learning_rate | 8.88e-05 |
| loss | 0.793 |
| n_updates | 30 |
| policy_gradient_loss | -0.0214 |
| value_loss | 9.91 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 1.9e+03 |
| ep_rew_mean | 138 |
| time/ | |
| fps | 399 |
| iterations | 5 |
| time_elapsed | 14 |
| total_timesteps | 5730 |
| train/ | |
| approx_kl | 0.04213173 |
| clip_fraction | 0.147 |
| clip_range | 0.332 |
| entropy_loss | -8.21 |
| explained_variance | 0.0443 |
| learning_rate | 8.88e-05 |
| loss | 2.83 |
| n_updates | 40 |
| policy_gradient_loss | -0.0103 |
| value_loss | 34.6 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.1e+03 |
| ep_rew_mean | 68 |
| time/ | |
| fps | 393 |
| iterations | 6 |
| time_elapsed | 17 |
| total_timesteps | 6876 |
| train/ | |
| approx_kl | 0.02153559 |
| clip_fraction | 0.143 |
| clip_range | 0.332 |
| entropy_loss | -8.14 |
| explained_variance | -0.0437 |
| learning_rate | 8.88e-05 |
| loss | 0.602 |
| n_updates | 50 |
| policy_gradient_loss | -0.0157 |
| value_loss | 11.8 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 1.92e+03 |
| ep_rew_mean | 22.2 |
| time/ | |
| fps | 391 |
| iterations | 7 |
| time_elapsed | 20 |
| total_timesteps | 8022 |
| train/ | |
| approx_kl | 0.040259663 |
| clip_fraction | 0.2 |
| clip_range | 0.332 |
| entropy_loss | -8.05 |
| explained_variance | -0.161 |
| learning_rate | 8.88e-05 |
| loss | 13.2 |
| n_updates | 60 |
| policy_gradient_loss | -0.025 |
| value_loss | 15.7 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 1.92e+03 |
| ep_rew_mean | 22.2 |
| time/ | |
| fps | 388 |
| iterations | 8 |
| time_elapsed | 23 |
| total_timesteps | 9168 |
| train/ | |
| approx_kl | 0.029950712 |
| clip_fraction | 0.155 |
| clip_range | 0.332 |
| entropy_loss | -8.02 |
| explained_variance | 0.0118 |
| learning_rate | 8.88e-05 |
| loss | 6.02 |
| n_updates | 70 |
| policy_gradient_loss | -0.028 |
| value_loss | 16.9 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 1.89e+03 |
| ep_rew_mean | -28.4 |
| time/ | |
| fps | 386 |
| iterations | 9 |
| time_elapsed | 26 |
| total_timesteps | 10314 |
| train/ | |
| approx_kl | 0.034316827 |
| clip_fraction | 0.127 |
| clip_range | 0.332 |
| entropy_loss | -8.03 |
| explained_variance | -0.00279 |
| learning_rate | 8.88e-05 |
| loss | 0.944 |
| n_updates | 80 |
| policy_gradient_loss | -0.0281 |
| value_loss | 5.72 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 1.89e+03 |
| ep_rew_mean | -28.4 |
| time/ | |
| fps | 381 |
| iterations | 10 |
| time_elapsed | 30 |
| total_timesteps | 11460 |
| train/ | |
| approx_kl | 0.047511037 |
| clip_fraction | 0.184 |
| clip_range | 0.332 |
| entropy_loss | -7.96 |
| explained_variance | 0.0175 |
| learning_rate | 8.88e-05 |
| loss | 1.72 |
| n_updates | 90 |
| policy_gradient_loss | -0.0188 |
| value_loss | 24.5 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 1.89e+03 |
| ep_rew_mean | -28.4 |
| time/ | |
| fps | 380 |
| iterations | 11 |
| time_elapsed | 33 |
| total_timesteps | 12606 |
| train/ | |
| approx_kl | 0.03660329 |
| clip_fraction | 0.183 |
| clip_range | 0.332 |
| entropy_loss | -7.83 |
| explained_variance | -0.00303 |
| learning_rate | 8.88e-05 |
| loss | 2.56 |
| n_updates | 100 |
| policy_gradient_loss | -0.017 |
| value_loss | 6.9 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.12e+03 |
| ep_rew_mean | -27.5 |
| time/ | |
| fps | 378 |
| iterations | 12 |
| time_elapsed | 36 |
| total_timesteps | 13752 |
| train/ | |
| approx_kl | 0.030439496 |
| clip_fraction | 0.146 |
| clip_range | 0.332 |
| entropy_loss | -7.91 |
| explained_variance | 0.0077 |
| learning_rate | 8.88e-05 |
| loss | 9.06 |
| n_updates | 110 |
| policy_gradient_loss | -0.0198 |
| value_loss | 6.01 |
-----------------------------------------
---------------------------------------
| rollout/ | |
| ep_len_mean | 2.05e+03 |
| ep_rew_mean | 3 |
| time/ | |
| fps | 376 |
| iterations | 13 |
| time_elapsed | 39 |
| total_timesteps | 14898 |
| train/ | |
| approx_kl | 0.0319912 |
| clip_fraction | 0.138 |
| clip_range | 0.332 |
| entropy_loss | -7.87 |
| explained_variance | -0.236 |
| learning_rate | 8.88e-05 |
| loss | 0.376 |
| n_updates | 120 |
| policy_gradient_loss | -0.0228 |
| value_loss | 5.92 |
---------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.05e+03 |
| ep_rew_mean | 3 |
| time/ | |
| fps | 374 |
| iterations | 14 |
| time_elapsed | 42 |
| total_timesteps | 16044 |
| train/ | |
| approx_kl | 0.029652404 |
| clip_fraction | 0.153 |
| clip_range | 0.332 |
| entropy_loss | -7.91 |
| explained_variance | -0.0174 |
| learning_rate | 8.88e-05 |
| loss | 0.688 |
| n_updates | 130 |
| policy_gradient_loss | -0.0226 |
| value_loss | 23.6 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.09e+03 |
| ep_rew_mean | -8.75 |
| time/ | |
| fps | 372 |
| iterations | 15 |
| time_elapsed | 46 |
| total_timesteps | 17190 |
| train/ | |
| approx_kl | 0.034059085 |
| clip_fraction | 0.172 |
| clip_range | 0.332 |
| entropy_loss | -7.86 |
| explained_variance | -0.0714 |
| learning_rate | 8.88e-05 |
| loss | 0.296 |
| n_updates | 140 |
| policy_gradient_loss | -0.0245 |
| value_loss | 7.12 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.02e+03 |
| ep_rew_mean | -40.6 |
| time/ | |
| fps | 372 |
| iterations | 16 |
| time_elapsed | 49 |
| total_timesteps | 18336 |
| train/ | |
| approx_kl | 0.032943897 |
| clip_fraction | 0.121 |
| clip_range | 0.332 |
| entropy_loss | -7.86 |
| explained_variance | -0.16 |
| learning_rate | 8.88e-05 |
| loss | 8.37 |
| n_updates | 150 |
| policy_gradient_loss | -0.0113 |
| value_loss | 8.67 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.02e+03 |
| ep_rew_mean | -40.6 |
| time/ | |
| fps | 371 |
| iterations | 17 |
| time_elapsed | 52 |
| total_timesteps | 19482 |
| train/ | |
| approx_kl | 0.03772574 |
| clip_fraction | 0.124 |
| clip_range | 0.332 |
| entropy_loss | -7.87 |
| explained_variance | 0.0207 |
| learning_rate | 8.88e-05 |
| loss | 2.78 |
| n_updates | 160 |
| policy_gradient_loss | -0.0135 |
| value_loss | 53.4 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.02e+03 |
| ep_rew_mean | -40.6 |
| time/ | |
| fps | 370 |
| iterations | 18 |
| time_elapsed | 55 |
| total_timesteps | 20628 |
| train/ | |
| approx_kl | 0.024023427 |
| clip_fraction | 0.16 |
| clip_range | 0.332 |
| entropy_loss | -7.73 |
| explained_variance | 0.0897 |
| learning_rate | 8.88e-05 |
| loss | 0.491 |
| n_updates | 170 |
| policy_gradient_loss | -0.0294 |
| value_loss | 4.74 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.11e+03 |
| ep_rew_mean | -28.9 |
| time/ | |
| fps | 370 |
| iterations | 19 |
| time_elapsed | 58 |
| total_timesteps | 21774 |
| train/ | |
| approx_kl | 0.030223705 |
| clip_fraction | 0.123 |
| clip_range | 0.332 |
| entropy_loss | -7.82 |
| explained_variance | -0.0303 |
| learning_rate | 8.88e-05 |
| loss | 1.69 |
| n_updates | 180 |
| policy_gradient_loss | -0.0151 |
| value_loss | 5.96 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.05e+03 |
| ep_rew_mean | -46 |
| time/ | |
| fps | 371 |
| iterations | 20 |
| time_elapsed | 61 |
| total_timesteps | 22920 |
| train/ | |
| approx_kl | 0.03593646 |
| clip_fraction | 0.194 |
| clip_range | 0.332 |
| entropy_loss | -7.64 |
| explained_variance | 0.161 |
| learning_rate | 8.88e-05 |
| loss | 4.26 |
| n_updates | 190 |
| policy_gradient_loss | -0.0292 |
| value_loss | 8.23 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 1.96e+03 |
| ep_rew_mean | -21.8 |
| time/ | |
| fps | 371 |
| iterations | 21 |
| time_elapsed | 64 |
| total_timesteps | 24066 |
| train/ | |
| approx_kl | 0.023679743 |
| clip_fraction | 0.111 |
| clip_range | 0.332 |
| entropy_loss | -7.71 |
| explained_variance | 0.126 |
| learning_rate | 8.88e-05 |
| loss | 4.02 |
| n_updates | 200 |
| policy_gradient_loss | -0.0215 |
| value_loss | 24.6 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 1.96e+03 |
| ep_rew_mean | -21.8 |
| time/ | |
| fps | 371 |
| iterations | 22 |
| time_elapsed | 67 |
| total_timesteps | 25212 |
| train/ | |
| approx_kl | 0.026022209 |
| clip_fraction | 0.178 |
| clip_range | 0.332 |
| entropy_loss | -7.65 |
| explained_variance | -0.2 |
| learning_rate | 8.88e-05 |
| loss | 2.98 |
| n_updates | 210 |
| policy_gradient_loss | -0.0145 |
| value_loss | 44.2 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 1.97e+03 |
| ep_rew_mean | -21.3 |
| time/ | |
| fps | 372 |
| iterations | 23 |
| time_elapsed | 70 |
| total_timesteps | 26358 |
| train/ | |
| approx_kl | 0.02791216 |
| clip_fraction | 0.104 |
| clip_range | 0.332 |
| entropy_loss | -7.58 |
| explained_variance | 0.0539 |
| learning_rate | 8.88e-05 |
| loss | 1.41 |
| n_updates | 220 |
| policy_gradient_loss | -0.0185 |
| value_loss | 6.16 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 1.95e+03 |
| ep_rew_mean | -21.6 |
| time/ | |
| fps | 372 |
| iterations | 24 |
| time_elapsed | 73 |
| total_timesteps | 27504 |
| train/ | |
| approx_kl | 0.03433499 |
| clip_fraction | 0.138 |
| clip_range | 0.332 |
| entropy_loss | -7.69 |
| explained_variance | -0.0641 |
| learning_rate | 8.88e-05 |
| loss | 4.67 |
| n_updates | 230 |
| policy_gradient_loss | -0.0315 |
| value_loss | 4.38 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 1.88e+03 |
| ep_rew_mean | -42.7 |
| time/ | |
| fps | 372 |
| iterations | 25 |
| time_elapsed | 76 |
| total_timesteps | 28650 |
| train/ | |
| approx_kl | 0.03307491 |
| clip_fraction | 0.157 |
| clip_range | 0.332 |
| entropy_loss | -7.54 |
| explained_variance | 0.074 |
| learning_rate | 8.88e-05 |
| loss | 2.61 |
| n_updates | 240 |
| policy_gradient_loss | -0.0279 |
| value_loss | 9.81 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 1.84e+03 |
| ep_rew_mean | -59.4 |
| time/ | |
| fps | 372 |
| iterations | 26 |
| time_elapsed | 79 |
| total_timesteps | 29796 |
| train/ | |
| approx_kl | 0.041843403 |
| clip_fraction | 0.136 |
| clip_range | 0.332 |
| entropy_loss | -7.47 |
| explained_variance | -0.00451 |
| learning_rate | 8.88e-05 |
| loss | 4.38 |
| n_updates | 250 |
| policy_gradient_loss | -0.0115 |
| value_loss | 71.8 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 1.84e+03 |
| ep_rew_mean | -59.4 |
| time/ | |
| fps | 372 |
| iterations | 27 |
| time_elapsed | 83 |
| total_timesteps | 30942 |
| train/ | |
| approx_kl | 0.036084294 |
| clip_fraction | 0.154 |
| clip_range | 0.332 |
| entropy_loss | -7.33 |
| explained_variance | 0.0532 |
| learning_rate | 8.88e-05 |
| loss | 3.64 |
| n_updates | 260 |
| policy_gradient_loss | -0.0072 |
| value_loss | 58.6 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 1.86e+03 |
| ep_rew_mean | -42.5 |
| time/ | |
| fps | 372 |
| iterations | 28 |
| time_elapsed | 86 |
| total_timesteps | 32088 |
| train/ | |
| approx_kl | 0.036504086 |
| clip_fraction | 0.141 |
| clip_range | 0.332 |
| entropy_loss | -7.55 |
| explained_variance | 0.148 |
| learning_rate | 8.88e-05 |
| loss | 1.76 |
| n_updates | 270 |
| policy_gradient_loss | -0.0167 |
| value_loss | 12.2 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 1.86e+03 |
| ep_rew_mean | -42.5 |
| time/ | |
| fps | 370 |
| iterations | 29 |
| time_elapsed | 89 |
| total_timesteps | 33234 |
| train/ | |
| approx_kl | 0.034296088 |
| clip_fraction | 0.187 |
| clip_range | 0.332 |
| entropy_loss | -7.61 |
| explained_variance | 0.0593 |
| learning_rate | 8.88e-05 |
| loss | 12.4 |
| n_updates | 280 |
| policy_gradient_loss | -0.0195 |
| value_loss | 31.1 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 1.88e+03 |
| ep_rew_mean | -54.7 |
| time/ | |
| fps | 368 |
| iterations | 30 |
| time_elapsed | 93 |
| total_timesteps | 34380 |
| train/ | |
| approx_kl | 0.029918602 |
| clip_fraction | 0.173 |
| clip_range | 0.332 |
| entropy_loss | -7.44 |
| explained_variance | 0.178 |
| learning_rate | 8.88e-05 |
| loss | 2.16 |
| n_updates | 290 |
| policy_gradient_loss | -0.0263 |
| value_loss | 5.47 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 1.88e+03 |
| ep_rew_mean | -54.7 |
| time/ | |
| fps | 367 |
| iterations | 31 |
| time_elapsed | 96 |
| total_timesteps | 35526 |
| train/ | |
| approx_kl | 0.04137721 |
| clip_fraction | 0.132 |
| clip_range | 0.332 |
| entropy_loss | -7.45 |
| explained_variance | 0.00431 |
| learning_rate | 8.88e-05 |
| loss | 1.78 |
| n_updates | 300 |
| policy_gradient_loss | -0.0145 |
| value_loss | 39.8 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 1.92e+03 |
| ep_rew_mean | -53.7 |
| time/ | |
| fps | 367 |
| iterations | 32 |
| time_elapsed | 99 |
| total_timesteps | 36672 |
| train/ | |
| approx_kl | 0.030837413 |
| clip_fraction | 0.134 |
| clip_range | 0.332 |
| entropy_loss | -7.38 |
| explained_variance | 0.0959 |
| learning_rate | 8.88e-05 |
| loss | 0.065 |
| n_updates | 310 |
| policy_gradient_loss | -0.031 |
| value_loss | 3.25 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 1.92e+03 |
| ep_rew_mean | -53.7 |
| time/ | |
| fps | 365 |
| iterations | 33 |
| time_elapsed | 103 |
| total_timesteps | 37818 |
| train/ | |
| approx_kl | 0.029948711 |
| clip_fraction | 0.139 |
| clip_range | 0.332 |
| entropy_loss | -7.3 |
| explained_variance | 0.16 |
| learning_rate | 8.88e-05 |
| loss | 2.53 |
| n_updates | 320 |
| policy_gradient_loss | -0.021 |
| value_loss | 7.3 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 1.92e+03 |
| ep_rew_mean | -62.9 |
| time/ | |
| fps | 364 |
| iterations | 34 |
| time_elapsed | 106 |
| total_timesteps | 38964 |
| train/ | |
| approx_kl | 0.03208603 |
| clip_fraction | 0.163 |
| clip_range | 0.332 |
| entropy_loss | -7.35 |
| explained_variance | 0.152 |
| learning_rate | 8.88e-05 |
| loss | 0.512 |
| n_updates | 330 |
| policy_gradient_loss | -0.0206 |
| value_loss | 6.39 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 1.91e+03 |
| ep_rew_mean | -62.5 |
| time/ | |
| fps | 363 |
| iterations | 35 |
| time_elapsed | 110 |
| total_timesteps | 40110 |
| train/ | |
| approx_kl | 0.028910978 |
| clip_fraction | 0.148 |
| clip_range | 0.332 |
| entropy_loss | -7.23 |
| explained_variance | -0.049 |
| learning_rate | 8.88e-05 |
| loss | 1.4 |
| n_updates | 340 |
| policy_gradient_loss | -0.0218 |
| value_loss | 28.6 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 1.91e+03 |
| ep_rew_mean | -62.5 |
| time/ | |
| fps | 362 |
| iterations | 36 |
| time_elapsed | 113 |
| total_timesteps | 41256 |
| train/ | |
| approx_kl | 0.034623235 |
| clip_fraction | 0.122 |
| clip_range | 0.332 |
| entropy_loss | -7.21 |
| explained_variance | 0.00912 |
| learning_rate | 8.88e-05 |
| loss | 4.85 |
| n_updates | 350 |
| policy_gradient_loss | -0.0166 |
| value_loss | 17.1 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 1.88e+03 |
| ep_rew_mean | -53.5 |
| time/ | |
| fps | 362 |
| iterations | 37 |
| time_elapsed | 117 |
| total_timesteps | 42402 |
| train/ | |
| approx_kl | 0.025375202 |
| clip_fraction | 0.146 |
| clip_range | 0.332 |
| entropy_loss | -7.14 |
| explained_variance | 0.27 |
| learning_rate | 8.88e-05 |
| loss | 4.6 |
| n_updates | 360 |
| policy_gradient_loss | -0.0222 |
| value_loss | 7.8 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 1.87e+03 |
| ep_rew_mean | -66.1 |
| time/ | |
| fps | 361 |
| iterations | 38 |
| time_elapsed | 120 |
| total_timesteps | 43548 |
| train/ | |
| approx_kl | 0.042737268 |
| clip_fraction | 0.15 |
| clip_range | 0.332 |
| entropy_loss | -7.07 |
| explained_variance | 0.0585 |
| learning_rate | 8.88e-05 |
| loss | 3.26 |
| n_updates | 370 |
| policy_gradient_loss | -0.0223 |
| value_loss | 15.6 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 1.85e+03 |
| ep_rew_mean | -74.1 |
| time/ | |
| fps | 361 |
| iterations | 39 |
| time_elapsed | 123 |
| total_timesteps | 44694 |
| train/ | |
| approx_kl | 0.027932568 |
| clip_fraction | 0.129 |
| clip_range | 0.332 |
| entropy_loss | -6.9 |
| explained_variance | 0.0755 |
| learning_rate | 8.88e-05 |
| loss | 2.24 |
| n_updates | 380 |
| policy_gradient_loss | -0.0163 |
| value_loss | 49.9 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 1.85e+03 |
| ep_rew_mean | -74.1 |
| time/ | |
| fps | 362 |
| iterations | 40 |
| time_elapsed | 126 |
| total_timesteps | 45840 |
| train/ | |
| approx_kl | 0.030873783 |
| clip_fraction | 0.0904 |
| clip_range | 0.332 |
| entropy_loss | -6.99 |
| explained_variance | 0.152 |
| learning_rate | 8.88e-05 |
| loss | 2.15 |
| n_updates | 390 |
| policy_gradient_loss | -0.0199 |
| value_loss | 42.7 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 1.85e+03 |
| ep_rew_mean | -82.7 |
| time/ | |
| fps | 362 |
| iterations | 41 |
| time_elapsed | 129 |
| total_timesteps | 46986 |
| train/ | |
| approx_kl | 0.026348379 |
| clip_fraction | 0.126 |
| clip_range | 0.332 |
| entropy_loss | -7.23 |
| explained_variance | 0.0323 |
| learning_rate | 8.88e-05 |
| loss | 0.438 |
| n_updates | 400 |
| policy_gradient_loss | -0.0228 |
| value_loss | 6.85 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 1.85e+03 |
| ep_rew_mean | -82.7 |
| time/ | |
| fps | 362 |
| iterations | 42 |
| time_elapsed | 132 |
| total_timesteps | 48132 |
| train/ | |
| approx_kl | 0.04224583 |
| clip_fraction | 0.206 |
| clip_range | 0.332 |
| entropy_loss | -7.16 |
| explained_variance | 0.173 |
| learning_rate | 8.88e-05 |
| loss | 1.07 |
| n_updates | 410 |
| policy_gradient_loss | -0.0242 |
| value_loss | 38 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 1.86e+03 |
| ep_rew_mean | -75.8 |
| time/ | |
| fps | 362 |
| iterations | 43 |
| time_elapsed | 136 |
| total_timesteps | 49278 |
| train/ | |
| approx_kl | 0.03634002 |
| clip_fraction | 0.164 |
| clip_range | 0.332 |
| entropy_loss | -7.21 |
| explained_variance | 0.28 |
| learning_rate | 8.88e-05 |
| loss | 3.7 |
| n_updates | 420 |
| policy_gradient_loss | -0.032 |
| value_loss | 9.29 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 1.86e+03 |
| ep_rew_mean | -73.5 |
| time/ | |
| fps | 362 |
| iterations | 44 |
| time_elapsed | 139 |
| total_timesteps | 50424 |
| train/ | |
| approx_kl | 0.03677476 |
| clip_fraction | 0.153 |
| clip_range | 0.332 |
| entropy_loss | -7.17 |
| explained_variance | 0.11 |
| learning_rate | 8.88e-05 |
| loss | 0.533 |
| n_updates | 430 |
| policy_gradient_loss | -0.0277 |
| value_loss | 9.42 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 1.86e+03 |
| ep_rew_mean | -73.5 |
| time/ | |
| fps | 362 |
| iterations | 45 |
| time_elapsed | 142 |
| total_timesteps | 51570 |
| train/ | |
| approx_kl | 0.034621768 |
| clip_fraction | 0.154 |
| clip_range | 0.332 |
| entropy_loss | -7.17 |
| explained_variance | 0.368 |
| learning_rate | 8.88e-05 |
| loss | 5.39 |
| n_updates | 440 |
| policy_gradient_loss | -0.0276 |
| value_loss | 8.99 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 1.85e+03 |
| ep_rew_mean | -64.9 |
| time/ | |
| fps | 362 |
| iterations | 46 |
| time_elapsed | 145 |
| total_timesteps | 52716 |
| train/ | |
| approx_kl | 0.036459163 |
| clip_fraction | 0.165 |
| clip_range | 0.332 |
| entropy_loss | -7.21 |
| explained_variance | 0.374 |
| learning_rate | 8.88e-05 |
| loss | 12.4 |
| n_updates | 450 |
| policy_gradient_loss | -0.037 |
| value_loss | 9.29 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 1.85e+03 |
| ep_rew_mean | -60.5 |
| time/ | |
| fps | 361 |
| iterations | 47 |
| time_elapsed | 148 |
| total_timesteps | 53862 |
| train/ | |
| approx_kl | 0.03863145 |
| clip_fraction | 0.144 |
| clip_range | 0.332 |
| entropy_loss | -7.03 |
| explained_variance | 0.36 |
| learning_rate | 8.88e-05 |
| loss | 0.534 |
| n_updates | 460 |
| policy_gradient_loss | -0.0239 |
| value_loss | 10.8 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 1.85e+03 |
| ep_rew_mean | -60.5 |
| time/ | |
| fps | 361 |
| iterations | 48 |
| time_elapsed | 152 |
| total_timesteps | 55008 |
| train/ | |
| approx_kl | 0.049923413 |
| clip_fraction | 0.157 |
| clip_range | 0.332 |
| entropy_loss | -7.04 |
| explained_variance | 0.0693 |
| learning_rate | 8.88e-05 |
| loss | 5.03 |
| n_updates | 470 |
| policy_gradient_loss | -0.0285 |
| value_loss | 11.7 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 1.86e+03 |
| ep_rew_mean | -64.5 |
| time/ | |
| fps | 361 |
| iterations | 49 |
| time_elapsed | 155 |
| total_timesteps | 56154 |
| train/ | |
| approx_kl | 0.03346165 |
| clip_fraction | 0.145 |
| clip_range | 0.332 |
| entropy_loss | -7 |
| explained_variance | -0.0154 |
| learning_rate | 8.88e-05 |
| loss | 2.6 |
| n_updates | 480 |
| policy_gradient_loss | -0.0256 |
| value_loss | 2.62 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 1.86e+03 |
| ep_rew_mean | -64.5 |
| time/ | |
| fps | 361 |
| iterations | 50 |
| time_elapsed | 158 |
| total_timesteps | 57300 |
| train/ | |
| approx_kl | 0.041344777 |
| clip_fraction | 0.162 |
| clip_range | 0.332 |
| entropy_loss | -7.07 |
| explained_variance | 0.133 |
| learning_rate | 8.88e-05 |
| loss | 5.4 |
| n_updates | 490 |
| policy_gradient_loss | -0.0249 |
| value_loss | 20.1 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 1.88e+03 |
| ep_rew_mean | -64.6 |
| time/ | |
| fps | 360 |
| iterations | 51 |
| time_elapsed | 161 |
| total_timesteps | 58446 |
| train/ | |
| approx_kl | 0.03568868 |
| clip_fraction | 0.159 |
| clip_range | 0.332 |
| entropy_loss | -6.96 |
| explained_variance | 0.364 |
| learning_rate | 8.88e-05 |
| loss | 1.36 |
| n_updates | 500 |
| policy_gradient_loss | -0.0308 |
| value_loss | 4.16 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 1.88e+03 |
| ep_rew_mean | -64.6 |
| time/ | |
| fps | 360 |
| iterations | 52 |
| time_elapsed | 165 |
| total_timesteps | 59592 |
| train/ | |
| approx_kl | 0.032623842 |
| clip_fraction | 0.145 |
| clip_range | 0.332 |
| entropy_loss | -6.74 |
| explained_variance | 0.407 |
| learning_rate | 8.88e-05 |
| loss | 13.7 |
| n_updates | 510 |
| policy_gradient_loss | -0.0268 |
| value_loss | 7.94 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 1.87e+03 |
| ep_rew_mean | -68.9 |
| time/ | |
| fps | 359 |
| iterations | 53 |
| time_elapsed | 168 |
| total_timesteps | 60738 |
| train/ | |
| approx_kl | 0.027725061 |
| clip_fraction | 0.126 |
| clip_range | 0.332 |
| entropy_loss | -6.87 |
| explained_variance | 0.437 |
| learning_rate | 8.88e-05 |
| loss | 0.632 |
| n_updates | 520 |
| policy_gradient_loss | -0.0337 |
| value_loss | 4.87 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 1.87e+03 |
| ep_rew_mean | -68.9 |
| time/ | |
| fps | 359 |
| iterations | 54 |
| time_elapsed | 172 |
| total_timesteps | 61884 |
| train/ | |
| approx_kl | 0.06401909 |
| clip_fraction | 0.228 |
| clip_range | 0.332 |
| entropy_loss | -6.97 |
| explained_variance | 0.0456 |
| learning_rate | 8.88e-05 |
| loss | 1.16 |
| n_updates | 530 |
| policy_gradient_loss | -0.0282 |
| value_loss | 24.5 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 1.88e+03 |
| ep_rew_mean | -75.2 |
| time/ | |
| fps | 359 |
| iterations | 55 |
| time_elapsed | 175 |
| total_timesteps | 63030 |
| train/ | |
| approx_kl | 0.06221285 |
| clip_fraction | 0.215 |
| clip_range | 0.332 |
| entropy_loss | -7.02 |
| explained_variance | 0.025 |
| learning_rate | 8.88e-05 |
| loss | 4.39 |
| n_updates | 540 |
| policy_gradient_loss | -0.0343 |
| value_loss | 5.32 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 1.89e+03 |
| ep_rew_mean | -80.6 |
| time/ | |
| fps | 359 |
| iterations | 56 |
| time_elapsed | 178 |
| total_timesteps | 64176 |
| train/ | |
| approx_kl | 0.042404637 |
| clip_fraction | 0.201 |
| clip_range | 0.332 |
| entropy_loss | -6.95 |
| explained_variance | -0.0445 |
| learning_rate | 8.88e-05 |
| loss | 9.84 |
| n_updates | 550 |
| policy_gradient_loss | -0.0255 |
| value_loss | 32.7 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 1.89e+03 |
| ep_rew_mean | -80.6 |
| time/ | |
| fps | 359 |
| iterations | 57 |
| time_elapsed | 181 |
| total_timesteps | 65322 |
| train/ | |
| approx_kl | 0.04397238 |
| clip_fraction | 0.151 |
| clip_range | 0.332 |
| entropy_loss | -6.98 |
| explained_variance | 0.0972 |
| learning_rate | 8.88e-05 |
| loss | 26.8 |
| n_updates | 560 |
| policy_gradient_loss | -0.0251 |
| value_loss | 38.3 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 1.88e+03 |
| ep_rew_mean | -83 |
| time/ | |
| fps | 360 |
| iterations | 58 |
| time_elapsed | 184 |
| total_timesteps | 66468 |
| train/ | |
| approx_kl | 0.044086635 |
| clip_fraction | 0.197 |
| clip_range | 0.332 |
| entropy_loss | -7.2 |
| explained_variance | 0.118 |
| learning_rate | 8.88e-05 |
| loss | 1.07 |
| n_updates | 570 |
| policy_gradient_loss | -0.0239 |
| value_loss | 5.21 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 1.88e+03 |
| ep_rew_mean | -83 |
| time/ | |
| fps | 360 |
| iterations | 59 |
| time_elapsed | 187 |
| total_timesteps | 67614 |
| train/ | |
| approx_kl | 0.052704122 |
| clip_fraction | 0.203 |
| clip_range | 0.332 |
| entropy_loss | -6.97 |
| explained_variance | 0.0489 |
| learning_rate | 8.88e-05 |
| loss | 1.06 |
| n_updates | 580 |
| policy_gradient_loss | -0.0281 |
| value_loss | 17.1 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 1.9e+03 |
| ep_rew_mean | -89.1 |
| time/ | |
| fps | 360 |
| iterations | 60 |
| time_elapsed | 190 |
| total_timesteps | 68760 |
| train/ | |
| approx_kl | 0.046419837 |
| clip_fraction | 0.196 |
| clip_range | 0.332 |
| entropy_loss | -7.05 |
| explained_variance | -0.0384 |
| learning_rate | 8.88e-05 |
| loss | 2.11 |
| n_updates | 590 |
| policy_gradient_loss | -0.027 |
| value_loss | 2.89 |
-----------------------------------------
---------------------------------------
| rollout/ | |
| ep_len_mean | 1.86e+03 |
| ep_rew_mean | -79.1 |
| time/ | |
| fps | 360 |
| iterations | 61 |
| time_elapsed | 193 |
| total_timesteps | 69906 |
| train/ | |
| approx_kl | 0.0403891 |
| clip_fraction | 0.169 |
| clip_range | 0.332 |
| entropy_loss | -7.04 |
| explained_variance | 0.142 |
| learning_rate | 8.88e-05 |
| loss | 18.5 |
| n_updates | 600 |
| policy_gradient_loss | -0.026 |
| value_loss | 43.4 |
---------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 1.85e+03 |
| ep_rew_mean | -85.6 |
| time/ | |
| fps | 360 |
| iterations | 62 |
| time_elapsed | 196 |
| total_timesteps | 71052 |
| train/ | |
| approx_kl | 0.04120052 |
| clip_fraction | 0.175 |
| clip_range | 0.332 |
| entropy_loss | -7.16 |
| explained_variance | 0.0448 |
| learning_rate | 8.88e-05 |
| loss | 1.63 |
| n_updates | 610 |
| policy_gradient_loss | -0.0246 |
| value_loss | 42 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 1.85e+03 |
| ep_rew_mean | -85.6 |
| time/ | |
| fps | 361 |
| iterations | 63 |
| time_elapsed | 199 |
| total_timesteps | 72198 |
| train/ | |
| approx_kl | 0.04098662 |
| clip_fraction | 0.184 |
| clip_range | 0.332 |
| entropy_loss | -6.94 |
| explained_variance | 0.137 |
| learning_rate | 8.88e-05 |
| loss | 5.39 |
| n_updates | 620 |
| policy_gradient_loss | -0.0287 |
| value_loss | 44 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 1.87e+03 |
| ep_rew_mean | -88 |
| time/ | |
| fps | 361 |
| iterations | 64 |
| time_elapsed | 202 |
| total_timesteps | 73344 |
| train/ | |
| approx_kl | 0.056530125 |
| clip_fraction | 0.226 |
| clip_range | 0.332 |
| entropy_loss | -7.3 |
| explained_variance | 0.307 |
| learning_rate | 8.88e-05 |
| loss | 1.07 |
| n_updates | 630 |
| policy_gradient_loss | -0.0322 |
| value_loss | 8.53 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 1.86e+03 |
| ep_rew_mean | -93.7 |
| time/ | |
| fps | 361 |
| iterations | 65 |
| time_elapsed | 205 |
| total_timesteps | 74490 |
| train/ | |
| approx_kl | 0.05633619 |
| clip_fraction | 0.226 |
| clip_range | 0.332 |
| entropy_loss | -7.17 |
| explained_variance | 0.406 |
| learning_rate | 8.88e-05 |
| loss | 10.5 |
| n_updates | 640 |
| policy_gradient_loss | -0.0258 |
| value_loss | 15 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 1.86e+03 |
| ep_rew_mean | -93.7 |
| time/ | |
| fps | 361 |
| iterations | 66 |
| time_elapsed | 209 |
| total_timesteps | 75636 |
| train/ | |
| approx_kl | 0.08828102 |
| clip_fraction | 0.324 |
| clip_range | 0.332 |
| entropy_loss | -6.98 |
| explained_variance | 0.132 |
| learning_rate | 8.88e-05 |
| loss | 3 |
| n_updates | 650 |
| policy_gradient_loss | -0.0222 |
| value_loss | 37.7 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 1.85e+03 |
| ep_rew_mean | -95.9 |
| time/ | |
| fps | 360 |
| iterations | 67 |
| time_elapsed | 213 |
| total_timesteps | 76782 |
| train/ | |
| approx_kl | 0.042576507 |
| clip_fraction | 0.216 |
| clip_range | 0.332 |
| entropy_loss | -6.86 |
| explained_variance | 0.0496 |
| learning_rate | 8.88e-05 |
| loss | 6.04 |
| n_updates | 660 |
| policy_gradient_loss | -0.0288 |
| value_loss | 8.43 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 1.85e+03 |
| ep_rew_mean | -91.6 |
| time/ | |
| fps | 359 |
| iterations | 68 |
| time_elapsed | 216 |
| total_timesteps | 77928 |
| train/ | |
| approx_kl | 0.04540308 |
| clip_fraction | 0.229 |
| clip_range | 0.332 |
| entropy_loss | -7.07 |
| explained_variance | 0.249 |
| learning_rate | 8.88e-05 |
| loss | 5.47 |
| n_updates | 670 |
| policy_gradient_loss | -0.0288 |
| value_loss | 16 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 1.85e+03 |
| ep_rew_mean | -91.6 |
| time/ | |
| fps | 359 |
| iterations | 69 |
| time_elapsed | 219 |
| total_timesteps | 79074 |
| train/ | |
| approx_kl | 0.040512584 |
| clip_fraction | 0.211 |
| clip_range | 0.332 |
| entropy_loss | -7.02 |
| explained_variance | -0.00908 |
| learning_rate | 8.88e-05 |
| loss | 1.86 |
| n_updates | 680 |
| policy_gradient_loss | -0.0131 |
| value_loss | 13.4 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 1.85e+03 |
| ep_rew_mean | -91.6 |
| time/ | |
| fps | 359 |
| iterations | 70 |
| time_elapsed | 223 |
| total_timesteps | 80220 |
| train/ | |
| approx_kl | 0.05254833 |
| clip_fraction | 0.188 |
| clip_range | 0.332 |
| entropy_loss | -7.14 |
| explained_variance | 0.295 |
| learning_rate | 8.88e-05 |
| loss | 0.321 |
| n_updates | 690 |
| policy_gradient_loss | -0.0257 |
| value_loss | 8.67 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 1.88e+03 |
| ep_rew_mean | -94 |
| time/ | |
| fps | 358 |
| iterations | 71 |
| time_elapsed | 226 |
| total_timesteps | 81366 |
| train/ | |
| approx_kl | 0.04479891 |
| clip_fraction | 0.224 |
| clip_range | 0.332 |
| entropy_loss | -7.18 |
| explained_variance | -0.0191 |
| learning_rate | 8.88e-05 |
| loss | 0.529 |
| n_updates | 700 |
| policy_gradient_loss | -0.0383 |
| value_loss | 1.7 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 1.88e+03 |
| ep_rew_mean | -94 |
| time/ | |
| fps | 358 |
| iterations | 72 |
| time_elapsed | 230 |
| total_timesteps | 82512 |
| train/ | |
| approx_kl | 0.05337418 |
| clip_fraction | 0.207 |
| clip_range | 0.332 |
| entropy_loss | -7.21 |
| explained_variance | 0.168 |
| learning_rate | 8.88e-05 |
| loss | 0.286 |
| n_updates | 710 |
| policy_gradient_loss | -0.034 |
| value_loss | 10.8 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 1.9e+03 |
| ep_rew_mean | -95.4 |
| time/ | |
| fps | 358 |
| iterations | 73 |
| time_elapsed | 233 |
| total_timesteps | 83658 |
| train/ | |
| approx_kl | 0.03758472 |
| clip_fraction | 0.184 |
| clip_range | 0.332 |
| entropy_loss | -7.42 |
| explained_variance | -0.0312 |
| learning_rate | 8.88e-05 |
| loss | 0.699 |
| n_updates | 720 |
| policy_gradient_loss | -0.0247 |
| value_loss | 6.12 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 1.9e+03 |
| ep_rew_mean | -95.4 |
| time/ | |
| fps | 358 |
| iterations | 74 |
| time_elapsed | 236 |
| total_timesteps | 84804 |
| train/ | |
| approx_kl | 0.058067992 |
| clip_fraction | 0.221 |
| clip_range | 0.332 |
| entropy_loss | -7.14 |
| explained_variance | 0.0152 |
| learning_rate | 8.88e-05 |
| loss | 0.904 |
| n_updates | 730 |
| policy_gradient_loss | -0.0133 |
| value_loss | 13.2 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 1.91e+03 |
| ep_rew_mean | -97.7 |
| time/ | |
| fps | 358 |
| iterations | 75 |
| time_elapsed | 239 |
| total_timesteps | 85950 |
| train/ | |
| approx_kl | 0.035781853 |
| clip_fraction | 0.175 |
| clip_range | 0.332 |
| entropy_loss | -7.28 |
| explained_variance | 0.0129 |
| learning_rate | 8.88e-05 |
| loss | 0.581 |
| n_updates | 740 |
| policy_gradient_loss | -0.019 |
| value_loss | 4.61 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 1.91e+03 |
| ep_rew_mean | -97.7 |
| time/ | |
| fps | 358 |
| iterations | 76 |
| time_elapsed | 242 |
| total_timesteps | 87096 |
| train/ | |
| approx_kl | 0.062611975 |
| clip_fraction | 0.322 |
| clip_range | 0.332 |
| entropy_loss | -7.08 |
| explained_variance | 0.06 |
| learning_rate | 8.88e-05 |
| loss | 1.4 |
| n_updates | 750 |
| policy_gradient_loss | -0.0256 |
| value_loss | 22.8 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 1.91e+03 |
| ep_rew_mean | -97.7 |
| time/ | |
| fps | 358 |
| iterations | 77 |
| time_elapsed | 246 |
| total_timesteps | 88242 |
| train/ | |
| approx_kl | 0.05377618 |
| clip_fraction | 0.227 |
| clip_range | 0.332 |
| entropy_loss | -7.17 |
| explained_variance | 0.19 |
| learning_rate | 8.88e-05 |
| loss | 0.718 |
| n_updates | 760 |
| policy_gradient_loss | -0.0249 |
| value_loss | 8.35 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 1.92e+03 |
| ep_rew_mean | -95.3 |
| time/ | |
| fps | 358 |
| iterations | 78 |
| time_elapsed | 249 |
| total_timesteps | 89388 |
| train/ | |
| approx_kl | 0.050721783 |
| clip_fraction | 0.225 |
| clip_range | 0.332 |
| entropy_loss | -6.94 |
| explained_variance | -0.0205 |
| learning_rate | 8.88e-05 |
| loss | 0.33 |
| n_updates | 770 |
| policy_gradient_loss | -0.0391 |
| value_loss | 2.79 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 1.92e+03 |
| ep_rew_mean | -97.8 |
| time/ | |
| fps | 358 |
| iterations | 79 |
| time_elapsed | 252 |
| total_timesteps | 90534 |
| train/ | |
| approx_kl | 0.046041932 |
| clip_fraction | 0.215 |
| clip_range | 0.332 |
| entropy_loss | -7.21 |
| explained_variance | -0.076 |
| learning_rate | 8.88e-05 |
| loss | 2.6 |
| n_updates | 780 |
| policy_gradient_loss | -0.0325 |
| value_loss | 5.97 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 1.9e+03 |
| ep_rew_mean | -90.9 |
| time/ | |
| fps | 359 |
| iterations | 80 |
| time_elapsed | 255 |
| total_timesteps | 91680 |
| train/ | |
| approx_kl | 0.07306535 |
| clip_fraction | 0.309 |
| clip_range | 0.332 |
| entropy_loss | -7.15 |
| explained_variance | 0.0302 |
| learning_rate | 8.88e-05 |
| loss | 1.4 |
| n_updates | 790 |
| policy_gradient_loss | -0.0324 |
| value_loss | 18.2 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 1.9e+03 |
| ep_rew_mean | -90.9 |
| time/ | |
| fps | 359 |
| iterations | 81 |
| time_elapsed | 258 |
| total_timesteps | 92826 |
| train/ | |
| approx_kl | 0.06419113 |
| clip_fraction | 0.28 |
| clip_range | 0.332 |
| entropy_loss | -6.92 |
| explained_variance | -0.326 |
| learning_rate | 8.88e-05 |
| loss | 1.45 |
| n_updates | 800 |
| policy_gradient_loss | 0.00325 |
| value_loss | 24.5 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 1.9e+03 |
| ep_rew_mean | -90.9 |
| time/ | |
| fps | 359 |
| iterations | 82 |
| time_elapsed | 261 |
| total_timesteps | 93972 |
| train/ | |
| approx_kl | 0.047866795 |
| clip_fraction | 0.237 |
| clip_range | 0.332 |
| entropy_loss | -7.02 |
| explained_variance | 0.135 |
| learning_rate | 8.88e-05 |
| loss | 0.433 |
| n_updates | 810 |
| policy_gradient_loss | -0.0285 |
| value_loss | 5.26 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 1.92e+03 |
| ep_rew_mean | -88.8 |
| time/ | |
| fps | 359 |
| iterations | 83 |
| time_elapsed | 264 |
| total_timesteps | 95118 |
| train/ | |
| approx_kl | 0.066993006 |
| clip_fraction | 0.28 |
| clip_range | 0.332 |
| entropy_loss | -7.12 |
| explained_variance | -0.127 |
| learning_rate | 8.88e-05 |
| loss | 4.38 |
| n_updates | 820 |
| policy_gradient_loss | -0.0312 |
| value_loss | 9.15 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 1.91e+03 |
| ep_rew_mean | -91.8 |
| time/ | |
| fps | 359 |
| iterations | 84 |
| time_elapsed | 267 |
| total_timesteps | 96264 |
| train/ | |
| approx_kl | 0.05563952 |
| clip_fraction | 0.247 |
| clip_range | 0.332 |
| entropy_loss | -7.08 |
| explained_variance | 0.154 |
| learning_rate | 8.88e-05 |
| loss | 0.568 |
| n_updates | 830 |
| policy_gradient_loss | -0.0269 |
| value_loss | 8.24 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 1.91e+03 |
| ep_rew_mean | -91.8 |
| time/ | |
| fps | 359 |
| iterations | 85 |
| time_elapsed | 271 |
| total_timesteps | 97410 |
| train/ | |
| approx_kl | 0.05921689 |
| clip_fraction | 0.293 |
| clip_range | 0.332 |
| entropy_loss | -6.99 |
| explained_variance | 0.0197 |
| learning_rate | 8.88e-05 |
| loss | 1.75 |
| n_updates | 840 |
| policy_gradient_loss | -0.0203 |
| value_loss | 33 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 1.92e+03 |
| ep_rew_mean | -93.6 |
| time/ | |
| fps | 359 |
| iterations | 86 |
| time_elapsed | 274 |
| total_timesteps | 98556 |
| train/ | |
| approx_kl | 0.048403326 |
| clip_fraction | 0.219 |
| clip_range | 0.332 |
| entropy_loss | -6.95 |
| explained_variance | 0.00879 |
| learning_rate | 8.88e-05 |
| loss | 0.85 |
| n_updates | 850 |
| policy_gradient_loss | -0.0288 |
| value_loss | 6.54 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 1.92e+03 |
| ep_rew_mean | -93.6 |
| time/ | |
| fps | 358 |
| iterations | 87 |
| time_elapsed | 277 |
| total_timesteps | 99702 |
| train/ | |
| approx_kl | 0.04475287 |
| clip_fraction | 0.178 |
| clip_range | 0.332 |
| entropy_loss | -7.01 |
| explained_variance | 0.204 |
| learning_rate | 8.88e-05 |
| loss | 0.859 |
| n_updates | 860 |
| policy_gradient_loss | -0.0151 |
| value_loss | 17.5 |
----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 1.93e+03 |
| ep_rew_mean | -93.8 |
| time/ | |
| fps | 358 |
| iterations | 88 |
| time_elapsed | 280 |
| total_timesteps | 100848 |
| train/ | |
| approx_kl | 0.06419406 |
| clip_fraction | 0.244 |
| clip_range | 0.332 |
| entropy_loss | -7 |
| explained_variance | -0.22 |
| learning_rate | 8.88e-05 |
| loss | 0.243 |
| n_updates | 870 |
| policy_gradient_loss | -0.025 |
| value_loss | 1.64 |
----------------------------------------
[I 2023-03-30 22:24:11,258] Trial 8 finished with value: -343.0 and parameters: {'n_steps': 1146, 'gamma': 0.9192032939378013, 'learning_rate': 8.879233904874816e-05, 'clip_range': 0.3323467236435492, 'gae_lambda': 0.8683502580240515}. Best is trial 6 with value: -1.0.
Using cuda device
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.
C:\ProgramData\Anaconda3\envs\StreetFighterAI\lib\site-packages\stable_baselines3\ppo\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 3539`, after every 55 untruncated mini-batches, there will be a truncated mini-batch of size 19
We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=3539 and n_envs=1)
warnings.warn(
Logging to logs/PPO_27
---------------------------------
| rollout/ | |
| ep_len_mean | 2.15e+03 |
| ep_rew_mean | -50 |
| time/ | |
| fps | 587 |
| iterations | 1 |
| time_elapsed | 6 |
| total_timesteps | 3539 |
---------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.52e+03 |
| ep_rew_mean | -45 |
| time/ | |
| fps | 444 |
| iterations | 2 |
| time_elapsed | 15 |
| total_timesteps | 7078 |
| train/ | |
| approx_kl | 0.006164868 |
| clip_fraction | 0.12 |
| clip_range | 0.155 |
| entropy_loss | -8.31 |
| explained_variance | -0.00174 |
| learning_rate | 5.95e-05 |
| loss | 0.546 |
| n_updates | 10 |
| policy_gradient_loss | -0.00714 |
| value_loss | 8.14 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.56e+03 |
| ep_rew_mean | -109 |
| time/ | |
| fps | 404 |
| iterations | 3 |
| time_elapsed | 26 |
| total_timesteps | 10617 |
| train/ | |
| approx_kl | 0.00478289 |
| clip_fraction | 0.0976 |
| clip_range | 0.155 |
| entropy_loss | -8.31 |
| explained_variance | 0.014 |
| learning_rate | 5.95e-05 |
| loss | 0.361 |
| n_updates | 20 |
| policy_gradient_loss | -0.00716 |
| value_loss | 6.41 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.6e+03 |
| ep_rew_mean | -129 |
| time/ | |
| fps | 385 |
| iterations | 4 |
| time_elapsed | 36 |
| total_timesteps | 14156 |
| train/ | |
| approx_kl | 0.007813611 |
| clip_fraction | 0.146 |
| clip_range | 0.155 |
| entropy_loss | -8.3 |
| explained_variance | 0.0131 |
| learning_rate | 5.95e-05 |
| loss | 2.84 |
| n_updates | 30 |
| policy_gradient_loss | -0.00689 |
| value_loss | 14 |
-----------------------------------------
------------------------------------------
| rollout/ | |
| ep_len_mean | 2.52e+03 |
| ep_rew_mean | -111 |
| time/ | |
| fps | 377 |
| iterations | 5 |
| time_elapsed | 46 |
| total_timesteps | 17695 |
| train/ | |
| approx_kl | 0.0056522703 |
| clip_fraction | 0.0913 |
| clip_range | 0.155 |
| entropy_loss | -8.3 |
| explained_variance | 0.0146 |
| learning_rate | 5.95e-05 |
| loss | 0.276 |
| n_updates | 40 |
| policy_gradient_loss | -0.00467 |
| value_loss | 13.4 |
------------------------------------------
------------------------------------------
| rollout/ | |
| ep_len_mean | 2.47e+03 |
| ep_rew_mean | -126 |
| time/ | |
| fps | 373 |
| iterations | 6 |
| time_elapsed | 56 |
| total_timesteps | 21234 |
| train/ | |
| approx_kl | 0.0062621506 |
| clip_fraction | 0.123 |
| clip_range | 0.155 |
| entropy_loss | -8.29 |
| explained_variance | 0.0869 |
| learning_rate | 5.95e-05 |
| loss | 18.6 |
| n_updates | 50 |
| policy_gradient_loss | -0.00766 |
| value_loss | 7.72 |
------------------------------------------
------------------------------------------
| rollout/ | |
| ep_len_mean | 2.34e+03 |
| ep_rew_mean | -129 |
| time/ | |
| fps | 371 |
| iterations | 7 |
| time_elapsed | 66 |
| total_timesteps | 24773 |
| train/ | |
| approx_kl | 0.0071724947 |
| clip_fraction | 0.159 |
| clip_range | 0.155 |
| entropy_loss | -8.28 |
| explained_variance | 0.176 |
| learning_rate | 5.95e-05 |
| loss | 0.872 |
| n_updates | 60 |
| policy_gradient_loss | -0.00702 |
| value_loss | 11.1 |
------------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.35e+03 |
| ep_rew_mean | -106 |
| time/ | |
| fps | 369 |
| iterations | 8 |
| time_elapsed | 76 |
| total_timesteps | 28312 |
| train/ | |
| approx_kl | 0.007235888 |
| clip_fraction | 0.147 |
| clip_range | 0.155 |
| entropy_loss | -8.28 |
| explained_variance | 0.0603 |
| learning_rate | 5.95e-05 |
| loss | 0.535 |
| n_updates | 70 |
| policy_gradient_loss | -0.00766 |
| value_loss | 20.2 |
-----------------------------------------
------------------------------------------
| rollout/ | |
| ep_len_mean | 2.41e+03 |
| ep_rew_mean | -113 |
| time/ | |
| fps | 365 |
| iterations | 9 |
| time_elapsed | 87 |
| total_timesteps | 31851 |
| train/ | |
| approx_kl | 0.0057272953 |
| clip_fraction | 0.133 |
| clip_range | 0.155 |
| entropy_loss | -8.27 |
| explained_variance | 0.0634 |
| learning_rate | 5.95e-05 |
| loss | 0.31 |
| n_updates | 80 |
| policy_gradient_loss | -0.00732 |
| value_loss | 7.16 |
------------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.72e+03 |
| ep_rew_mean | 198 |
| time/ | |
| fps | 362 |
| iterations | 10 |
| time_elapsed | 97 |
| total_timesteps | 35390 |
| train/ | |
| approx_kl | 0.006537366 |
| clip_fraction | 0.154 |
| clip_range | 0.155 |
| entropy_loss | -8.26 |
| explained_variance | -0.00102 |
| learning_rate | 5.95e-05 |
| loss | 9.6 |
| n_updates | 90 |
| policy_gradient_loss | -0.0035 |
| value_loss | 5.98e+03 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.66e+03 |
| ep_rew_mean | 176 |
| time/ | |
| fps | 361 |
| iterations | 11 |
| time_elapsed | 107 |
| total_timesteps | 38929 |
| train/ | |
| approx_kl | 0.007428738 |
| clip_fraction | 0.147 |
| clip_range | 0.155 |
| entropy_loss | -8.25 |
| explained_variance | 0.275 |
| learning_rate | 5.95e-05 |
| loss | 0.198 |
| n_updates | 100 |
| policy_gradient_loss | -0.0124 |
| value_loss | 4.22 |
-----------------------------------------
------------------------------------------
| rollout/ | |
| ep_len_mean | 2.59e+03 |
| ep_rew_mean | 148 |
| time/ | |
| fps | 361 |
| iterations | 12 |
| time_elapsed | 117 |
| total_timesteps | 42468 |
| train/ | |
| approx_kl | 0.0071307733 |
| clip_fraction | 0.139 |
| clip_range | 0.155 |
| entropy_loss | -8.25 |
| explained_variance | 0.0965 |
| learning_rate | 5.95e-05 |
| loss | 0.68 |
| n_updates | 110 |
| policy_gradient_loss | -0.0106 |
| value_loss | 8.22 |
------------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.55e+03 |
| ep_rew_mean | 114 |
| time/ | |
| fps | 360 |
| iterations | 13 |
| time_elapsed | 127 |
| total_timesteps | 46007 |
| train/ | |
| approx_kl | 0.007813596 |
| clip_fraction | 0.141 |
| clip_range | 0.155 |
| entropy_loss | -8.24 |
| explained_variance | 0.0396 |
| learning_rate | 5.95e-05 |
| loss | 0.861 |
| n_updates | 120 |
| policy_gradient_loss | -0.00828 |
| value_loss | 26.4 |
-----------------------------------------
------------------------------------------
| rollout/ | |
| ep_len_mean | 2.49e+03 |
| ep_rew_mean | 89.8 |
| time/ | |
| fps | 358 |
| iterations | 14 |
| time_elapsed | 138 |
| total_timesteps | 49546 |
| train/ | |
| approx_kl | 0.0077910186 |
| clip_fraction | 0.148 |
| clip_range | 0.155 |
| entropy_loss | -8.23 |
| explained_variance | 0.0748 |
| learning_rate | 5.95e-05 |
| loss | 1.18 |
| n_updates | 130 |
| policy_gradient_loss | -0.00939 |
| value_loss | 16.7 |
------------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.5e+03 |
| ep_rew_mean | 74 |
| time/ | |
| fps | 356 |
| iterations | 15 |
| time_elapsed | 149 |
| total_timesteps | 53085 |
| train/ | |
| approx_kl | 0.009532078 |
| clip_fraction | 0.184 |
| clip_range | 0.155 |
| entropy_loss | -8.23 |
| explained_variance | 0.191 |
| learning_rate | 5.95e-05 |
| loss | 1.42 |
| n_updates | 140 |
| policy_gradient_loss | -0.011 |
| value_loss | 20.1 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.52e+03 |
| ep_rew_mean | 61.8 |
| time/ | |
| fps | 353 |
| iterations | 16 |
| time_elapsed | 160 |
| total_timesteps | 56624 |
| train/ | |
| approx_kl | 0.008085081 |
| clip_fraction | 0.178 |
| clip_range | 0.155 |
| entropy_loss | -8.23 |
| explained_variance | 0.185 |
| learning_rate | 5.95e-05 |
| loss | 0.266 |
| n_updates | 150 |
| policy_gradient_loss | -0.0106 |
| value_loss | 10.2 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.54e+03 |
| ep_rew_mean | 61.8 |
| time/ | |
| fps | 354 |
| iterations | 17 |
| time_elapsed | 169 |
| total_timesteps | 60163 |
| train/ | |
| approx_kl | 0.008233994 |
| clip_fraction | 0.181 |
| clip_range | 0.155 |
| entropy_loss | -8.22 |
| explained_variance | 0.143 |
| learning_rate | 5.95e-05 |
| loss | 0.381 |
| n_updates | 160 |
| policy_gradient_loss | -0.0085 |
| value_loss | 6.92 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.53e+03 |
| ep_rew_mean | 58.3 |
| time/ | |
| fps | 354 |
| iterations | 18 |
| time_elapsed | 179 |
| total_timesteps | 63702 |
| train/ | |
| approx_kl | 0.00866387 |
| clip_fraction | 0.173 |
| clip_range | 0.155 |
| entropy_loss | -8.21 |
| explained_variance | 0.21 |
| learning_rate | 5.95e-05 |
| loss | 0.428 |
| n_updates | 170 |
| policy_gradient_loss | -0.0136 |
| value_loss | 4.9 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.58e+03 |
| ep_rew_mean | 54.2 |
| time/ | |
| fps | 354 |
| iterations | 19 |
| time_elapsed | 189 |
| total_timesteps | 67241 |
| train/ | |
| approx_kl | 0.008494033 |
| clip_fraction | 0.195 |
| clip_range | 0.155 |
| entropy_loss | -8.2 |
| explained_variance | 0.0444 |
| learning_rate | 5.95e-05 |
| loss | 24.3 |
| n_updates | 180 |
| policy_gradient_loss | -0.00716 |
| value_loss | 16.3 |
-----------------------------------------
----------------------------------------
| rollout/ | |
| ep_len_mean | 2.6e+03 |
| ep_rew_mean | 44.4 |
| time/ | |
| fps | 353 |
| iterations | 20 |
| time_elapsed | 200 |
| total_timesteps | 70780 |
| train/ | |
| approx_kl | 0.00970717 |
| clip_fraction | 0.195 |
| clip_range | 0.155 |
| entropy_loss | -8.17 |
| explained_variance | 0.282 |
| learning_rate | 5.95e-05 |
| loss | 0.367 |
| n_updates | 190 |
| policy_gradient_loss | -0.0132 |
| value_loss | 4.43 |
----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.58e+03 |
| ep_rew_mean | 39.4 |
| time/ | |
| fps | 352 |
| iterations | 21 |
| time_elapsed | 211 |
| total_timesteps | 74319 |
| train/ | |
| approx_kl | 0.008659723 |
| clip_fraction | 0.206 |
| clip_range | 0.155 |
| entropy_loss | -8.18 |
| explained_variance | 0.214 |
| learning_rate | 5.95e-05 |
| loss | 0.268 |
| n_updates | 200 |
| policy_gradient_loss | -0.014 |
| value_loss | 6.89 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.59e+03 |
| ep_rew_mean | 39.8 |
| time/ | |
| fps | 352 |
| iterations | 22 |
| time_elapsed | 220 |
| total_timesteps | 77858 |
| train/ | |
| approx_kl | 0.011562935 |
| clip_fraction | 0.223 |
| clip_range | 0.155 |
| entropy_loss | -8.21 |
| explained_variance | 0.16 |
| learning_rate | 5.95e-05 |
| loss | 5.91 |
| n_updates | 210 |
| policy_gradient_loss | -0.00776 |
| value_loss | 8.23 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.58e+03 |
| ep_rew_mean | 25.9 |
| time/ | |
| fps | 352 |
| iterations | 23 |
| time_elapsed | 230 |
| total_timesteps | 81397 |
| train/ | |
| approx_kl | 0.011082681 |
| clip_fraction | 0.22 |
| clip_range | 0.155 |
| entropy_loss | -8.15 |
| explained_variance | 0.438 |
| learning_rate | 5.95e-05 |
| loss | 0.259 |
| n_updates | 220 |
| policy_gradient_loss | -0.017 |
| value_loss | 3.91 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.58e+03 |
| ep_rew_mean | 23.9 |
| time/ | |
| fps | 353 |
| iterations | 24 |
| time_elapsed | 240 |
| total_timesteps | 84936 |
| train/ | |
| approx_kl | 0.010984284 |
| clip_fraction | 0.237 |
| clip_range | 0.155 |
| entropy_loss | -8.15 |
| explained_variance | 0.0983 |
| learning_rate | 5.95e-05 |
| loss | 4.34 |
| n_updates | 230 |
| policy_gradient_loss | -0.0112 |
| value_loss | 14.1 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.59e+03 |
| ep_rew_mean | 18.8 |
| time/ | |
| fps | 353 |
| iterations | 25 |
| time_elapsed | 250 |
| total_timesteps | 88475 |
| train/ | |
| approx_kl | 0.012365894 |
| clip_fraction | 0.254 |
| clip_range | 0.155 |
| entropy_loss | -8.14 |
| explained_variance | 0.179 |
| learning_rate | 5.95e-05 |
| loss | 3.71 |
| n_updates | 240 |
| policy_gradient_loss | -0.0117 |
| value_loss | 7.45 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.56e+03 |
| ep_rew_mean | 9.77 |
| time/ | |
| fps | 352 |
| iterations | 26 |
| time_elapsed | 260 |
| total_timesteps | 92014 |
| train/ | |
| approx_kl | 0.010001008 |
| clip_fraction | 0.231 |
| clip_range | 0.155 |
| entropy_loss | -8.2 |
| explained_variance | 0.146 |
| learning_rate | 5.95e-05 |
| loss | 19 |
| n_updates | 250 |
| policy_gradient_loss | -0.011 |
| value_loss | 5.73 |
-----------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.55e+03 |
| ep_rew_mean | -3.51 |
| time/ | |
| fps | 351 |
| iterations | 27 |
| time_elapsed | 271 |
| total_timesteps | 95553 |
| train/ | |
| approx_kl | 0.011729387 |
| clip_fraction | 0.264 |
| clip_range | 0.155 |
| entropy_loss | -8.15 |
| explained_variance | 0.0695 |
| learning_rate | 5.95e-05 |
| loss | 0.242 |
| n_updates | 260 |
| policy_gradient_loss | -0.0102 |
| value_loss | 16.5 |
-----------------------------------------
------------------------------------------
| rollout/ | |
| ep_len_mean | 2.5e+03 |
| ep_rew_mean | -3.13 |
| time/ | |
| fps | 350 |
| iterations | 28 |
| time_elapsed | 282 |
| total_timesteps | 99092 |
| train/ | |
| approx_kl | 0.0153510645 |
| clip_fraction | 0.298 |
| clip_range | 0.155 |
| entropy_loss | -8.12 |
| explained_variance | 0.0587 |
| learning_rate | 5.95e-05 |
| loss | 3.32 |
| n_updates | 270 |
| policy_gradient_loss | -0.00956 |
| value_loss | 15.8 |
------------------------------------------
-----------------------------------------
| rollout/ | |
| ep_len_mean | 2.49e+03 |
| ep_rew_mean | -3.24 |
| time/ | |
| fps | 350 |
| iterations | 29 |
| time_elapsed | 292 |
| total_timesteps | 102631 |
| train/ | |
| approx_kl | 0.018420441 |
| clip_fraction | 0.324 |
| clip_range | 0.155 |
| entropy_loss | -8.1 |
| explained_variance | -0.00146 |
| learning_rate | 5.95e-05 |
| loss | 0.384 |
| n_updates | 280 |
| policy_gradient_loss | -0.00777 |
| value_loss | 13.8 |
-----------------------------------------
[I 2023-03-30 22:29:37,908] Trial 9 finished with value: -345.0 and parameters: {'n_steps': 3539, 'gamma': 0.9483166689072441, 'learning_rate': 5.947863028406936e-05, 'clip_range': 0.15487331840468324, 'gae_lambda': 0.8132195074364921}. Best is trial 6 with value: -1.0.
{'n_steps': 4033, 'gamma': 0.9021075358644198, 'learning_rate': 9.226619222512819e-05, 'clip_range': 0.2947845259713344, 'gae_lambda': 0.9634981775867985}
FrozenTrial(number=6, state=TrialState.COMPLETE, values=[-1.0], datetime_start=datetime.datetime(2023, 3, 30, 22, 13, 4, 197753), datetime_complete=datetime.datetime(2023, 3, 30, 22, 13, 11, 640374), params={'n_steps': 4033, 'gamma': 0.9021075358644198, 'learning_rate': 9.226619222512819e-05, 'clip_range': 0.2947845259713344, 'gae_lambda': 0.9634981775867985}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'n_steps': IntDistribution(high=8192, log=True, low=1024, step=1), 'gamma': FloatDistribution(high=0.9999, log=False, low=0.9, step=None), 'learning_rate': FloatDistribution(high=0.0001, log=True, low=5e-05, step=None), 'clip_range': FloatDistribution(high=0.4, log=False, low=0.1, step=None), 'gae_lambda': FloatDistribution(high=0.99, log=False, low=0.8, step=None)}, trial_id=6, value=None)