diff --git a/000_image_stack_ram_based_reward/.ipynb_checkpoints/street_fighter_notebook-checkpoint.ipynb b/000_image_stack_ram_based_reward/.ipynb_checkpoints/street_fighter_notebook-checkpoint.ipynb
new file mode 100644
index 0000000..ccf2f64
--- /dev/null
+++ b/000_image_stack_ram_based_reward/.ipynb_checkpoints/street_fighter_notebook-checkpoint.ipynb
@@ -0,0 +1,234 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "10d267bb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import retro"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "1ef8ff20",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "game = \"StreetFighterIISpecialChampionEdition-Genesis\"\n",
+    "state = \"Champion.Level1.ChunLiVsGuile\"\n",
+    "env = retro.make(game=game, state=state)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "5ce656b8",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1], dtype=int8)"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "env.action_space.sample()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "8c3f0a4d",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(200, 256, 3)"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "env.observation_space.sample().shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "46db7b05",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(200, 256, 3)\n",
+      "{'enemy_matches_won': 0, 'score': 0, 'matches_won': 0, 'continuetimer': 0, 'enemy_health': 176, 'health': 176}\n"
+     ]
+    }
+   ],
+   "source": [
+    "observation = env.reset()\n",
+    "print(observation.shape)\n",
+    "\n",
+    "action = env.action_space.sample()\n",
+    "obs, rewards, done, info = env.step(action)\n",
+    "print(info)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "09f0c6b0",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "MultiBinary(12)\n"
+     ]
+    }
+   ],
+   "source": [
+    "from gym.spaces import Box, MultiBinary\n",
+    "\n",
+    "print(MultiBinary(12))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "id": "97df18cf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import gym\n",
+    "import numpy as np\n",
+    "from gym.spaces import Box, MultiBinary\n",
+    "\n",
+    "class StreetFighter(gym.Env):\n",
+    "    def __init__(self):\n",
+    "        super().__init__()\n",
+    "        self.observation_space = Box(low=0, high=255, shape=(84, 84), dtype=np.uint8)\n",
+    "        self.action_space = MultiBinary(12)\n",
+    "        self.game = retro.make(game=\"StreetFighterIISpecialChampionEdition-Genesis\", use_restricted_actions=retro.Actions.FILTERED)\n",
+    "        \n",
+    "        self.full_hp = 176\n",
+    "        self.player_health = self.full_hp\n",
+    "        self.oppont_health = self.full_hp\n",
+    "        \n",
+    "        self.score = 0\n",
+    "        \n",
+    "    def __preprocess(self, observation):\n",
+    "        gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)\n",
+    "        resize = cv2.resize(gray, (84,84), interpolation=cv2.INTER_CUBIC)\n",
+    "        return resize\n",
+    "\n",
+    "    def step(self, action):\n",
+    "\n",
+    "        obs, reward, done, info = self.game.step(action)\n",
+    "        custom_obs = self.__preprocess(obs) # It's just frame, not frame_delta\n",
+    "\n",
+    "        # During fighting, either player or opponent has positive health points.\n",
+    "        if info['health'] > 0 or info['enemy_health'] > 0:\n",
+    "\n",
+    "            # Player Loses\n",
+    "            if info['health'] < 0 and info['health'] != self.player_health and info['enemy_health'] != 0:\n",
+    "                reward = (-self.full_hp) * info['enemy_health']\n",
+    "\n",
+    "            # Player Wins\n",
+    "            elif info['enemy_health'] < 0 and info['enemy_health'] != self.oppont_health and info['health'] != 0:\n",
+    "                reward = self.full_hp * info['health']\n",
+    "\n",
+    "            # During Fighting\n",
+    "            else:\n",
+    "                reward = (self.oppont_health - info['enemy_health']) - (self.player_health - info['health'])\n",
+    "        \n",
+    "        self.player_health = info['health']\n",
+    "        self.oppont_health = info['enemy_health']\n",
+    "        \n",
+    "        return custom_obs, reward, done, info\n",
+    "    \n",
+    "    def render(self, *args, **kwargs):\n",
+    "        self.game.render()\n",
+    "        \n",
+    "    def reset(self):\n",
+    "        obs = self.game.reset()\n",
+    "        custom_obs = self.__preprocess(obs)\n",
+    "        self.previous_frame = obs\n",
+    "    \n",
+    "        self.player_health = self.full_hp\n",
+    "        self.oppont_health = self.full_hp\n",
+    "        return custom_obs\n",
+    "\n",
+    "    def close(self):\n",
+    "        self.game.close()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "id": "0b137b88",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(84, 84, 1)\n"
+     ]
+    }
+   ],
+   "source": [
+    "env.close()\n",
+    "env = StreetFighter()\n",
+    "print(env.observation_space.shape)\n",
+    "env.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2da50dbc",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/000_image_stack_ram_based_reward/__pycache__/custom_cnn.cpython-38.pyc b/000_image_stack_ram_based_reward/__pycache__/custom_cnn.cpython-38.pyc
new file mode 100644
index 0000000..617ab55
Binary files /dev/null and b/000_image_stack_ram_based_reward/__pycache__/custom_cnn.cpython-38.pyc differ
diff --git a/000_image_stack_ram_based_reward/__pycache__/rmsprop_optim.cpython-38.pyc b/000_image_stack_ram_based_reward/__pycache__/rmsprop_optim.cpython-38.pyc
new file mode 100644
index 0000000..9a61bfd
Binary files /dev/null and b/000_image_stack_ram_based_reward/__pycache__/rmsprop_optim.cpython-38.pyc differ
diff --git a/000_image_stack_ram_based_reward/__pycache__/street_fighter_custom_wrapper.cpython-38.pyc b/000_image_stack_ram_based_reward/__pycache__/street_fighter_custom_wrapper.cpython-38.pyc
new file mode 100644
index 0000000..5d8bbb4
Binary files /dev/null and b/000_image_stack_ram_based_reward/__pycache__/street_fighter_custom_wrapper.cpython-38.pyc differ
diff --git a/000_image_stack_ram_based_reward/check_reward.py b/000_image_stack_ram_based_reward/check_reward.py
new file mode 100644
index 0000000..7f46495
--- /dev/null
+++ b/000_image_stack_ram_based_reward/check_reward.py
@@ -0,0 +1,46 @@
+import time 
+
+import retro
+from stable_baselines3 import PPO
+from stable_baselines3.common.monitor import Monitor
+from stable_baselines3.common.vec_env import DummyVecEnv, VecFrameStack
+
+from street_fighter_custom_wrapper import StreetFighterCustomWrapper
+    
+def make_env(game, state):
+    def _init():
+        env = retro.make(
+            game=game, 
+            state=state, 
+            use_restricted_actions=retro.Actions.FILTERED, 
+            obs_type=retro.Observations.IMAGE
+        )
+        env = StreetFighterCustomWrapper(env)
+        return env
+    return _init
+
+game = "StreetFighterIISpecialChampionEdition-Genesis"
+state = "Champion.Level1.ChunLiVsGuile"#"ChampionX.Level1.ChunLiVsKen"
+
+env = make_env(game, state)()
+env = Monitor(env, 'logs/')
+
+num_episodes = 30
+episode_reward_sum = 0
+for _ in range(num_episodes):
+    done = False
+    obs = env.reset()
+    total_reward = 0
+    while not done:
+        timestamp = time.time()
+        obs, reward, done, info = env.step(env.action_space.sample())
+
+        if reward != 0:
+            total_reward += reward
+            print("Reward: {}, playerHP: {}, enemyHP:{}".format(reward, info['health'], info['enemy_health']))
+        env.render()
+    print("Total reward: {}".format(total_reward))
+    episode_reward_sum += total_reward
+
+env.close()
+print("Average reward for random strategy: {}".format(episode_reward_sum/num_episodes))
diff --git a/001_image_stack/custom_cnn.py b/000_image_stack_ram_based_reward/custom_cnn.py
similarity index 100%
rename from 001_image_stack/custom_cnn.py
rename to 000_image_stack_ram_based_reward/custom_cnn.py
diff --git a/000_image_stack_ram_based_reward/evaluate.py b/000_image_stack_ram_based_reward/evaluate.py
new file mode 100644
index 0000000..c435f08
--- /dev/null
+++ b/000_image_stack_ram_based_reward/evaluate.py
@@ -0,0 +1,52 @@
+import retro
+
+from stable_baselines3 import PPO
+from stable_baselines3.common.vec_env import DummyVecEnv
+from stable_baselines3.common.monitor import Monitor
+from stable_baselines3.common.evaluation import evaluate_policy
+
+from custom_cnn import CustomCNN
+from street_fighter_custom_wrapper import StreetFighterCustomWrapper
+
+def make_env(game, state):
+    def _init():
+        env = retro.make(
+            game=game, 
+            state=state, 
+            use_restricted_actions=retro.Actions.FILTERED, 
+            obs_type=retro.Observations.IMAGE    
+        )
+        env = StreetFighterCustomWrapper(env)
+        return env
+    return _init
+
+game = "StreetFighterIISpecialChampionEdition-Genesis"
+state_stages = [
+    "Champion.Level1.ChunLiVsGuile",
+    "Champion.Level2.ChunLiVsKen",
+    "Champion.Level3.ChunLiVsChunLi",
+    "Champion.Level4.ChunLiVsZangief",
+    "Champion.Level5.ChunLiVsDhalsim",
+    "Champion.Level6.ChunLiVsRyu",
+    "Champion.Level7.ChunLiVsEHonda",
+    "Champion.Level8.ChunLiVsBlanka",
+    "Champion.Level9.ChunLiVsBalrog",
+    "Champion.Level10.ChunLiVsVega",
+    "Champion.Level11.ChunLiVsSagat",
+    "Champion.Level12.ChunLiVsBison"
+    # Add other stages as necessary
+]
+
+env = make_env(game, state_stages[0])()
+
+# Wrap the environment
+# env = Monitor(env, 'logs/')
+
+policy_kwargs = {'features_extractor_class': CustomCNN}
+model = PPO("CnnPolicy", env, policy_kwargs=policy_kwargs)
+
+model = PPO.load(r"dummy_model_ppo_chunli")
+# model.load(r"trained_models/ppo_chunli_864000_steps")
+
+mean_reward, std_reward = evaluate_policy(model, env, render=True, n_eval_episodes=10, deterministic=False, return_episode_rewards=True)
+print(f"Mean reward: {mean_reward:.2f} +/- {std_reward:.2f}")
diff --git a/000_image_stack_ram_based_reward/logs/PPO_1/events.out.tfevents.1680176551.DESKTOP-9E17TO7.25984.0 b/000_image_stack_ram_based_reward/logs/PPO_1/events.out.tfevents.1680176551.DESKTOP-9E17TO7.25984.0
new file mode 100644
index 0000000..9096b7c
Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_1/events.out.tfevents.1680176551.DESKTOP-9E17TO7.25984.0 differ
diff --git a/000_image_stack_ram_based_reward/logs/PPO_10/events.out.tfevents.1680180303.DESKTOP-9E17TO7.35284.0 b/000_image_stack_ram_based_reward/logs/PPO_10/events.out.tfevents.1680180303.DESKTOP-9E17TO7.35284.0
new file mode 100644
index 0000000..106794b
Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_10/events.out.tfevents.1680180303.DESKTOP-9E17TO7.35284.0 differ
diff --git a/000_image_stack_ram_based_reward/logs/PPO_11/events.out.tfevents.1680180514.DESKTOP-9E17TO7.11796.0 b/000_image_stack_ram_based_reward/logs/PPO_11/events.out.tfevents.1680180514.DESKTOP-9E17TO7.11796.0
new file mode 100644
index 0000000..6fab041
Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_11/events.out.tfevents.1680180514.DESKTOP-9E17TO7.11796.0 differ
diff --git a/000_image_stack_ram_based_reward/logs/PPO_12/events.out.tfevents.1680180894.DESKTOP-9E17TO7.20548.0 b/000_image_stack_ram_based_reward/logs/PPO_12/events.out.tfevents.1680180894.DESKTOP-9E17TO7.20548.0
new file mode 100644
index 0000000..8ac9b2f
Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_12/events.out.tfevents.1680180894.DESKTOP-9E17TO7.20548.0 differ
diff --git a/000_image_stack_ram_based_reward/logs/PPO_13/events.out.tfevents.1680182153.DESKTOP-9E17TO7.30948.0 b/000_image_stack_ram_based_reward/logs/PPO_13/events.out.tfevents.1680182153.DESKTOP-9E17TO7.30948.0
new file mode 100644
index 0000000..74c786c
Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_13/events.out.tfevents.1680182153.DESKTOP-9E17TO7.30948.0 differ
diff --git a/000_image_stack_ram_based_reward/logs/PPO_14/events.out.tfevents.1680182468.DESKTOP-9E17TO7.30948.1 b/000_image_stack_ram_based_reward/logs/PPO_14/events.out.tfevents.1680182468.DESKTOP-9E17TO7.30948.1
new file mode 100644
index 0000000..39f8c7f
Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_14/events.out.tfevents.1680182468.DESKTOP-9E17TO7.30948.1 differ
diff --git a/000_image_stack_ram_based_reward/logs/PPO_15/events.out.tfevents.1680182795.DESKTOP-9E17TO7.30948.2 b/000_image_stack_ram_based_reward/logs/PPO_15/events.out.tfevents.1680182795.DESKTOP-9E17TO7.30948.2
new file mode 100644
index 0000000..23d3259
Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_15/events.out.tfevents.1680182795.DESKTOP-9E17TO7.30948.2 differ
diff --git a/000_image_stack_ram_based_reward/logs/PPO_16/events.out.tfevents.1680183136.DESKTOP-9E17TO7.30948.3 b/000_image_stack_ram_based_reward/logs/PPO_16/events.out.tfevents.1680183136.DESKTOP-9E17TO7.30948.3
new file mode 100644
index 0000000..f357854
Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_16/events.out.tfevents.1680183136.DESKTOP-9E17TO7.30948.3 differ
diff --git a/000_image_stack_ram_based_reward/logs/PPO_17/events.out.tfevents.1680183432.DESKTOP-9E17TO7.30948.4 b/000_image_stack_ram_based_reward/logs/PPO_17/events.out.tfevents.1680183432.DESKTOP-9E17TO7.30948.4
new file mode 100644
index 0000000..ac83234
Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_17/events.out.tfevents.1680183432.DESKTOP-9E17TO7.30948.4 differ
diff --git a/000_image_stack_ram_based_reward/logs/PPO_18/events.out.tfevents.1680183612.DESKTOP-9E17TO7.32692.0 b/000_image_stack_ram_based_reward/logs/PPO_18/events.out.tfevents.1680183612.DESKTOP-9E17TO7.32692.0
new file mode 100644
index 0000000..8e7b54d
Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_18/events.out.tfevents.1680183612.DESKTOP-9E17TO7.32692.0 differ
diff --git a/000_image_stack_ram_based_reward/logs/PPO_19/events.out.tfevents.1680183923.DESKTOP-9E17TO7.32692.1 b/000_image_stack_ram_based_reward/logs/PPO_19/events.out.tfevents.1680183923.DESKTOP-9E17TO7.32692.1
new file mode 100644
index 0000000..4627ee2
Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_19/events.out.tfevents.1680183923.DESKTOP-9E17TO7.32692.1 differ
diff --git a/000_image_stack_ram_based_reward/logs/PPO_2/events.out.tfevents.1680177234.DESKTOP-9E17TO7.2364.0 b/000_image_stack_ram_based_reward/logs/PPO_2/events.out.tfevents.1680177234.DESKTOP-9E17TO7.2364.0
new file mode 100644
index 0000000..9b22664
Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_2/events.out.tfevents.1680177234.DESKTOP-9E17TO7.2364.0 differ
diff --git a/000_image_stack_ram_based_reward/logs/PPO_20/events.out.tfevents.1680184256.DESKTOP-9E17TO7.32692.2 b/000_image_stack_ram_based_reward/logs/PPO_20/events.out.tfevents.1680184256.DESKTOP-9E17TO7.32692.2
new file mode 100644
index 0000000..5a2dda3
Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_20/events.out.tfevents.1680184256.DESKTOP-9E17TO7.32692.2 differ
diff --git a/000_image_stack_ram_based_reward/logs/PPO_21/events.out.tfevents.1680184590.DESKTOP-9E17TO7.32692.3 b/000_image_stack_ram_based_reward/logs/PPO_21/events.out.tfevents.1680184590.DESKTOP-9E17TO7.32692.3
new file mode 100644
index 0000000..55c5bef
Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_21/events.out.tfevents.1680184590.DESKTOP-9E17TO7.32692.3 differ
diff --git a/000_image_stack_ram_based_reward/logs/PPO_22/events.out.tfevents.1680184934.DESKTOP-9E17TO7.32692.4 b/000_image_stack_ram_based_reward/logs/PPO_22/events.out.tfevents.1680184934.DESKTOP-9E17TO7.32692.4
new file mode 100644
index 0000000..0210587
Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_22/events.out.tfevents.1680184934.DESKTOP-9E17TO7.32692.4 differ
diff --git a/000_image_stack_ram_based_reward/logs/PPO_23/events.out.tfevents.1680185250.DESKTOP-9E17TO7.32692.5 b/000_image_stack_ram_based_reward/logs/PPO_23/events.out.tfevents.1680185250.DESKTOP-9E17TO7.32692.5
new file mode 100644
index 0000000..528228a
Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_23/events.out.tfevents.1680185250.DESKTOP-9E17TO7.32692.5 differ
diff --git a/000_image_stack_ram_based_reward/logs/PPO_24/events.out.tfevents.1680185584.DESKTOP-9E17TO7.32692.6 b/000_image_stack_ram_based_reward/logs/PPO_24/events.out.tfevents.1680185584.DESKTOP-9E17TO7.32692.6
new file mode 100644
index 0000000..c4c9faa
Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_24/events.out.tfevents.1680185584.DESKTOP-9E17TO7.32692.6 differ
diff --git a/000_image_stack_ram_based_reward/logs/PPO_25/events.out.tfevents.1680185591.DESKTOP-9E17TO7.32692.7 b/000_image_stack_ram_based_reward/logs/PPO_25/events.out.tfevents.1680185591.DESKTOP-9E17TO7.32692.7
new file mode 100644
index 0000000..a1a8e5d
Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_25/events.out.tfevents.1680185591.DESKTOP-9E17TO7.32692.7 differ
diff --git a/000_image_stack_ram_based_reward/logs/PPO_26/events.out.tfevents.1680185938.DESKTOP-9E17TO7.32692.8 b/000_image_stack_ram_based_reward/logs/PPO_26/events.out.tfevents.1680185938.DESKTOP-9E17TO7.32692.8
new file mode 100644
index 0000000..37dacc8
Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_26/events.out.tfevents.1680185938.DESKTOP-9E17TO7.32692.8 differ
diff --git a/000_image_stack_ram_based_reward/logs/PPO_27/events.out.tfevents.1680186251.DESKTOP-9E17TO7.32692.9 b/000_image_stack_ram_based_reward/logs/PPO_27/events.out.tfevents.1680186251.DESKTOP-9E17TO7.32692.9
new file mode 100644
index 0000000..4299711
Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_27/events.out.tfevents.1680186251.DESKTOP-9E17TO7.32692.9 differ
diff --git a/000_image_stack_ram_based_reward/logs/PPO_3/events.out.tfevents.1680177334.DESKTOP-9E17TO7.35060.0 b/000_image_stack_ram_based_reward/logs/PPO_3/events.out.tfevents.1680177334.DESKTOP-9E17TO7.35060.0
new file mode 100644
index 0000000..a51dfe5
Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_3/events.out.tfevents.1680177334.DESKTOP-9E17TO7.35060.0 differ
diff --git a/000_image_stack_ram_based_reward/logs/PPO_4/events.out.tfevents.1680177771.DESKTOP-9E17TO7.35060.1 b/000_image_stack_ram_based_reward/logs/PPO_4/events.out.tfevents.1680177771.DESKTOP-9E17TO7.35060.1
new file mode 100644
index 0000000..4fee189
Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_4/events.out.tfevents.1680177771.DESKTOP-9E17TO7.35060.1 differ
diff --git a/000_image_stack_ram_based_reward/logs/PPO_5/events.out.tfevents.1680178207.DESKTOP-9E17TO7.35060.2 b/000_image_stack_ram_based_reward/logs/PPO_5/events.out.tfevents.1680178207.DESKTOP-9E17TO7.35060.2
new file mode 100644
index 0000000..6741459
Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_5/events.out.tfevents.1680178207.DESKTOP-9E17TO7.35060.2 differ
diff --git a/000_image_stack_ram_based_reward/logs/PPO_6/events.out.tfevents.1680178663.DESKTOP-9E17TO7.35060.3 b/000_image_stack_ram_based_reward/logs/PPO_6/events.out.tfevents.1680178663.DESKTOP-9E17TO7.35060.3
new file mode 100644
index 0000000..ddc6b02
Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_6/events.out.tfevents.1680178663.DESKTOP-9E17TO7.35060.3 differ
diff --git a/000_image_stack_ram_based_reward/logs/PPO_7/events.out.tfevents.1680179100.DESKTOP-9E17TO7.35060.4 b/000_image_stack_ram_based_reward/logs/PPO_7/events.out.tfevents.1680179100.DESKTOP-9E17TO7.35060.4
new file mode 100644
index 0000000..f1b8b23
Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_7/events.out.tfevents.1680179100.DESKTOP-9E17TO7.35060.4 differ
diff --git a/000_image_stack_ram_based_reward/logs/PPO_8/events.out.tfevents.1680179576.DESKTOP-9E17TO7.35060.5 b/000_image_stack_ram_based_reward/logs/PPO_8/events.out.tfevents.1680179576.DESKTOP-9E17TO7.35060.5
new file mode 100644
index 0000000..89b6ec0
Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_8/events.out.tfevents.1680179576.DESKTOP-9E17TO7.35060.5 differ
diff --git a/000_image_stack_ram_based_reward/logs/PPO_9/events.out.tfevents.1680180040.DESKTOP-9E17TO7.35060.6 b/000_image_stack_ram_based_reward/logs/PPO_9/events.out.tfevents.1680180040.DESKTOP-9E17TO7.35060.6
new file mode 100644
index 0000000..755d4cc
Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_9/events.out.tfevents.1680180040.DESKTOP-9E17TO7.35060.6 differ
diff --git a/000_image_stack_ram_based_reward/logs/monitor.csv b/000_image_stack_ram_based_reward/logs/monitor.csv
new file mode 100644
index 0000000..a7eb38b
--- /dev/null
+++ b/000_image_stack_ram_based_reward/logs/monitor.csv
@@ -0,0 +1,53 @@
+#{"t_start": 1680186251.3110938, "env_id": null}
+r,l,t
+-50,2150,3.695703
+-40,2886,12.564373
+-128,2196,20.599987
+-217,3000,25.620172
+-210,2753,34.631877
+27,2177,42.807461
+-161,2502,46.870715
+-227,2122,54.492589
+-289,1567,61.321581
+1,2075,64.463465
+130,2465,72.662509
+-192,3007,82.093462
+3927.0,6468,97.611361
+-109,1823,104.996175
+200,1820,112.333123
+-300,2478,116.020238
+-42,2351,124.010789
+-263,1990,127.212089
+-351,1486,134.405471
+-225,2611,143.112158
+-56,3290,153.69294
+-65,2138,157.640509
+62,3161,167.244644
+-189,2652,175.720904
+224,2138,179.193385
+-48,3706,189.4923
+-209,3172,199.319699
+-98,2059,207.148574
+51,2787,216.523835
+-88,3218,225.952495
+-263,1828,228.707771
+-38,2328,236.642072
+7,3179,245.83899
+-133,2421,249.558141
+-296,1684,256.702009
+-211,2881,266.1996
+-261,1710,269.33675
+-176,1974,277.229695
+184,1310,279.58493
+218,2222,288.236686
+-229,2460,291.904952
+-345,2510,299.876746
+-345,2510,302.781091
+-345,2510,305.701696
+-345,2510,308.687105
+-345,2510,311.624716
+-345,2510,314.566203
+-345,2510,317.608539
+-345,2510,320.618201
+-345,2510,323.649133
+-345,2510,326.561072
diff --git a/000_image_stack_ram_based_reward/optuna/tuning_log.txt b/000_image_stack_ram_based_reward/optuna/tuning_log.txt
new file mode 100644
index 0000000..9a55088
--- /dev/null
+++ b/000_image_stack_ram_based_reward/optuna/tuning_log.txt
@@ -0,0 +1,8947 @@
+|    value_loss           | 20          |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.64e+03    |
+|    ep_rew_mean          | -99         |
+| time/                   |             |
+|    fps                  | 353         |
+|    iterations           | 6           |
+|    time_elapsed         | 23          |
+|    total_timesteps      | 8226        |
+| train/                  |             |
+|    approx_kl            | 0.014875706 |
+|    clip_fraction        | 0.148       |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -8.24       |
+|    explained_variance   | 0.151       |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 2.44        |
+|    n_updates            | 50          |
+|    policy_gradient_loss | -0.00671    |
+|    value_loss           | 10.3        |
+-----------------------------------------
+---------------------------------------
+| rollout/                |           |
+|    ep_len_mean          | 2.64e+03  |
+|    ep_rew_mean          | -99       |
+| time/                   |           |
+|    fps                  | 350       |
+|    iterations           | 7         |
+|    time_elapsed         | 27        |
+|    total_timesteps      | 9597      |
+| train/                  |           |
+|    approx_kl            | 0.0164865 |
+|    clip_fraction        | 0.162     |
+|    clip_range           | 0.26      |
+|    entropy_loss         | -8.21     |
+|    explained_variance   | -0.272    |
+|    learning_rate        | 8.14e-05  |
+|    loss                 | 4.19      |
+|    n_updates            | 60        |
+|    policy_gradient_loss | -0.0113   |
+|    value_loss           | 16.8      |
+---------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.73e+03    |
+|    ep_rew_mean          | -88.5       |
+| time/                   |             |
+|    fps                  | 349         |
+|    iterations           | 8           |
+|    time_elapsed         | 31          |
+|    total_timesteps      | 10968       |
+| train/                  |             |
+|    approx_kl            | 0.014885512 |
+|    clip_fraction        | 0.162       |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -8.18       |
+|    explained_variance   | 0.0707      |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 1.46        |
+|    n_updates            | 70          |
+|    policy_gradient_loss | -0.017      |
+|    value_loss           | 8.77        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.73e+03    |
+|    ep_rew_mean          | -88.5       |
+| time/                   |             |
+|    fps                  | 349         |
+|    iterations           | 9           |
+|    time_elapsed         | 35          |
+|    total_timesteps      | 12339       |
+| train/                  |             |
+|    approx_kl            | 0.018109197 |
+|    clip_fraction        | 0.118       |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -8.17       |
+|    explained_variance   | 0.0377      |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 2.68        |
+|    n_updates            | 80          |
+|    policy_gradient_loss | -0.0208     |
+|    value_loss           | 10          |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.47e+03   |
+|    ep_rew_mean          | -121       |
+| time/                   |            |
+|    fps                  | 347        |
+|    iterations           | 10         |
+|    time_elapsed         | 39         |
+|    total_timesteps      | 13710      |
+| train/                  |            |
+|    approx_kl            | 0.02112376 |
+|    clip_fraction        | 0.154      |
+|    clip_range           | 0.26       |
+|    entropy_loss         | -8.17      |
+|    explained_variance   | 0.0154     |
+|    learning_rate        | 8.14e-05   |
+|    loss                 | 4.45       |
+|    n_updates            | 90         |
+|    policy_gradient_loss | -0.0182    |
+|    value_loss           | 13.2       |
+----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.43e+03   |
+|    ep_rew_mean          | -94        |
+| time/                   |            |
+|    fps                  | 347        |
+|    iterations           | 11         |
+|    time_elapsed         | 43         |
+|    total_timesteps      | 15081      |
+| train/                  |            |
+|    approx_kl            | 0.03655843 |
+|    clip_fraction        | 0.271      |
+|    clip_range           | 0.26       |
+|    entropy_loss         | -8.1       |
+|    explained_variance   | -0.0637    |
+|    learning_rate        | 8.14e-05   |
+|    loss                 | 2.29       |
+|    n_updates            | 100        |
+|    policy_gradient_loss | -0.00361   |
+|    value_loss           | 31.6       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.19e+03    |
+|    ep_rew_mean          | -45.3       |
+| time/                   |             |
+|    fps                  | 349         |
+|    iterations           | 12          |
+|    time_elapsed         | 47          |
+|    total_timesteps      | 16452       |
+| train/                  |             |
+|    approx_kl            | 0.037120674 |
+|    clip_fraction        | 0.245       |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -8.08       |
+|    explained_variance   | 0.104       |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 7.71        |
+|    n_updates            | 110         |
+|    policy_gradient_loss | -0.00649    |
+|    value_loss           | 22.7        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.21e+03    |
+|    ep_rew_mean          | -65         |
+| time/                   |             |
+|    fps                  | 350         |
+|    iterations           | 13          |
+|    time_elapsed         | 50          |
+|    total_timesteps      | 17823       |
+| train/                  |             |
+|    approx_kl            | 0.027819885 |
+|    clip_fraction        | 0.228       |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -7.98       |
+|    explained_variance   | 0.0436      |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 7.06        |
+|    n_updates            | 120         |
+|    policy_gradient_loss | 0.00299     |
+|    value_loss           | 59.9        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.21e+03    |
+|    ep_rew_mean          | -65         |
+| time/                   |             |
+|    fps                  | 351         |
+|    iterations           | 14          |
+|    time_elapsed         | 54          |
+|    total_timesteps      | 19194       |
+| train/                  |             |
+|    approx_kl            | 0.027972419 |
+|    clip_fraction        | 0.197       |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -7.94       |
+|    explained_variance   | 0.0199      |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 4.1         |
+|    n_updates            | 130         |
+|    policy_gradient_loss | -0.00526    |
+|    value_loss           | 26.3        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.21e+03    |
+|    ep_rew_mean          | -65         |
+| time/                   |             |
+|    fps                  | 352         |
+|    iterations           | 15          |
+|    time_elapsed         | 58          |
+|    total_timesteps      | 20565       |
+| train/                  |             |
+|    approx_kl            | 0.035860673 |
+|    clip_fraction        | 0.236       |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -7.89       |
+|    explained_variance   | -0.13       |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 4.36        |
+|    n_updates            | 140         |
+|    policy_gradient_loss | -0.0122     |
+|    value_loss           | 10.5        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.3e+03     |
+|    ep_rew_mean          | -57.8       |
+| time/                   |             |
+|    fps                  | 352         |
+|    iterations           | 16          |
+|    time_elapsed         | 62          |
+|    total_timesteps      | 21936       |
+| train/                  |             |
+|    approx_kl            | 0.020882078 |
+|    clip_fraction        | 0.207       |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -7.97       |
+|    explained_variance   | -0.266      |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 17.1        |
+|    n_updates            | 150         |
+|    policy_gradient_loss | -0.00845    |
+|    value_loss           | 12.5        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.32e+03    |
+|    ep_rew_mean          | -76.7       |
+| time/                   |             |
+|    fps                  | 353         |
+|    iterations           | 17          |
+|    time_elapsed         | 65          |
+|    total_timesteps      | 23307       |
+| train/                  |             |
+|    approx_kl            | 0.017862184 |
+|    clip_fraction        | 0.112       |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -8.11       |
+|    explained_variance   | -0.152      |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 0.52        |
+|    n_updates            | 160         |
+|    policy_gradient_loss | -0.014      |
+|    value_loss           | 5           |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.32e+03   |
+|    ep_rew_mean          | -76.7      |
+| time/                   |            |
+|    fps                  | 354        |
+|    iterations           | 18         |
+|    time_elapsed         | 69         |
+|    total_timesteps      | 24678      |
+| train/                  |            |
+|    approx_kl            | 0.02715041 |
+|    clip_fraction        | 0.179      |
+|    clip_range           | 0.26       |
+|    entropy_loss         | -8.03      |
+|    explained_variance   | 0.0571     |
+|    learning_rate        | 8.14e-05   |
+|    loss                 | 6.35       |
+|    n_updates            | 170        |
+|    policy_gradient_loss | -0.00481   |
+|    value_loss           | 43         |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.26e+03    |
+|    ep_rew_mean          | -52.5       |
+| time/                   |             |
+|    fps                  | 354         |
+|    iterations           | 19          |
+|    time_elapsed         | 73          |
+|    total_timesteps      | 26049       |
+| train/                  |             |
+|    approx_kl            | 0.020203596 |
+|    clip_fraction        | 0.15        |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -8.08       |
+|    explained_variance   | -0.578      |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 0.888       |
+|    n_updates            | 180         |
+|    policy_gradient_loss | -0.011      |
+|    value_loss           | 11.7        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.28e+03   |
+|    ep_rew_mean          | -57.9      |
+| time/                   |            |
+|    fps                  | 354        |
+|    iterations           | 20         |
+|    time_elapsed         | 77         |
+|    total_timesteps      | 27420      |
+| train/                  |            |
+|    approx_kl            | 0.02579885 |
+|    clip_fraction        | 0.181      |
+|    clip_range           | 0.26       |
+|    entropy_loss         | -7.96      |
+|    explained_variance   | -0.0316    |
+|    learning_rate        | 8.14e-05   |
+|    loss                 | 1.58       |
+|    n_updates            | 190        |
+|    policy_gradient_loss | -0.0102    |
+|    value_loss           | 34.6       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.28e+03    |
+|    ep_rew_mean          | -57.9       |
+| time/                   |             |
+|    fps                  | 354         |
+|    iterations           | 21          |
+|    time_elapsed         | 81          |
+|    total_timesteps      | 28791       |
+| train/                  |             |
+|    approx_kl            | 0.016173096 |
+|    clip_fraction        | 0.172       |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -7.84       |
+|    explained_variance   | -0.369      |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 0.632       |
+|    n_updates            | 200         |
+|    policy_gradient_loss | -0.0133     |
+|    value_loss           | 11.3        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.28e+03    |
+|    ep_rew_mean          | -57.9       |
+| time/                   |             |
+|    fps                  | 354         |
+|    iterations           | 22          |
+|    time_elapsed         | 84          |
+|    total_timesteps      | 30162       |
+| train/                  |             |
+|    approx_kl            | 0.018948458 |
+|    clip_fraction        | 0.159       |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -7.89       |
+|    explained_variance   | -0.091      |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 5.72        |
+|    n_updates            | 210         |
+|    policy_gradient_loss | -0.0145     |
+|    value_loss           | 7.36        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.36e+03    |
+|    ep_rew_mean          | -56.1       |
+| time/                   |             |
+|    fps                  | 354         |
+|    iterations           | 23          |
+|    time_elapsed         | 88          |
+|    total_timesteps      | 31533       |
+| train/                  |             |
+|    approx_kl            | 0.018955443 |
+|    clip_fraction        | 0.145       |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -7.97       |
+|    explained_variance   | -0.269      |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 1.12        |
+|    n_updates            | 220         |
+|    policy_gradient_loss | -0.0227     |
+|    value_loss           | 7.03        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.36e+03    |
+|    ep_rew_mean          | -56.1       |
+| time/                   |             |
+|    fps                  | 354         |
+|    iterations           | 24          |
+|    time_elapsed         | 92          |
+|    total_timesteps      | 32904       |
+| train/                  |             |
+|    approx_kl            | 0.017530933 |
+|    clip_fraction        | 0.151       |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -8.05       |
+|    explained_variance   | -0.11       |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 0.575       |
+|    n_updates            | 230         |
+|    policy_gradient_loss | -0.0203     |
+|    value_loss           | 11          |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.36e+03    |
+|    ep_rew_mean          | -49.5       |
+| time/                   |             |
+|    fps                  | 354         |
+|    iterations           | 25          |
+|    time_elapsed         | 96          |
+|    total_timesteps      | 34275       |
+| train/                  |             |
+|    approx_kl            | 0.025710236 |
+|    clip_fraction        | 0.166       |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -8.04       |
+|    explained_variance   | 0.00206     |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 1.7         |
+|    n_updates            | 240         |
+|    policy_gradient_loss | -0.0246     |
+|    value_loss           | 11.2        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.36e+03    |
+|    ep_rew_mean          | -60.1       |
+| time/                   |             |
+|    fps                  | 354         |
+|    iterations           | 26          |
+|    time_elapsed         | 100         |
+|    total_timesteps      | 35646       |
+| train/                  |             |
+|    approx_kl            | 0.026275737 |
+|    clip_fraction        | 0.21        |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -8.1        |
+|    explained_variance   | -0.414      |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 0.556       |
+|    n_updates            | 250         |
+|    policy_gradient_loss | -0.0248     |
+|    value_loss           | 5.13        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.36e+03    |
+|    ep_rew_mean          | -60.1       |
+| time/                   |             |
+|    fps                  | 354         |
+|    iterations           | 27          |
+|    time_elapsed         | 104         |
+|    total_timesteps      | 37017       |
+| train/                  |             |
+|    approx_kl            | 0.026121318 |
+|    clip_fraction        | 0.171       |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -8.12       |
+|    explained_variance   | -0.0283     |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 2.91        |
+|    n_updates            | 260         |
+|    policy_gradient_loss | -0.00495    |
+|    value_loss           | 30          |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.35e+03   |
+|    ep_rew_mean          | -60.1      |
+| time/                   |            |
+|    fps                  | 354        |
+|    iterations           | 28         |
+|    time_elapsed         | 108        |
+|    total_timesteps      | 38388      |
+| train/                  |            |
+|    approx_kl            | 0.02375033 |
+|    clip_fraction        | 0.177      |
+|    clip_range           | 0.26       |
+|    entropy_loss         | -8.01      |
+|    explained_variance   | -0.12      |
+|    learning_rate        | 8.14e-05   |
+|    loss                 | 4.69       |
+|    n_updates            | 270        |
+|    policy_gradient_loss | -0.0175    |
+|    value_loss           | 11.3       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.35e+03    |
+|    ep_rew_mean          | -60.1       |
+| time/                   |             |
+|    fps                  | 354         |
+|    iterations           | 29          |
+|    time_elapsed         | 112         |
+|    total_timesteps      | 39759       |
+| train/                  |             |
+|    approx_kl            | 0.025788946 |
+|    clip_fraction        | 0.206       |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -8.1        |
+|    explained_variance   | 0.122       |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 1.84        |
+|    n_updates            | 280         |
+|    policy_gradient_loss | -0.0214     |
+|    value_loss           | 13.7        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.35e+03    |
+|    ep_rew_mean          | -57.4       |
+| time/                   |             |
+|    fps                  | 354         |
+|    iterations           | 30          |
+|    time_elapsed         | 115         |
+|    total_timesteps      | 41130       |
+| train/                  |             |
+|    approx_kl            | 0.035159614 |
+|    clip_fraction        | 0.199       |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -8.01       |
+|    explained_variance   | -0.0443     |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 0.494       |
+|    n_updates            | 290         |
+|    policy_gradient_loss | -0.0228     |
+|    value_loss           | 9.62        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.35e+03   |
+|    ep_rew_mean          | -55.4      |
+| time/                   |            |
+|    fps                  | 354        |
+|    iterations           | 31         |
+|    time_elapsed         | 119        |
+|    total_timesteps      | 42501      |
+| train/                  |            |
+|    approx_kl            | 0.03578476 |
+|    clip_fraction        | 0.198      |
+|    clip_range           | 0.26       |
+|    entropy_loss         | -7.9       |
+|    explained_variance   | 0.0155     |
+|    learning_rate        | 8.14e-05   |
+|    loss                 | 0.58       |
+|    n_updates            | 300        |
+|    policy_gradient_loss | -0.0134    |
+|    value_loss           | 7.94       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.3e+03     |
+|    ep_rew_mean          | -41.5       |
+| time/                   |             |
+|    fps                  | 355         |
+|    iterations           | 32          |
+|    time_elapsed         | 123         |
+|    total_timesteps      | 43872       |
+| train/                  |             |
+|    approx_kl            | 0.027321111 |
+|    clip_fraction        | 0.229       |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -7.84       |
+|    explained_variance   | -0.272      |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 1.25        |
+|    n_updates            | 310         |
+|    policy_gradient_loss | -0.0237     |
+|    value_loss           | 8.41        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.24e+03    |
+|    ep_rew_mean          | -28.3       |
+| time/                   |             |
+|    fps                  | 355         |
+|    iterations           | 33          |
+|    time_elapsed         | 127         |
+|    total_timesteps      | 45243       |
+| train/                  |             |
+|    approx_kl            | 0.032422796 |
+|    clip_fraction        | 0.232       |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -7.83       |
+|    explained_variance   | 0.101       |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 6.39        |
+|    n_updates            | 320         |
+|    policy_gradient_loss | -0.00398    |
+|    value_loss           | 30.1        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.2e+03     |
+|    ep_rew_mean          | -20.8       |
+| time/                   |             |
+|    fps                  | 356         |
+|    iterations           | 34          |
+|    time_elapsed         | 130         |
+|    total_timesteps      | 46614       |
+| train/                  |             |
+|    approx_kl            | 0.031185307 |
+|    clip_fraction        | 0.234       |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -7.86       |
+|    explained_variance   | -0.055      |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 3.37        |
+|    n_updates            | 330         |
+|    policy_gradient_loss | -0.0114     |
+|    value_loss           | 35.8        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.2e+03     |
+|    ep_rew_mean          | -20.8       |
+| time/                   |             |
+|    fps                  | 355         |
+|    iterations           | 35          |
+|    time_elapsed         | 134         |
+|    total_timesteps      | 47985       |
+| train/                  |             |
+|    approx_kl            | 0.030157859 |
+|    clip_fraction        | 0.268       |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -7.92       |
+|    explained_variance   | -0.443      |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 1.93        |
+|    n_updates            | 340         |
+|    policy_gradient_loss | -0.0119     |
+|    value_loss           | 16.3        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.23e+03    |
+|    ep_rew_mean          | -15.9       |
+| time/                   |             |
+|    fps                  | 355         |
+|    iterations           | 36          |
+|    time_elapsed         | 138         |
+|    total_timesteps      | 49356       |
+| train/                  |             |
+|    approx_kl            | 0.028865792 |
+|    clip_fraction        | 0.237       |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -7.59       |
+|    explained_variance   | 0.14        |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 0.794       |
+|    n_updates            | 350         |
+|    policy_gradient_loss | -0.0105     |
+|    value_loss           | 9.95        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.23e+03   |
+|    ep_rew_mean          | -15.9      |
+| time/                   |            |
+|    fps                  | 355        |
+|    iterations           | 37         |
+|    time_elapsed         | 142        |
+|    total_timesteps      | 50727      |
+| train/                  |            |
+|    approx_kl            | 0.02842192 |
+|    clip_fraction        | 0.236      |
+|    clip_range           | 0.26       |
+|    entropy_loss         | -7.84      |
+|    explained_variance   | -0.549     |
+|    learning_rate        | 8.14e-05   |
+|    loss                 | 0.895      |
+|    n_updates            | 360        |
+|    policy_gradient_loss | -0.0193    |
+|    value_loss           | 10.1       |
+----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.22e+03   |
+|    ep_rew_mean          | -20.1      |
+| time/                   |            |
+|    fps                  | 355        |
+|    iterations           | 38         |
+|    time_elapsed         | 146        |
+|    total_timesteps      | 52098      |
+| train/                  |            |
+|    approx_kl            | 0.03672131 |
+|    clip_fraction        | 0.237      |
+|    clip_range           | 0.26       |
+|    entropy_loss         | -8.07      |
+|    explained_variance   | 0.0648     |
+|    learning_rate        | 8.14e-05   |
+|    loss                 | 4.86       |
+|    n_updates            | 370        |
+|    policy_gradient_loss | -0.0211    |
+|    value_loss           | 6.36       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.22e+03    |
+|    ep_rew_mean          | -30.7       |
+| time/                   |             |
+|    fps                  | 355         |
+|    iterations           | 39          |
+|    time_elapsed         | 150         |
+|    total_timesteps      | 53469       |
+| train/                  |             |
+|    approx_kl            | 0.035383318 |
+|    clip_fraction        | 0.218       |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -7.98       |
+|    explained_variance   | 0.0373      |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 0.993       |
+|    n_updates            | 380         |
+|    policy_gradient_loss | -0.0212     |
+|    value_loss           | 13.9        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.19e+03    |
+|    ep_rew_mean          | -23.8       |
+| time/                   |             |
+|    fps                  | 355         |
+|    iterations           | 40          |
+|    time_elapsed         | 154         |
+|    total_timesteps      | 54840       |
+| train/                  |             |
+|    approx_kl            | 0.035663478 |
+|    clip_fraction        | 0.313       |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -7.36       |
+|    explained_variance   | 0.0586      |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 2.23        |
+|    n_updates            | 390         |
+|    policy_gradient_loss | -0.00997    |
+|    value_loss           | 46.3        |
+-----------------------------------------
+--------------------------------------
+| rollout/                |          |
+|    ep_len_mean          | 2.19e+03 |
+|    ep_rew_mean          | -23.8    |
+| time/                   |          |
+|    fps                  | 354      |
+|    iterations           | 41       |
+|    time_elapsed         | 158      |
+|    total_timesteps      | 56211    |
+| train/                  |          |
+|    approx_kl            | 0.052496 |
+|    clip_fraction        | 0.338    |
+|    clip_range           | 0.26     |
+|    entropy_loss         | -7.89    |
+|    explained_variance   | -0.49    |
+|    learning_rate        | 8.14e-05 |
+|    loss                 | 1.45     |
+|    n_updates            | 400      |
+|    policy_gradient_loss | -0.00552 |
+|    value_loss           | 23.9     |
+--------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.18e+03    |
+|    ep_rew_mean          | -25.5       |
+| time/                   |             |
+|    fps                  | 354         |
+|    iterations           | 42          |
+|    time_elapsed         | 162         |
+|    total_timesteps      | 57582       |
+| train/                  |             |
+|    approx_kl            | 0.032533452 |
+|    clip_fraction        | 0.26        |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -7.35       |
+|    explained_variance   | -0.0372     |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 1.11        |
+|    n_updates            | 410         |
+|    policy_gradient_loss | -0.0189     |
+|    value_loss           | 9.41        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.18e+03    |
+|    ep_rew_mean          | -25.5       |
+| time/                   |             |
+|    fps                  | 355         |
+|    iterations           | 43          |
+|    time_elapsed         | 166         |
+|    total_timesteps      | 58953       |
+| train/                  |             |
+|    approx_kl            | 0.033652484 |
+|    clip_fraction        | 0.245       |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -7.66       |
+|    explained_variance   | -0.0541     |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 1.85        |
+|    n_updates            | 420         |
+|    policy_gradient_loss | -0.013      |
+|    value_loss           | 14.4        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.14e+03    |
+|    ep_rew_mean          | -12.6       |
+| time/                   |             |
+|    fps                  | 355         |
+|    iterations           | 44          |
+|    time_elapsed         | 169         |
+|    total_timesteps      | 60324       |
+| train/                  |             |
+|    approx_kl            | 0.029627763 |
+|    clip_fraction        | 0.287       |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -7.78       |
+|    explained_variance   | -0.173      |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 4.15        |
+|    n_updates            | 430         |
+|    policy_gradient_loss | -0.0145     |
+|    value_loss           | 11.2        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.1e+03     |
+|    ep_rew_mean          | -3.31       |
+| time/                   |             |
+|    fps                  | 355         |
+|    iterations           | 45          |
+|    time_elapsed         | 173         |
+|    total_timesteps      | 61695       |
+| train/                  |             |
+|    approx_kl            | 0.053978715 |
+|    clip_fraction        | 0.301       |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -7.78       |
+|    explained_variance   | 0.0145      |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 5.25        |
+|    n_updates            | 440         |
+|    policy_gradient_loss | -0.00936    |
+|    value_loss           | 83.2        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.1e+03    |
+|    ep_rew_mean          | -3.31      |
+| time/                   |            |
+|    fps                  | 355        |
+|    iterations           | 46         |
+|    time_elapsed         | 177        |
+|    total_timesteps      | 63066      |
+| train/                  |            |
+|    approx_kl            | 0.04385848 |
+|    clip_fraction        | 0.309      |
+|    clip_range           | 0.26       |
+|    entropy_loss         | -7.72      |
+|    explained_variance   | -0.142     |
+|    learning_rate        | 8.14e-05   |
+|    loss                 | 1.44       |
+|    n_updates            | 450        |
+|    policy_gradient_loss | -0.00675   |
+|    value_loss           | 32.4       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.12e+03    |
+|    ep_rew_mean          | -1.23       |
+| time/                   |             |
+|    fps                  | 356         |
+|    iterations           | 47          |
+|    time_elapsed         | 180         |
+|    total_timesteps      | 64437       |
+| train/                  |             |
+|    approx_kl            | 0.034602597 |
+|    clip_fraction        | 0.301       |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -7.3        |
+|    explained_variance   | -0.657      |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 1.27        |
+|    n_updates            | 460         |
+|    policy_gradient_loss | -0.0142     |
+|    value_loss           | 9.68        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.12e+03    |
+|    ep_rew_mean          | -1.23       |
+| time/                   |             |
+|    fps                  | 356         |
+|    iterations           | 48          |
+|    time_elapsed         | 184         |
+|    total_timesteps      | 65808       |
+| train/                  |             |
+|    approx_kl            | 0.034854636 |
+|    clip_fraction        | 0.264       |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -7.11       |
+|    explained_variance   | 0.0247      |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 11.7        |
+|    n_updates            | 470         |
+|    policy_gradient_loss | -0.0175     |
+|    value_loss           | 20.1        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.13e+03    |
+|    ep_rew_mean          | -1.26       |
+| time/                   |             |
+|    fps                  | 356         |
+|    iterations           | 49          |
+|    time_elapsed         | 188         |
+|    total_timesteps      | 67179       |
+| train/                  |             |
+|    approx_kl            | 0.050826874 |
+|    clip_fraction        | 0.328       |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -7.92       |
+|    explained_variance   | -0.296      |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 5.44        |
+|    n_updates            | 480         |
+|    policy_gradient_loss | -0.0181     |
+|    value_loss           | 13          |
+-----------------------------------------
+---------------------------------------
+| rollout/                |           |
+|    ep_len_mean          | 2.12e+03  |
+|    ep_rew_mean          | -2.72     |
+| time/                   |           |
+|    fps                  | 356       |
+|    iterations           | 50        |
+|    time_elapsed         | 192       |
+|    total_timesteps      | 68550     |
+| train/                  |           |
+|    approx_kl            | 0.0387544 |
+|    clip_fraction        | 0.284     |
+|    clip_range           | 0.26      |
+|    entropy_loss         | -7.56     |
+|    explained_variance   | -0.016    |
+|    learning_rate        | 8.14e-05  |
+|    loss                 | 1.01      |
+|    n_updates            | 490       |
+|    policy_gradient_loss | -0.0124   |
+|    value_loss           | 10.5      |
+---------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.09e+03    |
+|    ep_rew_mean          | 2.79        |
+| time/                   |             |
+|    fps                  | 356         |
+|    iterations           | 51          |
+|    time_elapsed         | 196         |
+|    total_timesteps      | 69921       |
+| train/                  |             |
+|    approx_kl            | 0.033755988 |
+|    clip_fraction        | 0.261       |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -6.82       |
+|    explained_variance   | 0.0437      |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 1.73        |
+|    n_updates            | 500         |
+|    policy_gradient_loss | -0.019      |
+|    value_loss           | 10.5        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.08e+03    |
+|    ep_rew_mean          | 6.5         |
+| time/                   |             |
+|    fps                  | 356         |
+|    iterations           | 52          |
+|    time_elapsed         | 200         |
+|    total_timesteps      | 71292       |
+| train/                  |             |
+|    approx_kl            | 0.028060019 |
+|    clip_fraction        | 0.276       |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -7.04       |
+|    explained_variance   | 0.0647      |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 1.4         |
+|    n_updates            | 510         |
+|    policy_gradient_loss | -0.00959    |
+|    value_loss           | 33.6        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.08e+03    |
+|    ep_rew_mean          | 6.5         |
+| time/                   |             |
+|    fps                  | 355         |
+|    iterations           | 53          |
+|    time_elapsed         | 204         |
+|    total_timesteps      | 72663       |
+| train/                  |             |
+|    approx_kl            | 0.029590033 |
+|    clip_fraction        | 0.232       |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -6.96       |
+|    explained_variance   | 0.174       |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 7.49        |
+|    n_updates            | 520         |
+|    policy_gradient_loss | -0.00783    |
+|    value_loss           | 14.9        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.08e+03    |
+|    ep_rew_mean          | 6.5         |
+| time/                   |             |
+|    fps                  | 355         |
+|    iterations           | 54          |
+|    time_elapsed         | 208         |
+|    total_timesteps      | 74034       |
+| train/                  |             |
+|    approx_kl            | 0.044851318 |
+|    clip_fraction        | 0.327       |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -7.81       |
+|    explained_variance   | 0.106       |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 1.97        |
+|    n_updates            | 530         |
+|    policy_gradient_loss | -0.00695    |
+|    value_loss           | 14.4        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.12e+03    |
+|    ep_rew_mean          | 6           |
+| time/                   |             |
+|    fps                  | 355         |
+|    iterations           | 55          |
+|    time_elapsed         | 212         |
+|    total_timesteps      | 75405       |
+| train/                  |             |
+|    approx_kl            | 0.042934623 |
+|    clip_fraction        | 0.296       |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -7.69       |
+|    explained_variance   | -0.881      |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 0.152       |
+|    n_updates            | 540         |
+|    policy_gradient_loss | -0.023      |
+|    value_loss           | 3.2         |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.12e+03    |
+|    ep_rew_mean          | 6           |
+| time/                   |             |
+|    fps                  | 355         |
+|    iterations           | 56          |
+|    time_elapsed         | 216         |
+|    total_timesteps      | 76776       |
+| train/                  |             |
+|    approx_kl            | 0.031715214 |
+|    clip_fraction        | 0.266       |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -7.55       |
+|    explained_variance   | 0.163       |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 0.161       |
+|    n_updates            | 550         |
+|    policy_gradient_loss | -0.0273     |
+|    value_loss           | 4.01        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.14e+03    |
+|    ep_rew_mean          | 5.56        |
+| time/                   |             |
+|    fps                  | 355         |
+|    iterations           | 57          |
+|    time_elapsed         | 219         |
+|    total_timesteps      | 78147       |
+| train/                  |             |
+|    approx_kl            | 0.043580677 |
+|    clip_fraction        | 0.346       |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -7.62       |
+|    explained_variance   | -0.127      |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 1.73        |
+|    n_updates            | 560         |
+|    policy_gradient_loss | -0.0176     |
+|    value_loss           | 11          |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.13e+03    |
+|    ep_rew_mean          | 11.6        |
+| time/                   |             |
+|    fps                  | 355         |
+|    iterations           | 58          |
+|    time_elapsed         | 223         |
+|    total_timesteps      | 79518       |
+| train/                  |             |
+|    approx_kl            | 0.038065173 |
+|    clip_fraction        | 0.272       |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -7.52       |
+|    explained_variance   | 0.234       |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 4.84        |
+|    n_updates            | 570         |
+|    policy_gradient_loss | -0.018      |
+|    value_loss           | 8.44        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.13e+03    |
+|    ep_rew_mean          | 11.6        |
+| time/                   |             |
+|    fps                  | 355         |
+|    iterations           | 59          |
+|    time_elapsed         | 227         |
+|    total_timesteps      | 80889       |
+| train/                  |             |
+|    approx_kl            | 0.049862172 |
+|    clip_fraction        | 0.31        |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -7.6        |
+|    explained_variance   | 0.0943      |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 0.524       |
+|    n_updates            | 580         |
+|    policy_gradient_loss | -0.015      |
+|    value_loss           | 14.5        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.13e+03    |
+|    ep_rew_mean          | 11.2        |
+| time/                   |             |
+|    fps                  | 355         |
+|    iterations           | 60          |
+|    time_elapsed         | 231         |
+|    total_timesteps      | 82260       |
+| train/                  |             |
+|    approx_kl            | 0.040924706 |
+|    clip_fraction        | 0.313       |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -7.74       |
+|    explained_variance   | -0.255      |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 0.218       |
+|    n_updates            | 590         |
+|    policy_gradient_loss | -0.0118     |
+|    value_loss           | 10.8        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.14e+03    |
+|    ep_rew_mean          | 5.38        |
+| time/                   |             |
+|    fps                  | 355         |
+|    iterations           | 61          |
+|    time_elapsed         | 234         |
+|    total_timesteps      | 83631       |
+| train/                  |             |
+|    approx_kl            | 0.031327777 |
+|    clip_fraction        | 0.273       |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -7.33       |
+|    explained_variance   | -0.235      |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 1.87        |
+|    n_updates            | 600         |
+|    policy_gradient_loss | -0.00756    |
+|    value_loss           | 9.36        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.11e+03    |
+|    ep_rew_mean          | 9.3         |
+| time/                   |             |
+|    fps                  | 356         |
+|    iterations           | 62          |
+|    time_elapsed         | 238         |
+|    total_timesteps      | 85002       |
+| train/                  |             |
+|    approx_kl            | 0.077066906 |
+|    clip_fraction        | 0.375       |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -7.63       |
+|    explained_variance   | -6.45e-05   |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 4.58        |
+|    n_updates            | 610         |
+|    policy_gradient_loss | -0.00174    |
+|    value_loss           | 24.8        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.08e+03    |
+|    ep_rew_mean          | 12.6        |
+| time/                   |             |
+|    fps                  | 356         |
+|    iterations           | 63          |
+|    time_elapsed         | 242         |
+|    total_timesteps      | 86373       |
+| train/                  |             |
+|    approx_kl            | 0.047113765 |
+|    clip_fraction        | 0.325       |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -7.27       |
+|    explained_variance   | 0.345       |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 2.71        |
+|    n_updates            | 620         |
+|    policy_gradient_loss | -0.0105     |
+|    value_loss           | 22          |
+-----------------------------------------
+--------------------------------------
+| rollout/                |          |
+|    ep_len_mean          | 2.07e+03 |
+|    ep_rew_mean          | 7.55     |
+| time/                   |          |
+|    fps                  | 356      |
+|    iterations           | 64       |
+|    time_elapsed         | 246      |
+|    total_timesteps      | 87744    |
+| train/                  |          |
+|    approx_kl            | 0.051026 |
+|    clip_fraction        | 0.328    |
+|    clip_range           | 0.26     |
+|    entropy_loss         | -7.45    |
+|    explained_variance   | -0.0504  |
+|    learning_rate        | 8.14e-05 |
+|    loss                 | 8.05     |
+|    n_updates            | 630      |
+|    policy_gradient_loss | -0.0202  |
+|    value_loss           | 21.4     |
+--------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.07e+03   |
+|    ep_rew_mean          | 7.55       |
+| time/                   |            |
+|    fps                  | 356        |
+|    iterations           | 65         |
+|    time_elapsed         | 249        |
+|    total_timesteps      | 89115      |
+| train/                  |            |
+|    approx_kl            | 0.08706577 |
+|    clip_fraction        | 0.408      |
+|    clip_range           | 0.26       |
+|    entropy_loss         | -7.05      |
+|    explained_variance   | -0.281     |
+|    learning_rate        | 8.14e-05   |
+|    loss                 | 6.06       |
+|    n_updates            | 640        |
+|    policy_gradient_loss | -0.0151    |
+|    value_loss           | 24.7       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.07e+03    |
+|    ep_rew_mean          | 7.37        |
+| time/                   |             |
+|    fps                  | 356         |
+|    iterations           | 66          |
+|    time_elapsed         | 253         |
+|    total_timesteps      | 90486       |
+| train/                  |             |
+|    approx_kl            | 0.060183015 |
+|    clip_fraction        | 0.353       |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -7.27       |
+|    explained_variance   | -0.235      |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 2.65        |
+|    n_updates            | 650         |
+|    policy_gradient_loss | -0.00283    |
+|    value_loss           | 11.1        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.07e+03   |
+|    ep_rew_mean          | 6.11       |
+| time/                   |            |
+|    fps                  | 356        |
+|    iterations           | 67         |
+|    time_elapsed         | 257        |
+|    total_timesteps      | 91857      |
+| train/                  |            |
+|    approx_kl            | 0.03188397 |
+|    clip_fraction        | 0.266      |
+|    clip_range           | 0.26       |
+|    entropy_loss         | -7.28      |
+|    explained_variance   | 0.319      |
+|    learning_rate        | 8.14e-05   |
+|    loss                 | 0.867      |
+|    n_updates            | 660        |
+|    policy_gradient_loss | -0.022     |
+|    value_loss           | 7.59       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.07e+03    |
+|    ep_rew_mean          | 6.11        |
+| time/                   |             |
+|    fps                  | 356         |
+|    iterations           | 68          |
+|    time_elapsed         | 261         |
+|    total_timesteps      | 93228       |
+| train/                  |             |
+|    approx_kl            | 0.049166773 |
+|    clip_fraction        | 0.343       |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -7.11       |
+|    explained_variance   | -0.196      |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 0.647       |
+|    n_updates            | 670         |
+|    policy_gradient_loss | -0.0119     |
+|    value_loss           | 11.7        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.08e+03   |
+|    ep_rew_mean          | 2.13       |
+| time/                   |            |
+|    fps                  | 355        |
+|    iterations           | 69         |
+|    time_elapsed         | 265        |
+|    total_timesteps      | 94599      |
+| train/                  |            |
+|    approx_kl            | 0.03328535 |
+|    clip_fraction        | 0.278      |
+|    clip_range           | 0.26       |
+|    entropy_loss         | -6.78      |
+|    explained_variance   | -0.396     |
+|    learning_rate        | 8.14e-05   |
+|    loss                 | 6.24       |
+|    n_updates            | 680        |
+|    policy_gradient_loss | -0.0167    |
+|    value_loss           | 7.49       |
+----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.08e+03   |
+|    ep_rew_mean          | 2.13       |
+| time/                   |            |
+|    fps                  | 355        |
+|    iterations           | 70         |
+|    time_elapsed         | 269        |
+|    total_timesteps      | 95970      |
+| train/                  |            |
+|    approx_kl            | 0.08318052 |
+|    clip_fraction        | 0.38       |
+|    clip_range           | 0.26       |
+|    entropy_loss         | -7.26      |
+|    explained_variance   | -0.267     |
+|    learning_rate        | 8.14e-05   |
+|    loss                 | 4.04       |
+|    n_updates            | 690        |
+|    policy_gradient_loss | 0.00117    |
+|    value_loss           | 15         |
+----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.09e+03   |
+|    ep_rew_mean          | 1.28       |
+| time/                   |            |
+|    fps                  | 355        |
+|    iterations           | 71         |
+|    time_elapsed         | 274        |
+|    total_timesteps      | 97341      |
+| train/                  |            |
+|    approx_kl            | 0.05132381 |
+|    clip_fraction        | 0.372      |
+|    clip_range           | 0.26       |
+|    entropy_loss         | -7.08      |
+|    explained_variance   | -0.574     |
+|    learning_rate        | 8.14e-05   |
+|    loss                 | 14.8       |
+|    n_updates            | 700        |
+|    policy_gradient_loss | 0.0229     |
+|    value_loss           | 15.2       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.09e+03    |
+|    ep_rew_mean          | 0.298       |
+| time/                   |             |
+|    fps                  | 354         |
+|    iterations           | 72          |
+|    time_elapsed         | 278         |
+|    total_timesteps      | 98712       |
+| train/                  |             |
+|    approx_kl            | 0.049137857 |
+|    clip_fraction        | 0.354       |
+|    clip_range           | 0.26        |
+|    entropy_loss         | -6.89       |
+|    explained_variance   | 0.0478      |
+|    learning_rate        | 8.14e-05    |
+|    loss                 | 1.22        |
+|    n_updates            | 710         |
+|    policy_gradient_loss | -0.0197     |
+|    value_loss           | 7.75        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.09e+03   |
+|    ep_rew_mean          | 0.298      |
+| time/                   |            |
+|    fps                  | 354        |
+|    iterations           | 73         |
+|    time_elapsed         | 282        |
+|    total_timesteps      | 100083     |
+| train/                  |            |
+|    approx_kl            | 0.04495397 |
+|    clip_fraction        | 0.278      |
+|    clip_range           | 0.26       |
+|    entropy_loss         | -6.95      |
+|    explained_variance   | 0.0999     |
+|    learning_rate        | 8.14e-05   |
+|    loss                 | 0.888      |
+|    n_updates            | 720        |
+|    policy_gradient_loss | -0.0123    |
+|    value_loss           | 11.4       |
+----------------------------------------
+[I 2023-03-30 21:45:23,843] Trial 0 finished with value: -347.0 and parameters: {'n_steps': 1371, 'gamma': 0.9373200020810921, 'learning_rate': 8.141042840141496e-05, 'clip_range': 0.2600128459343352, 'gae_lambda': 0.9415709130298376}. Best is trial 0 with value: -347.0.
+Using cuda device
+Wrapping the env in a DummyVecEnv.
+Wrapping the env in a VecTransposeImage.
+C:\ProgramData\Anaconda3\envs\StreetFighterAI\lib\site-packages\stable_baselines3\ppo\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 3294`, after every 51 untruncated mini-batches, there will be a truncated mini-batch of size 30
+We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
+Info: (n_steps=3294 and n_envs=1)
+  warnings.warn(
+Logging to logs/PPO_19
+---------------------------------
+| rollout/           |          |
+|    ep_len_mean     | 2.44e+03 |
+|    ep_rew_mean     | -47      |
+| time/              |          |
+|    fps             | 550      |
+|    iterations      | 1        |
+|    time_elapsed    | 5        |
+|    total_timesteps | 3294     |
+---------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2e+03       |
+|    ep_rew_mean          | -140        |
+| time/                   |             |
+|    fps                  | 417         |
+|    iterations           | 2           |
+|    time_elapsed         | 15          |
+|    total_timesteps      | 6588        |
+| train/                  |             |
+|    approx_kl            | 0.008250391 |
+|    clip_fraction        | 0.13        |
+|    clip_range           | 0.193       |
+|    entropy_loss         | -8.31       |
+|    explained_variance   | -0.00164    |
+|    learning_rate        | 5.11e-05    |
+|    loss                 | 0.766       |
+|    n_updates            | 10          |
+|    policy_gradient_loss | -0.00627    |
+|    value_loss           | 8.03        |
+-----------------------------------------
+------------------------------------------
+| rollout/                |              |
+|    ep_len_mean          | 2.31e+03     |
+|    ep_rew_mean          | -95.8        |
+| time/                   |              |
+|    fps                  | 392          |
+|    iterations           | 3            |
+|    time_elapsed         | 25           |
+|    total_timesteps      | 9882         |
+| train/                  |              |
+|    approx_kl            | 0.0076712077 |
+|    clip_fraction        | 0.113        |
+|    clip_range           | 0.193        |
+|    entropy_loss         | -8.3         |
+|    explained_variance   | -0.0133      |
+|    learning_rate        | 5.11e-05     |
+|    loss                 | 8.2          |
+|    n_updates            | 20           |
+|    policy_gradient_loss | -0.00919     |
+|    value_loss           | 22.1         |
+------------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.32e+03    |
+|    ep_rew_mean          | -75.8       |
+| time/                   |             |
+|    fps                  | 379         |
+|    iterations           | 4           |
+|    time_elapsed         | 34          |
+|    total_timesteps      | 13176       |
+| train/                  |             |
+|    approx_kl            | 0.009143725 |
+|    clip_fraction        | 0.1         |
+|    clip_range           | 0.193       |
+|    entropy_loss         | -8.29       |
+|    explained_variance   | -0.0603     |
+|    learning_rate        | 5.11e-05    |
+|    loss                 | 2.06        |
+|    n_updates            | 30          |
+|    policy_gradient_loss | -0.0105     |
+|    value_loss           | 7.27        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.32e+03    |
+|    ep_rew_mean          | -68.2       |
+| time/                   |             |
+|    fps                  | 371         |
+|    iterations           | 5           |
+|    time_elapsed         | 44          |
+|    total_timesteps      | 16470       |
+| train/                  |             |
+|    approx_kl            | 0.009427849 |
+|    clip_fraction        | 0.108       |
+|    clip_range           | 0.193       |
+|    entropy_loss         | -8.28       |
+|    explained_variance   | 0.00375     |
+|    learning_rate        | 5.11e-05    |
+|    loss                 | 1.5         |
+|    n_updates            | 40          |
+|    policy_gradient_loss | -0.0116     |
+|    value_loss           | 5.16        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.4e+03     |
+|    ep_rew_mean          | -60.1       |
+| time/                   |             |
+|    fps                  | 367         |
+|    iterations           | 6           |
+|    time_elapsed         | 53          |
+|    total_timesteps      | 19764       |
+| train/                  |             |
+|    approx_kl            | 0.008516062 |
+|    clip_fraction        | 0.102       |
+|    clip_range           | 0.193       |
+|    entropy_loss         | -8.27       |
+|    explained_variance   | 0.0201      |
+|    learning_rate        | 5.11e-05    |
+|    loss                 | 1.73        |
+|    n_updates            | 50          |
+|    policy_gradient_loss | -0.0116     |
+|    value_loss           | 7.34        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.44e+03    |
+|    ep_rew_mean          | -73.8       |
+| time/                   |             |
+|    fps                  | 364         |
+|    iterations           | 7           |
+|    time_elapsed         | 63          |
+|    total_timesteps      | 23058       |
+| train/                  |             |
+|    approx_kl            | 0.009343307 |
+|    clip_fraction        | 0.105       |
+|    clip_range           | 0.193       |
+|    entropy_loss         | -8.27       |
+|    explained_variance   | 0.0736      |
+|    learning_rate        | 5.11e-05    |
+|    loss                 | 2.54        |
+|    n_updates            | 60          |
+|    policy_gradient_loss | -0.0116     |
+|    value_loss           | 5.97        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.45e+03    |
+|    ep_rew_mean          | -94.5       |
+| time/                   |             |
+|    fps                  | 359         |
+|    iterations           | 8           |
+|    time_elapsed         | 73          |
+|    total_timesteps      | 26352       |
+| train/                  |             |
+|    approx_kl            | 0.008755345 |
+|    clip_fraction        | 0.103       |
+|    clip_range           | 0.193       |
+|    entropy_loss         | -8.26       |
+|    explained_variance   | -0.0946     |
+|    learning_rate        | 5.11e-05    |
+|    loss                 | 13.2        |
+|    n_updates            | 70          |
+|    policy_gradient_loss | -0.0106     |
+|    value_loss           | 18.1        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.46e+03    |
+|    ep_rew_mean          | -100        |
+| time/                   |             |
+|    fps                  | 357         |
+|    iterations           | 9           |
+|    time_elapsed         | 82          |
+|    total_timesteps      | 29646       |
+| train/                  |             |
+|    approx_kl            | 0.007682183 |
+|    clip_fraction        | 0.0851      |
+|    clip_range           | 0.193       |
+|    entropy_loss         | -8.25       |
+|    explained_variance   | 0.0101      |
+|    learning_rate        | 5.11e-05    |
+|    loss                 | 0.305       |
+|    n_updates            | 80          |
+|    policy_gradient_loss | -0.0103     |
+|    value_loss           | 18.6        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.34e+03    |
+|    ep_rew_mean          | -116        |
+| time/                   |             |
+|    fps                  | 355         |
+|    iterations           | 10          |
+|    time_elapsed         | 92          |
+|    total_timesteps      | 32940       |
+| train/                  |             |
+|    approx_kl            | 0.010493592 |
+|    clip_fraction        | 0.12        |
+|    clip_range           | 0.193       |
+|    entropy_loss         | -8.25       |
+|    explained_variance   | -0.0487     |
+|    learning_rate        | 5.11e-05    |
+|    loss                 | 0.736       |
+|    n_updates            | 90          |
+|    policy_gradient_loss | -0.0141     |
+|    value_loss           | 10.8        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.34e+03    |
+|    ep_rew_mean          | -116        |
+| time/                   |             |
+|    fps                  | 355         |
+|    iterations           | 11          |
+|    time_elapsed         | 102         |
+|    total_timesteps      | 36234       |
+| train/                  |             |
+|    approx_kl            | 0.014990667 |
+|    clip_fraction        | 0.177       |
+|    clip_range           | 0.193       |
+|    entropy_loss         | -8.24       |
+|    explained_variance   | -0.0262     |
+|    learning_rate        | 5.11e-05    |
+|    loss                 | 6.56        |
+|    n_updates            | 100         |
+|    policy_gradient_loss | -0.00923    |
+|    value_loss           | 17.9        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.42e+03    |
+|    ep_rew_mean          | -95.2       |
+| time/                   |             |
+|    fps                  | 355         |
+|    iterations           | 12          |
+|    time_elapsed         | 111         |
+|    total_timesteps      | 39528       |
+| train/                  |             |
+|    approx_kl            | 0.012156485 |
+|    clip_fraction        | 0.158       |
+|    clip_range           | 0.193       |
+|    entropy_loss         | -8.22       |
+|    explained_variance   | 0.0904      |
+|    learning_rate        | 5.11e-05    |
+|    loss                 | 11.6        |
+|    n_updates            | 110         |
+|    policy_gradient_loss | -0.0144     |
+|    value_loss           | 4.6         |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.43e+03    |
+|    ep_rew_mean          | -96.6       |
+| time/                   |             |
+|    fps                  | 354         |
+|    iterations           | 13          |
+|    time_elapsed         | 120         |
+|    total_timesteps      | 42822       |
+| train/                  |             |
+|    approx_kl            | 0.009144909 |
+|    clip_fraction        | 0.116       |
+|    clip_range           | 0.193       |
+|    entropy_loss         | -8.21       |
+|    explained_variance   | 0.0377      |
+|    learning_rate        | 5.11e-05    |
+|    loss                 | 0.243       |
+|    n_updates            | 120         |
+|    policy_gradient_loss | -0.0126     |
+|    value_loss           | 7.32        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.47e+03    |
+|    ep_rew_mean          | -93.6       |
+| time/                   |             |
+|    fps                  | 353         |
+|    iterations           | 14          |
+|    time_elapsed         | 130         |
+|    total_timesteps      | 46116       |
+| train/                  |             |
+|    approx_kl            | 0.010074705 |
+|    clip_fraction        | 0.143       |
+|    clip_range           | 0.193       |
+|    entropy_loss         | -8.21       |
+|    explained_variance   | 0.0431      |
+|    learning_rate        | 5.11e-05    |
+|    loss                 | 0.422       |
+|    n_updates            | 130         |
+|    policy_gradient_loss | -0.0116     |
+|    value_loss           | 8.13        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.46e+03   |
+|    ep_rew_mean          | -89.6      |
+| time/                   |            |
+|    fps                  | 352        |
+|    iterations           | 15         |
+|    time_elapsed         | 140        |
+|    total_timesteps      | 49410      |
+| train/                  |            |
+|    approx_kl            | 0.00962226 |
+|    clip_fraction        | 0.141      |
+|    clip_range           | 0.193      |
+|    entropy_loss         | -8.19      |
+|    explained_variance   | -0.127     |
+|    learning_rate        | 5.11e-05   |
+|    loss                 | 4.31       |
+|    n_updates            | 140        |
+|    policy_gradient_loss | -0.0141    |
+|    value_loss           | 7.67       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.51e+03    |
+|    ep_rew_mean          | -95.2       |
+| time/                   |             |
+|    fps                  | 352         |
+|    iterations           | 16          |
+|    time_elapsed         | 149         |
+|    total_timesteps      | 52704       |
+| train/                  |             |
+|    approx_kl            | 0.010282748 |
+|    clip_fraction        | 0.149       |
+|    clip_range           | 0.193       |
+|    entropy_loss         | -8.18       |
+|    explained_variance   | 0.0186      |
+|    learning_rate        | 5.11e-05    |
+|    loss                 | 0.291       |
+|    n_updates            | 150         |
+|    policy_gradient_loss | -0.0157     |
+|    value_loss           | 3.42        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.49e+03    |
+|    ep_rew_mean          | -86.3       |
+| time/                   |             |
+|    fps                  | 352         |
+|    iterations           | 17          |
+|    time_elapsed         | 158         |
+|    total_timesteps      | 55998       |
+| train/                  |             |
+|    approx_kl            | 0.011693283 |
+|    clip_fraction        | 0.129       |
+|    clip_range           | 0.193       |
+|    entropy_loss         | -8.17       |
+|    explained_variance   | 0.038       |
+|    learning_rate        | 5.11e-05    |
+|    loss                 | 2.92        |
+|    n_updates            | 160         |
+|    policy_gradient_loss | -0.0117     |
+|    value_loss           | 12.6        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.47e+03    |
+|    ep_rew_mean          | -91.1       |
+| time/                   |             |
+|    fps                  | 353         |
+|    iterations           | 18          |
+|    time_elapsed         | 167         |
+|    total_timesteps      | 59292       |
+| train/                  |             |
+|    approx_kl            | 0.011810733 |
+|    clip_fraction        | 0.155       |
+|    clip_range           | 0.193       |
+|    entropy_loss         | -8.16       |
+|    explained_variance   | 0.0673      |
+|    learning_rate        | 5.11e-05    |
+|    loss                 | 0.93        |
+|    n_updates            | 170         |
+|    policy_gradient_loss | -0.0134     |
+|    value_loss           | 9.58        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.5e+03     |
+|    ep_rew_mean          | -88.9       |
+| time/                   |             |
+|    fps                  | 353         |
+|    iterations           | 19          |
+|    time_elapsed         | 177         |
+|    total_timesteps      | 62586       |
+| train/                  |             |
+|    approx_kl            | 0.012234138 |
+|    clip_fraction        | 0.167       |
+|    clip_range           | 0.193       |
+|    entropy_loss         | -8.17       |
+|    explained_variance   | 0.033       |
+|    learning_rate        | 5.11e-05    |
+|    loss                 | 0.243       |
+|    n_updates            | 180         |
+|    policy_gradient_loss | -0.0139     |
+|    value_loss           | 8.98        |
+-----------------------------------------
+------------------------------------------
+| rollout/                |              |
+|    ep_len_mean          | 2.5e+03      |
+|    ep_rew_mean          | -88.9        |
+| time/                   |              |
+|    fps                  | 352          |
+|    iterations           | 20           |
+|    time_elapsed         | 186          |
+|    total_timesteps      | 65880        |
+| train/                  |              |
+|    approx_kl            | 0.0135463225 |
+|    clip_fraction        | 0.167        |
+|    clip_range           | 0.193        |
+|    entropy_loss         | -8.15        |
+|    explained_variance   | 0.104        |
+|    learning_rate        | 5.11e-05     |
+|    loss                 | 1.94         |
+|    n_updates            | 190          |
+|    policy_gradient_loss | -0.0105      |
+|    value_loss           | 8.12         |
+------------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.59e+03    |
+|    ep_rew_mean          | 114         |
+| time/                   |             |
+|    fps                  | 351         |
+|    iterations           | 21          |
+|    time_elapsed         | 196         |
+|    total_timesteps      | 69174       |
+| train/                  |             |
+|    approx_kl            | 0.012275431 |
+|    clip_fraction        | 0.164       |
+|    clip_range           | 0.193       |
+|    entropy_loss         | -8.14       |
+|    explained_variance   | -0.00593    |
+|    learning_rate        | 5.11e-05    |
+|    loss                 | 15.6        |
+|    n_updates            | 200         |
+|    policy_gradient_loss | -0.00313    |
+|    value_loss           | 1.08e+04    |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.54e+03    |
+|    ep_rew_mean          | 104         |
+| time/                   |             |
+|    fps                  | 351         |
+|    iterations           | 22          |
+|    time_elapsed         | 206         |
+|    total_timesteps      | 72468       |
+| train/                  |             |
+|    approx_kl            | 0.014285704 |
+|    clip_fraction        | 0.189       |
+|    clip_range           | 0.193       |
+|    entropy_loss         | -8.14       |
+|    explained_variance   | 0.172       |
+|    learning_rate        | 5.11e-05    |
+|    loss                 | 0.239       |
+|    n_updates            | 210         |
+|    policy_gradient_loss | -0.0122     |
+|    value_loss           | 8.08        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.53e+03   |
+|    ep_rew_mean          | 91.4       |
+| time/                   |            |
+|    fps                  | 351        |
+|    iterations           | 23         |
+|    time_elapsed         | 215        |
+|    total_timesteps      | 75762      |
+| train/                  |            |
+|    approx_kl            | 0.01594875 |
+|    clip_fraction        | 0.212      |
+|    clip_range           | 0.193      |
+|    entropy_loss         | -8.11      |
+|    explained_variance   | -0.0207    |
+|    learning_rate        | 5.11e-05   |
+|    loss                 | 19.3       |
+|    n_updates            | 220        |
+|    policy_gradient_loss | -0.0113    |
+|    value_loss           | 12.8       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.52e+03    |
+|    ep_rew_mean          | 98.8        |
+| time/                   |             |
+|    fps                  | 351         |
+|    iterations           | 24          |
+|    time_elapsed         | 225         |
+|    total_timesteps      | 79056       |
+| train/                  |             |
+|    approx_kl            | 0.016312802 |
+|    clip_fraction        | 0.236       |
+|    clip_range           | 0.193       |
+|    entropy_loss         | -8.03       |
+|    explained_variance   | 0.118       |
+|    learning_rate        | 5.11e-05    |
+|    loss                 | 0.819       |
+|    n_updates            | 230         |
+|    policy_gradient_loss | -0.0082     |
+|    value_loss           | 15.8        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.49e+03    |
+|    ep_rew_mean          | 77.5        |
+| time/                   |             |
+|    fps                  | 351         |
+|    iterations           | 25          |
+|    time_elapsed         | 234         |
+|    total_timesteps      | 82350       |
+| train/                  |             |
+|    approx_kl            | 0.012337481 |
+|    clip_fraction        | 0.163       |
+|    clip_range           | 0.193       |
+|    entropy_loss         | -8.13       |
+|    explained_variance   | 0.0669      |
+|    learning_rate        | 5.11e-05    |
+|    loss                 | 16.1        |
+|    n_updates            | 240         |
+|    policy_gradient_loss | -0.00899    |
+|    value_loss           | 16.1        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.46e+03    |
+|    ep_rew_mean          | 66.2        |
+| time/                   |             |
+|    fps                  | 347         |
+|    iterations           | 26          |
+|    time_elapsed         | 246         |
+|    total_timesteps      | 85644       |
+| train/                  |             |
+|    approx_kl            | 0.018887786 |
+|    clip_fraction        | 0.241       |
+|    clip_range           | 0.193       |
+|    entropy_loss         | -8          |
+|    explained_variance   | 0.107       |
+|    learning_rate        | 5.11e-05    |
+|    loss                 | 3.57        |
+|    n_updates            | 250         |
+|    policy_gradient_loss | -0.00903    |
+|    value_loss           | 26.9        |
+-----------------------------------------
+---------------------------------------
+| rollout/                |           |
+|    ep_len_mean          | 2.47e+03  |
+|    ep_rew_mean          | 58        |
+| time/                   |           |
+|    fps                  | 346       |
+|    iterations           | 27        |
+|    time_elapsed         | 256       |
+|    total_timesteps      | 88938     |
+| train/                  |           |
+|    approx_kl            | 0.0155593 |
+|    clip_fraction        | 0.219     |
+|    clip_range           | 0.193     |
+|    entropy_loss         | -8.07     |
+|    explained_variance   | 0.0512    |
+|    learning_rate        | 5.11e-05  |
+|    loss                 | 3.59      |
+|    n_updates            | 260       |
+|    policy_gradient_loss | -0.0133   |
+|    value_loss           | 21        |
+---------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.44e+03    |
+|    ep_rew_mean          | 54.1        |
+| time/                   |             |
+|    fps                  | 346         |
+|    iterations           | 28          |
+|    time_elapsed         | 266         |
+|    total_timesteps      | 92232       |
+| train/                  |             |
+|    approx_kl            | 0.015150225 |
+|    clip_fraction        | 0.198       |
+|    clip_range           | 0.193       |
+|    entropy_loss         | -8.1        |
+|    explained_variance   | 0.149       |
+|    learning_rate        | 5.11e-05    |
+|    loss                 | 3.1         |
+|    n_updates            | 270         |
+|    policy_gradient_loss | -0.00993    |
+|    value_loss           | 9.94        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.47e+03    |
+|    ep_rew_mean          | 52.1        |
+| time/                   |             |
+|    fps                  | 346         |
+|    iterations           | 29          |
+|    time_elapsed         | 275         |
+|    total_timesteps      | 95526       |
+| train/                  |             |
+|    approx_kl            | 0.016742641 |
+|    clip_fraction        | 0.223       |
+|    clip_range           | 0.193       |
+|    entropy_loss         | -7.91       |
+|    explained_variance   | 0.276       |
+|    learning_rate        | 5.11e-05    |
+|    loss                 | 4.49        |
+|    n_updates            | 280         |
+|    policy_gradient_loss | -0.015      |
+|    value_loss           | 7.73        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.44e+03    |
+|    ep_rew_mean          | 41.7        |
+| time/                   |             |
+|    fps                  | 346         |
+|    iterations           | 30          |
+|    time_elapsed         | 284         |
+|    total_timesteps      | 98820       |
+| train/                  |             |
+|    approx_kl            | 0.016321812 |
+|    clip_fraction        | 0.238       |
+|    clip_range           | 0.193       |
+|    entropy_loss         | -8.08       |
+|    explained_variance   | 0.104       |
+|    learning_rate        | 5.11e-05    |
+|    loss                 | 0.345       |
+|    n_updates            | 290         |
+|    policy_gradient_loss | -0.0153     |
+|    value_loss           | 5.16        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.43e+03    |
+|    ep_rew_mean          | 40          |
+| time/                   |             |
+|    fps                  | 346         |
+|    iterations           | 31          |
+|    time_elapsed         | 294         |
+|    total_timesteps      | 102114      |
+| train/                  |             |
+|    approx_kl            | 0.016819764 |
+|    clip_fraction        | 0.22        |
+|    clip_range           | 0.193       |
+|    entropy_loss         | -7.89       |
+|    explained_variance   | -0.0187     |
+|    learning_rate        | 5.11e-05    |
+|    loss                 | 1.07        |
+|    n_updates            | 300         |
+|    policy_gradient_loss | -0.00886    |
+|    value_loss           | 15.6        |
+-----------------------------------------
+[I 2023-03-30 21:50:56,850] Trial 1 finished with value: -205.0 and parameters: {'n_steps': 3294, 'gamma': 0.9019828232975781, 'learning_rate': 5.112209134805487e-05, 'clip_range': 0.1926590966798606, 'gae_lambda': 0.8670673597089896}. Best is trial 1 with value: -205.0.
+Using cuda device
+Wrapping the env in a DummyVecEnv.
+Wrapping the env in a VecTransposeImage.
+C:\ProgramData\Anaconda3\envs\StreetFighterAI\lib\site-packages\stable_baselines3\ppo\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 1586`, after every 24 untruncated mini-batches, there will be a truncated mini-batch of size 50
+We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
+Info: (n_steps=1586 and n_envs=1)
+  warnings.warn(
+Logging to logs/PPO_20
+-----------------------------
+| time/              |      |
+|    fps             | 611  |
+|    iterations      | 1    |
+|    time_elapsed    | 2    |
+|    total_timesteps | 1586 |
+-----------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.1e+03     |
+|    ep_rew_mean          | -13         |
+| time/                   |             |
+|    fps                  | 448         |
+|    iterations           | 2           |
+|    time_elapsed         | 7           |
+|    total_timesteps      | 3172        |
+| train/                  |             |
+|    approx_kl            | 0.007479368 |
+|    clip_fraction        | 0.211       |
+|    clip_range           | 0.161       |
+|    entropy_loss         | -8.31       |
+|    explained_variance   | -0.00514    |
+|    learning_rate        | 5.02e-05    |
+|    loss                 | 5.13        |
+|    n_updates            | 10          |
+|    policy_gradient_loss | -0.0115     |
+|    value_loss           | 11.6        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.21e+03    |
+|    ep_rew_mean          | -115        |
+| time/                   |             |
+|    fps                  | 413         |
+|    iterations           | 3           |
+|    time_elapsed         | 11          |
+|    total_timesteps      | 4758        |
+| train/                  |             |
+|    approx_kl            | 0.009029714 |
+|    clip_fraction        | 0.168       |
+|    clip_range           | 0.161       |
+|    entropy_loss         | -8.3        |
+|    explained_variance   | 0.0171      |
+|    learning_rate        | 5.02e-05    |
+|    loss                 | 1.61        |
+|    n_updates            | 20          |
+|    policy_gradient_loss | -0.0118     |
+|    value_loss           | 15          |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.21e+03    |
+|    ep_rew_mean          | -115        |
+| time/                   |             |
+|    fps                  | 395         |
+|    iterations           | 4           |
+|    time_elapsed         | 16          |
+|    total_timesteps      | 6344        |
+| train/                  |             |
+|    approx_kl            | 0.011066959 |
+|    clip_fraction        | 0.22        |
+|    clip_range           | 0.161       |
+|    entropy_loss         | -8.28       |
+|    explained_variance   | 0.138       |
+|    learning_rate        | 5.02e-05    |
+|    loss                 | 4.3         |
+|    n_updates            | 30          |
+|    policy_gradient_loss | -0.0035     |
+|    value_loss           | 26.5        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.13e+03    |
+|    ep_rew_mean          | -64.3       |
+| time/                   |             |
+|    fps                  | 385         |
+|    iterations           | 5           |
+|    time_elapsed         | 20          |
+|    total_timesteps      | 7930        |
+| train/                  |             |
+|    approx_kl            | 0.006398973 |
+|    clip_fraction        | 0.123       |
+|    clip_range           | 0.161       |
+|    entropy_loss         | -8.29       |
+|    explained_variance   | 0.0506      |
+|    learning_rate        | 5.02e-05    |
+|    loss                 | 7.38        |
+|    n_updates            | 40          |
+|    policy_gradient_loss | -0.00387    |
+|    value_loss           | 13.7        |
+-----------------------------------------
+------------------------------------------
+| rollout/                |              |
+|    ep_len_mean          | 2.07e+03     |
+|    ep_rew_mean          | -124         |
+| time/                   |              |
+|    fps                  | 378          |
+|    iterations           | 6            |
+|    time_elapsed         | 25           |
+|    total_timesteps      | 9516         |
+| train/                  |              |
+|    approx_kl            | 0.0068364535 |
+|    clip_fraction        | 0.234        |
+|    clip_range           | 0.161        |
+|    entropy_loss         | -8.28        |
+|    explained_variance   | 0.0926       |
+|    learning_rate        | 5.02e-05     |
+|    loss                 | 1.79         |
+|    n_updates            | 50           |
+|    policy_gradient_loss | -0.00452     |
+|    value_loss           | 8.46         |
+------------------------------------------
+------------------------------------------
+| rollout/                |              |
+|    ep_len_mean          | 2.2e+03      |
+|    ep_rew_mean          | -129         |
+| time/                   |              |
+|    fps                  | 373          |
+|    iterations           | 7            |
+|    time_elapsed         | 29           |
+|    total_timesteps      | 11102        |
+| train/                  |              |
+|    approx_kl            | 0.0063208304 |
+|    clip_fraction        | 0.139        |
+|    clip_range           | 0.161        |
+|    entropy_loss         | -8.29        |
+|    explained_variance   | 0.225        |
+|    learning_rate        | 5.02e-05     |
+|    loss                 | 11           |
+|    n_updates            | 60           |
+|    policy_gradient_loss | -0.00482     |
+|    value_loss           | 55.3         |
+------------------------------------------
+------------------------------------------
+| rollout/                |              |
+|    ep_len_mean          | 2.2e+03      |
+|    ep_rew_mean          | -129         |
+| time/                   |              |
+|    fps                  | 369          |
+|    iterations           | 8            |
+|    time_elapsed         | 34           |
+|    total_timesteps      | 12688        |
+| train/                  |              |
+|    approx_kl            | 0.0049916673 |
+|    clip_fraction        | 0.146        |
+|    clip_range           | 0.161        |
+|    entropy_loss         | -8.29        |
+|    explained_variance   | 0.341        |
+|    learning_rate        | 5.02e-05     |
+|    loss                 | 4.11         |
+|    n_updates            | 70           |
+|    policy_gradient_loss | -0.00677     |
+|    value_loss           | 21.2         |
+------------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.25e+03   |
+|    ep_rew_mean          | -140       |
+| time/                   |            |
+|    fps                  | 368        |
+|    iterations           | 9          |
+|    time_elapsed         | 38         |
+|    total_timesteps      | 14274      |
+| train/                  |            |
+|    approx_kl            | 0.00807819 |
+|    clip_fraction        | 0.222      |
+|    clip_range           | 0.161      |
+|    entropy_loss         | -8.29      |
+|    explained_variance   | 0.368      |
+|    learning_rate        | 5.02e-05   |
+|    loss                 | 0.672      |
+|    n_updates            | 80         |
+|    policy_gradient_loss | -0.00792   |
+|    value_loss           | 6.29       |
+----------------------------------------
+------------------------------------------
+| rollout/                |              |
+|    ep_len_mean          | 2.09e+03     |
+|    ep_rew_mean          | -96.1        |
+| time/                   |              |
+|    fps                  | 366          |
+|    iterations           | 10           |
+|    time_elapsed         | 43           |
+|    total_timesteps      | 15860        |
+| train/                  |              |
+|    approx_kl            | 0.0066149407 |
+|    clip_fraction        | 0.157        |
+|    clip_range           | 0.161        |
+|    entropy_loss         | -8.29        |
+|    explained_variance   | 0.102        |
+|    learning_rate        | 5.02e-05     |
+|    loss                 | 47.7         |
+|    n_updates            | 90           |
+|    policy_gradient_loss | -0.00711     |
+|    value_loss           | 24.1         |
+------------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.09e+03    |
+|    ep_rew_mean          | -96.1       |
+| time/                   |             |
+|    fps                  | 366         |
+|    iterations           | 11          |
+|    time_elapsed         | 47          |
+|    total_timesteps      | 17446       |
+| train/                  |             |
+|    approx_kl            | 0.009226098 |
+|    clip_fraction        | 0.162       |
+|    clip_range           | 0.161       |
+|    entropy_loss         | -8.29       |
+|    explained_variance   | 0.103       |
+|    learning_rate        | 5.02e-05    |
+|    loss                 | 32.2        |
+|    n_updates            | 100         |
+|    policy_gradient_loss | -0.00681    |
+|    value_loss           | 30          |
+-----------------------------------------
+------------------------------------------
+| rollout/                |              |
+|    ep_len_mean          | 2.23e+03     |
+|    ep_rew_mean          | -84.8        |
+| time/                   |              |
+|    fps                  | 365          |
+|    iterations           | 12           |
+|    time_elapsed         | 52           |
+|    total_timesteps      | 19032        |
+| train/                  |              |
+|    approx_kl            | 0.0071024043 |
+|    clip_fraction        | 0.202        |
+|    clip_range           | 0.161        |
+|    entropy_loss         | -8.28        |
+|    explained_variance   | 0.477        |
+|    learning_rate        | 5.02e-05     |
+|    loss                 | 1.68         |
+|    n_updates            | 110          |
+|    policy_gradient_loss | -0.00699     |
+|    value_loss           | 8.59         |
+------------------------------------------
+------------------------------------------
+| rollout/                |              |
+|    ep_len_mean          | 2.29e+03     |
+|    ep_rew_mean          | -80.7        |
+| time/                   |              |
+|    fps                  | 364          |
+|    iterations           | 13           |
+|    time_elapsed         | 56           |
+|    total_timesteps      | 20618        |
+| train/                  |              |
+|    approx_kl            | 0.0079917265 |
+|    clip_fraction        | 0.177        |
+|    clip_range           | 0.161        |
+|    entropy_loss         | -8.28        |
+|    explained_variance   | 0.499        |
+|    learning_rate        | 5.02e-05     |
+|    loss                 | 2.03         |
+|    n_updates            | 120          |
+|    policy_gradient_loss | -0.00561     |
+|    value_loss           | 9.53         |
+------------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.29e+03    |
+|    ep_rew_mean          | -80.7       |
+| time/                   |             |
+|    fps                  | 364         |
+|    iterations           | 14          |
+|    time_elapsed         | 60          |
+|    total_timesteps      | 22204       |
+| train/                  |             |
+|    approx_kl            | 0.008118922 |
+|    clip_fraction        | 0.183       |
+|    clip_range           | 0.161       |
+|    entropy_loss         | -8.28       |
+|    explained_variance   | 0.592       |
+|    learning_rate        | 5.02e-05    |
+|    loss                 | 1.45        |
+|    n_updates            | 130         |
+|    policy_gradient_loss | -0.00841    |
+|    value_loss           | 8.69        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.35e+03    |
+|    ep_rew_mean          | -77.8       |
+| time/                   |             |
+|    fps                  | 363         |
+|    iterations           | 15          |
+|    time_elapsed         | 65          |
+|    total_timesteps      | 23790       |
+| train/                  |             |
+|    approx_kl            | 0.009747963 |
+|    clip_fraction        | 0.177       |
+|    clip_range           | 0.161       |
+|    entropy_loss         | -8.27       |
+|    explained_variance   | 0.568       |
+|    learning_rate        | 5.02e-05    |
+|    loss                 | 3.4         |
+|    n_updates            | 140         |
+|    policy_gradient_loss | -0.00655    |
+|    value_loss           | 12.6        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.35e+03    |
+|    ep_rew_mean          | -77.8       |
+| time/                   |             |
+|    fps                  | 363         |
+|    iterations           | 16          |
+|    time_elapsed         | 69          |
+|    total_timesteps      | 25376       |
+| train/                  |             |
+|    approx_kl            | 0.011058032 |
+|    clip_fraction        | 0.209       |
+|    clip_range           | 0.161       |
+|    entropy_loss         | -8.25       |
+|    explained_variance   | 0.54        |
+|    learning_rate        | 5.02e-05    |
+|    loss                 | 0.934       |
+|    n_updates            | 150         |
+|    policy_gradient_loss | -0.00562    |
+|    value_loss           | 8.9         |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.4e+03     |
+|    ep_rew_mean          | -70.5       |
+| time/                   |             |
+|    fps                  | 364         |
+|    iterations           | 17          |
+|    time_elapsed         | 74          |
+|    total_timesteps      | 26962       |
+| train/                  |             |
+|    approx_kl            | 0.007287364 |
+|    clip_fraction        | 0.151       |
+|    clip_range           | 0.161       |
+|    entropy_loss         | -8.26       |
+|    explained_variance   | 0.473       |
+|    learning_rate        | 5.02e-05    |
+|    loss                 | 0.892       |
+|    n_updates            | 160         |
+|    policy_gradient_loss | -0.00825    |
+|    value_loss           | 7.3         |
+-----------------------------------------
+------------------------------------------
+| rollout/                |              |
+|    ep_len_mean          | 2.4e+03      |
+|    ep_rew_mean          | -70.5        |
+| time/                   |              |
+|    fps                  | 364          |
+|    iterations           | 18           |
+|    time_elapsed         | 78           |
+|    total_timesteps      | 28548        |
+| train/                  |              |
+|    approx_kl            | 0.0059617176 |
+|    clip_fraction        | 0.119        |
+|    clip_range           | 0.161        |
+|    entropy_loss         | -8.27        |
+|    explained_variance   | 0.193        |
+|    learning_rate        | 5.02e-05     |
+|    loss                 | 5.01         |
+|    n_updates            | 170          |
+|    policy_gradient_loss | -0.00776     |
+|    value_loss           | 15.3         |
+------------------------------------------
+------------------------------------------
+| rollout/                |              |
+|    ep_len_mean          | 2.49e+03     |
+|    ep_rew_mean          | -64.2        |
+| time/                   |              |
+|    fps                  | 364          |
+|    iterations           | 19           |
+|    time_elapsed         | 82           |
+|    total_timesteps      | 30134        |
+| train/                  |              |
+|    approx_kl            | 0.0068875425 |
+|    clip_fraction        | 0.143        |
+|    clip_range           | 0.161        |
+|    entropy_loss         | -8.27        |
+|    explained_variance   | 0.208        |
+|    learning_rate        | 5.02e-05     |
+|    loss                 | 0.754        |
+|    n_updates            | 180          |
+|    policy_gradient_loss | -0.00782     |
+|    value_loss           | 8.96         |
+------------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.49e+03    |
+|    ep_rew_mean          | -64.2       |
+| time/                   |             |
+|    fps                  | 364         |
+|    iterations           | 20          |
+|    time_elapsed         | 86          |
+|    total_timesteps      | 31720       |
+| train/                  |             |
+|    approx_kl            | 0.006500314 |
+|    clip_fraction        | 0.141       |
+|    clip_range           | 0.161       |
+|    entropy_loss         | -8.28       |
+|    explained_variance   | 0.431       |
+|    learning_rate        | 5.02e-05    |
+|    loss                 | 0.835       |
+|    n_updates            | 190         |
+|    policy_gradient_loss | -0.0125     |
+|    value_loss           | 4.6         |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.47e+03   |
+|    ep_rew_mean          | -59.9      |
+| time/                   |            |
+|    fps                  | 363        |
+|    iterations           | 21         |
+|    time_elapsed         | 91         |
+|    total_timesteps      | 33306      |
+| train/                  |            |
+|    approx_kl            | 0.00925716 |
+|    clip_fraction        | 0.167      |
+|    clip_range           | 0.161      |
+|    entropy_loss         | -8.28      |
+|    explained_variance   | 0.166      |
+|    learning_rate        | 5.02e-05   |
+|    loss                 | 4.07       |
+|    n_updates            | 200        |
+|    policy_gradient_loss | -0.0126    |
+|    value_loss           | 13.9       |
+----------------------------------------
+------------------------------------------
+| rollout/                |              |
+|    ep_len_mean          | 2.47e+03     |
+|    ep_rew_mean          | -59.9        |
+| time/                   |              |
+|    fps                  | 362          |
+|    iterations           | 22           |
+|    time_elapsed         | 96           |
+|    total_timesteps      | 34892        |
+| train/                  |              |
+|    approx_kl            | 0.0061101955 |
+|    clip_fraction        | 0.121        |
+|    clip_range           | 0.161        |
+|    entropy_loss         | -8.27        |
+|    explained_variance   | 0.192        |
+|    learning_rate        | 5.02e-05     |
+|    loss                 | 0.275        |
+|    n_updates            | 210          |
+|    policy_gradient_loss | -0.0125      |
+|    value_loss           | 6.15         |
+------------------------------------------
+------------------------------------------
+| rollout/                |              |
+|    ep_len_mean          | 2.5e+03      |
+|    ep_rew_mean          | -68.9        |
+| time/                   |              |
+|    fps                  | 361          |
+|    iterations           | 23           |
+|    time_elapsed         | 100          |
+|    total_timesteps      | 36478        |
+| train/                  |              |
+|    approx_kl            | 0.0070993374 |
+|    clip_fraction        | 0.127        |
+|    clip_range           | 0.161        |
+|    entropy_loss         | -8.29        |
+|    explained_variance   | 0.0764       |
+|    learning_rate        | 5.02e-05     |
+|    loss                 | 1.86         |
+|    n_updates            | 220          |
+|    policy_gradient_loss | -0.0111      |
+|    value_loss           | 11.7         |
+------------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.5e+03     |
+|    ep_rew_mean          | -65.4       |
+| time/                   |             |
+|    fps                  | 360         |
+|    iterations           | 24          |
+|    time_elapsed         | 105         |
+|    total_timesteps      | 38064       |
+| train/                  |             |
+|    approx_kl            | 0.010024515 |
+|    clip_fraction        | 0.182       |
+|    clip_range           | 0.161       |
+|    entropy_loss         | -8.27       |
+|    explained_variance   | 0.335       |
+|    learning_rate        | 5.02e-05    |
+|    loss                 | 30          |
+|    n_updates            | 230         |
+|    policy_gradient_loss | -0.012      |
+|    value_loss           | 25.5        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.5e+03     |
+|    ep_rew_mean          | -65.4       |
+| time/                   |             |
+|    fps                  | 358         |
+|    iterations           | 25          |
+|    time_elapsed         | 110         |
+|    total_timesteps      | 39650       |
+| train/                  |             |
+|    approx_kl            | 0.009245104 |
+|    clip_fraction        | 0.179       |
+|    clip_range           | 0.161       |
+|    entropy_loss         | -8.27       |
+|    explained_variance   | 0.325       |
+|    learning_rate        | 5.02e-05    |
+|    loss                 | 9.09        |
+|    n_updates            | 240         |
+|    policy_gradient_loss | -0.012      |
+|    value_loss           | 9.09        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.5e+03     |
+|    ep_rew_mean          | -73.7       |
+| time/                   |             |
+|    fps                  | 355         |
+|    iterations           | 26          |
+|    time_elapsed         | 115         |
+|    total_timesteps      | 41236       |
+| train/                  |             |
+|    approx_kl            | 0.007121284 |
+|    clip_fraction        | 0.155       |
+|    clip_range           | 0.161       |
+|    entropy_loss         | -8.26       |
+|    explained_variance   | 0.297       |
+|    learning_rate        | 5.02e-05    |
+|    loss                 | 0.498       |
+|    n_updates            | 250         |
+|    policy_gradient_loss | -0.00963    |
+|    value_loss           | 5.33        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.48e+03    |
+|    ep_rew_mean          | -70.9       |
+| time/                   |             |
+|    fps                  | 353         |
+|    iterations           | 27          |
+|    time_elapsed         | 121         |
+|    total_timesteps      | 42822       |
+| train/                  |             |
+|    approx_kl            | 0.006750791 |
+|    clip_fraction        | 0.22        |
+|    clip_range           | 0.161       |
+|    entropy_loss         | -8.24       |
+|    explained_variance   | 0.0803      |
+|    learning_rate        | 5.02e-05    |
+|    loss                 | 11.9        |
+|    n_updates            | 260         |
+|    policy_gradient_loss | -0.00203    |
+|    value_loss           | 36.9        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.41e+03    |
+|    ep_rew_mean          | -58.2       |
+| time/                   |             |
+|    fps                  | 353         |
+|    iterations           | 28          |
+|    time_elapsed         | 125         |
+|    total_timesteps      | 44408       |
+| train/                  |             |
+|    approx_kl            | 0.010176781 |
+|    clip_fraction        | 0.163       |
+|    clip_range           | 0.161       |
+|    entropy_loss         | -8.23       |
+|    explained_variance   | 0.524       |
+|    learning_rate        | 5.02e-05    |
+|    loss                 | 3.07        |
+|    n_updates            | 270         |
+|    policy_gradient_loss | -0.0123     |
+|    value_loss           | 12.7        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.41e+03    |
+|    ep_rew_mean          | -65.8       |
+| time/                   |             |
+|    fps                  | 353         |
+|    iterations           | 29          |
+|    time_elapsed         | 130         |
+|    total_timesteps      | 45994       |
+| train/                  |             |
+|    approx_kl            | 0.009089488 |
+|    clip_fraction        | 0.196       |
+|    clip_range           | 0.161       |
+|    entropy_loss         | -8.25       |
+|    explained_variance   | -0.0364     |
+|    learning_rate        | 5.02e-05    |
+|    loss                 | 6.55        |
+|    n_updates            | 280         |
+|    policy_gradient_loss | -0.00565    |
+|    value_loss           | 31.2        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.41e+03    |
+|    ep_rew_mean          | -65.8       |
+| time/                   |             |
+|    fps                  | 353         |
+|    iterations           | 30          |
+|    time_elapsed         | 134         |
+|    total_timesteps      | 47580       |
+| train/                  |             |
+|    approx_kl            | 0.010195761 |
+|    clip_fraction        | 0.168       |
+|    clip_range           | 0.161       |
+|    entropy_loss         | -8.21       |
+|    explained_variance   | 0.248       |
+|    learning_rate        | 5.02e-05    |
+|    loss                 | 2.92        |
+|    n_updates            | 290         |
+|    policy_gradient_loss | -0.00979    |
+|    value_loss           | 21.1        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.41e+03   |
+|    ep_rew_mean          | -62.6      |
+| time/                   |            |
+|    fps                  | 354        |
+|    iterations           | 31         |
+|    time_elapsed         | 138        |
+|    total_timesteps      | 49166      |
+| train/                  |            |
+|    approx_kl            | 0.00898233 |
+|    clip_fraction        | 0.207      |
+|    clip_range           | 0.161      |
+|    entropy_loss         | -8.21      |
+|    explained_variance   | 0.268      |
+|    learning_rate        | 5.02e-05   |
+|    loss                 | 2.67       |
+|    n_updates            | 300        |
+|    policy_gradient_loss | -0.00882   |
+|    value_loss           | 9.28       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.39e+03    |
+|    ep_rew_mean          | -61.5       |
+| time/                   |             |
+|    fps                  | 353         |
+|    iterations           | 32          |
+|    time_elapsed         | 143         |
+|    total_timesteps      | 50752       |
+| train/                  |             |
+|    approx_kl            | 0.011192194 |
+|    clip_fraction        | 0.247       |
+|    clip_range           | 0.161       |
+|    entropy_loss         | -8.2        |
+|    explained_variance   | 0.513       |
+|    learning_rate        | 5.02e-05    |
+|    loss                 | 2.01        |
+|    n_updates            | 310         |
+|    policy_gradient_loss | -0.00966    |
+|    value_loss           | 5.51        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.39e+03    |
+|    ep_rew_mean          | -61.5       |
+| time/                   |             |
+|    fps                  | 351         |
+|    iterations           | 33          |
+|    time_elapsed         | 148         |
+|    total_timesteps      | 52338       |
+| train/                  |             |
+|    approx_kl            | 0.010591626 |
+|    clip_fraction        | 0.226       |
+|    clip_range           | 0.161       |
+|    entropy_loss         | -8.2        |
+|    explained_variance   | 0.328       |
+|    learning_rate        | 5.02e-05    |
+|    loss                 | 1.02        |
+|    n_updates            | 320         |
+|    policy_gradient_loss | -0.0127     |
+|    value_loss           | 8.93        |
+-----------------------------------------
+------------------------------------------
+| rollout/                |              |
+|    ep_len_mean          | 2.41e+03     |
+|    ep_rew_mean          | -63          |
+| time/                   |              |
+|    fps                  | 350          |
+|    iterations           | 34           |
+|    time_elapsed         | 154          |
+|    total_timesteps      | 53924        |
+| train/                  |              |
+|    approx_kl            | 0.0077109425 |
+|    clip_fraction        | 0.228        |
+|    clip_range           | 0.161        |
+|    entropy_loss         | -8.17        |
+|    explained_variance   | 0.17         |
+|    learning_rate        | 5.02e-05     |
+|    loss                 | 2.71         |
+|    n_updates            | 330          |
+|    policy_gradient_loss | -0.00697     |
+|    value_loss           | 8.1          |
+------------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.37e+03    |
+|    ep_rew_mean          | -71.1       |
+| time/                   |             |
+|    fps                  | 348         |
+|    iterations           | 35          |
+|    time_elapsed         | 159         |
+|    total_timesteps      | 55510       |
+| train/                  |             |
+|    approx_kl            | 0.009447264 |
+|    clip_fraction        | 0.209       |
+|    clip_range           | 0.161       |
+|    entropy_loss         | -8.2        |
+|    explained_variance   | 0.244       |
+|    learning_rate        | 5.02e-05    |
+|    loss                 | 13.6        |
+|    n_updates            | 340         |
+|    policy_gradient_loss | -0.00654    |
+|    value_loss           | 11.1        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.34e+03    |
+|    ep_rew_mean          | -77.4       |
+| time/                   |             |
+|    fps                  | 347         |
+|    iterations           | 36          |
+|    time_elapsed         | 164         |
+|    total_timesteps      | 57096       |
+| train/                  |             |
+|    approx_kl            | 0.011554491 |
+|    clip_fraction        | 0.213       |
+|    clip_range           | 0.161       |
+|    entropy_loss         | -8.18       |
+|    explained_variance   | 0.243       |
+|    learning_rate        | 5.02e-05    |
+|    loss                 | 1.56        |
+|    n_updates            | 350         |
+|    policy_gradient_loss | -0.00918    |
+|    value_loss           | 35.2        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.32e+03    |
+|    ep_rew_mean          | -65.5       |
+| time/                   |             |
+|    fps                  | 346         |
+|    iterations           | 37          |
+|    time_elapsed         | 169         |
+|    total_timesteps      | 58682       |
+| train/                  |             |
+|    approx_kl            | 0.014401031 |
+|    clip_fraction        | 0.277       |
+|    clip_range           | 0.161       |
+|    entropy_loss         | -8.09       |
+|    explained_variance   | 0.676       |
+|    learning_rate        | 5.02e-05    |
+|    loss                 | 3.44        |
+|    n_updates            | 360         |
+|    policy_gradient_loss | -0.0114     |
+|    value_loss           | 29          |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.32e+03   |
+|    ep_rew_mean          | -65.5      |
+| time/                   |            |
+|    fps                  | 345        |
+|    iterations           | 38         |
+|    time_elapsed         | 174        |
+|    total_timesteps      | 60268      |
+| train/                  |            |
+|    approx_kl            | 0.01188478 |
+|    clip_fraction        | 0.218      |
+|    clip_range           | 0.161      |
+|    entropy_loss         | -8.03      |
+|    explained_variance   | 0.506      |
+|    learning_rate        | 5.02e-05   |
+|    loss                 | 3.72       |
+|    n_updates            | 370        |
+|    policy_gradient_loss | -0.00428   |
+|    value_loss           | 51.7       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.33e+03    |
+|    ep_rew_mean          | -69         |
+| time/                   |             |
+|    fps                  | 346         |
+|    iterations           | 39          |
+|    time_elapsed         | 178         |
+|    total_timesteps      | 61854       |
+| train/                  |             |
+|    approx_kl            | 0.010957578 |
+|    clip_fraction        | 0.247       |
+|    clip_range           | 0.161       |
+|    entropy_loss         | -8.11       |
+|    explained_variance   | 0.534       |
+|    learning_rate        | 5.02e-05    |
+|    loss                 | 1.98        |
+|    n_updates            | 380         |
+|    policy_gradient_loss | -0.0112     |
+|    value_loss           | 8.17        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.34e+03    |
+|    ep_rew_mean          | -74.6       |
+| time/                   |             |
+|    fps                  | 346         |
+|    iterations           | 40          |
+|    time_elapsed         | 182         |
+|    total_timesteps      | 63440       |
+| train/                  |             |
+|    approx_kl            | 0.012994195 |
+|    clip_fraction        | 0.245       |
+|    clip_range           | 0.161       |
+|    entropy_loss         | -8.05       |
+|    explained_variance   | 0.572       |
+|    learning_rate        | 5.02e-05    |
+|    loss                 | 2.95        |
+|    n_updates            | 390         |
+|    policy_gradient_loss | -0.0108     |
+|    value_loss           | 13.3        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.34e+03   |
+|    ep_rew_mean          | -74.6      |
+| time/                   |            |
+|    fps                  | 337        |
+|    iterations           | 41         |
+|    time_elapsed         | 192        |
+|    total_timesteps      | 65026      |
+| train/                  |            |
+|    approx_kl            | 0.01015701 |
+|    clip_fraction        | 0.236      |
+|    clip_range           | 0.161      |
+|    entropy_loss         | -8.1       |
+|    explained_variance   | 0.263      |
+|    learning_rate        | 5.02e-05   |
+|    loss                 | 4.03       |
+|    n_updates            | 400        |
+|    policy_gradient_loss | -0.012     |
+|    value_loss           | 27.9       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.33e+03    |
+|    ep_rew_mean          | -82         |
+| time/                   |             |
+|    fps                  | 338         |
+|    iterations           | 42          |
+|    time_elapsed         | 196         |
+|    total_timesteps      | 66612       |
+| train/                  |             |
+|    approx_kl            | 0.013143239 |
+|    clip_fraction        | 0.255       |
+|    clip_range           | 0.161       |
+|    entropy_loss         | -8.09       |
+|    explained_variance   | 0.672       |
+|    learning_rate        | 5.02e-05    |
+|    loss                 | 2.09        |
+|    n_updates            | 410         |
+|    policy_gradient_loss | -0.0143     |
+|    value_loss           | 3.7         |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.31e+03    |
+|    ep_rew_mean          | -78.7       |
+| time/                   |             |
+|    fps                  | 338         |
+|    iterations           | 43          |
+|    time_elapsed         | 201         |
+|    total_timesteps      | 68198       |
+| train/                  |             |
+|    approx_kl            | 0.019480813 |
+|    clip_fraction        | 0.326       |
+|    clip_range           | 0.161       |
+|    entropy_loss         | -8.01       |
+|    explained_variance   | 0.28        |
+|    learning_rate        | 5.02e-05    |
+|    loss                 | 3.27        |
+|    n_updates            | 420         |
+|    policy_gradient_loss | -0.000208   |
+|    value_loss           | 31.3        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.31e+03   |
+|    ep_rew_mean          | -78.7      |
+| time/                   |            |
+|    fps                  | 339        |
+|    iterations           | 44         |
+|    time_elapsed         | 205        |
+|    total_timesteps      | 69784      |
+| train/                  |            |
+|    approx_kl            | 0.01460914 |
+|    clip_fraction        | 0.273      |
+|    clip_range           | 0.161      |
+|    entropy_loss         | -7.99      |
+|    explained_variance   | 0.66       |
+|    learning_rate        | 5.02e-05   |
+|    loss                 | 3.57       |
+|    n_updates            | 430        |
+|    policy_gradient_loss | -0.0082    |
+|    value_loss           | 15.4       |
+----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.35e+03   |
+|    ep_rew_mean          | -76.4      |
+| time/                   |            |
+|    fps                  | 338        |
+|    iterations           | 45         |
+|    time_elapsed         | 210        |
+|    total_timesteps      | 71370      |
+| train/                  |            |
+|    approx_kl            | 0.01634773 |
+|    clip_fraction        | 0.303      |
+|    clip_range           | 0.161      |
+|    entropy_loss         | -7.99      |
+|    explained_variance   | 0.729      |
+|    learning_rate        | 5.02e-05   |
+|    loss                 | 4.86       |
+|    n_updates            | 440        |
+|    policy_gradient_loss | -0.00564   |
+|    value_loss           | 13.7       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.34e+03    |
+|    ep_rew_mean          | -79.5       |
+| time/                   |             |
+|    fps                  | 337         |
+|    iterations           | 46          |
+|    time_elapsed         | 216         |
+|    total_timesteps      | 72956       |
+| train/                  |             |
+|    approx_kl            | 0.013031598 |
+|    clip_fraction        | 0.306       |
+|    clip_range           | 0.161       |
+|    entropy_loss         | -7.89       |
+|    explained_variance   | 0.513       |
+|    learning_rate        | 5.02e-05    |
+|    loss                 | 0.69        |
+|    n_updates            | 450         |
+|    policy_gradient_loss | -0.00602    |
+|    value_loss           | 7.02        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.34e+03    |
+|    ep_rew_mean          | -79.5       |
+| time/                   |             |
+|    fps                  | 336         |
+|    iterations           | 47          |
+|    time_elapsed         | 221         |
+|    total_timesteps      | 74542       |
+| train/                  |             |
+|    approx_kl            | 0.012420004 |
+|    clip_fraction        | 0.261       |
+|    clip_range           | 0.161       |
+|    entropy_loss         | -7.91       |
+|    explained_variance   | 0.314       |
+|    learning_rate        | 5.02e-05    |
+|    loss                 | 3.91        |
+|    n_updates            | 460         |
+|    policy_gradient_loss | -0.00565    |
+|    value_loss           | 20.3        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.36e+03    |
+|    ep_rew_mean          | -76.7       |
+| time/                   |             |
+|    fps                  | 335         |
+|    iterations           | 48          |
+|    time_elapsed         | 226         |
+|    total_timesteps      | 76128       |
+| train/                  |             |
+|    approx_kl            | 0.012879474 |
+|    clip_fraction        | 0.28        |
+|    clip_range           | 0.161       |
+|    entropy_loss         | -7.89       |
+|    explained_variance   | 0.787       |
+|    learning_rate        | 5.02e-05    |
+|    loss                 | 4.51        |
+|    n_updates            | 470         |
+|    policy_gradient_loss | -0.0114     |
+|    value_loss           | 7.77        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.36e+03    |
+|    ep_rew_mean          | -76.7       |
+| time/                   |             |
+|    fps                  | 334         |
+|    iterations           | 49          |
+|    time_elapsed         | 232         |
+|    total_timesteps      | 77714       |
+| train/                  |             |
+|    approx_kl            | 0.015691841 |
+|    clip_fraction        | 0.268       |
+|    clip_range           | 0.161       |
+|    entropy_loss         | -7.97       |
+|    explained_variance   | 0.792       |
+|    learning_rate        | 5.02e-05    |
+|    loss                 | 2.4         |
+|    n_updates            | 480         |
+|    policy_gradient_loss | -0.00649    |
+|    value_loss           | 13.2        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.36e+03   |
+|    ep_rew_mean          | -75.5      |
+| time/                   |            |
+|    fps                  | 334        |
+|    iterations           | 50         |
+|    time_elapsed         | 237        |
+|    total_timesteps      | 79300      |
+| train/                  |            |
+|    approx_kl            | 0.01415793 |
+|    clip_fraction        | 0.271      |
+|    clip_range           | 0.161      |
+|    entropy_loss         | -7.91      |
+|    explained_variance   | 0.663      |
+|    learning_rate        | 5.02e-05   |
+|    loss                 | 3.25       |
+|    n_updates            | 490        |
+|    policy_gradient_loss | -0.0147    |
+|    value_loss           | 10.4       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.34e+03    |
+|    ep_rew_mean          | -76.9       |
+| time/                   |             |
+|    fps                  | 333         |
+|    iterations           | 51          |
+|    time_elapsed         | 242         |
+|    total_timesteps      | 80886       |
+| train/                  |             |
+|    approx_kl            | 0.011811551 |
+|    clip_fraction        | 0.309       |
+|    clip_range           | 0.161       |
+|    entropy_loss         | -7.68       |
+|    explained_variance   | 0.634       |
+|    learning_rate        | 5.02e-05    |
+|    loss                 | 1.68        |
+|    n_updates            | 500         |
+|    policy_gradient_loss | -0.00301    |
+|    value_loss           | 10.3        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.34e+03    |
+|    ep_rew_mean          | -83.2       |
+| time/                   |             |
+|    fps                  | 332         |
+|    iterations           | 52          |
+|    time_elapsed         | 247         |
+|    total_timesteps      | 82472       |
+| train/                  |             |
+|    approx_kl            | 0.015533115 |
+|    clip_fraction        | 0.25        |
+|    clip_range           | 0.161       |
+|    entropy_loss         | -7.86       |
+|    explained_variance   | 0.652       |
+|    learning_rate        | 5.02e-05    |
+|    loss                 | 5.43        |
+|    n_updates            | 510         |
+|    policy_gradient_loss | -0.0107     |
+|    value_loss           | 10.6        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.33e+03    |
+|    ep_rew_mean          | -87.6       |
+| time/                   |             |
+|    fps                  | 333         |
+|    iterations           | 53          |
+|    time_elapsed         | 252         |
+|    total_timesteps      | 84058       |
+| train/                  |             |
+|    approx_kl            | 0.017240252 |
+|    clip_fraction        | 0.316       |
+|    clip_range           | 0.161       |
+|    entropy_loss         | -8          |
+|    explained_variance   | 0.623       |
+|    learning_rate        | 5.02e-05    |
+|    loss                 | 14.6        |
+|    n_updates            | 520         |
+|    policy_gradient_loss | -0.00709    |
+|    value_loss           | 33          |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.31e+03    |
+|    ep_rew_mean          | -82.3       |
+| time/                   |             |
+|    fps                  | 333         |
+|    iterations           | 54          |
+|    time_elapsed         | 256         |
+|    total_timesteps      | 85644       |
+| train/                  |             |
+|    approx_kl            | 0.015610819 |
+|    clip_fraction        | 0.297       |
+|    clip_range           | 0.161       |
+|    entropy_loss         | -7.9        |
+|    explained_variance   | 0.506       |
+|    learning_rate        | 5.02e-05    |
+|    loss                 | 79.8        |
+|    n_updates            | 530         |
+|    policy_gradient_loss | -0.0053     |
+|    value_loss           | 30.7        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.31e+03   |
+|    ep_rew_mean          | -82.3      |
+| time/                   |            |
+|    fps                  | 334        |
+|    iterations           | 55         |
+|    time_elapsed         | 261        |
+|    total_timesteps      | 87230      |
+| train/                  |            |
+|    approx_kl            | 0.01877381 |
+|    clip_fraction        | 0.33       |
+|    clip_range           | 0.161      |
+|    entropy_loss         | -7.88      |
+|    explained_variance   | 0.388      |
+|    learning_rate        | 5.02e-05   |
+|    loss                 | 2.4        |
+|    n_updates            | 540        |
+|    policy_gradient_loss | -0.00414   |
+|    value_loss           | 19.6       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.3e+03     |
+|    ep_rew_mean          | -89.3       |
+| time/                   |             |
+|    fps                  | 334         |
+|    iterations           | 56          |
+|    time_elapsed         | 265         |
+|    total_timesteps      | 88816       |
+| train/                  |             |
+|    approx_kl            | 0.018082947 |
+|    clip_fraction        | 0.339       |
+|    clip_range           | 0.161       |
+|    entropy_loss         | -7.79       |
+|    explained_variance   | 0.742       |
+|    learning_rate        | 5.02e-05    |
+|    loss                 | 1.75        |
+|    n_updates            | 550         |
+|    policy_gradient_loss | -0.00747    |
+|    value_loss           | 8.59        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.31e+03    |
+|    ep_rew_mean          | -89.3       |
+| time/                   |             |
+|    fps                  | 334         |
+|    iterations           | 57          |
+|    time_elapsed         | 269         |
+|    total_timesteps      | 90402       |
+| train/                  |             |
+|    approx_kl            | 0.033854794 |
+|    clip_fraction        | 0.4         |
+|    clip_range           | 0.161       |
+|    entropy_loss         | -7.85       |
+|    explained_variance   | 0.145       |
+|    learning_rate        | 5.02e-05    |
+|    loss                 | 2.04        |
+|    n_updates            | 560         |
+|    policy_gradient_loss | 0.00031     |
+|    value_loss           | 43.8        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.28e+03   |
+|    ep_rew_mean          | -81.3      |
+| time/                   |            |
+|    fps                  | 335        |
+|    iterations           | 58         |
+|    time_elapsed         | 274        |
+|    total_timesteps      | 91988      |
+| train/                  |            |
+|    approx_kl            | 0.02308767 |
+|    clip_fraction        | 0.369      |
+|    clip_range           | 0.161      |
+|    entropy_loss         | -7.65      |
+|    explained_variance   | 0.836      |
+|    learning_rate        | 5.02e-05   |
+|    loss                 | 1.59       |
+|    n_updates            | 570        |
+|    policy_gradient_loss | -0.00735   |
+|    value_loss           | 12.1       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.28e+03    |
+|    ep_rew_mean          | -81.3       |
+| time/                   |             |
+|    fps                  | 335         |
+|    iterations           | 59          |
+|    time_elapsed         | 279         |
+|    total_timesteps      | 93574       |
+| train/                  |             |
+|    approx_kl            | 0.020519579 |
+|    clip_fraction        | 0.345       |
+|    clip_range           | 0.161       |
+|    entropy_loss         | -7.8        |
+|    explained_variance   | 0.278       |
+|    learning_rate        | 5.02e-05    |
+|    loss                 | 9.11        |
+|    n_updates            | 580         |
+|    policy_gradient_loss | -0.00419    |
+|    value_loss           | 45          |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.28e+03    |
+|    ep_rew_mean          | -81.3       |
+| time/                   |             |
+|    fps                  | 335         |
+|    iterations           | 60          |
+|    time_elapsed         | 283         |
+|    total_timesteps      | 95160       |
+| train/                  |             |
+|    approx_kl            | 0.021029348 |
+|    clip_fraction        | 0.365       |
+|    clip_range           | 0.161       |
+|    entropy_loss         | -7.82       |
+|    explained_variance   | 0.839       |
+|    learning_rate        | 5.02e-05    |
+|    loss                 | 1.28        |
+|    n_updates            | 590         |
+|    policy_gradient_loss | -0.00539    |
+|    value_loss           | 4.64        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.28e+03    |
+|    ep_rew_mean          | -81.3       |
+| time/                   |             |
+|    fps                  | 335         |
+|    iterations           | 61          |
+|    time_elapsed         | 288         |
+|    total_timesteps      | 96746       |
+| train/                  |             |
+|    approx_kl            | 0.017239623 |
+|    clip_fraction        | 0.328       |
+|    clip_range           | 0.161       |
+|    entropy_loss         | -7.65       |
+|    explained_variance   | 0.762       |
+|    learning_rate        | 5.02e-05    |
+|    loss                 | 3.36        |
+|    n_updates            | 600         |
+|    policy_gradient_loss | -0.00336    |
+|    value_loss           | 13.7        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.39e+03    |
+|    ep_rew_mean          | -84.9       |
+| time/                   |             |
+|    fps                  | 335         |
+|    iterations           | 62          |
+|    time_elapsed         | 292         |
+|    total_timesteps      | 98332       |
+| train/                  |             |
+|    approx_kl            | 0.014254608 |
+|    clip_fraction        | 0.358       |
+|    clip_range           | 0.161       |
+|    entropy_loss         | -7.61       |
+|    explained_variance   | 0.69        |
+|    learning_rate        | 5.02e-05    |
+|    loss                 | 10.9        |
+|    n_updates            | 610         |
+|    policy_gradient_loss | -0.00208    |
+|    value_loss           | 5.69        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.39e+03    |
+|    ep_rew_mean          | -84.9       |
+| time/                   |             |
+|    fps                  | 336         |
+|    iterations           | 63          |
+|    time_elapsed         | 297         |
+|    total_timesteps      | 99918       |
+| train/                  |             |
+|    approx_kl            | 0.012963827 |
+|    clip_fraction        | 0.302       |
+|    clip_range           | 0.161       |
+|    entropy_loss         | -7.63       |
+|    explained_variance   | 0.322       |
+|    learning_rate        | 5.02e-05    |
+|    loss                 | 9.69        |
+|    n_updates            | 620         |
+|    policy_gradient_loss | -0.00529    |
+|    value_loss           | 29.8        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.41e+03    |
+|    ep_rew_mean          | -83.5       |
+| time/                   |             |
+|    fps                  | 336         |
+|    iterations           | 64          |
+|    time_elapsed         | 301         |
+|    total_timesteps      | 101504      |
+| train/                  |             |
+|    approx_kl            | 0.018386848 |
+|    clip_fraction        | 0.319       |
+|    clip_range           | 0.161       |
+|    entropy_loss         | -7.67       |
+|    explained_variance   | 0.638       |
+|    learning_rate        | 5.02e-05    |
+|    loss                 | 1.82        |
+|    n_updates            | 630         |
+|    policy_gradient_loss | -0.00643    |
+|    value_loss           | 6.2         |
+-----------------------------------------
+[I 2023-03-30 21:56:30,174] Trial 2 finished with value: -348.0 and parameters: {'n_steps': 1586, 'gamma': 0.9956348644941185, 'learning_rate': 5.0170841536324054e-05, 'clip_range': 0.16056638694970846, 'gae_lambda': 0.8608765423049661}. Best is trial 1 with value: -205.0.
+Using cuda device
+Wrapping the env in a DummyVecEnv.
+Wrapping the env in a VecTransposeImage.
+C:\ProgramData\Anaconda3\envs\StreetFighterAI\lib\site-packages\stable_baselines3\ppo\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 3813`, after every 59 untruncated mini-batches, there will be a truncated mini-batch of size 37
+We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
+Info: (n_steps=3813 and n_envs=1)
+  warnings.warn(
+Logging to logs/PPO_21
+---------------------------------
+| rollout/           |          |
+|    ep_len_mean     | 3.42e+03 |
+|    ep_rew_mean     | -8       |
+| time/              |          |
+|    fps             | 534      |
+|    iterations      | 1        |
+|    time_elapsed    | 7        |
+|    total_timesteps | 3813     |
+---------------------------------
+------------------------------------------
+| rollout/                |              |
+|    ep_len_mean          | 3.23e+03     |
+|    ep_rew_mean          | -32          |
+| time/                   |              |
+|    fps                  | 404          |
+|    iterations           | 2            |
+|    time_elapsed         | 18           |
+|    total_timesteps      | 7626         |
+| train/                  |              |
+|    approx_kl            | 0.0036819936 |
+|    clip_fraction        | 0.136        |
+|    clip_range           | 0.123        |
+|    entropy_loss         | -8.31        |
+|    explained_variance   | 0.000731     |
+|    learning_rate        | 5.81e-05     |
+|    loss                 | 0.479        |
+|    n_updates            | 10           |
+|    policy_gradient_loss | -0.00524     |
+|    value_loss           | 9.78         |
+------------------------------------------
+------------------------------------------
+| rollout/                |              |
+|    ep_len_mean          | 2.82e+03     |
+|    ep_rew_mean          | -104         |
+| time/                   |              |
+|    fps                  | 365          |
+|    iterations           | 3            |
+|    time_elapsed         | 31           |
+|    total_timesteps      | 11439        |
+| train/                  |              |
+|    approx_kl            | 0.0041210777 |
+|    clip_fraction        | 0.136        |
+|    clip_range           | 0.123        |
+|    entropy_loss         | -8.31        |
+|    explained_variance   | -0.119       |
+|    learning_rate        | 5.81e-05     |
+|    loss                 | 0.859        |
+|    n_updates            | 20           |
+|    policy_gradient_loss | -0.0063      |
+|    value_loss           | 8.05         |
+------------------------------------------
+------------------------------------------
+| rollout/                |              |
+|    ep_len_mean          | 2.65e+03     |
+|    ep_rew_mean          | -75.4        |
+| time/                   |              |
+|    fps                  | 348          |
+|    iterations           | 4            |
+|    time_elapsed         | 43           |
+|    total_timesteps      | 15252        |
+| train/                  |              |
+|    approx_kl            | 0.0068013067 |
+|    clip_fraction        | 0.186        |
+|    clip_range           | 0.123        |
+|    entropy_loss         | -8.3         |
+|    explained_variance   | -0.164       |
+|    learning_rate        | 5.81e-05     |
+|    loss                 | 125          |
+|    n_updates            | 30           |
+|    policy_gradient_loss | -0.00652     |
+|    value_loss           | 27.2         |
+------------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.58e+03   |
+|    ep_rew_mean          | -83        |
+| time/                   |            |
+|    fps                  | 337        |
+|    iterations           | 5          |
+|    time_elapsed         | 56         |
+|    total_timesteps      | 19065      |
+| train/                  |            |
+|    approx_kl            | 0.00511329 |
+|    clip_fraction        | 0.199      |
+|    clip_range           | 0.123      |
+|    entropy_loss         | -8.3       |
+|    explained_variance   | -0.527     |
+|    learning_rate        | 5.81e-05   |
+|    loss                 | 1.5        |
+|    n_updates            | 40         |
+|    policy_gradient_loss | -0.00346   |
+|    value_loss           | 14.6       |
+----------------------------------------
+------------------------------------------
+| rollout/                |              |
+|    ep_len_mean          | 2.54e+03     |
+|    ep_rew_mean          | -92.9        |
+| time/                   |              |
+|    fps                  | 336          |
+|    iterations           | 6            |
+|    time_elapsed         | 68           |
+|    total_timesteps      | 22878        |
+| train/                  |              |
+|    approx_kl            | 0.0047720987 |
+|    clip_fraction        | 0.186        |
+|    clip_range           | 0.123        |
+|    entropy_loss         | -8.3         |
+|    explained_variance   | -0.222       |
+|    learning_rate        | 5.81e-05     |
+|    loss                 | 3.05         |
+|    n_updates            | 50           |
+|    policy_gradient_loss | -0.00716     |
+|    value_loss           | 13.6         |
+------------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.56e+03    |
+|    ep_rew_mean          | -122        |
+| time/                   |             |
+|    fps                  | 336         |
+|    iterations           | 7           |
+|    time_elapsed         | 79          |
+|    total_timesteps      | 26691       |
+| train/                  |             |
+|    approx_kl            | 0.005124747 |
+|    clip_fraction        | 0.208       |
+|    clip_range           | 0.123       |
+|    entropy_loss         | -8.3        |
+|    explained_variance   | -0.166      |
+|    learning_rate        | 5.81e-05    |
+|    loss                 | 2.19        |
+|    n_updates            | 60          |
+|    policy_gradient_loss | -0.00516    |
+|    value_loss           | 15.4        |
+-----------------------------------------
+------------------------------------------
+| rollout/                |              |
+|    ep_len_mean          | 2.46e+03     |
+|    ep_rew_mean          | -90          |
+| time/                   |              |
+|    fps                  | 331          |
+|    iterations           | 8            |
+|    time_elapsed         | 92           |
+|    total_timesteps      | 30504        |
+| train/                  |              |
+|    approx_kl            | 0.0077587436 |
+|    clip_fraction        | 0.243        |
+|    clip_range           | 0.123        |
+|    entropy_loss         | -8.29        |
+|    explained_variance   | -0.0477      |
+|    learning_rate        | 5.81e-05     |
+|    loss                 | 1.42         |
+|    n_updates            | 70           |
+|    policy_gradient_loss | -0.00297     |
+|    value_loss           | 31.2         |
+------------------------------------------
+------------------------------------------
+| rollout/                |              |
+|    ep_len_mean          | 2.46e+03     |
+|    ep_rew_mean          | -78.2        |
+| time/                   |              |
+|    fps                  | 327          |
+|    iterations           | 9            |
+|    time_elapsed         | 104          |
+|    total_timesteps      | 34317        |
+| train/                  |              |
+|    approx_kl            | 0.0055853897 |
+|    clip_fraction        | 0.222        |
+|    clip_range           | 0.123        |
+|    entropy_loss         | -8.29        |
+|    explained_variance   | -0.242       |
+|    learning_rate        | 5.81e-05     |
+|    loss                 | 0.979        |
+|    n_updates            | 80           |
+|    policy_gradient_loss | -0.00466     |
+|    value_loss           | 18.7         |
+------------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.45e+03    |
+|    ep_rew_mean          | -81.7       |
+| time/                   |             |
+|    fps                  | 326         |
+|    iterations           | 10          |
+|    time_elapsed         | 116         |
+|    total_timesteps      | 38130       |
+| train/                  |             |
+|    approx_kl            | 0.005408008 |
+|    clip_fraction        | 0.217       |
+|    clip_range           | 0.123       |
+|    entropy_loss         | -8.29       |
+|    explained_variance   | -1.29       |
+|    learning_rate        | 5.81e-05    |
+|    loss                 | 4.61        |
+|    n_updates            | 90          |
+|    policy_gradient_loss | -0.00827    |
+|    value_loss           | 5.36        |
+-----------------------------------------
+------------------------------------------
+| rollout/                |              |
+|    ep_len_mean          | 2.45e+03     |
+|    ep_rew_mean          | -76.2        |
+| time/                   |              |
+|    fps                  | 326          |
+|    iterations           | 11           |
+|    time_elapsed         | 128          |
+|    total_timesteps      | 41943        |
+| train/                  |              |
+|    approx_kl            | 0.0057736286 |
+|    clip_fraction        | 0.19         |
+|    clip_range           | 0.123        |
+|    entropy_loss         | -8.28        |
+|    explained_variance   | -0.141       |
+|    learning_rate        | 5.81e-05     |
+|    loss                 | 3.36         |
+|    n_updates            | 100          |
+|    policy_gradient_loss | -0.00708     |
+|    value_loss           | 19.9         |
+------------------------------------------
+------------------------------------------
+| rollout/                |              |
+|    ep_len_mean          | 2.6e+03      |
+|    ep_rew_mean          | -88.4        |
+| time/                   |              |
+|    fps                  | 326          |
+|    iterations           | 12           |
+|    time_elapsed         | 140          |
+|    total_timesteps      | 45756        |
+| train/                  |              |
+|    approx_kl            | 0.0061197034 |
+|    clip_fraction        | 0.221        |
+|    clip_range           | 0.123        |
+|    entropy_loss         | -8.27        |
+|    explained_variance   | -0.275       |
+|    learning_rate        | 5.81e-05     |
+|    loss                 | 1.75         |
+|    n_updates            | 110          |
+|    policy_gradient_loss | -0.00862     |
+|    value_loss           | 9.48         |
+------------------------------------------
+------------------------------------------
+| rollout/                |              |
+|    ep_len_mean          | 2.58e+03     |
+|    ep_rew_mean          | -86.6        |
+| time/                   |              |
+|    fps                  | 329          |
+|    iterations           | 13           |
+|    time_elapsed         | 150          |
+|    total_timesteps      | 49569        |
+| train/                  |              |
+|    approx_kl            | 0.0073136846 |
+|    clip_fraction        | 0.233        |
+|    clip_range           | 0.123        |
+|    entropy_loss         | -8.27        |
+|    explained_variance   | -0.0933      |
+|    learning_rate        | 5.81e-05     |
+|    loss                 | 0.466        |
+|    n_updates            | 120          |
+|    policy_gradient_loss | -0.00841     |
+|    value_loss           | 18           |
+------------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.8e+03     |
+|    ep_rew_mean          | -18.5       |
+| time/                   |             |
+|    fps                  | 330         |
+|    iterations           | 14          |
+|    time_elapsed         | 161         |
+|    total_timesteps      | 53382       |
+| train/                  |             |
+|    approx_kl            | 0.012948585 |
+|    clip_fraction        | 0.348       |
+|    clip_range           | 0.123       |
+|    entropy_loss         | -8.26       |
+|    explained_variance   | 0.0057      |
+|    learning_rate        | 5.81e-05    |
+|    loss                 | 2.17        |
+|    n_updates            | 130         |
+|    policy_gradient_loss | 0.00365     |
+|    value_loss           | 1.38e+03    |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.79e+03    |
+|    ep_rew_mean          | -17.8       |
+| time/                   |             |
+|    fps                  | 332         |
+|    iterations           | 15          |
+|    time_elapsed         | 172         |
+|    total_timesteps      | 57195       |
+| train/                  |             |
+|    approx_kl            | 0.010128591 |
+|    clip_fraction        | 0.417       |
+|    clip_range           | 0.123       |
+|    entropy_loss         | -8.25       |
+|    explained_variance   | -4.29       |
+|    learning_rate        | 5.81e-05    |
+|    loss                 | 3.52        |
+|    n_updates            | 140         |
+|    policy_gradient_loss | 0.00333     |
+|    value_loss           | 7.59        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.69e+03    |
+|    ep_rew_mean          | -25         |
+| time/                   |             |
+|    fps                  | 333         |
+|    iterations           | 16          |
+|    time_elapsed         | 182         |
+|    total_timesteps      | 61008       |
+| train/                  |             |
+|    approx_kl            | 0.009500639 |
+|    clip_fraction        | 0.306       |
+|    clip_range           | 0.123       |
+|    entropy_loss         | -8.27       |
+|    explained_variance   | -0.37       |
+|    learning_rate        | 5.81e-05    |
+|    loss                 | 3.36        |
+|    n_updates            | 150         |
+|    policy_gradient_loss | -0.00701    |
+|    value_loss           | 9.43        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.69e+03    |
+|    ep_rew_mean          | -35.8       |
+| time/                   |             |
+|    fps                  | 334         |
+|    iterations           | 17          |
+|    time_elapsed         | 193         |
+|    total_timesteps      | 64821       |
+| train/                  |             |
+|    approx_kl            | 0.013091733 |
+|    clip_fraction        | 0.352       |
+|    clip_range           | 0.123       |
+|    entropy_loss         | -8.25       |
+|    explained_variance   | -0.138      |
+|    learning_rate        | 5.81e-05    |
+|    loss                 | 0.699       |
+|    n_updates            | 160         |
+|    policy_gradient_loss | -0.00203    |
+|    value_loss           | 25.9        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.67e+03    |
+|    ep_rew_mean          | -34         |
+| time/                   |             |
+|    fps                  | 335         |
+|    iterations           | 18          |
+|    time_elapsed         | 204         |
+|    total_timesteps      | 68634       |
+| train/                  |             |
+|    approx_kl            | 0.013554989 |
+|    clip_fraction        | 0.341       |
+|    clip_range           | 0.123       |
+|    entropy_loss         | -8.24       |
+|    explained_variance   | -0.24       |
+|    learning_rate        | 5.81e-05    |
+|    loss                 | 0.791       |
+|    n_updates            | 170         |
+|    policy_gradient_loss | -0.00409    |
+|    value_loss           | 18.6        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.64e+03    |
+|    ep_rew_mean          | -52.1       |
+| time/                   |             |
+|    fps                  | 335         |
+|    iterations           | 19          |
+|    time_elapsed         | 215         |
+|    total_timesteps      | 72447       |
+| train/                  |             |
+|    approx_kl            | 0.010819951 |
+|    clip_fraction        | 0.347       |
+|    clip_range           | 0.123       |
+|    entropy_loss         | -8.23       |
+|    explained_variance   | -0.575      |
+|    learning_rate        | 5.81e-05    |
+|    loss                 | 0.601       |
+|    n_updates            | 180         |
+|    policy_gradient_loss | -0.00691    |
+|    value_loss           | 9.12        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.63e+03    |
+|    ep_rew_mean          | -60.8       |
+| time/                   |             |
+|    fps                  | 333         |
+|    iterations           | 20          |
+|    time_elapsed         | 228         |
+|    total_timesteps      | 76260       |
+| train/                  |             |
+|    approx_kl            | 0.015728015 |
+|    clip_fraction        | 0.39        |
+|    clip_range           | 0.123       |
+|    entropy_loss         | -8.24       |
+|    explained_variance   | -0.0299     |
+|    learning_rate        | 5.81e-05    |
+|    loss                 | 1.85        |
+|    n_updates            | 190         |
+|    policy_gradient_loss | -0.00169    |
+|    value_loss           | 27.5        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.62e+03    |
+|    ep_rew_mean          | -65.9       |
+| time/                   |             |
+|    fps                  | 331         |
+|    iterations           | 21          |
+|    time_elapsed         | 241         |
+|    total_timesteps      | 80073       |
+| train/                  |             |
+|    approx_kl            | 0.013783906 |
+|    clip_fraction        | 0.427       |
+|    clip_range           | 0.123       |
+|    entropy_loss         | -8.22       |
+|    explained_variance   | -0.214      |
+|    learning_rate        | 5.81e-05    |
+|    loss                 | 1.94        |
+|    n_updates            | 200         |
+|    policy_gradient_loss | 0.000583    |
+|    value_loss           | 35.7        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.63e+03    |
+|    ep_rew_mean          | -65.4       |
+| time/                   |             |
+|    fps                  | 329         |
+|    iterations           | 22          |
+|    time_elapsed         | 254         |
+|    total_timesteps      | 83886       |
+| train/                  |             |
+|    approx_kl            | 0.014645203 |
+|    clip_fraction        | 0.43        |
+|    clip_range           | 0.123       |
+|    entropy_loss         | -8.21       |
+|    explained_variance   | -0.164      |
+|    learning_rate        | 5.81e-05    |
+|    loss                 | 2.59        |
+|    n_updates            | 210         |
+|    policy_gradient_loss | 0.00338     |
+|    value_loss           | 16.1        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.55e+03    |
+|    ep_rew_mean          | -75.3       |
+| time/                   |             |
+|    fps                  | 327         |
+|    iterations           | 23          |
+|    time_elapsed         | 267         |
+|    total_timesteps      | 87699       |
+| train/                  |             |
+|    approx_kl            | 0.015013908 |
+|    clip_fraction        | 0.426       |
+|    clip_range           | 0.123       |
+|    entropy_loss         | -8.19       |
+|    explained_variance   | -0.904      |
+|    learning_rate        | 5.81e-05    |
+|    loss                 | 1.16        |
+|    n_updates            | 220         |
+|    policy_gradient_loss | -0.000645   |
+|    value_loss           | 7.32        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.53e+03   |
+|    ep_rew_mean          | -73.4      |
+| time/                   |            |
+|    fps                  | 326        |
+|    iterations           | 24         |
+|    time_elapsed         | 280        |
+|    total_timesteps      | 91512      |
+| train/                  |            |
+|    approx_kl            | 0.02647818 |
+|    clip_fraction        | 0.481      |
+|    clip_range           | 0.123      |
+|    entropy_loss         | -8.11      |
+|    explained_variance   | -0.0693    |
+|    learning_rate        | 5.81e-05   |
+|    loss                 | 3.85       |
+|    n_updates            | 230        |
+|    policy_gradient_loss | 0.00305    |
+|    value_loss           | 32.6       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.5e+03     |
+|    ep_rew_mean          | -71.4       |
+| time/                   |             |
+|    fps                  | 325         |
+|    iterations           | 25          |
+|    time_elapsed         | 293         |
+|    total_timesteps      | 95325       |
+| train/                  |             |
+|    approx_kl            | 0.019708665 |
+|    clip_fraction        | 0.482       |
+|    clip_range           | 0.123       |
+|    entropy_loss         | -8.12       |
+|    explained_variance   | -0.496      |
+|    learning_rate        | 5.81e-05    |
+|    loss                 | 3.13        |
+|    n_updates            | 240         |
+|    policy_gradient_loss | 0.0037      |
+|    value_loss           | 13.6        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.47e+03    |
+|    ep_rew_mean          | -78.4       |
+| time/                   |             |
+|    fps                  | 323         |
+|    iterations           | 26          |
+|    time_elapsed         | 306         |
+|    total_timesteps      | 99138       |
+| train/                  |             |
+|    approx_kl            | 0.017824553 |
+|    clip_fraction        | 0.454       |
+|    clip_range           | 0.123       |
+|    entropy_loss         | -8.11       |
+|    explained_variance   | -0.288      |
+|    learning_rate        | 5.81e-05    |
+|    loss                 | 0.948       |
+|    n_updates            | 250         |
+|    policy_gradient_loss | 0.000339    |
+|    value_loss           | 15.5        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.41e+03   |
+|    ep_rew_mean          | -63.9      |
+| time/                   |            |
+|    fps                  | 322        |
+|    iterations           | 27         |
+|    time_elapsed         | 319        |
+|    total_timesteps      | 102951     |
+| train/                  |            |
+|    approx_kl            | 0.01960509 |
+|    clip_fraction        | 0.495      |
+|    clip_range           | 0.123      |
+|    entropy_loss         | -8.1       |
+|    explained_variance   | -0.318     |
+|    learning_rate        | 5.81e-05   |
+|    loss                 | 0.768      |
+|    n_updates            | 260        |
+|    policy_gradient_loss | 0.00504    |
+|    value_loss           | 22.9       |
+----------------------------------------
+[I 2023-03-30 22:02:13,996] Trial 3 finished with value: -296.0 and parameters: {'n_steps': 3813, 'gamma': 0.904141731391283, 'learning_rate': 5.8079041373677925e-05, 'clip_range': 0.12331848349559273, 'gae_lambda': 0.9741511540746485}. Best is trial 1 with value: -205.0.
+Using cuda device
+Wrapping the env in a DummyVecEnv.
+Wrapping the env in a VecTransposeImage.
+C:\ProgramData\Anaconda3\envs\StreetFighterAI\lib\site-packages\stable_baselines3\ppo\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 2255`, after every 35 untruncated mini-batches, there will be a truncated mini-batch of size 15
+We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
+Info: (n_steps=2255 and n_envs=1)
+  warnings.warn(
+Logging to logs/PPO_22
+---------------------------------
+| rollout/           |          |
+|    ep_len_mean     | 1.96e+03 |
+|    ep_rew_mean     | -277     |
+| time/              |          |
+|    fps             | 612      |
+|    iterations      | 1        |
+|    time_elapsed    | 3        |
+|    total_timesteps | 2255     |
+---------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 1.76e+03    |
+|    ep_rew_mean          | -312        |
+| time/                   |             |
+|    fps                  | 449         |
+|    iterations           | 2           |
+|    time_elapsed         | 10          |
+|    total_timesteps      | 4510        |
+| train/                  |             |
+|    approx_kl            | 0.020538189 |
+|    clip_fraction        | 0.184       |
+|    clip_range           | 0.246       |
+|    entropy_loss         | -8.3        |
+|    explained_variance   | 0.00102     |
+|    learning_rate        | 6.18e-05    |
+|    loss                 | 0.312       |
+|    n_updates            | 10          |
+|    policy_gradient_loss | -0.0141     |
+|    value_loss           | 28.1        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.02e+03    |
+|    ep_rew_mean          | -205        |
+| time/                   |             |
+|    fps                  | 397         |
+|    iterations           | 3           |
+|    time_elapsed         | 17          |
+|    total_timesteps      | 6765        |
+| train/                  |             |
+|    approx_kl            | 0.014282044 |
+|    clip_fraction        | 0.149       |
+|    clip_range           | 0.246       |
+|    entropy_loss         | -8.29       |
+|    explained_variance   | 0.0389      |
+|    learning_rate        | 6.18e-05    |
+|    loss                 | 3.57        |
+|    n_updates            | 20          |
+|    policy_gradient_loss | -0.00883    |
+|    value_loss           | 41.2        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.02e+03    |
+|    ep_rew_mean          | -205        |
+| time/                   |             |
+|    fps                  | 378         |
+|    iterations           | 4           |
+|    time_elapsed         | 23          |
+|    total_timesteps      | 9020        |
+| train/                  |             |
+|    approx_kl            | 0.010472495 |
+|    clip_fraction        | 0.124       |
+|    clip_range           | 0.246       |
+|    entropy_loss         | -8.27       |
+|    explained_variance   | -0.182      |
+|    learning_rate        | 6.18e-05    |
+|    loss                 | 0.526       |
+|    n_updates            | 30          |
+|    policy_gradient_loss | -0.0058     |
+|    value_loss           | 6.26        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.29e+03    |
+|    ep_rew_mean          | -158        |
+| time/                   |             |
+|    fps                  | 368         |
+|    iterations           | 5           |
+|    time_elapsed         | 30          |
+|    total_timesteps      | 11275       |
+| train/                  |             |
+|    approx_kl            | 0.014202305 |
+|    clip_fraction        | 0.116       |
+|    clip_range           | 0.246       |
+|    entropy_loss         | -8.26       |
+|    explained_variance   | -0.0118     |
+|    learning_rate        | 6.18e-05    |
+|    loss                 | 0.206       |
+|    n_updates            | 40          |
+|    policy_gradient_loss | -0.00424    |
+|    value_loss           | 8.23        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.4e+03     |
+|    ep_rew_mean          | -160        |
+| time/                   |             |
+|    fps                  | 361         |
+|    iterations           | 6           |
+|    time_elapsed         | 37          |
+|    total_timesteps      | 13530       |
+| train/                  |             |
+|    approx_kl            | 0.009082135 |
+|    clip_fraction        | 0.0682      |
+|    clip_range           | 0.246       |
+|    entropy_loss         | -8.25       |
+|    explained_variance   | 0.0728      |
+|    learning_rate        | 6.18e-05    |
+|    loss                 | 3.01        |
+|    n_updates            | 50          |
+|    policy_gradient_loss | -0.0105     |
+|    value_loss           | 4.79        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.38e+03    |
+|    ep_rew_mean          | -162        |
+| time/                   |             |
+|    fps                  | 360         |
+|    iterations           | 7           |
+|    time_elapsed         | 43          |
+|    total_timesteps      | 15785       |
+| train/                  |             |
+|    approx_kl            | 0.016703699 |
+|    clip_fraction        | 0.153       |
+|    clip_range           | 0.246       |
+|    entropy_loss         | -8.22       |
+|    explained_variance   | 0.0113      |
+|    learning_rate        | 6.18e-05    |
+|    loss                 | 5.27        |
+|    n_updates            | 60          |
+|    policy_gradient_loss | -0.0144     |
+|    value_loss           | 13          |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.25e+03    |
+|    ep_rew_mean          | -128        |
+| time/                   |             |
+|    fps                  | 360         |
+|    iterations           | 8           |
+|    time_elapsed         | 49          |
+|    total_timesteps      | 18040       |
+| train/                  |             |
+|    approx_kl            | 0.024009299 |
+|    clip_fraction        | 0.152       |
+|    clip_range           | 0.246       |
+|    entropy_loss         | -8.2        |
+|    explained_variance   | 0.0536      |
+|    learning_rate        | 6.18e-05    |
+|    loss                 | 0.72        |
+|    n_updates            | 70          |
+|    policy_gradient_loss | -0.0143     |
+|    value_loss           | 15.3        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.25e+03   |
+|    ep_rew_mean          | -128       |
+| time/                   |            |
+|    fps                  | 360        |
+|    iterations           | 9          |
+|    time_elapsed         | 56         |
+|    total_timesteps      | 20295      |
+| train/                  |            |
+|    approx_kl            | 0.02027614 |
+|    clip_fraction        | 0.184      |
+|    clip_range           | 0.246      |
+|    entropy_loss         | -8.14      |
+|    explained_variance   | 0.0651     |
+|    learning_rate        | 6.18e-05   |
+|    loss                 | 0.961      |
+|    n_updates            | 80         |
+|    policy_gradient_loss | -0.00954   |
+|    value_loss           | 24.2       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.33e+03    |
+|    ep_rew_mean          | -113        |
+| time/                   |             |
+|    fps                  | 360         |
+|    iterations           | 10          |
+|    time_elapsed         | 62          |
+|    total_timesteps      | 22550       |
+| train/                  |             |
+|    approx_kl            | 0.016172899 |
+|    clip_fraction        | 0.143       |
+|    clip_range           | 0.246       |
+|    entropy_loss         | -8.12       |
+|    explained_variance   | 0.00202     |
+|    learning_rate        | 6.18e-05    |
+|    loss                 | 19.2        |
+|    n_updates            | 90          |
+|    policy_gradient_loss | -0.00773    |
+|    value_loss           | 8.99        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.28e+03    |
+|    ep_rew_mean          | -104        |
+| time/                   |             |
+|    fps                  | 359         |
+|    iterations           | 11          |
+|    time_elapsed         | 68          |
+|    total_timesteps      | 24805       |
+| train/                  |             |
+|    approx_kl            | 0.012545445 |
+|    clip_fraction        | 0.0956      |
+|    clip_range           | 0.246       |
+|    entropy_loss         | -8.15       |
+|    explained_variance   | 0.114       |
+|    learning_rate        | 6.18e-05    |
+|    loss                 | 0.305       |
+|    n_updates            | 100         |
+|    policy_gradient_loss | -0.0125     |
+|    value_loss           | 8.93        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.26e+03   |
+|    ep_rew_mean          | -81.5      |
+| time/                   |            |
+|    fps                  | 358        |
+|    iterations           | 12         |
+|    time_elapsed         | 75         |
+|    total_timesteps      | 27060      |
+| train/                  |            |
+|    approx_kl            | 0.01598395 |
+|    clip_fraction        | 0.135      |
+|    clip_range           | 0.246      |
+|    entropy_loss         | -8.1       |
+|    explained_variance   | 0.102      |
+|    learning_rate        | 6.18e-05   |
+|    loss                 | 0.936      |
+|    n_updates            | 110        |
+|    policy_gradient_loss | -0.0195    |
+|    value_loss           | 7.65       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.26e+03    |
+|    ep_rew_mean          | -81.5       |
+| time/                   |             |
+|    fps                  | 358         |
+|    iterations           | 13          |
+|    time_elapsed         | 81          |
+|    total_timesteps      | 29315       |
+| train/                  |             |
+|    approx_kl            | 0.016748266 |
+|    clip_fraction        | 0.138       |
+|    clip_range           | 0.246       |
+|    entropy_loss         | -8.09       |
+|    explained_variance   | 0.0378      |
+|    learning_rate        | 6.18e-05    |
+|    loss                 | 1.89        |
+|    n_updates            | 120         |
+|    policy_gradient_loss | -0.016      |
+|    value_loss           | 16          |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.26e+03   |
+|    ep_rew_mean          | -81.5      |
+| time/                   |            |
+|    fps                  | 358        |
+|    iterations           | 14         |
+|    time_elapsed         | 88         |
+|    total_timesteps      | 31570      |
+| train/                  |            |
+|    approx_kl            | 0.02423302 |
+|    clip_fraction        | 0.145      |
+|    clip_range           | 0.246      |
+|    entropy_loss         | -8.1       |
+|    explained_variance   | -0.000177  |
+|    learning_rate        | 6.18e-05   |
+|    loss                 | 0.226      |
+|    n_updates            | 130        |
+|    policy_gradient_loss | -0.0147    |
+|    value_loss           | 47.5       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.58e+03    |
+|    ep_rew_mean          | -80.2       |
+| time/                   |             |
+|    fps                  | 358         |
+|    iterations           | 15          |
+|    time_elapsed         | 94          |
+|    total_timesteps      | 33825       |
+| train/                  |             |
+|    approx_kl            | 0.016911915 |
+|    clip_fraction        | 0.133       |
+|    clip_range           | 0.246       |
+|    entropy_loss         | -8.12       |
+|    explained_variance   | 0.0574      |
+|    learning_rate        | 6.18e-05    |
+|    loss                 | 0.128       |
+|    n_updates            | 140         |
+|    policy_gradient_loss | -0.0177     |
+|    value_loss           | 5.19        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.53e+03    |
+|    ep_rew_mean          | -95.7       |
+| time/                   |             |
+|    fps                  | 357         |
+|    iterations           | 16          |
+|    time_elapsed         | 100         |
+|    total_timesteps      | 36080       |
+| train/                  |             |
+|    approx_kl            | 0.012477045 |
+|    clip_fraction        | 0.0815      |
+|    clip_range           | 0.246       |
+|    entropy_loss         | -8.09       |
+|    explained_variance   | 0.0981      |
+|    learning_rate        | 6.18e-05    |
+|    loss                 | 1.54        |
+|    n_updates            | 150         |
+|    policy_gradient_loss | -0.0104     |
+|    value_loss           | 24.3        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.49e+03    |
+|    ep_rew_mean          | -104        |
+| time/                   |             |
+|    fps                  | 357         |
+|    iterations           | 17          |
+|    time_elapsed         | 107         |
+|    total_timesteps      | 38335       |
+| train/                  |             |
+|    approx_kl            | 0.023185179 |
+|    clip_fraction        | 0.145       |
+|    clip_range           | 0.246       |
+|    entropy_loss         | -8.09       |
+|    explained_variance   | 0.0336      |
+|    learning_rate        | 6.18e-05    |
+|    loss                 | 0.609       |
+|    n_updates            | 160         |
+|    policy_gradient_loss | -0.0129     |
+|    value_loss           | 22.4        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.47e+03    |
+|    ep_rew_mean          | -105        |
+| time/                   |             |
+|    fps                  | 357         |
+|    iterations           | 18          |
+|    time_elapsed         | 113         |
+|    total_timesteps      | 40590       |
+| train/                  |             |
+|    approx_kl            | 0.015480906 |
+|    clip_fraction        | 0.17        |
+|    clip_range           | 0.246       |
+|    entropy_loss         | -8.08       |
+|    explained_variance   | 0.0682      |
+|    learning_rate        | 6.18e-05    |
+|    loss                 | 0.449       |
+|    n_updates            | 170         |
+|    policy_gradient_loss | -0.0136     |
+|    value_loss           | 18          |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.44e+03   |
+|    ep_rew_mean          | -119       |
+| time/                   |            |
+|    fps                  | 357        |
+|    iterations           | 19         |
+|    time_elapsed         | 119        |
+|    total_timesteps      | 42845      |
+| train/                  |            |
+|    approx_kl            | 0.01642779 |
+|    clip_fraction        | 0.136      |
+|    clip_range           | 0.246      |
+|    entropy_loss         | -8.03      |
+|    explained_variance   | 0.0203     |
+|    learning_rate        | 6.18e-05   |
+|    loss                 | 0.749      |
+|    n_updates            | 180        |
+|    policy_gradient_loss | -0.00958   |
+|    value_loss           | 13.1       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.48e+03    |
+|    ep_rew_mean          | -113        |
+| time/                   |             |
+|    fps                  | 356         |
+|    iterations           | 20          |
+|    time_elapsed         | 126         |
+|    total_timesteps      | 45100       |
+| train/                  |             |
+|    approx_kl            | 0.013520324 |
+|    clip_fraction        | 0.13        |
+|    clip_range           | 0.246       |
+|    entropy_loss         | -8.04       |
+|    explained_variance   | 0.017       |
+|    learning_rate        | 6.18e-05    |
+|    loss                 | 0.554       |
+|    n_updates            | 190         |
+|    policy_gradient_loss | -0.0125     |
+|    value_loss           | 33          |
+-----------------------------------------
+---------------------------------------
+| rollout/                |           |
+|    ep_len_mean          | 2.43e+03  |
+|    ep_rew_mean          | -117      |
+| time/                   |           |
+|    fps                  | 355       |
+|    iterations           | 21        |
+|    time_elapsed         | 133       |
+|    total_timesteps      | 47355     |
+| train/                  |           |
+|    approx_kl            | 0.0185782 |
+|    clip_fraction        | 0.156     |
+|    clip_range           | 0.246     |
+|    entropy_loss         | -8.02     |
+|    explained_variance   | 0.255     |
+|    learning_rate        | 6.18e-05  |
+|    loss                 | 0.618     |
+|    n_updates            | 200       |
+|    policy_gradient_loss | -0.0175   |
+|    value_loss           | 6.95      |
+---------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.43e+03    |
+|    ep_rew_mean          | -117        |
+| time/                   |             |
+|    fps                  | 354         |
+|    iterations           | 22          |
+|    time_elapsed         | 139         |
+|    total_timesteps      | 49610       |
+| train/                  |             |
+|    approx_kl            | 0.016632264 |
+|    clip_fraction        | 0.131       |
+|    clip_range           | 0.246       |
+|    entropy_loss         | -8.02       |
+|    explained_variance   | 0.0436      |
+|    learning_rate        | 6.18e-05    |
+|    loss                 | 1.05        |
+|    n_updates            | 210         |
+|    policy_gradient_loss | -0.0174     |
+|    value_loss           | 11          |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.43e+03    |
+|    ep_rew_mean          | -117        |
+| time/                   |             |
+|    fps                  | 354         |
+|    iterations           | 23          |
+|    time_elapsed         | 146         |
+|    total_timesteps      | 51865       |
+| train/                  |             |
+|    approx_kl            | 0.019239776 |
+|    clip_fraction        | 0.165       |
+|    clip_range           | 0.246       |
+|    entropy_loss         | -7.95       |
+|    explained_variance   | 0.0435      |
+|    learning_rate        | 6.18e-05    |
+|    loss                 | 1.27        |
+|    n_updates            | 220         |
+|    policy_gradient_loss | -0.018      |
+|    value_loss           | 6.75        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.68e+03    |
+|    ep_rew_mean          | -116        |
+| time/                   |             |
+|    fps                  | 353         |
+|    iterations           | 24          |
+|    time_elapsed         | 153         |
+|    total_timesteps      | 54120       |
+| train/                  |             |
+|    approx_kl            | 0.022035323 |
+|    clip_fraction        | 0.184       |
+|    clip_range           | 0.246       |
+|    entropy_loss         | -7.93       |
+|    explained_variance   | 0.195       |
+|    learning_rate        | 6.18e-05    |
+|    loss                 | 0.162       |
+|    n_updates            | 230         |
+|    policy_gradient_loss | -0.0164     |
+|    value_loss           | 4.23        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.67e+03    |
+|    ep_rew_mean          | -113        |
+| time/                   |             |
+|    fps                  | 352         |
+|    iterations           | 25          |
+|    time_elapsed         | 160         |
+|    total_timesteps      | 56375       |
+| train/                  |             |
+|    approx_kl            | 0.016369374 |
+|    clip_fraction        | 0.141       |
+|    clip_range           | 0.246       |
+|    entropy_loss         | -7.98       |
+|    explained_variance   | 0.0345      |
+|    learning_rate        | 6.18e-05    |
+|    loss                 | 0.162       |
+|    n_updates            | 240         |
+|    policy_gradient_loss | -0.0173     |
+|    value_loss           | 5.37        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.62e+03   |
+|    ep_rew_mean          | -98.4      |
+| time/                   |            |
+|    fps                  | 352        |
+|    iterations           | 26         |
+|    time_elapsed         | 166        |
+|    total_timesteps      | 58630      |
+| train/                  |            |
+|    approx_kl            | 0.01639726 |
+|    clip_fraction        | 0.13       |
+|    clip_range           | 0.246      |
+|    entropy_loss         | -7.96      |
+|    explained_variance   | 0.136      |
+|    learning_rate        | 6.18e-05   |
+|    loss                 | 0.725      |
+|    n_updates            | 250        |
+|    policy_gradient_loss | -0.0164    |
+|    value_loss           | 8.18       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.59e+03    |
+|    ep_rew_mean          | -98.9       |
+| time/                   |             |
+|    fps                  | 352         |
+|    iterations           | 27          |
+|    time_elapsed         | 172         |
+|    total_timesteps      | 60885       |
+| train/                  |             |
+|    approx_kl            | 0.016931452 |
+|    clip_fraction        | 0.124       |
+|    clip_range           | 0.246       |
+|    entropy_loss         | -7.97       |
+|    explained_variance   | -0.0917     |
+|    learning_rate        | 6.18e-05    |
+|    loss                 | 3.31        |
+|    n_updates            | 260         |
+|    policy_gradient_loss | -0.00691    |
+|    value_loss           | 22.9        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.55e+03    |
+|    ep_rew_mean          | -108        |
+| time/                   |             |
+|    fps                  | 353         |
+|    iterations           | 28          |
+|    time_elapsed         | 178         |
+|    total_timesteps      | 63140       |
+| train/                  |             |
+|    approx_kl            | 0.020913824 |
+|    clip_fraction        | 0.157       |
+|    clip_range           | 0.246       |
+|    entropy_loss         | -7.88       |
+|    explained_variance   | 0.215       |
+|    learning_rate        | 6.18e-05    |
+|    loss                 | 0.995       |
+|    n_updates            | 270         |
+|    policy_gradient_loss | -0.0182     |
+|    value_loss           | 7.35        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.55e+03    |
+|    ep_rew_mean          | -103        |
+| time/                   |             |
+|    fps                  | 353         |
+|    iterations           | 29          |
+|    time_elapsed         | 184         |
+|    total_timesteps      | 65395       |
+| train/                  |             |
+|    approx_kl            | 0.021241019 |
+|    clip_fraction        | 0.149       |
+|    clip_range           | 0.246       |
+|    entropy_loss         | -7.81       |
+|    explained_variance   | -0.057      |
+|    learning_rate        | 6.18e-05    |
+|    loss                 | 0.312       |
+|    n_updates            | 280         |
+|    policy_gradient_loss | -0.0152     |
+|    value_loss           | 32.1        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.57e+03    |
+|    ep_rew_mean          | -101        |
+| time/                   |             |
+|    fps                  | 353         |
+|    iterations           | 30          |
+|    time_elapsed         | 191         |
+|    total_timesteps      | 67650       |
+| train/                  |             |
+|    approx_kl            | 0.022794545 |
+|    clip_fraction        | 0.174       |
+|    clip_range           | 0.246       |
+|    entropy_loss         | -7.92       |
+|    explained_variance   | 0.0735      |
+|    learning_rate        | 6.18e-05    |
+|    loss                 | 1.95        |
+|    n_updates            | 290         |
+|    policy_gradient_loss | -0.0239     |
+|    value_loss           | 4.61        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.57e+03    |
+|    ep_rew_mean          | -101        |
+| time/                   |             |
+|    fps                  | 353         |
+|    iterations           | 31          |
+|    time_elapsed         | 197         |
+|    total_timesteps      | 69905       |
+| train/                  |             |
+|    approx_kl            | 0.018762259 |
+|    clip_fraction        | 0.164       |
+|    clip_range           | 0.246       |
+|    entropy_loss         | -7.94       |
+|    explained_variance   | -0.0688     |
+|    learning_rate        | 6.18e-05    |
+|    loss                 | 0.732       |
+|    n_updates            | 300         |
+|    policy_gradient_loss | -0.0192     |
+|    value_loss           | 4.37        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.59e+03    |
+|    ep_rew_mean          | -108        |
+| time/                   |             |
+|    fps                  | 353         |
+|    iterations           | 32          |
+|    time_elapsed         | 204         |
+|    total_timesteps      | 72160       |
+| train/                  |             |
+|    approx_kl            | 0.018497027 |
+|    clip_fraction        | 0.166       |
+|    clip_range           | 0.246       |
+|    entropy_loss         | -7.91       |
+|    explained_variance   | 0.197       |
+|    learning_rate        | 6.18e-05    |
+|    loss                 | 0.135       |
+|    n_updates            | 310         |
+|    policy_gradient_loss | -0.0232     |
+|    value_loss           | 3.52        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.56e+03    |
+|    ep_rew_mean          | -113        |
+| time/                   |             |
+|    fps                  | 353         |
+|    iterations           | 33          |
+|    time_elapsed         | 210         |
+|    total_timesteps      | 74415       |
+| train/                  |             |
+|    approx_kl            | 0.023638394 |
+|    clip_fraction        | 0.166       |
+|    clip_range           | 0.246       |
+|    entropy_loss         | -7.91       |
+|    explained_variance   | -0.0457     |
+|    learning_rate        | 6.18e-05    |
+|    loss                 | 0.139       |
+|    n_updates            | 320         |
+|    policy_gradient_loss | -0.0173     |
+|    value_loss           | 19.5        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.54e+03    |
+|    ep_rew_mean          | -116        |
+| time/                   |             |
+|    fps                  | 354         |
+|    iterations           | 34          |
+|    time_elapsed         | 216         |
+|    total_timesteps      | 76670       |
+| train/                  |             |
+|    approx_kl            | 0.030456556 |
+|    clip_fraction        | 0.245       |
+|    clip_range           | 0.246       |
+|    entropy_loss         | -7.84       |
+|    explained_variance   | 0.0645      |
+|    learning_rate        | 6.18e-05    |
+|    loss                 | 1.4         |
+|    n_updates            | 330         |
+|    policy_gradient_loss | -0.0148     |
+|    value_loss           | 20.3        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.53e+03    |
+|    ep_rew_mean          | -120        |
+| time/                   |             |
+|    fps                  | 354         |
+|    iterations           | 35          |
+|    time_elapsed         | 222         |
+|    total_timesteps      | 78925       |
+| train/                  |             |
+|    approx_kl            | 0.023147207 |
+|    clip_fraction        | 0.216       |
+|    clip_range           | 0.246       |
+|    entropy_loss         | -7.9        |
+|    explained_variance   | 0.26        |
+|    learning_rate        | 6.18e-05    |
+|    loss                 | 0.602       |
+|    n_updates            | 340         |
+|    policy_gradient_loss | -0.011      |
+|    value_loss           | 18.6        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.52e+03    |
+|    ep_rew_mean          | -125        |
+| time/                   |             |
+|    fps                  | 354         |
+|    iterations           | 36          |
+|    time_elapsed         | 228         |
+|    total_timesteps      | 81180       |
+| train/                  |             |
+|    approx_kl            | 0.027237331 |
+|    clip_fraction        | 0.201       |
+|    clip_range           | 0.246       |
+|    entropy_loss         | -7.88       |
+|    explained_variance   | 0.313       |
+|    learning_rate        | 6.18e-05    |
+|    loss                 | 0.175       |
+|    n_updates            | 350         |
+|    policy_gradient_loss | -0.0156     |
+|    value_loss           | 16.3        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.52e+03    |
+|    ep_rew_mean          | -125        |
+| time/                   |             |
+|    fps                  | 354         |
+|    iterations           | 37          |
+|    time_elapsed         | 235         |
+|    total_timesteps      | 83435       |
+| train/                  |             |
+|    approx_kl            | 0.025215741 |
+|    clip_fraction        | 0.196       |
+|    clip_range           | 0.246       |
+|    entropy_loss         | -7.85       |
+|    explained_variance   | -0.0349     |
+|    learning_rate        | 6.18e-05    |
+|    loss                 | 328         |
+|    n_updates            | 360         |
+|    policy_gradient_loss | -0.0175     |
+|    value_loss           | 30.2        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.54e+03    |
+|    ep_rew_mean          | -122        |
+| time/                   |             |
+|    fps                  | 354         |
+|    iterations           | 38          |
+|    time_elapsed         | 241         |
+|    total_timesteps      | 85690       |
+| train/                  |             |
+|    approx_kl            | 0.023207983 |
+|    clip_fraction        | 0.192       |
+|    clip_range           | 0.246       |
+|    entropy_loss         | -7.88       |
+|    explained_variance   | 0.167       |
+|    learning_rate        | 6.18e-05    |
+|    loss                 | 0.145       |
+|    n_updates            | 370         |
+|    policy_gradient_loss | -0.0241     |
+|    value_loss           | 4.89        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.53e+03    |
+|    ep_rew_mean          | -124        |
+| time/                   |             |
+|    fps                  | 354         |
+|    iterations           | 39          |
+|    time_elapsed         | 247         |
+|    total_timesteps      | 87945       |
+| train/                  |             |
+|    approx_kl            | 0.023880122 |
+|    clip_fraction        | 0.22        |
+|    clip_range           | 0.246       |
+|    entropy_loss         | -7.9        |
+|    explained_variance   | 0.033       |
+|    learning_rate        | 6.18e-05    |
+|    loss                 | 0.34        |
+|    n_updates            | 380         |
+|    policy_gradient_loss | -0.0178     |
+|    value_loss           | 4.61        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.53e+03    |
+|    ep_rew_mean          | -127        |
+| time/                   |             |
+|    fps                  | 354         |
+|    iterations           | 40          |
+|    time_elapsed         | 254         |
+|    total_timesteps      | 90200       |
+| train/                  |             |
+|    approx_kl            | 0.033778906 |
+|    clip_fraction        | 0.203       |
+|    clip_range           | 0.246       |
+|    entropy_loss         | -7.82       |
+|    explained_variance   | 0.0941      |
+|    learning_rate        | 6.18e-05    |
+|    loss                 | 1.27        |
+|    n_updates            | 390         |
+|    policy_gradient_loss | -0.0201     |
+|    value_loss           | 10.1        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.53e+03    |
+|    ep_rew_mean          | -124        |
+| time/                   |             |
+|    fps                  | 354         |
+|    iterations           | 41          |
+|    time_elapsed         | 260         |
+|    total_timesteps      | 92455       |
+| train/                  |             |
+|    approx_kl            | 0.024718465 |
+|    clip_fraction        | 0.213       |
+|    clip_range           | 0.246       |
+|    entropy_loss         | -7.89       |
+|    explained_variance   | 0.163       |
+|    learning_rate        | 6.18e-05    |
+|    loss                 | 1.01        |
+|    n_updates            | 400         |
+|    policy_gradient_loss | -0.0143     |
+|    value_loss           | 19          |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.49e+03    |
+|    ep_rew_mean          | -119        |
+| time/                   |             |
+|    fps                  | 354         |
+|    iterations           | 42          |
+|    time_elapsed         | 267         |
+|    total_timesteps      | 94710       |
+| train/                  |             |
+|    approx_kl            | 0.022747982 |
+|    clip_fraction        | 0.191       |
+|    clip_range           | 0.246       |
+|    entropy_loss         | -7.92       |
+|    explained_variance   | 0.308       |
+|    learning_rate        | 6.18e-05    |
+|    loss                 | 10.6        |
+|    n_updates            | 410         |
+|    policy_gradient_loss | -0.0181     |
+|    value_loss           | 7.14        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.49e+03   |
+|    ep_rew_mean          | -119       |
+| time/                   |            |
+|    fps                  | 354        |
+|    iterations           | 43         |
+|    time_elapsed         | 273        |
+|    total_timesteps      | 96965      |
+| train/                  |            |
+|    approx_kl            | 0.02743027 |
+|    clip_fraction        | 0.228      |
+|    clip_range           | 0.246      |
+|    entropy_loss         | -7.86      |
+|    explained_variance   | 0.0156     |
+|    learning_rate        | 6.18e-05   |
+|    loss                 | 1.72       |
+|    n_updates            | 420        |
+|    policy_gradient_loss | -0.0158    |
+|    value_loss           | 20.6       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.5e+03     |
+|    ep_rew_mean          | -116        |
+| time/                   |             |
+|    fps                  | 353         |
+|    iterations           | 44          |
+|    time_elapsed         | 280         |
+|    total_timesteps      | 99220       |
+| train/                  |             |
+|    approx_kl            | 0.028826194 |
+|    clip_fraction        | 0.233       |
+|    clip_range           | 0.246       |
+|    entropy_loss         | -7.89       |
+|    explained_variance   | 0.253       |
+|    learning_rate        | 6.18e-05    |
+|    loss                 | 0.285       |
+|    n_updates            | 430         |
+|    policy_gradient_loss | -0.0227     |
+|    value_loss           | 5.22        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.47e+03    |
+|    ep_rew_mean          | -117        |
+| time/                   |             |
+|    fps                  | 353         |
+|    iterations           | 45          |
+|    time_elapsed         | 286         |
+|    total_timesteps      | 101475      |
+| train/                  |             |
+|    approx_kl            | 0.025009144 |
+|    clip_fraction        | 0.21        |
+|    clip_range           | 0.246       |
+|    entropy_loss         | -7.89       |
+|    explained_variance   | 0.248       |
+|    learning_rate        | 6.18e-05    |
+|    loss                 | 7.05        |
+|    n_updates            | 440         |
+|    policy_gradient_loss | -0.0173     |
+|    value_loss           | 10.1        |
+-----------------------------------------
+[I 2023-03-30 22:07:30,694] Trial 4 finished with value: -332.0 and parameters: {'n_steps': 2255, 'gamma': 0.9347334629907355, 'learning_rate': 6.175910217498569e-05, 'clip_range': 0.2461965528470431, 'gae_lambda': 0.8597964121436965}. Best is trial 1 with value: -205.0.
+Using cuda device
+Wrapping the env in a DummyVecEnv.
+Wrapping the env in a VecTransposeImage.
+C:\ProgramData\Anaconda3\envs\StreetFighterAI\lib\site-packages\stable_baselines3\ppo\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 7730`, after every 120 untruncated mini-batches, there will be a truncated mini-batch of size 50
+We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
+Info: (n_steps=7730 and n_envs=1)
+  warnings.warn(
+Logging to logs/PPO_23
+---------------------------------
+| rollout/           |          |
+|    ep_len_mean     | 2.49e+03 |
+|    ep_rew_mean     | -20      |
+| time/              |          |
+|    fps             | 537      |
+|    iterations      | 1        |
+|    time_elapsed    | 14       |
+|    total_timesteps | 7730     |
+---------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.54e+03   |
+|    ep_rew_mean          | -19.2      |
+| time/                   |            |
+|    fps                  | 423        |
+|    iterations           | 2          |
+|    time_elapsed         | 36         |
+|    total_timesteps      | 15460      |
+| train/                  |            |
+|    approx_kl            | 0.04281639 |
+|    clip_fraction        | 0.204      |
+|    clip_range           | 0.299      |
+|    entropy_loss         | -8.29      |
+|    explained_variance   | 0.0017     |
+|    learning_rate        | 9.55e-05   |
+|    loss                 | 1.29       |
+|    n_updates            | 10         |
+|    policy_gradient_loss | -0.00636   |
+|    value_loss           | 25.2       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.32e+03    |
+|    ep_rew_mean          | -55.8       |
+| time/                   |             |
+|    fps                  | 393         |
+|    iterations           | 3           |
+|    time_elapsed         | 58          |
+|    total_timesteps      | 23190       |
+| train/                  |             |
+|    approx_kl            | 0.029096674 |
+|    clip_fraction        | 0.136       |
+|    clip_range           | 0.299       |
+|    entropy_loss         | -8.28       |
+|    explained_variance   | -0.0552     |
+|    learning_rate        | 9.55e-05    |
+|    loss                 | 1.15        |
+|    n_updates            | 20          |
+|    policy_gradient_loss | -0.00801    |
+|    value_loss           | 29.6        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.41e+03    |
+|    ep_rew_mean          | -61         |
+| time/                   |             |
+|    fps                  | 370         |
+|    iterations           | 4           |
+|    time_elapsed         | 83          |
+|    total_timesteps      | 30920       |
+| train/                  |             |
+|    approx_kl            | 0.041381396 |
+|    clip_fraction        | 0.168       |
+|    clip_range           | 0.299       |
+|    entropy_loss         | -8.25       |
+|    explained_variance   | 0.206       |
+|    learning_rate        | 9.55e-05    |
+|    loss                 | 2.05        |
+|    n_updates            | 30          |
+|    policy_gradient_loss | -0.0118     |
+|    value_loss           | 28.6        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.38e+03   |
+|    ep_rew_mean          | -101       |
+| time/                   |            |
+|    fps                  | 365        |
+|    iterations           | 5          |
+|    time_elapsed         | 105        |
+|    total_timesteps      | 38650      |
+| train/                  |            |
+|    approx_kl            | 0.03648014 |
+|    clip_fraction        | 0.194      |
+|    clip_range           | 0.299      |
+|    entropy_loss         | -8.25      |
+|    explained_variance   | 0.214      |
+|    learning_rate        | 9.55e-05   |
+|    loss                 | 4.98       |
+|    n_updates            | 40         |
+|    policy_gradient_loss | -0.0153    |
+|    value_loss           | 11.3       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.36e+03    |
+|    ep_rew_mean          | -111        |
+| time/                   |             |
+|    fps                  | 360         |
+|    iterations           | 6           |
+|    time_elapsed         | 128         |
+|    total_timesteps      | 46380       |
+| train/                  |             |
+|    approx_kl            | 0.048289824 |
+|    clip_fraction        | 0.203       |
+|    clip_range           | 0.299       |
+|    entropy_loss         | -8.22       |
+|    explained_variance   | 0.161       |
+|    learning_rate        | 9.55e-05    |
+|    loss                 | 2.33        |
+|    n_updates            | 50          |
+|    policy_gradient_loss | -0.0181     |
+|    value_loss           | 27.4        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.37e+03    |
+|    ep_rew_mean          | -102        |
+| time/                   |             |
+|    fps                  | 357         |
+|    iterations           | 7           |
+|    time_elapsed         | 151         |
+|    total_timesteps      | 54110       |
+| train/                  |             |
+|    approx_kl            | 0.050513566 |
+|    clip_fraction        | 0.256       |
+|    clip_range           | 0.299       |
+|    entropy_loss         | -8.18       |
+|    explained_variance   | 0.0793      |
+|    learning_rate        | 9.55e-05    |
+|    loss                 | 36.3        |
+|    n_updates            | 60          |
+|    policy_gradient_loss | -0.0118     |
+|    value_loss           | 18.6        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.37e+03    |
+|    ep_rew_mean          | -95.8       |
+| time/                   |             |
+|    fps                  | 353         |
+|    iterations           | 8           |
+|    time_elapsed         | 174         |
+|    total_timesteps      | 61840       |
+| train/                  |             |
+|    approx_kl            | 0.052512296 |
+|    clip_fraction        | 0.277       |
+|    clip_range           | 0.299       |
+|    entropy_loss         | -8.11       |
+|    explained_variance   | 0.219       |
+|    learning_rate        | 9.55e-05    |
+|    loss                 | 2.76        |
+|    n_updates            | 70          |
+|    policy_gradient_loss | -0.0174     |
+|    value_loss           | 11.8        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.39e+03   |
+|    ep_rew_mean          | -112       |
+| time/                   |            |
+|    fps                  | 350        |
+|    iterations           | 9          |
+|    time_elapsed         | 198        |
+|    total_timesteps      | 69570      |
+| train/                  |            |
+|    approx_kl            | 0.04943707 |
+|    clip_fraction        | 0.256      |
+|    clip_range           | 0.299      |
+|    entropy_loss         | -8.08      |
+|    explained_variance   | 0.254      |
+|    learning_rate        | 9.55e-05   |
+|    loss                 | 8.24       |
+|    n_updates            | 80         |
+|    policy_gradient_loss | -0.0148    |
+|    value_loss           | 17         |
+----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.4e+03    |
+|    ep_rew_mean          | -115       |
+| time/                   |            |
+|    fps                  | 349        |
+|    iterations           | 10         |
+|    time_elapsed         | 220        |
+|    total_timesteps      | 77300      |
+| train/                  |            |
+|    approx_kl            | 0.07023027 |
+|    clip_fraction        | 0.297      |
+|    clip_range           | 0.299      |
+|    entropy_loss         | -7.99      |
+|    explained_variance   | 0.245      |
+|    learning_rate        | 9.55e-05   |
+|    loss                 | 1.51       |
+|    n_updates            | 90         |
+|    policy_gradient_loss | -0.0157    |
+|    value_loss           | 16.3       |
+----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.38e+03   |
+|    ep_rew_mean          | -116       |
+| time/                   |            |
+|    fps                  | 348        |
+|    iterations           | 11         |
+|    time_elapsed         | 243        |
+|    total_timesteps      | 85030      |
+| train/                  |            |
+|    approx_kl            | 0.06494863 |
+|    clip_fraction        | 0.324      |
+|    clip_range           | 0.299      |
+|    entropy_loss         | -7.97      |
+|    explained_variance   | 0.308      |
+|    learning_rate        | 9.55e-05   |
+|    loss                 | 9.16       |
+|    n_updates            | 100        |
+|    policy_gradient_loss | -0.0122    |
+|    value_loss           | 16.3       |
+----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.41e+03   |
+|    ep_rew_mean          | -126       |
+| time/                   |            |
+|    fps                  | 347        |
+|    iterations           | 12         |
+|    time_elapsed         | 266        |
+|    total_timesteps      | 92760      |
+| train/                  |            |
+|    approx_kl            | 0.07837609 |
+|    clip_fraction        | 0.344      |
+|    clip_range           | 0.299      |
+|    entropy_loss         | -7.92      |
+|    explained_variance   | 0.267      |
+|    learning_rate        | 9.55e-05   |
+|    loss                 | 2.41       |
+|    n_updates            | 110        |
+|    policy_gradient_loss | -0.0101    |
+|    value_loss           | 15.8       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.33e+03    |
+|    ep_rew_mean          | -118        |
+| time/                   |             |
+|    fps                  | 347         |
+|    iterations           | 13          |
+|    time_elapsed         | 289         |
+|    total_timesteps      | 100490      |
+| train/                  |             |
+|    approx_kl            | 0.078203924 |
+|    clip_fraction        | 0.348       |
+|    clip_range           | 0.299       |
+|    entropy_loss         | -7.78       |
+|    explained_variance   | 0.254       |
+|    learning_rate        | 9.55e-05    |
+|    loss                 | 3.22        |
+|    n_updates            | 120         |
+|    policy_gradient_loss | -0.00835    |
+|    value_loss           | 31          |
+-----------------------------------------
+[I 2023-03-30 22:13:04,197] Trial 5 finished with value: -314.0 and parameters: {'n_steps': 7730, 'gamma': 0.984052776116379, 'learning_rate': 9.545314010671991e-05, 'clip_range': 0.29860092606270394, 'gae_lambda': 0.9241326779349373}. Best is trial 1 with value: -205.0.
+Using cuda device
+Wrapping the env in a DummyVecEnv.
+Wrapping the env in a VecTransposeImage.
+C:\ProgramData\Anaconda3\envs\StreetFighterAI\lib\site-packages\stable_baselines3\ppo\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 4033`, after every 63 untruncated mini-batches, there will be a truncated mini-batch of size 1
+We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
+Info: (n_steps=4033 and n_envs=1)
+  warnings.warn(
+Logging to logs/PPO_24
+---------------------------------
+| rollout/           |          |
+|    ep_len_mean     | 1.84e+03 |
+|    ep_rew_mean     | -128     |
+| time/              |          |
+|    fps             | 596      |
+|    iterations      | 1        |
+|    time_elapsed    | 6        |
+|    total_timesteps | 4033     |
+---------------------------------
+C:\ProgramData\Anaconda3\envs\StreetFighterAI\lib\site-packages\stable_baselines3\ppo\ppo.py:261: FutureWarning: Non-finite norm encountered in torch.nn.utils.clip_grad_norm_; continuing anyway. Note that the default behavior will change in a future release to error out if a non-finite total norm is encountered. At that point, setting error_if_nonfinite=false will be required to retain the old behavior.
+  th.nn.utils.clip_grad_norm_(self.policy.parameters(), self.max_grad_norm)
+[I 2023-03-30 22:13:11,640] Trial 6 finished with value: -1.0 and parameters: {'n_steps': 4033, 'gamma': 0.9021075358644198, 'learning_rate': 9.226619222512819e-05, 'clip_range': 0.2947845259713344, 'gae_lambda': 0.9634981775867985}. Best is trial 6 with value: -1.0.
+Using cuda device
+Wrapping the env in a DummyVecEnv.
+Wrapping the env in a VecTransposeImage.
+C:\ProgramData\Anaconda3\envs\StreetFighterAI\lib\site-packages\stable_baselines3\ppo\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 1654`, after every 25 untruncated mini-batches, there will be a truncated mini-batch of size 54
+We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
+Info: (n_steps=1654 and n_envs=1)
+  warnings.warn(
+Logging to logs/PPO_25
+-----------------------------
+| time/              |      |
+|    fps             | 666  |
+|    iterations      | 1    |
+|    time_elapsed    | 2    |
+|    total_timesteps | 1654 |
+-----------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.24e+03    |
+|    ep_rew_mean          | -183        |
+| time/                   |             |
+|    fps                  | 465         |
+|    iterations           | 2           |
+|    time_elapsed         | 7           |
+|    total_timesteps      | 3308        |
+| train/                  |             |
+|    approx_kl            | 0.011272669 |
+|    clip_fraction        | 0.18        |
+|    clip_range           | 0.228       |
+|    entropy_loss         | -8.3        |
+|    explained_variance   | 0.00627     |
+|    learning_rate        | 9.79e-05    |
+|    loss                 | 0.815       |
+|    n_updates            | 10          |
+|    policy_gradient_loss | -0.0141     |
+|    value_loss           | 6.8         |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.21e+03    |
+|    ep_rew_mean          | -194        |
+| time/                   |             |
+|    fps                  | 425         |
+|    iterations           | 3           |
+|    time_elapsed         | 11          |
+|    total_timesteps      | 4962        |
+| train/                  |             |
+|    approx_kl            | 0.013628463 |
+|    clip_fraction        | 0.114       |
+|    clip_range           | 0.228       |
+|    entropy_loss         | -8.29       |
+|    explained_variance   | 0.0365      |
+|    learning_rate        | 9.79e-05    |
+|    loss                 | 40.8        |
+|    n_updates            | 20          |
+|    policy_gradient_loss | -0.0135     |
+|    value_loss           | 20.9        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.14e+03    |
+|    ep_rew_mean          | -218        |
+| time/                   |             |
+|    fps                  | 401         |
+|    iterations           | 4           |
+|    time_elapsed         | 16          |
+|    total_timesteps      | 6616        |
+| train/                  |             |
+|    approx_kl            | 0.023692455 |
+|    clip_fraction        | 0.238       |
+|    clip_range           | 0.228       |
+|    entropy_loss         | -8.26       |
+|    explained_variance   | 0.251       |
+|    learning_rate        | 9.79e-05    |
+|    loss                 | 0.977       |
+|    n_updates            | 30          |
+|    policy_gradient_loss | -0.0152     |
+|    value_loss           | 17.7        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.14e+03   |
+|    ep_rew_mean          | -218       |
+| time/                   |            |
+|    fps                  | 389        |
+|    iterations           | 5          |
+|    time_elapsed         | 21         |
+|    total_timesteps      | 8270       |
+| train/                  |            |
+|    approx_kl            | 0.02734942 |
+|    clip_fraction        | 0.272      |
+|    clip_range           | 0.228      |
+|    entropy_loss         | -8.25      |
+|    explained_variance   | -0.035     |
+|    learning_rate        | 9.79e-05   |
+|    loss                 | 0.673      |
+|    n_updates            | 40         |
+|    policy_gradient_loss | 0.00113    |
+|    value_loss           | 33.6       |
+----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.29e+03   |
+|    ep_rew_mean          | -176       |
+| time/                   |            |
+|    fps                  | 382        |
+|    iterations           | 6          |
+|    time_elapsed         | 25         |
+|    total_timesteps      | 9924       |
+| train/                  |            |
+|    approx_kl            | 0.01778004 |
+|    clip_fraction        | 0.247      |
+|    clip_range           | 0.228      |
+|    entropy_loss         | -8.26      |
+|    explained_variance   | 0.369      |
+|    learning_rate        | 9.79e-05   |
+|    loss                 | 1.41       |
+|    n_updates            | 50         |
+|    policy_gradient_loss | -0.0143    |
+|    value_loss           | 4.89       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.29e+03    |
+|    ep_rew_mean          | -176        |
+| time/                   |             |
+|    fps                  | 375         |
+|    iterations           | 7           |
+|    time_elapsed         | 30          |
+|    total_timesteps      | 11578       |
+| train/                  |             |
+|    approx_kl            | 0.016047975 |
+|    clip_fraction        | 0.174       |
+|    clip_range           | 0.228       |
+|    entropy_loss         | -8.25       |
+|    explained_variance   | 0.164       |
+|    learning_rate        | 9.79e-05    |
+|    loss                 | 1.49        |
+|    n_updates            | 60          |
+|    policy_gradient_loss | -0.0128     |
+|    value_loss           | 7.98        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.57e+03    |
+|    ep_rew_mean          | -148        |
+| time/                   |             |
+|    fps                  | 363         |
+|    iterations           | 8           |
+|    time_elapsed         | 36          |
+|    total_timesteps      | 13232       |
+| train/                  |             |
+|    approx_kl            | 0.016572453 |
+|    clip_fraction        | 0.174       |
+|    clip_range           | 0.228       |
+|    entropy_loss         | -8.25       |
+|    explained_variance   | -0.193      |
+|    learning_rate        | 9.79e-05    |
+|    loss                 | 0.18        |
+|    n_updates            | 70          |
+|    policy_gradient_loss | -0.0197     |
+|    value_loss           | 3.7         |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.42e+03    |
+|    ep_rew_mean          | -152        |
+| time/                   |             |
+|    fps                  | 354         |
+|    iterations           | 9           |
+|    time_elapsed         | 42          |
+|    total_timesteps      | 14886       |
+| train/                  |             |
+|    approx_kl            | 0.018261585 |
+|    clip_fraction        | 0.163       |
+|    clip_range           | 0.228       |
+|    entropy_loss         | -8.23       |
+|    explained_variance   | 0.0612      |
+|    learning_rate        | 9.79e-05    |
+|    loss                 | 1.23        |
+|    n_updates            | 80          |
+|    policy_gradient_loss | -0.017      |
+|    value_loss           | 8.05        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.42e+03    |
+|    ep_rew_mean          | -152        |
+| time/                   |             |
+|    fps                  | 347         |
+|    iterations           | 10          |
+|    time_elapsed         | 47          |
+|    total_timesteps      | 16540       |
+| train/                  |             |
+|    approx_kl            | 0.025278179 |
+|    clip_fraction        | 0.233       |
+|    clip_range           | 0.228       |
+|    entropy_loss         | -8.19       |
+|    explained_variance   | 0.00951     |
+|    learning_rate        | 9.79e-05    |
+|    loss                 | 1.36        |
+|    n_updates            | 90          |
+|    policy_gradient_loss | -0.0161     |
+|    value_loss           | 14.8        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.53e+03    |
+|    ep_rew_mean          | -141        |
+| time/                   |             |
+|    fps                  | 342         |
+|    iterations           | 11          |
+|    time_elapsed         | 53          |
+|    total_timesteps      | 18194       |
+| train/                  |             |
+|    approx_kl            | 0.019890858 |
+|    clip_fraction        | 0.243       |
+|    clip_range           | 0.228       |
+|    entropy_loss         | -8.16       |
+|    explained_variance   | 0.32        |
+|    learning_rate        | 9.79e-05    |
+|    loss                 | 1.63        |
+|    n_updates            | 100         |
+|    policy_gradient_loss | -0.0133     |
+|    value_loss           | 5.37        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.39e+03    |
+|    ep_rew_mean          | -163        |
+| time/                   |             |
+|    fps                  | 339         |
+|    iterations           | 12          |
+|    time_elapsed         | 58          |
+|    total_timesteps      | 19848       |
+| train/                  |             |
+|    approx_kl            | 0.027444609 |
+|    clip_fraction        | 0.273       |
+|    clip_range           | 0.228       |
+|    entropy_loss         | -8.1        |
+|    explained_variance   | 0.173       |
+|    learning_rate        | 9.79e-05    |
+|    loss                 | 1.03        |
+|    n_updates            | 110         |
+|    policy_gradient_loss | -0.0161     |
+|    value_loss           | 10.9        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.39e+03    |
+|    ep_rew_mean          | -163        |
+| time/                   |             |
+|    fps                  | 336         |
+|    iterations           | 13          |
+|    time_elapsed         | 63          |
+|    total_timesteps      | 21502       |
+| train/                  |             |
+|    approx_kl            | 0.029858373 |
+|    clip_fraction        | 0.226       |
+|    clip_range           | 0.228       |
+|    entropy_loss         | -8.15       |
+|    explained_variance   | 0.124       |
+|    learning_rate        | 9.79e-05    |
+|    loss                 | 87.4        |
+|    n_updates            | 120         |
+|    policy_gradient_loss | -0.0143     |
+|    value_loss           | 37.1        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.4e+03    |
+|    ep_rew_mean          | -168       |
+| time/                   |            |
+|    fps                  | 333        |
+|    iterations           | 14         |
+|    time_elapsed         | 69         |
+|    total_timesteps      | 23156      |
+| train/                  |            |
+|    approx_kl            | 0.02500601 |
+|    clip_fraction        | 0.272      |
+|    clip_range           | 0.228      |
+|    entropy_loss         | -8.14      |
+|    explained_variance   | 0.227      |
+|    learning_rate        | 9.79e-05   |
+|    loss                 | 1.26       |
+|    n_updates            | 130        |
+|    policy_gradient_loss | -0.0173    |
+|    value_loss           | 6.66       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.47e+03    |
+|    ep_rew_mean          | -161        |
+| time/                   |             |
+|    fps                  | 330         |
+|    iterations           | 15          |
+|    time_elapsed         | 75          |
+|    total_timesteps      | 24810       |
+| train/                  |             |
+|    approx_kl            | 0.025755124 |
+|    clip_fraction        | 0.234       |
+|    clip_range           | 0.228       |
+|    entropy_loss         | -8.14       |
+|    explained_variance   | 0.238       |
+|    learning_rate        | 9.79e-05    |
+|    loss                 | 1.24        |
+|    n_updates            | 140         |
+|    policy_gradient_loss | -0.016      |
+|    value_loss           | 13.3        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.47e+03    |
+|    ep_rew_mean          | -161        |
+| time/                   |             |
+|    fps                  | 328         |
+|    iterations           | 16          |
+|    time_elapsed         | 80          |
+|    total_timesteps      | 26464       |
+| train/                  |             |
+|    approx_kl            | 0.021200689 |
+|    clip_fraction        | 0.234       |
+|    clip_range           | 0.228       |
+|    entropy_loss         | -8.07       |
+|    explained_variance   | 0.176       |
+|    learning_rate        | 9.79e-05    |
+|    loss                 | 0.559       |
+|    n_updates            | 150         |
+|    policy_gradient_loss | -0.0132     |
+|    value_loss           | 9.71        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.47e+03   |
+|    ep_rew_mean          | -161       |
+| time/                   |            |
+|    fps                  | 326        |
+|    iterations           | 17         |
+|    time_elapsed         | 86         |
+|    total_timesteps      | 28118      |
+| train/                  |            |
+|    approx_kl            | 0.02932891 |
+|    clip_fraction        | 0.27       |
+|    clip_range           | 0.228      |
+|    entropy_loss         | -8.08      |
+|    explained_variance   | 0.302      |
+|    learning_rate        | 9.79e-05   |
+|    loss                 | 2.16       |
+|    n_updates            | 160        |
+|    policy_gradient_loss | -0.0225    |
+|    value_loss           | 4.34       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.47e+03    |
+|    ep_rew_mean          | -161        |
+| time/                   |             |
+|    fps                  | 324         |
+|    iterations           | 18          |
+|    time_elapsed         | 91          |
+|    total_timesteps      | 29772       |
+| train/                  |             |
+|    approx_kl            | 0.016725304 |
+|    clip_fraction        | 0.212       |
+|    clip_range           | 0.228       |
+|    entropy_loss         | -8.06       |
+|    explained_variance   | 0.0835      |
+|    learning_rate        | 9.79e-05    |
+|    loss                 | 0.398       |
+|    n_updates            | 170         |
+|    policy_gradient_loss | -0.0136     |
+|    value_loss           | 5.97        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.84e+03    |
+|    ep_rew_mean          | -164        |
+| time/                   |             |
+|    fps                  | 322         |
+|    iterations           | 19          |
+|    time_elapsed         | 97          |
+|    total_timesteps      | 31426       |
+| train/                  |             |
+|    approx_kl            | 0.020989887 |
+|    clip_fraction        | 0.216       |
+|    clip_range           | 0.228       |
+|    entropy_loss         | -8.08       |
+|    explained_variance   | 0.364       |
+|    learning_rate        | 9.79e-05    |
+|    loss                 | 0.103       |
+|    n_updates            | 180         |
+|    policy_gradient_loss | -0.0259     |
+|    value_loss           | 1.02        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.84e+03   |
+|    ep_rew_mean          | -164       |
+| time/                   |            |
+|    fps                  | 321        |
+|    iterations           | 20         |
+|    time_elapsed         | 102        |
+|    total_timesteps      | 33080      |
+| train/                  |            |
+|    approx_kl            | 0.02685814 |
+|    clip_fraction        | 0.201      |
+|    clip_range           | 0.228      |
+|    entropy_loss         | -8.03      |
+|    explained_variance   | 0.114      |
+|    learning_rate        | 9.79e-05   |
+|    loss                 | 1.95       |
+|    n_updates            | 190        |
+|    policy_gradient_loss | -0.0192    |
+|    value_loss           | 16.3       |
+----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.84e+03   |
+|    ep_rew_mean          | -164       |
+| time/                   |            |
+|    fps                  | 320        |
+|    iterations           | 21         |
+|    time_elapsed         | 108        |
+|    total_timesteps      | 34734      |
+| train/                  |            |
+|    approx_kl            | 0.02518316 |
+|    clip_fraction        | 0.22       |
+|    clip_range           | 0.228      |
+|    entropy_loss         | -8.03      |
+|    explained_variance   | 0.293      |
+|    learning_rate        | 9.79e-05   |
+|    loss                 | 1.66       |
+|    n_updates            | 200        |
+|    policy_gradient_loss | -0.0237    |
+|    value_loss           | 6.07       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.92e+03    |
+|    ep_rew_mean          | -148        |
+| time/                   |             |
+|    fps                  | 319         |
+|    iterations           | 22          |
+|    time_elapsed         | 114         |
+|    total_timesteps      | 36388       |
+| train/                  |             |
+|    approx_kl            | 0.020095803 |
+|    clip_fraction        | 0.191       |
+|    clip_range           | 0.228       |
+|    entropy_loss         | -8.04       |
+|    explained_variance   | 0.242       |
+|    learning_rate        | 9.79e-05    |
+|    loss                 | 0.859       |
+|    n_updates            | 210         |
+|    policy_gradient_loss | -0.0166     |
+|    value_loss           | 5.24        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.87e+03    |
+|    ep_rew_mean          | -164        |
+| time/                   |             |
+|    fps                  | 317         |
+|    iterations           | 23          |
+|    time_elapsed         | 119         |
+|    total_timesteps      | 38042       |
+| train/                  |             |
+|    approx_kl            | 0.023355601 |
+|    clip_fraction        | 0.246       |
+|    clip_range           | 0.228       |
+|    entropy_loss         | -7.97       |
+|    explained_variance   | 0.0157      |
+|    learning_rate        | 9.79e-05    |
+|    loss                 | 0.642       |
+|    n_updates            | 220         |
+|    policy_gradient_loss | -0.0248     |
+|    value_loss           | 4.15        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.87e+03    |
+|    ep_rew_mean          | -164        |
+| time/                   |             |
+|    fps                  | 317         |
+|    iterations           | 24          |
+|    time_elapsed         | 125         |
+|    total_timesteps      | 39696       |
+| train/                  |             |
+|    approx_kl            | 0.034935288 |
+|    clip_fraction        | 0.267       |
+|    clip_range           | 0.228       |
+|    entropy_loss         | -7.95       |
+|    explained_variance   | 0.0452      |
+|    learning_rate        | 9.79e-05    |
+|    loss                 | 201         |
+|    n_updates            | 230         |
+|    policy_gradient_loss | -0.0162     |
+|    value_loss           | 46.4        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.92e+03    |
+|    ep_rew_mean          | -152        |
+| time/                   |             |
+|    fps                  | 316         |
+|    iterations           | 25          |
+|    time_elapsed         | 130         |
+|    total_timesteps      | 41350       |
+| train/                  |             |
+|    approx_kl            | 0.025743902 |
+|    clip_fraction        | 0.265       |
+|    clip_range           | 0.228       |
+|    entropy_loss         | -7.94       |
+|    explained_variance   | 0.214       |
+|    learning_rate        | 9.79e-05    |
+|    loss                 | 1.29        |
+|    n_updates            | 240         |
+|    policy_gradient_loss | -0.0199     |
+|    value_loss           | 7.38        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.86e+03   |
+|    ep_rew_mean          | -162       |
+| time/                   |            |
+|    fps                  | 315        |
+|    iterations           | 26         |
+|    time_elapsed         | 136        |
+|    total_timesteps      | 43004      |
+| train/                  |            |
+|    approx_kl            | 0.03637928 |
+|    clip_fraction        | 0.312      |
+|    clip_range           | 0.228      |
+|    entropy_loss         | -7.93      |
+|    explained_variance   | 0.491      |
+|    learning_rate        | 9.79e-05   |
+|    loss                 | 0.508      |
+|    n_updates            | 250        |
+|    policy_gradient_loss | -0.0222    |
+|    value_loss           | 2.89       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.86e+03    |
+|    ep_rew_mean          | -162        |
+| time/                   |             |
+|    fps                  | 315         |
+|    iterations           | 27          |
+|    time_elapsed         | 141         |
+|    total_timesteps      | 44658       |
+| train/                  |             |
+|    approx_kl            | 0.056980833 |
+|    clip_fraction        | 0.394       |
+|    clip_range           | 0.228       |
+|    entropy_loss         | -7.87       |
+|    explained_variance   | 0.109       |
+|    learning_rate        | 9.79e-05    |
+|    loss                 | 1.26        |
+|    n_updates            | 260         |
+|    policy_gradient_loss | -0.00769    |
+|    value_loss           | 19.5        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.86e+03   |
+|    ep_rew_mean          | -156       |
+| time/                   |            |
+|    fps                  | 316        |
+|    iterations           | 28         |
+|    time_elapsed         | 146        |
+|    total_timesteps      | 46312      |
+| train/                  |            |
+|    approx_kl            | 0.03702618 |
+|    clip_fraction        | 0.365      |
+|    clip_range           | 0.228      |
+|    entropy_loss         | -7.85      |
+|    explained_variance   | 0.291      |
+|    learning_rate        | 9.79e-05   |
+|    loss                 | 0.537      |
+|    n_updates            | 270        |
+|    policy_gradient_loss | -0.0122    |
+|    value_loss           | 4.75       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.86e+03    |
+|    ep_rew_mean          | -156        |
+| time/                   |             |
+|    fps                  | 318         |
+|    iterations           | 29          |
+|    time_elapsed         | 150         |
+|    total_timesteps      | 47966       |
+| train/                  |             |
+|    approx_kl            | 0.033755746 |
+|    clip_fraction        | 0.299       |
+|    clip_range           | 0.228       |
+|    entropy_loss         | -7.77       |
+|    explained_variance   | 0.31        |
+|    learning_rate        | 9.79e-05    |
+|    loss                 | 7.27        |
+|    n_updates            | 280         |
+|    policy_gradient_loss | -0.016      |
+|    value_loss           | 7.63        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.83e+03   |
+|    ep_rew_mean          | -159       |
+| time/                   |            |
+|    fps                  | 319        |
+|    iterations           | 30         |
+|    time_elapsed         | 155        |
+|    total_timesteps      | 49620      |
+| train/                  |            |
+|    approx_kl            | 0.03693611 |
+|    clip_fraction        | 0.286      |
+|    clip_range           | 0.228      |
+|    entropy_loss         | -7.81      |
+|    explained_variance   | 0.343      |
+|    learning_rate        | 9.79e-05   |
+|    loss                 | 0.828      |
+|    n_updates            | 290        |
+|    policy_gradient_loss | -0.0217    |
+|    value_loss           | 4.62       |
+----------------------------------------
+---------------------------------------
+| rollout/                |           |
+|    ep_len_mean          | 2.81e+03  |
+|    ep_rew_mean          | -150      |
+| time/                   |           |
+|    fps                  | 319       |
+|    iterations           | 31        |
+|    time_elapsed         | 160       |
+|    total_timesteps      | 51274     |
+| train/                  |           |
+|    approx_kl            | 0.0407202 |
+|    clip_fraction        | 0.331     |
+|    clip_range           | 0.228     |
+|    entropy_loss         | -7.73     |
+|    explained_variance   | 0.164     |
+|    learning_rate        | 9.79e-05  |
+|    loss                 | 19.9      |
+|    n_updates            | 300       |
+|    policy_gradient_loss | -0.00744  |
+|    value_loss           | 25.4      |
+---------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.81e+03    |
+|    ep_rew_mean          | -150        |
+| time/                   |             |
+|    fps                  | 321         |
+|    iterations           | 32          |
+|    time_elapsed         | 164         |
+|    total_timesteps      | 52928       |
+| train/                  |             |
+|    approx_kl            | 0.049017448 |
+|    clip_fraction        | 0.356       |
+|    clip_range           | 0.228       |
+|    entropy_loss         | -7.8        |
+|    explained_variance   | -0.0167     |
+|    learning_rate        | 9.79e-05    |
+|    loss                 | 0.897       |
+|    n_updates            | 310         |
+|    policy_gradient_loss | -0.0214     |
+|    value_loss           | 4.18        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.8e+03     |
+|    ep_rew_mean          | -156        |
+| time/                   |             |
+|    fps                  | 322         |
+|    iterations           | 33          |
+|    time_elapsed         | 169         |
+|    total_timesteps      | 54582       |
+| train/                  |             |
+|    approx_kl            | 0.035868283 |
+|    clip_fraction        | 0.323       |
+|    clip_range           | 0.228       |
+|    entropy_loss         | -7.79       |
+|    explained_variance   | 0.177       |
+|    learning_rate        | 9.79e-05    |
+|    loss                 | 0.298       |
+|    n_updates            | 320         |
+|    policy_gradient_loss | -0.0128     |
+|    value_loss           | 2.9         |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.76e+03    |
+|    ep_rew_mean          | -161        |
+| time/                   |             |
+|    fps                  | 322         |
+|    iterations           | 34          |
+|    time_elapsed         | 174         |
+|    total_timesteps      | 56236       |
+| train/                  |             |
+|    approx_kl            | 0.040655132 |
+|    clip_fraction        | 0.321       |
+|    clip_range           | 0.228       |
+|    entropy_loss         | -7.6        |
+|    explained_variance   | 0.0977      |
+|    learning_rate        | 9.79e-05    |
+|    loss                 | 3.28        |
+|    n_updates            | 330         |
+|    policy_gradient_loss | -0.0114     |
+|    value_loss           | 29.4        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.73e+03    |
+|    ep_rew_mean          | -162        |
+| time/                   |             |
+|    fps                  | 323         |
+|    iterations           | 35          |
+|    time_elapsed         | 178         |
+|    total_timesteps      | 57890       |
+| train/                  |             |
+|    approx_kl            | 0.049799267 |
+|    clip_fraction        | 0.364       |
+|    clip_range           | 0.228       |
+|    entropy_loss         | -7.61       |
+|    explained_variance   | 0.0564      |
+|    learning_rate        | 9.79e-05    |
+|    loss                 | 0.473       |
+|    n_updates            | 340         |
+|    policy_gradient_loss | -0.0171     |
+|    value_loss           | 12.4        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.65e+03    |
+|    ep_rew_mean          | -141        |
+| time/                   |             |
+|    fps                  | 323         |
+|    iterations           | 36          |
+|    time_elapsed         | 183         |
+|    total_timesteps      | 59544       |
+| train/                  |             |
+|    approx_kl            | 0.051947072 |
+|    clip_fraction        | 0.352       |
+|    clip_range           | 0.228       |
+|    entropy_loss         | -7.51       |
+|    explained_variance   | 0.194       |
+|    learning_rate        | 9.79e-05    |
+|    loss                 | 2.48        |
+|    n_updates            | 350         |
+|    policy_gradient_loss | -0.0138     |
+|    value_loss           | 17.2        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.65e+03   |
+|    ep_rew_mean          | -141       |
+| time/                   |            |
+|    fps                  | 324        |
+|    iterations           | 37         |
+|    time_elapsed         | 188        |
+|    total_timesteps      | 61198      |
+| train/                  |            |
+|    approx_kl            | 0.04901576 |
+|    clip_fraction        | 0.4        |
+|    clip_range           | 0.228      |
+|    entropy_loss         | -7.55      |
+|    explained_variance   | -0.0663    |
+|    learning_rate        | 9.79e-05   |
+|    loss                 | 7.66       |
+|    n_updates            | 360        |
+|    policy_gradient_loss | -0.00211   |
+|    value_loss           | 28.4       |
+----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.67e+03   |
+|    ep_rew_mean          | -132       |
+| time/                   |            |
+|    fps                  | 326        |
+|    iterations           | 38         |
+|    time_elapsed         | 192        |
+|    total_timesteps      | 62852      |
+| train/                  |            |
+|    approx_kl            | 0.04695523 |
+|    clip_fraction        | 0.388      |
+|    clip_range           | 0.228      |
+|    entropy_loss         | -7.6       |
+|    explained_variance   | 0.29       |
+|    learning_rate        | 9.79e-05   |
+|    loss                 | 0.643      |
+|    n_updates            | 370        |
+|    policy_gradient_loss | -0.0192    |
+|    value_loss           | 4.24       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.63e+03    |
+|    ep_rew_mean          | -141        |
+| time/                   |             |
+|    fps                  | 326         |
+|    iterations           | 39          |
+|    time_elapsed         | 197         |
+|    total_timesteps      | 64506       |
+| train/                  |             |
+|    approx_kl            | 0.046350323 |
+|    clip_fraction        | 0.355       |
+|    clip_range           | 0.228       |
+|    entropy_loss         | -7.45       |
+|    explained_variance   | 0.121       |
+|    learning_rate        | 9.79e-05    |
+|    loss                 | 2.98        |
+|    n_updates            | 380         |
+|    policy_gradient_loss | -0.0267     |
+|    value_loss           | 5.78        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.63e+03    |
+|    ep_rew_mean          | -145        |
+| time/                   |             |
+|    fps                  | 326         |
+|    iterations           | 40          |
+|    time_elapsed         | 202         |
+|    total_timesteps      | 66160       |
+| train/                  |             |
+|    approx_kl            | 0.042303674 |
+|    clip_fraction        | 0.365       |
+|    clip_range           | 0.228       |
+|    entropy_loss         | -7.55       |
+|    explained_variance   | 0.0295      |
+|    learning_rate        | 9.79e-05    |
+|    loss                 | 2.42        |
+|    n_updates            | 390         |
+|    policy_gradient_loss | -0.0111     |
+|    value_loss           | 49.1        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.63e+03   |
+|    ep_rew_mean          | -145       |
+| time/                   |            |
+|    fps                  | 327        |
+|    iterations           | 41         |
+|    time_elapsed         | 207        |
+|    total_timesteps      | 67814      |
+| train/                  |            |
+|    approx_kl            | 0.06833778 |
+|    clip_fraction        | 0.442      |
+|    clip_range           | 0.228      |
+|    entropy_loss         | -7.26      |
+|    explained_variance   | 0.0526     |
+|    learning_rate        | 9.79e-05   |
+|    loss                 | 1.06       |
+|    n_updates            | 400        |
+|    policy_gradient_loss | -0.00992   |
+|    value_loss           | 13.1       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.63e+03    |
+|    ep_rew_mean          | -148        |
+| time/                   |             |
+|    fps                  | 327         |
+|    iterations           | 42          |
+|    time_elapsed         | 212         |
+|    total_timesteps      | 69468       |
+| train/                  |             |
+|    approx_kl            | 0.051798023 |
+|    clip_fraction        | 0.393       |
+|    clip_range           | 0.228       |
+|    entropy_loss         | -7.56       |
+|    explained_variance   | 0.304       |
+|    learning_rate        | 9.79e-05    |
+|    loss                 | 0.899       |
+|    n_updates            | 410         |
+|    policy_gradient_loss | -0.0254     |
+|    value_loss           | 6.58        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.62e+03   |
+|    ep_rew_mean          | -136       |
+| time/                   |            |
+|    fps                  | 326        |
+|    iterations           | 43         |
+|    time_elapsed         | 217        |
+|    total_timesteps      | 71122      |
+| train/                  |            |
+|    approx_kl            | 0.04322006 |
+|    clip_fraction        | 0.383      |
+|    clip_range           | 0.228      |
+|    entropy_loss         | -7.52      |
+|    explained_variance   | 0.288      |
+|    learning_rate        | 9.79e-05   |
+|    loss                 | 0.502      |
+|    n_updates            | 420        |
+|    policy_gradient_loss | -0.0132    |
+|    value_loss           | 15.5       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.58e+03    |
+|    ep_rew_mean          | -141        |
+| time/                   |             |
+|    fps                  | 326         |
+|    iterations           | 44          |
+|    time_elapsed         | 223         |
+|    total_timesteps      | 72776       |
+| train/                  |             |
+|    approx_kl            | 0.051466085 |
+|    clip_fraction        | 0.397       |
+|    clip_range           | 0.228       |
+|    entropy_loss         | -7.23       |
+|    explained_variance   | 0.217       |
+|    learning_rate        | 9.79e-05    |
+|    loss                 | 1.78        |
+|    n_updates            | 430         |
+|    policy_gradient_loss | -0.0112     |
+|    value_loss           | 15.6        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.58e+03   |
+|    ep_rew_mean          | -141       |
+| time/                   |            |
+|    fps                  | 325        |
+|    iterations           | 45         |
+|    time_elapsed         | 228        |
+|    total_timesteps      | 74430      |
+| train/                  |            |
+|    approx_kl            | 0.06645863 |
+|    clip_fraction        | 0.421      |
+|    clip_range           | 0.228      |
+|    entropy_loss         | -7.28      |
+|    explained_variance   | 0.279      |
+|    learning_rate        | 9.79e-05   |
+|    loss                 | 1.55       |
+|    n_updates            | 440        |
+|    policy_gradient_loss | -0.0173    |
+|    value_loss           | 15.6       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.58e+03    |
+|    ep_rew_mean          | -139        |
+| time/                   |             |
+|    fps                  | 324         |
+|    iterations           | 46          |
+|    time_elapsed         | 234         |
+|    total_timesteps      | 76084       |
+| train/                  |             |
+|    approx_kl            | 0.042081438 |
+|    clip_fraction        | 0.38        |
+|    clip_range           | 0.228       |
+|    entropy_loss         | -7.37       |
+|    explained_variance   | 0.2         |
+|    learning_rate        | 9.79e-05    |
+|    loss                 | 5.59        |
+|    n_updates            | 450         |
+|    policy_gradient_loss | -0.016      |
+|    value_loss           | 5.48        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.56e+03    |
+|    ep_rew_mean          | -137        |
+| time/                   |             |
+|    fps                  | 324         |
+|    iterations           | 47          |
+|    time_elapsed         | 239         |
+|    total_timesteps      | 77738       |
+| train/                  |             |
+|    approx_kl            | 0.040408526 |
+|    clip_fraction        | 0.36        |
+|    clip_range           | 0.228       |
+|    entropy_loss         | -7.2        |
+|    explained_variance   | 0.439       |
+|    learning_rate        | 9.79e-05    |
+|    loss                 | 1.19        |
+|    n_updates            | 460         |
+|    policy_gradient_loss | -0.019      |
+|    value_loss           | 8.17        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.56e+03    |
+|    ep_rew_mean          | -132        |
+| time/                   |             |
+|    fps                  | 323         |
+|    iterations           | 48          |
+|    time_elapsed         | 245         |
+|    total_timesteps      | 79392       |
+| train/                  |             |
+|    approx_kl            | 0.047893133 |
+|    clip_fraction        | 0.399       |
+|    clip_range           | 0.228       |
+|    entropy_loss         | -7.24       |
+|    explained_variance   | 0.29        |
+|    learning_rate        | 9.79e-05    |
+|    loss                 | 9.79        |
+|    n_updates            | 470         |
+|    policy_gradient_loss | -0.023      |
+|    value_loss           | 9.88        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.56e+03    |
+|    ep_rew_mean          | -132        |
+| time/                   |             |
+|    fps                  | 323         |
+|    iterations           | 49          |
+|    time_elapsed         | 250         |
+|    total_timesteps      | 81046       |
+| train/                  |             |
+|    approx_kl            | 0.050825655 |
+|    clip_fraction        | 0.409       |
+|    clip_range           | 0.228       |
+|    entropy_loss         | -7.27       |
+|    explained_variance   | 0.125       |
+|    learning_rate        | 9.79e-05    |
+|    loss                 | 0.481       |
+|    n_updates            | 480         |
+|    policy_gradient_loss | -0.0222     |
+|    value_loss           | 5.85        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.53e+03    |
+|    ep_rew_mean          | -137        |
+| time/                   |             |
+|    fps                  | 322         |
+|    iterations           | 50          |
+|    time_elapsed         | 256         |
+|    total_timesteps      | 82700       |
+| train/                  |             |
+|    approx_kl            | 0.045231882 |
+|    clip_fraction        | 0.368       |
+|    clip_range           | 0.228       |
+|    entropy_loss         | -7.47       |
+|    explained_variance   | 0.378       |
+|    learning_rate        | 9.79e-05    |
+|    loss                 | 0.404       |
+|    n_updates            | 490         |
+|    policy_gradient_loss | -0.0237     |
+|    value_loss           | 5.36        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.48e+03   |
+|    ep_rew_mean          | -121       |
+| time/                   |            |
+|    fps                  | 322        |
+|    iterations           | 51         |
+|    time_elapsed         | 261        |
+|    total_timesteps      | 84354      |
+| train/                  |            |
+|    approx_kl            | 0.08339866 |
+|    clip_fraction        | 0.483      |
+|    clip_range           | 0.228      |
+|    entropy_loss         | -7.08      |
+|    explained_variance   | 0.0398     |
+|    learning_rate        | 9.79e-05   |
+|    loss                 | 1.24       |
+|    n_updates            | 500        |
+|    policy_gradient_loss | -0.0024    |
+|    value_loss           | 26.8       |
+----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.48e+03   |
+|    ep_rew_mean          | -121       |
+| time/                   |            |
+|    fps                  | 321        |
+|    iterations           | 52         |
+|    time_elapsed         | 267        |
+|    total_timesteps      | 86008      |
+| train/                  |            |
+|    approx_kl            | 0.07751507 |
+|    clip_fraction        | 0.455      |
+|    clip_range           | 0.228      |
+|    entropy_loss         | -6.97      |
+|    explained_variance   | 0.102      |
+|    learning_rate        | 9.79e-05   |
+|    loss                 | 13.2       |
+|    n_updates            | 510        |
+|    policy_gradient_loss | 9.07e-05   |
+|    value_loss           | 26.6       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.49e+03    |
+|    ep_rew_mean          | -116        |
+| time/                   |             |
+|    fps                  | 321         |
+|    iterations           | 53          |
+|    time_elapsed         | 272         |
+|    total_timesteps      | 87662       |
+| train/                  |             |
+|    approx_kl            | 0.050660215 |
+|    clip_fraction        | 0.407       |
+|    clip_range           | 0.228       |
+|    entropy_loss         | -7.09       |
+|    explained_variance   | 0.558       |
+|    learning_rate        | 9.79e-05    |
+|    loss                 | 1.27        |
+|    n_updates            | 520         |
+|    policy_gradient_loss | -0.028      |
+|    value_loss           | 5.08        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.45e+03    |
+|    ep_rew_mean          | -118        |
+| time/                   |             |
+|    fps                  | 321         |
+|    iterations           | 54          |
+|    time_elapsed         | 278         |
+|    total_timesteps      | 89316       |
+| train/                  |             |
+|    approx_kl            | 0.059903584 |
+|    clip_fraction        | 0.435       |
+|    clip_range           | 0.228       |
+|    entropy_loss         | -7          |
+|    explained_variance   | 0.34        |
+|    learning_rate        | 9.79e-05    |
+|    loss                 | 2.36        |
+|    n_updates            | 530         |
+|    policy_gradient_loss | -0.0178     |
+|    value_loss           | 7.52        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.43e+03   |
+|    ep_rew_mean          | -124       |
+| time/                   |            |
+|    fps                  | 320        |
+|    iterations           | 55         |
+|    time_elapsed         | 283        |
+|    total_timesteps      | 90970      |
+| train/                  |            |
+|    approx_kl            | 0.05906586 |
+|    clip_fraction        | 0.397      |
+|    clip_range           | 0.228      |
+|    entropy_loss         | -6.97      |
+|    explained_variance   | 0.411      |
+|    learning_rate        | 9.79e-05   |
+|    loss                 | 3.49       |
+|    n_updates            | 540        |
+|    policy_gradient_loss | -0.0154    |
+|    value_loss           | 15.3       |
+----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.42e+03   |
+|    ep_rew_mean          | -129       |
+| time/                   |            |
+|    fps                  | 320        |
+|    iterations           | 56         |
+|    time_elapsed         | 289        |
+|    total_timesteps      | 92624      |
+| train/                  |            |
+|    approx_kl            | 0.06667252 |
+|    clip_fraction        | 0.446      |
+|    clip_range           | 0.228      |
+|    entropy_loss         | -7.01      |
+|    explained_variance   | 0.12       |
+|    learning_rate        | 9.79e-05   |
+|    loss                 | 2.8        |
+|    n_updates            | 550        |
+|    policy_gradient_loss | -0.00396   |
+|    value_loss           | 49.1       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.41e+03    |
+|    ep_rew_mean          | -128        |
+| time/                   |             |
+|    fps                  | 319         |
+|    iterations           | 57          |
+|    time_elapsed         | 295         |
+|    total_timesteps      | 94278       |
+| train/                  |             |
+|    approx_kl            | 0.086899824 |
+|    clip_fraction        | 0.46        |
+|    clip_range           | 0.228       |
+|    entropy_loss         | -6.97       |
+|    explained_variance   | 0.37        |
+|    learning_rate        | 9.79e-05    |
+|    loss                 | 0.64        |
+|    n_updates            | 560         |
+|    policy_gradient_loss | -0.0243     |
+|    value_loss           | 20.1        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.41e+03   |
+|    ep_rew_mean          | -128       |
+| time/                   |            |
+|    fps                  | 318        |
+|    iterations           | 58         |
+|    time_elapsed         | 300        |
+|    total_timesteps      | 95932      |
+| train/                  |            |
+|    approx_kl            | 0.07774362 |
+|    clip_fraction        | 0.49       |
+|    clip_range           | 0.228      |
+|    entropy_loss         | -6.63      |
+|    explained_variance   | 0.221      |
+|    learning_rate        | 9.79e-05   |
+|    loss                 | 3.45       |
+|    n_updates            | 570        |
+|    policy_gradient_loss | -0.00944   |
+|    value_loss           | 8.12       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.42e+03    |
+|    ep_rew_mean          | -130        |
+| time/                   |             |
+|    fps                  | 318         |
+|    iterations           | 59          |
+|    time_elapsed         | 306         |
+|    total_timesteps      | 97586       |
+| train/                  |             |
+|    approx_kl            | 0.055749163 |
+|    clip_fraction        | 0.422       |
+|    clip_range           | 0.228       |
+|    entropy_loss         | -6.97       |
+|    explained_variance   | 0.27        |
+|    learning_rate        | 9.79e-05    |
+|    loss                 | 0.485       |
+|    n_updates            | 580         |
+|    policy_gradient_loss | -0.0239     |
+|    value_loss           | 3.6         |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.4e+03    |
+|    ep_rew_mean          | -134       |
+| time/                   |            |
+|    fps                  | 317        |
+|    iterations           | 60         |
+|    time_elapsed         | 312        |
+|    total_timesteps      | 99240      |
+| train/                  |            |
+|    approx_kl            | 0.05942291 |
+|    clip_fraction        | 0.465      |
+|    clip_range           | 0.228      |
+|    entropy_loss         | -7.15      |
+|    explained_variance   | 0.522      |
+|    learning_rate        | 9.79e-05   |
+|    loss                 | 1.21       |
+|    n_updates            | 590        |
+|    policy_gradient_loss | -0.0177    |
+|    value_loss           | 12.3       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.4e+03     |
+|    ep_rew_mean          | -134        |
+| time/                   |             |
+|    fps                  | 317         |
+|    iterations           | 61          |
+|    time_elapsed         | 317         |
+|    total_timesteps      | 100894      |
+| train/                  |             |
+|    approx_kl            | 0.090608686 |
+|    clip_fraction        | 0.468       |
+|    clip_range           | 0.228       |
+|    entropy_loss         | -7.05       |
+|    explained_variance   | 0.342       |
+|    learning_rate        | 9.79e-05    |
+|    loss                 | 4.04        |
+|    n_updates            | 600         |
+|    policy_gradient_loss | 0.000938    |
+|    value_loss           | 25.8        |
+-----------------------------------------
+[I 2023-03-30 22:18:58,250] Trial 7 finished with value: -352.0 and parameters: {'n_steps': 1654, 'gamma': 0.9631671321909901, 'learning_rate': 9.790024836371174e-05, 'clip_range': 0.22794548657535632, 'gae_lambda': 0.8643034328071537}. Best is trial 6 with value: -1.0.
+Using cuda device
+Wrapping the env in a DummyVecEnv.
+Wrapping the env in a VecTransposeImage.
+C:\ProgramData\Anaconda3\envs\StreetFighterAI\lib\site-packages\stable_baselines3\ppo\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 1146`, after every 17 untruncated mini-batches, there will be a truncated mini-batch of size 58
+We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
+Info: (n_steps=1146 and n_envs=1)
+  warnings.warn(
+Logging to logs/PPO_26
+-----------------------------
+| time/              |      |
+|    fps             | 624  |
+|    iterations      | 1    |
+|    time_elapsed    | 1    |
+|    total_timesteps | 1146 |
+-----------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 1.96e+03   |
+|    ep_rew_mean          | 1          |
+| time/                   |            |
+|    fps                  | 453        |
+|    iterations           | 2          |
+|    time_elapsed         | 5          |
+|    total_timesteps      | 2292       |
+| train/                  |            |
+|    approx_kl            | 0.04023962 |
+|    clip_fraction        | 0.18       |
+|    clip_range           | 0.332      |
+|    entropy_loss         | -8.29      |
+|    explained_variance   | 0.00577    |
+|    learning_rate        | 8.88e-05   |
+|    loss                 | 2.31       |
+|    n_updates            | 10         |
+|    policy_gradient_loss | -0.0237    |
+|    value_loss           | 11.7       |
+----------------------------------------
+---------------------------------------
+| rollout/                |           |
+|    ep_len_mean          | 1.96e+03  |
+|    ep_rew_mean          | 1         |
+| time/                   |           |
+|    fps                  | 419       |
+|    iterations           | 3         |
+|    time_elapsed         | 8         |
+|    total_timesteps      | 3438      |
+| train/                  |           |
+|    approx_kl            | 0.0303674 |
+|    clip_fraction        | 0.155     |
+|    clip_range           | 0.332     |
+|    entropy_loss         | -8.25     |
+|    explained_variance   | -0.03     |
+|    learning_rate        | 8.88e-05  |
+|    loss                 | 1.87      |
+|    n_updates            | 20        |
+|    policy_gradient_loss | -0.0214   |
+|    value_loss           | 5.96      |
+---------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 1.9e+03     |
+|    ep_rew_mean          | 138         |
+| time/                   |             |
+|    fps                  | 406         |
+|    iterations           | 4           |
+|    time_elapsed         | 11          |
+|    total_timesteps      | 4584        |
+| train/                  |             |
+|    approx_kl            | 0.029227091 |
+|    clip_fraction        | 0.13        |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -8.25       |
+|    explained_variance   | 0.0631      |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 0.793       |
+|    n_updates            | 30          |
+|    policy_gradient_loss | -0.0214     |
+|    value_loss           | 9.91        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 1.9e+03    |
+|    ep_rew_mean          | 138        |
+| time/                   |            |
+|    fps                  | 399        |
+|    iterations           | 5          |
+|    time_elapsed         | 14         |
+|    total_timesteps      | 5730       |
+| train/                  |            |
+|    approx_kl            | 0.04213173 |
+|    clip_fraction        | 0.147      |
+|    clip_range           | 0.332      |
+|    entropy_loss         | -8.21      |
+|    explained_variance   | 0.0443     |
+|    learning_rate        | 8.88e-05   |
+|    loss                 | 2.83       |
+|    n_updates            | 40         |
+|    policy_gradient_loss | -0.0103    |
+|    value_loss           | 34.6       |
+----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.1e+03    |
+|    ep_rew_mean          | 68         |
+| time/                   |            |
+|    fps                  | 393        |
+|    iterations           | 6          |
+|    time_elapsed         | 17         |
+|    total_timesteps      | 6876       |
+| train/                  |            |
+|    approx_kl            | 0.02153559 |
+|    clip_fraction        | 0.143      |
+|    clip_range           | 0.332      |
+|    entropy_loss         | -8.14      |
+|    explained_variance   | -0.0437    |
+|    learning_rate        | 8.88e-05   |
+|    loss                 | 0.602      |
+|    n_updates            | 50         |
+|    policy_gradient_loss | -0.0157    |
+|    value_loss           | 11.8       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 1.92e+03    |
+|    ep_rew_mean          | 22.2        |
+| time/                   |             |
+|    fps                  | 391         |
+|    iterations           | 7           |
+|    time_elapsed         | 20          |
+|    total_timesteps      | 8022        |
+| train/                  |             |
+|    approx_kl            | 0.040259663 |
+|    clip_fraction        | 0.2         |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -8.05       |
+|    explained_variance   | -0.161      |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 13.2        |
+|    n_updates            | 60          |
+|    policy_gradient_loss | -0.025      |
+|    value_loss           | 15.7        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 1.92e+03    |
+|    ep_rew_mean          | 22.2        |
+| time/                   |             |
+|    fps                  | 388         |
+|    iterations           | 8           |
+|    time_elapsed         | 23          |
+|    total_timesteps      | 9168        |
+| train/                  |             |
+|    approx_kl            | 0.029950712 |
+|    clip_fraction        | 0.155       |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -8.02       |
+|    explained_variance   | 0.0118      |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 6.02        |
+|    n_updates            | 70          |
+|    policy_gradient_loss | -0.028      |
+|    value_loss           | 16.9        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 1.89e+03    |
+|    ep_rew_mean          | -28.4       |
+| time/                   |             |
+|    fps                  | 386         |
+|    iterations           | 9           |
+|    time_elapsed         | 26          |
+|    total_timesteps      | 10314       |
+| train/                  |             |
+|    approx_kl            | 0.034316827 |
+|    clip_fraction        | 0.127       |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -8.03       |
+|    explained_variance   | -0.00279    |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 0.944       |
+|    n_updates            | 80          |
+|    policy_gradient_loss | -0.0281     |
+|    value_loss           | 5.72        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 1.89e+03    |
+|    ep_rew_mean          | -28.4       |
+| time/                   |             |
+|    fps                  | 381         |
+|    iterations           | 10          |
+|    time_elapsed         | 30          |
+|    total_timesteps      | 11460       |
+| train/                  |             |
+|    approx_kl            | 0.047511037 |
+|    clip_fraction        | 0.184       |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -7.96       |
+|    explained_variance   | 0.0175      |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 1.72        |
+|    n_updates            | 90          |
+|    policy_gradient_loss | -0.0188     |
+|    value_loss           | 24.5        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 1.89e+03   |
+|    ep_rew_mean          | -28.4      |
+| time/                   |            |
+|    fps                  | 380        |
+|    iterations           | 11         |
+|    time_elapsed         | 33         |
+|    total_timesteps      | 12606      |
+| train/                  |            |
+|    approx_kl            | 0.03660329 |
+|    clip_fraction        | 0.183      |
+|    clip_range           | 0.332      |
+|    entropy_loss         | -7.83      |
+|    explained_variance   | -0.00303   |
+|    learning_rate        | 8.88e-05   |
+|    loss                 | 2.56       |
+|    n_updates            | 100        |
+|    policy_gradient_loss | -0.017     |
+|    value_loss           | 6.9        |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.12e+03    |
+|    ep_rew_mean          | -27.5       |
+| time/                   |             |
+|    fps                  | 378         |
+|    iterations           | 12          |
+|    time_elapsed         | 36          |
+|    total_timesteps      | 13752       |
+| train/                  |             |
+|    approx_kl            | 0.030439496 |
+|    clip_fraction        | 0.146       |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -7.91       |
+|    explained_variance   | 0.0077      |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 9.06        |
+|    n_updates            | 110         |
+|    policy_gradient_loss | -0.0198     |
+|    value_loss           | 6.01        |
+-----------------------------------------
+---------------------------------------
+| rollout/                |           |
+|    ep_len_mean          | 2.05e+03  |
+|    ep_rew_mean          | 3         |
+| time/                   |           |
+|    fps                  | 376       |
+|    iterations           | 13        |
+|    time_elapsed         | 39        |
+|    total_timesteps      | 14898     |
+| train/                  |           |
+|    approx_kl            | 0.0319912 |
+|    clip_fraction        | 0.138     |
+|    clip_range           | 0.332     |
+|    entropy_loss         | -7.87     |
+|    explained_variance   | -0.236    |
+|    learning_rate        | 8.88e-05  |
+|    loss                 | 0.376     |
+|    n_updates            | 120       |
+|    policy_gradient_loss | -0.0228   |
+|    value_loss           | 5.92      |
+---------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.05e+03    |
+|    ep_rew_mean          | 3           |
+| time/                   |             |
+|    fps                  | 374         |
+|    iterations           | 14          |
+|    time_elapsed         | 42          |
+|    total_timesteps      | 16044       |
+| train/                  |             |
+|    approx_kl            | 0.029652404 |
+|    clip_fraction        | 0.153       |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -7.91       |
+|    explained_variance   | -0.0174     |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 0.688       |
+|    n_updates            | 130         |
+|    policy_gradient_loss | -0.0226     |
+|    value_loss           | 23.6        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.09e+03    |
+|    ep_rew_mean          | -8.75       |
+| time/                   |             |
+|    fps                  | 372         |
+|    iterations           | 15          |
+|    time_elapsed         | 46          |
+|    total_timesteps      | 17190       |
+| train/                  |             |
+|    approx_kl            | 0.034059085 |
+|    clip_fraction        | 0.172       |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -7.86       |
+|    explained_variance   | -0.0714     |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 0.296       |
+|    n_updates            | 140         |
+|    policy_gradient_loss | -0.0245     |
+|    value_loss           | 7.12        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.02e+03    |
+|    ep_rew_mean          | -40.6       |
+| time/                   |             |
+|    fps                  | 372         |
+|    iterations           | 16          |
+|    time_elapsed         | 49          |
+|    total_timesteps      | 18336       |
+| train/                  |             |
+|    approx_kl            | 0.032943897 |
+|    clip_fraction        | 0.121       |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -7.86       |
+|    explained_variance   | -0.16       |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 8.37        |
+|    n_updates            | 150         |
+|    policy_gradient_loss | -0.0113     |
+|    value_loss           | 8.67        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.02e+03   |
+|    ep_rew_mean          | -40.6      |
+| time/                   |            |
+|    fps                  | 371        |
+|    iterations           | 17         |
+|    time_elapsed         | 52         |
+|    total_timesteps      | 19482      |
+| train/                  |            |
+|    approx_kl            | 0.03772574 |
+|    clip_fraction        | 0.124      |
+|    clip_range           | 0.332      |
+|    entropy_loss         | -7.87      |
+|    explained_variance   | 0.0207     |
+|    learning_rate        | 8.88e-05   |
+|    loss                 | 2.78       |
+|    n_updates            | 160        |
+|    policy_gradient_loss | -0.0135    |
+|    value_loss           | 53.4       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.02e+03    |
+|    ep_rew_mean          | -40.6       |
+| time/                   |             |
+|    fps                  | 370         |
+|    iterations           | 18          |
+|    time_elapsed         | 55          |
+|    total_timesteps      | 20628       |
+| train/                  |             |
+|    approx_kl            | 0.024023427 |
+|    clip_fraction        | 0.16        |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -7.73       |
+|    explained_variance   | 0.0897      |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 0.491       |
+|    n_updates            | 170         |
+|    policy_gradient_loss | -0.0294     |
+|    value_loss           | 4.74        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.11e+03    |
+|    ep_rew_mean          | -28.9       |
+| time/                   |             |
+|    fps                  | 370         |
+|    iterations           | 19          |
+|    time_elapsed         | 58          |
+|    total_timesteps      | 21774       |
+| train/                  |             |
+|    approx_kl            | 0.030223705 |
+|    clip_fraction        | 0.123       |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -7.82       |
+|    explained_variance   | -0.0303     |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 1.69        |
+|    n_updates            | 180         |
+|    policy_gradient_loss | -0.0151     |
+|    value_loss           | 5.96        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.05e+03   |
+|    ep_rew_mean          | -46        |
+| time/                   |            |
+|    fps                  | 371        |
+|    iterations           | 20         |
+|    time_elapsed         | 61         |
+|    total_timesteps      | 22920      |
+| train/                  |            |
+|    approx_kl            | 0.03593646 |
+|    clip_fraction        | 0.194      |
+|    clip_range           | 0.332      |
+|    entropy_loss         | -7.64      |
+|    explained_variance   | 0.161      |
+|    learning_rate        | 8.88e-05   |
+|    loss                 | 4.26       |
+|    n_updates            | 190        |
+|    policy_gradient_loss | -0.0292    |
+|    value_loss           | 8.23       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 1.96e+03    |
+|    ep_rew_mean          | -21.8       |
+| time/                   |             |
+|    fps                  | 371         |
+|    iterations           | 21          |
+|    time_elapsed         | 64          |
+|    total_timesteps      | 24066       |
+| train/                  |             |
+|    approx_kl            | 0.023679743 |
+|    clip_fraction        | 0.111       |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -7.71       |
+|    explained_variance   | 0.126       |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 4.02        |
+|    n_updates            | 200         |
+|    policy_gradient_loss | -0.0215     |
+|    value_loss           | 24.6        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 1.96e+03    |
+|    ep_rew_mean          | -21.8       |
+| time/                   |             |
+|    fps                  | 371         |
+|    iterations           | 22          |
+|    time_elapsed         | 67          |
+|    total_timesteps      | 25212       |
+| train/                  |             |
+|    approx_kl            | 0.026022209 |
+|    clip_fraction        | 0.178       |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -7.65       |
+|    explained_variance   | -0.2        |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 2.98        |
+|    n_updates            | 210         |
+|    policy_gradient_loss | -0.0145     |
+|    value_loss           | 44.2        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 1.97e+03   |
+|    ep_rew_mean          | -21.3      |
+| time/                   |            |
+|    fps                  | 372        |
+|    iterations           | 23         |
+|    time_elapsed         | 70         |
+|    total_timesteps      | 26358      |
+| train/                  |            |
+|    approx_kl            | 0.02791216 |
+|    clip_fraction        | 0.104      |
+|    clip_range           | 0.332      |
+|    entropy_loss         | -7.58      |
+|    explained_variance   | 0.0539     |
+|    learning_rate        | 8.88e-05   |
+|    loss                 | 1.41       |
+|    n_updates            | 220        |
+|    policy_gradient_loss | -0.0185    |
+|    value_loss           | 6.16       |
+----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 1.95e+03   |
+|    ep_rew_mean          | -21.6      |
+| time/                   |            |
+|    fps                  | 372        |
+|    iterations           | 24         |
+|    time_elapsed         | 73         |
+|    total_timesteps      | 27504      |
+| train/                  |            |
+|    approx_kl            | 0.03433499 |
+|    clip_fraction        | 0.138      |
+|    clip_range           | 0.332      |
+|    entropy_loss         | -7.69      |
+|    explained_variance   | -0.0641    |
+|    learning_rate        | 8.88e-05   |
+|    loss                 | 4.67       |
+|    n_updates            | 230        |
+|    policy_gradient_loss | -0.0315    |
+|    value_loss           | 4.38       |
+----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 1.88e+03   |
+|    ep_rew_mean          | -42.7      |
+| time/                   |            |
+|    fps                  | 372        |
+|    iterations           | 25         |
+|    time_elapsed         | 76         |
+|    total_timesteps      | 28650      |
+| train/                  |            |
+|    approx_kl            | 0.03307491 |
+|    clip_fraction        | 0.157      |
+|    clip_range           | 0.332      |
+|    entropy_loss         | -7.54      |
+|    explained_variance   | 0.074      |
+|    learning_rate        | 8.88e-05   |
+|    loss                 | 2.61       |
+|    n_updates            | 240        |
+|    policy_gradient_loss | -0.0279    |
+|    value_loss           | 9.81       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 1.84e+03    |
+|    ep_rew_mean          | -59.4       |
+| time/                   |             |
+|    fps                  | 372         |
+|    iterations           | 26          |
+|    time_elapsed         | 79          |
+|    total_timesteps      | 29796       |
+| train/                  |             |
+|    approx_kl            | 0.041843403 |
+|    clip_fraction        | 0.136       |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -7.47       |
+|    explained_variance   | -0.00451    |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 4.38        |
+|    n_updates            | 250         |
+|    policy_gradient_loss | -0.0115     |
+|    value_loss           | 71.8        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 1.84e+03    |
+|    ep_rew_mean          | -59.4       |
+| time/                   |             |
+|    fps                  | 372         |
+|    iterations           | 27          |
+|    time_elapsed         | 83          |
+|    total_timesteps      | 30942       |
+| train/                  |             |
+|    approx_kl            | 0.036084294 |
+|    clip_fraction        | 0.154       |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -7.33       |
+|    explained_variance   | 0.0532      |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 3.64        |
+|    n_updates            | 260         |
+|    policy_gradient_loss | -0.0072     |
+|    value_loss           | 58.6        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 1.86e+03    |
+|    ep_rew_mean          | -42.5       |
+| time/                   |             |
+|    fps                  | 372         |
+|    iterations           | 28          |
+|    time_elapsed         | 86          |
+|    total_timesteps      | 32088       |
+| train/                  |             |
+|    approx_kl            | 0.036504086 |
+|    clip_fraction        | 0.141       |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -7.55       |
+|    explained_variance   | 0.148       |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 1.76        |
+|    n_updates            | 270         |
+|    policy_gradient_loss | -0.0167     |
+|    value_loss           | 12.2        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 1.86e+03    |
+|    ep_rew_mean          | -42.5       |
+| time/                   |             |
+|    fps                  | 370         |
+|    iterations           | 29          |
+|    time_elapsed         | 89          |
+|    total_timesteps      | 33234       |
+| train/                  |             |
+|    approx_kl            | 0.034296088 |
+|    clip_fraction        | 0.187       |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -7.61       |
+|    explained_variance   | 0.0593      |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 12.4        |
+|    n_updates            | 280         |
+|    policy_gradient_loss | -0.0195     |
+|    value_loss           | 31.1        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 1.88e+03    |
+|    ep_rew_mean          | -54.7       |
+| time/                   |             |
+|    fps                  | 368         |
+|    iterations           | 30          |
+|    time_elapsed         | 93          |
+|    total_timesteps      | 34380       |
+| train/                  |             |
+|    approx_kl            | 0.029918602 |
+|    clip_fraction        | 0.173       |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -7.44       |
+|    explained_variance   | 0.178       |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 2.16        |
+|    n_updates            | 290         |
+|    policy_gradient_loss | -0.0263     |
+|    value_loss           | 5.47        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 1.88e+03   |
+|    ep_rew_mean          | -54.7      |
+| time/                   |            |
+|    fps                  | 367        |
+|    iterations           | 31         |
+|    time_elapsed         | 96         |
+|    total_timesteps      | 35526      |
+| train/                  |            |
+|    approx_kl            | 0.04137721 |
+|    clip_fraction        | 0.132      |
+|    clip_range           | 0.332      |
+|    entropy_loss         | -7.45      |
+|    explained_variance   | 0.00431    |
+|    learning_rate        | 8.88e-05   |
+|    loss                 | 1.78       |
+|    n_updates            | 300        |
+|    policy_gradient_loss | -0.0145    |
+|    value_loss           | 39.8       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 1.92e+03    |
+|    ep_rew_mean          | -53.7       |
+| time/                   |             |
+|    fps                  | 367         |
+|    iterations           | 32          |
+|    time_elapsed         | 99          |
+|    total_timesteps      | 36672       |
+| train/                  |             |
+|    approx_kl            | 0.030837413 |
+|    clip_fraction        | 0.134       |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -7.38       |
+|    explained_variance   | 0.0959      |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 0.065       |
+|    n_updates            | 310         |
+|    policy_gradient_loss | -0.031      |
+|    value_loss           | 3.25        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 1.92e+03    |
+|    ep_rew_mean          | -53.7       |
+| time/                   |             |
+|    fps                  | 365         |
+|    iterations           | 33          |
+|    time_elapsed         | 103         |
+|    total_timesteps      | 37818       |
+| train/                  |             |
+|    approx_kl            | 0.029948711 |
+|    clip_fraction        | 0.139       |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -7.3        |
+|    explained_variance   | 0.16        |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 2.53        |
+|    n_updates            | 320         |
+|    policy_gradient_loss | -0.021      |
+|    value_loss           | 7.3         |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 1.92e+03   |
+|    ep_rew_mean          | -62.9      |
+| time/                   |            |
+|    fps                  | 364        |
+|    iterations           | 34         |
+|    time_elapsed         | 106        |
+|    total_timesteps      | 38964      |
+| train/                  |            |
+|    approx_kl            | 0.03208603 |
+|    clip_fraction        | 0.163      |
+|    clip_range           | 0.332      |
+|    entropy_loss         | -7.35      |
+|    explained_variance   | 0.152      |
+|    learning_rate        | 8.88e-05   |
+|    loss                 | 0.512      |
+|    n_updates            | 330        |
+|    policy_gradient_loss | -0.0206    |
+|    value_loss           | 6.39       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 1.91e+03    |
+|    ep_rew_mean          | -62.5       |
+| time/                   |             |
+|    fps                  | 363         |
+|    iterations           | 35          |
+|    time_elapsed         | 110         |
+|    total_timesteps      | 40110       |
+| train/                  |             |
+|    approx_kl            | 0.028910978 |
+|    clip_fraction        | 0.148       |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -7.23       |
+|    explained_variance   | -0.049      |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 1.4         |
+|    n_updates            | 340         |
+|    policy_gradient_loss | -0.0218     |
+|    value_loss           | 28.6        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 1.91e+03    |
+|    ep_rew_mean          | -62.5       |
+| time/                   |             |
+|    fps                  | 362         |
+|    iterations           | 36          |
+|    time_elapsed         | 113         |
+|    total_timesteps      | 41256       |
+| train/                  |             |
+|    approx_kl            | 0.034623235 |
+|    clip_fraction        | 0.122       |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -7.21       |
+|    explained_variance   | 0.00912     |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 4.85        |
+|    n_updates            | 350         |
+|    policy_gradient_loss | -0.0166     |
+|    value_loss           | 17.1        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 1.88e+03    |
+|    ep_rew_mean          | -53.5       |
+| time/                   |             |
+|    fps                  | 362         |
+|    iterations           | 37          |
+|    time_elapsed         | 117         |
+|    total_timesteps      | 42402       |
+| train/                  |             |
+|    approx_kl            | 0.025375202 |
+|    clip_fraction        | 0.146       |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -7.14       |
+|    explained_variance   | 0.27        |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 4.6         |
+|    n_updates            | 360         |
+|    policy_gradient_loss | -0.0222     |
+|    value_loss           | 7.8         |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 1.87e+03    |
+|    ep_rew_mean          | -66.1       |
+| time/                   |             |
+|    fps                  | 361         |
+|    iterations           | 38          |
+|    time_elapsed         | 120         |
+|    total_timesteps      | 43548       |
+| train/                  |             |
+|    approx_kl            | 0.042737268 |
+|    clip_fraction        | 0.15        |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -7.07       |
+|    explained_variance   | 0.0585      |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 3.26        |
+|    n_updates            | 370         |
+|    policy_gradient_loss | -0.0223     |
+|    value_loss           | 15.6        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 1.85e+03    |
+|    ep_rew_mean          | -74.1       |
+| time/                   |             |
+|    fps                  | 361         |
+|    iterations           | 39          |
+|    time_elapsed         | 123         |
+|    total_timesteps      | 44694       |
+| train/                  |             |
+|    approx_kl            | 0.027932568 |
+|    clip_fraction        | 0.129       |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -6.9        |
+|    explained_variance   | 0.0755      |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 2.24        |
+|    n_updates            | 380         |
+|    policy_gradient_loss | -0.0163     |
+|    value_loss           | 49.9        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 1.85e+03    |
+|    ep_rew_mean          | -74.1       |
+| time/                   |             |
+|    fps                  | 362         |
+|    iterations           | 40          |
+|    time_elapsed         | 126         |
+|    total_timesteps      | 45840       |
+| train/                  |             |
+|    approx_kl            | 0.030873783 |
+|    clip_fraction        | 0.0904      |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -6.99       |
+|    explained_variance   | 0.152       |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 2.15        |
+|    n_updates            | 390         |
+|    policy_gradient_loss | -0.0199     |
+|    value_loss           | 42.7        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 1.85e+03    |
+|    ep_rew_mean          | -82.7       |
+| time/                   |             |
+|    fps                  | 362         |
+|    iterations           | 41          |
+|    time_elapsed         | 129         |
+|    total_timesteps      | 46986       |
+| train/                  |             |
+|    approx_kl            | 0.026348379 |
+|    clip_fraction        | 0.126       |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -7.23       |
+|    explained_variance   | 0.0323      |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 0.438       |
+|    n_updates            | 400         |
+|    policy_gradient_loss | -0.0228     |
+|    value_loss           | 6.85        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 1.85e+03   |
+|    ep_rew_mean          | -82.7      |
+| time/                   |            |
+|    fps                  | 362        |
+|    iterations           | 42         |
+|    time_elapsed         | 132        |
+|    total_timesteps      | 48132      |
+| train/                  |            |
+|    approx_kl            | 0.04224583 |
+|    clip_fraction        | 0.206      |
+|    clip_range           | 0.332      |
+|    entropy_loss         | -7.16      |
+|    explained_variance   | 0.173      |
+|    learning_rate        | 8.88e-05   |
+|    loss                 | 1.07       |
+|    n_updates            | 410        |
+|    policy_gradient_loss | -0.0242    |
+|    value_loss           | 38         |
+----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 1.86e+03   |
+|    ep_rew_mean          | -75.8      |
+| time/                   |            |
+|    fps                  | 362        |
+|    iterations           | 43         |
+|    time_elapsed         | 136        |
+|    total_timesteps      | 49278      |
+| train/                  |            |
+|    approx_kl            | 0.03634002 |
+|    clip_fraction        | 0.164      |
+|    clip_range           | 0.332      |
+|    entropy_loss         | -7.21      |
+|    explained_variance   | 0.28       |
+|    learning_rate        | 8.88e-05   |
+|    loss                 | 3.7        |
+|    n_updates            | 420        |
+|    policy_gradient_loss | -0.032     |
+|    value_loss           | 9.29       |
+----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 1.86e+03   |
+|    ep_rew_mean          | -73.5      |
+| time/                   |            |
+|    fps                  | 362        |
+|    iterations           | 44         |
+|    time_elapsed         | 139        |
+|    total_timesteps      | 50424      |
+| train/                  |            |
+|    approx_kl            | 0.03677476 |
+|    clip_fraction        | 0.153      |
+|    clip_range           | 0.332      |
+|    entropy_loss         | -7.17      |
+|    explained_variance   | 0.11       |
+|    learning_rate        | 8.88e-05   |
+|    loss                 | 0.533      |
+|    n_updates            | 430        |
+|    policy_gradient_loss | -0.0277    |
+|    value_loss           | 9.42       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 1.86e+03    |
+|    ep_rew_mean          | -73.5       |
+| time/                   |             |
+|    fps                  | 362         |
+|    iterations           | 45          |
+|    time_elapsed         | 142         |
+|    total_timesteps      | 51570       |
+| train/                  |             |
+|    approx_kl            | 0.034621768 |
+|    clip_fraction        | 0.154       |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -7.17       |
+|    explained_variance   | 0.368       |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 5.39        |
+|    n_updates            | 440         |
+|    policy_gradient_loss | -0.0276     |
+|    value_loss           | 8.99        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 1.85e+03    |
+|    ep_rew_mean          | -64.9       |
+| time/                   |             |
+|    fps                  | 362         |
+|    iterations           | 46          |
+|    time_elapsed         | 145         |
+|    total_timesteps      | 52716       |
+| train/                  |             |
+|    approx_kl            | 0.036459163 |
+|    clip_fraction        | 0.165       |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -7.21       |
+|    explained_variance   | 0.374       |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 12.4        |
+|    n_updates            | 450         |
+|    policy_gradient_loss | -0.037      |
+|    value_loss           | 9.29        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 1.85e+03   |
+|    ep_rew_mean          | -60.5      |
+| time/                   |            |
+|    fps                  | 361        |
+|    iterations           | 47         |
+|    time_elapsed         | 148        |
+|    total_timesteps      | 53862      |
+| train/                  |            |
+|    approx_kl            | 0.03863145 |
+|    clip_fraction        | 0.144      |
+|    clip_range           | 0.332      |
+|    entropy_loss         | -7.03      |
+|    explained_variance   | 0.36       |
+|    learning_rate        | 8.88e-05   |
+|    loss                 | 0.534      |
+|    n_updates            | 460        |
+|    policy_gradient_loss | -0.0239    |
+|    value_loss           | 10.8       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 1.85e+03    |
+|    ep_rew_mean          | -60.5       |
+| time/                   |             |
+|    fps                  | 361         |
+|    iterations           | 48          |
+|    time_elapsed         | 152         |
+|    total_timesteps      | 55008       |
+| train/                  |             |
+|    approx_kl            | 0.049923413 |
+|    clip_fraction        | 0.157       |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -7.04       |
+|    explained_variance   | 0.0693      |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 5.03        |
+|    n_updates            | 470         |
+|    policy_gradient_loss | -0.0285     |
+|    value_loss           | 11.7        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 1.86e+03   |
+|    ep_rew_mean          | -64.5      |
+| time/                   |            |
+|    fps                  | 361        |
+|    iterations           | 49         |
+|    time_elapsed         | 155        |
+|    total_timesteps      | 56154      |
+| train/                  |            |
+|    approx_kl            | 0.03346165 |
+|    clip_fraction        | 0.145      |
+|    clip_range           | 0.332      |
+|    entropy_loss         | -7         |
+|    explained_variance   | -0.0154    |
+|    learning_rate        | 8.88e-05   |
+|    loss                 | 2.6        |
+|    n_updates            | 480        |
+|    policy_gradient_loss | -0.0256    |
+|    value_loss           | 2.62       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 1.86e+03    |
+|    ep_rew_mean          | -64.5       |
+| time/                   |             |
+|    fps                  | 361         |
+|    iterations           | 50          |
+|    time_elapsed         | 158         |
+|    total_timesteps      | 57300       |
+| train/                  |             |
+|    approx_kl            | 0.041344777 |
+|    clip_fraction        | 0.162       |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -7.07       |
+|    explained_variance   | 0.133       |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 5.4         |
+|    n_updates            | 490         |
+|    policy_gradient_loss | -0.0249     |
+|    value_loss           | 20.1        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 1.88e+03   |
+|    ep_rew_mean          | -64.6      |
+| time/                   |            |
+|    fps                  | 360        |
+|    iterations           | 51         |
+|    time_elapsed         | 161        |
+|    total_timesteps      | 58446      |
+| train/                  |            |
+|    approx_kl            | 0.03568868 |
+|    clip_fraction        | 0.159      |
+|    clip_range           | 0.332      |
+|    entropy_loss         | -6.96      |
+|    explained_variance   | 0.364      |
+|    learning_rate        | 8.88e-05   |
+|    loss                 | 1.36       |
+|    n_updates            | 500        |
+|    policy_gradient_loss | -0.0308    |
+|    value_loss           | 4.16       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 1.88e+03    |
+|    ep_rew_mean          | -64.6       |
+| time/                   |             |
+|    fps                  | 360         |
+|    iterations           | 52          |
+|    time_elapsed         | 165         |
+|    total_timesteps      | 59592       |
+| train/                  |             |
+|    approx_kl            | 0.032623842 |
+|    clip_fraction        | 0.145       |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -6.74       |
+|    explained_variance   | 0.407       |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 13.7        |
+|    n_updates            | 510         |
+|    policy_gradient_loss | -0.0268     |
+|    value_loss           | 7.94        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 1.87e+03    |
+|    ep_rew_mean          | -68.9       |
+| time/                   |             |
+|    fps                  | 359         |
+|    iterations           | 53          |
+|    time_elapsed         | 168         |
+|    total_timesteps      | 60738       |
+| train/                  |             |
+|    approx_kl            | 0.027725061 |
+|    clip_fraction        | 0.126       |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -6.87       |
+|    explained_variance   | 0.437       |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 0.632       |
+|    n_updates            | 520         |
+|    policy_gradient_loss | -0.0337     |
+|    value_loss           | 4.87        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 1.87e+03   |
+|    ep_rew_mean          | -68.9      |
+| time/                   |            |
+|    fps                  | 359        |
+|    iterations           | 54         |
+|    time_elapsed         | 172        |
+|    total_timesteps      | 61884      |
+| train/                  |            |
+|    approx_kl            | 0.06401909 |
+|    clip_fraction        | 0.228      |
+|    clip_range           | 0.332      |
+|    entropy_loss         | -6.97      |
+|    explained_variance   | 0.0456     |
+|    learning_rate        | 8.88e-05   |
+|    loss                 | 1.16       |
+|    n_updates            | 530        |
+|    policy_gradient_loss | -0.0282    |
+|    value_loss           | 24.5       |
+----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 1.88e+03   |
+|    ep_rew_mean          | -75.2      |
+| time/                   |            |
+|    fps                  | 359        |
+|    iterations           | 55         |
+|    time_elapsed         | 175        |
+|    total_timesteps      | 63030      |
+| train/                  |            |
+|    approx_kl            | 0.06221285 |
+|    clip_fraction        | 0.215      |
+|    clip_range           | 0.332      |
+|    entropy_loss         | -7.02      |
+|    explained_variance   | 0.025      |
+|    learning_rate        | 8.88e-05   |
+|    loss                 | 4.39       |
+|    n_updates            | 540        |
+|    policy_gradient_loss | -0.0343    |
+|    value_loss           | 5.32       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 1.89e+03    |
+|    ep_rew_mean          | -80.6       |
+| time/                   |             |
+|    fps                  | 359         |
+|    iterations           | 56          |
+|    time_elapsed         | 178         |
+|    total_timesteps      | 64176       |
+| train/                  |             |
+|    approx_kl            | 0.042404637 |
+|    clip_fraction        | 0.201       |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -6.95       |
+|    explained_variance   | -0.0445     |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 9.84        |
+|    n_updates            | 550         |
+|    policy_gradient_loss | -0.0255     |
+|    value_loss           | 32.7        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 1.89e+03   |
+|    ep_rew_mean          | -80.6      |
+| time/                   |            |
+|    fps                  | 359        |
+|    iterations           | 57         |
+|    time_elapsed         | 181        |
+|    total_timesteps      | 65322      |
+| train/                  |            |
+|    approx_kl            | 0.04397238 |
+|    clip_fraction        | 0.151      |
+|    clip_range           | 0.332      |
+|    entropy_loss         | -6.98      |
+|    explained_variance   | 0.0972     |
+|    learning_rate        | 8.88e-05   |
+|    loss                 | 26.8       |
+|    n_updates            | 560        |
+|    policy_gradient_loss | -0.0251    |
+|    value_loss           | 38.3       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 1.88e+03    |
+|    ep_rew_mean          | -83         |
+| time/                   |             |
+|    fps                  | 360         |
+|    iterations           | 58          |
+|    time_elapsed         | 184         |
+|    total_timesteps      | 66468       |
+| train/                  |             |
+|    approx_kl            | 0.044086635 |
+|    clip_fraction        | 0.197       |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -7.2        |
+|    explained_variance   | 0.118       |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 1.07        |
+|    n_updates            | 570         |
+|    policy_gradient_loss | -0.0239     |
+|    value_loss           | 5.21        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 1.88e+03    |
+|    ep_rew_mean          | -83         |
+| time/                   |             |
+|    fps                  | 360         |
+|    iterations           | 59          |
+|    time_elapsed         | 187         |
+|    total_timesteps      | 67614       |
+| train/                  |             |
+|    approx_kl            | 0.052704122 |
+|    clip_fraction        | 0.203       |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -6.97       |
+|    explained_variance   | 0.0489      |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 1.06        |
+|    n_updates            | 580         |
+|    policy_gradient_loss | -0.0281     |
+|    value_loss           | 17.1        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 1.9e+03     |
+|    ep_rew_mean          | -89.1       |
+| time/                   |             |
+|    fps                  | 360         |
+|    iterations           | 60          |
+|    time_elapsed         | 190         |
+|    total_timesteps      | 68760       |
+| train/                  |             |
+|    approx_kl            | 0.046419837 |
+|    clip_fraction        | 0.196       |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -7.05       |
+|    explained_variance   | -0.0384     |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 2.11        |
+|    n_updates            | 590         |
+|    policy_gradient_loss | -0.027      |
+|    value_loss           | 2.89        |
+-----------------------------------------
+---------------------------------------
+| rollout/                |           |
+|    ep_len_mean          | 1.86e+03  |
+|    ep_rew_mean          | -79.1     |
+| time/                   |           |
+|    fps                  | 360       |
+|    iterations           | 61        |
+|    time_elapsed         | 193       |
+|    total_timesteps      | 69906     |
+| train/                  |           |
+|    approx_kl            | 0.0403891 |
+|    clip_fraction        | 0.169     |
+|    clip_range           | 0.332     |
+|    entropy_loss         | -7.04     |
+|    explained_variance   | 0.142     |
+|    learning_rate        | 8.88e-05  |
+|    loss                 | 18.5      |
+|    n_updates            | 600       |
+|    policy_gradient_loss | -0.026    |
+|    value_loss           | 43.4      |
+---------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 1.85e+03   |
+|    ep_rew_mean          | -85.6      |
+| time/                   |            |
+|    fps                  | 360        |
+|    iterations           | 62         |
+|    time_elapsed         | 196        |
+|    total_timesteps      | 71052      |
+| train/                  |            |
+|    approx_kl            | 0.04120052 |
+|    clip_fraction        | 0.175      |
+|    clip_range           | 0.332      |
+|    entropy_loss         | -7.16      |
+|    explained_variance   | 0.0448     |
+|    learning_rate        | 8.88e-05   |
+|    loss                 | 1.63       |
+|    n_updates            | 610        |
+|    policy_gradient_loss | -0.0246    |
+|    value_loss           | 42         |
+----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 1.85e+03   |
+|    ep_rew_mean          | -85.6      |
+| time/                   |            |
+|    fps                  | 361        |
+|    iterations           | 63         |
+|    time_elapsed         | 199        |
+|    total_timesteps      | 72198      |
+| train/                  |            |
+|    approx_kl            | 0.04098662 |
+|    clip_fraction        | 0.184      |
+|    clip_range           | 0.332      |
+|    entropy_loss         | -6.94      |
+|    explained_variance   | 0.137      |
+|    learning_rate        | 8.88e-05   |
+|    loss                 | 5.39       |
+|    n_updates            | 620        |
+|    policy_gradient_loss | -0.0287    |
+|    value_loss           | 44         |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 1.87e+03    |
+|    ep_rew_mean          | -88         |
+| time/                   |             |
+|    fps                  | 361         |
+|    iterations           | 64          |
+|    time_elapsed         | 202         |
+|    total_timesteps      | 73344       |
+| train/                  |             |
+|    approx_kl            | 0.056530125 |
+|    clip_fraction        | 0.226       |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -7.3        |
+|    explained_variance   | 0.307       |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 1.07        |
+|    n_updates            | 630         |
+|    policy_gradient_loss | -0.0322     |
+|    value_loss           | 8.53        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 1.86e+03   |
+|    ep_rew_mean          | -93.7      |
+| time/                   |            |
+|    fps                  | 361        |
+|    iterations           | 65         |
+|    time_elapsed         | 205        |
+|    total_timesteps      | 74490      |
+| train/                  |            |
+|    approx_kl            | 0.05633619 |
+|    clip_fraction        | 0.226      |
+|    clip_range           | 0.332      |
+|    entropy_loss         | -7.17      |
+|    explained_variance   | 0.406      |
+|    learning_rate        | 8.88e-05   |
+|    loss                 | 10.5       |
+|    n_updates            | 640        |
+|    policy_gradient_loss | -0.0258    |
+|    value_loss           | 15         |
+----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 1.86e+03   |
+|    ep_rew_mean          | -93.7      |
+| time/                   |            |
+|    fps                  | 361        |
+|    iterations           | 66         |
+|    time_elapsed         | 209        |
+|    total_timesteps      | 75636      |
+| train/                  |            |
+|    approx_kl            | 0.08828102 |
+|    clip_fraction        | 0.324      |
+|    clip_range           | 0.332      |
+|    entropy_loss         | -6.98      |
+|    explained_variance   | 0.132      |
+|    learning_rate        | 8.88e-05   |
+|    loss                 | 3          |
+|    n_updates            | 650        |
+|    policy_gradient_loss | -0.0222    |
+|    value_loss           | 37.7       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 1.85e+03    |
+|    ep_rew_mean          | -95.9       |
+| time/                   |             |
+|    fps                  | 360         |
+|    iterations           | 67          |
+|    time_elapsed         | 213         |
+|    total_timesteps      | 76782       |
+| train/                  |             |
+|    approx_kl            | 0.042576507 |
+|    clip_fraction        | 0.216       |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -6.86       |
+|    explained_variance   | 0.0496      |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 6.04        |
+|    n_updates            | 660         |
+|    policy_gradient_loss | -0.0288     |
+|    value_loss           | 8.43        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 1.85e+03   |
+|    ep_rew_mean          | -91.6      |
+| time/                   |            |
+|    fps                  | 359        |
+|    iterations           | 68         |
+|    time_elapsed         | 216        |
+|    total_timesteps      | 77928      |
+| train/                  |            |
+|    approx_kl            | 0.04540308 |
+|    clip_fraction        | 0.229      |
+|    clip_range           | 0.332      |
+|    entropy_loss         | -7.07      |
+|    explained_variance   | 0.249      |
+|    learning_rate        | 8.88e-05   |
+|    loss                 | 5.47       |
+|    n_updates            | 670        |
+|    policy_gradient_loss | -0.0288    |
+|    value_loss           | 16         |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 1.85e+03    |
+|    ep_rew_mean          | -91.6       |
+| time/                   |             |
+|    fps                  | 359         |
+|    iterations           | 69          |
+|    time_elapsed         | 219         |
+|    total_timesteps      | 79074       |
+| train/                  |             |
+|    approx_kl            | 0.040512584 |
+|    clip_fraction        | 0.211       |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -7.02       |
+|    explained_variance   | -0.00908    |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 1.86        |
+|    n_updates            | 680         |
+|    policy_gradient_loss | -0.0131     |
+|    value_loss           | 13.4        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 1.85e+03   |
+|    ep_rew_mean          | -91.6      |
+| time/                   |            |
+|    fps                  | 359        |
+|    iterations           | 70         |
+|    time_elapsed         | 223        |
+|    total_timesteps      | 80220      |
+| train/                  |            |
+|    approx_kl            | 0.05254833 |
+|    clip_fraction        | 0.188      |
+|    clip_range           | 0.332      |
+|    entropy_loss         | -7.14      |
+|    explained_variance   | 0.295      |
+|    learning_rate        | 8.88e-05   |
+|    loss                 | 0.321      |
+|    n_updates            | 690        |
+|    policy_gradient_loss | -0.0257    |
+|    value_loss           | 8.67       |
+----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 1.88e+03   |
+|    ep_rew_mean          | -94        |
+| time/                   |            |
+|    fps                  | 358        |
+|    iterations           | 71         |
+|    time_elapsed         | 226        |
+|    total_timesteps      | 81366      |
+| train/                  |            |
+|    approx_kl            | 0.04479891 |
+|    clip_fraction        | 0.224      |
+|    clip_range           | 0.332      |
+|    entropy_loss         | -7.18      |
+|    explained_variance   | -0.0191    |
+|    learning_rate        | 8.88e-05   |
+|    loss                 | 0.529      |
+|    n_updates            | 700        |
+|    policy_gradient_loss | -0.0383    |
+|    value_loss           | 1.7        |
+----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 1.88e+03   |
+|    ep_rew_mean          | -94        |
+| time/                   |            |
+|    fps                  | 358        |
+|    iterations           | 72         |
+|    time_elapsed         | 230        |
+|    total_timesteps      | 82512      |
+| train/                  |            |
+|    approx_kl            | 0.05337418 |
+|    clip_fraction        | 0.207      |
+|    clip_range           | 0.332      |
+|    entropy_loss         | -7.21      |
+|    explained_variance   | 0.168      |
+|    learning_rate        | 8.88e-05   |
+|    loss                 | 0.286      |
+|    n_updates            | 710        |
+|    policy_gradient_loss | -0.034     |
+|    value_loss           | 10.8       |
+----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 1.9e+03    |
+|    ep_rew_mean          | -95.4      |
+| time/                   |            |
+|    fps                  | 358        |
+|    iterations           | 73         |
+|    time_elapsed         | 233        |
+|    total_timesteps      | 83658      |
+| train/                  |            |
+|    approx_kl            | 0.03758472 |
+|    clip_fraction        | 0.184      |
+|    clip_range           | 0.332      |
+|    entropy_loss         | -7.42      |
+|    explained_variance   | -0.0312    |
+|    learning_rate        | 8.88e-05   |
+|    loss                 | 0.699      |
+|    n_updates            | 720        |
+|    policy_gradient_loss | -0.0247    |
+|    value_loss           | 6.12       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 1.9e+03     |
+|    ep_rew_mean          | -95.4       |
+| time/                   |             |
+|    fps                  | 358         |
+|    iterations           | 74          |
+|    time_elapsed         | 236         |
+|    total_timesteps      | 84804       |
+| train/                  |             |
+|    approx_kl            | 0.058067992 |
+|    clip_fraction        | 0.221       |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -7.14       |
+|    explained_variance   | 0.0152      |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 0.904       |
+|    n_updates            | 730         |
+|    policy_gradient_loss | -0.0133     |
+|    value_loss           | 13.2        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 1.91e+03    |
+|    ep_rew_mean          | -97.7       |
+| time/                   |             |
+|    fps                  | 358         |
+|    iterations           | 75          |
+|    time_elapsed         | 239         |
+|    total_timesteps      | 85950       |
+| train/                  |             |
+|    approx_kl            | 0.035781853 |
+|    clip_fraction        | 0.175       |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -7.28       |
+|    explained_variance   | 0.0129      |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 0.581       |
+|    n_updates            | 740         |
+|    policy_gradient_loss | -0.019      |
+|    value_loss           | 4.61        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 1.91e+03    |
+|    ep_rew_mean          | -97.7       |
+| time/                   |             |
+|    fps                  | 358         |
+|    iterations           | 76          |
+|    time_elapsed         | 242         |
+|    total_timesteps      | 87096       |
+| train/                  |             |
+|    approx_kl            | 0.062611975 |
+|    clip_fraction        | 0.322       |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -7.08       |
+|    explained_variance   | 0.06        |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 1.4         |
+|    n_updates            | 750         |
+|    policy_gradient_loss | -0.0256     |
+|    value_loss           | 22.8        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 1.91e+03   |
+|    ep_rew_mean          | -97.7      |
+| time/                   |            |
+|    fps                  | 358        |
+|    iterations           | 77         |
+|    time_elapsed         | 246        |
+|    total_timesteps      | 88242      |
+| train/                  |            |
+|    approx_kl            | 0.05377618 |
+|    clip_fraction        | 0.227      |
+|    clip_range           | 0.332      |
+|    entropy_loss         | -7.17      |
+|    explained_variance   | 0.19       |
+|    learning_rate        | 8.88e-05   |
+|    loss                 | 0.718      |
+|    n_updates            | 760        |
+|    policy_gradient_loss | -0.0249    |
+|    value_loss           | 8.35       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 1.92e+03    |
+|    ep_rew_mean          | -95.3       |
+| time/                   |             |
+|    fps                  | 358         |
+|    iterations           | 78          |
+|    time_elapsed         | 249         |
+|    total_timesteps      | 89388       |
+| train/                  |             |
+|    approx_kl            | 0.050721783 |
+|    clip_fraction        | 0.225       |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -6.94       |
+|    explained_variance   | -0.0205     |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 0.33        |
+|    n_updates            | 770         |
+|    policy_gradient_loss | -0.0391     |
+|    value_loss           | 2.79        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 1.92e+03    |
+|    ep_rew_mean          | -97.8       |
+| time/                   |             |
+|    fps                  | 358         |
+|    iterations           | 79          |
+|    time_elapsed         | 252         |
+|    total_timesteps      | 90534       |
+| train/                  |             |
+|    approx_kl            | 0.046041932 |
+|    clip_fraction        | 0.215       |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -7.21       |
+|    explained_variance   | -0.076      |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 2.6         |
+|    n_updates            | 780         |
+|    policy_gradient_loss | -0.0325     |
+|    value_loss           | 5.97        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 1.9e+03    |
+|    ep_rew_mean          | -90.9      |
+| time/                   |            |
+|    fps                  | 359        |
+|    iterations           | 80         |
+|    time_elapsed         | 255        |
+|    total_timesteps      | 91680      |
+| train/                  |            |
+|    approx_kl            | 0.07306535 |
+|    clip_fraction        | 0.309      |
+|    clip_range           | 0.332      |
+|    entropy_loss         | -7.15      |
+|    explained_variance   | 0.0302     |
+|    learning_rate        | 8.88e-05   |
+|    loss                 | 1.4        |
+|    n_updates            | 790        |
+|    policy_gradient_loss | -0.0324    |
+|    value_loss           | 18.2       |
+----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 1.9e+03    |
+|    ep_rew_mean          | -90.9      |
+| time/                   |            |
+|    fps                  | 359        |
+|    iterations           | 81         |
+|    time_elapsed         | 258        |
+|    total_timesteps      | 92826      |
+| train/                  |            |
+|    approx_kl            | 0.06419113 |
+|    clip_fraction        | 0.28       |
+|    clip_range           | 0.332      |
+|    entropy_loss         | -6.92      |
+|    explained_variance   | -0.326     |
+|    learning_rate        | 8.88e-05   |
+|    loss                 | 1.45       |
+|    n_updates            | 800        |
+|    policy_gradient_loss | 0.00325    |
+|    value_loss           | 24.5       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 1.9e+03     |
+|    ep_rew_mean          | -90.9       |
+| time/                   |             |
+|    fps                  | 359         |
+|    iterations           | 82          |
+|    time_elapsed         | 261         |
+|    total_timesteps      | 93972       |
+| train/                  |             |
+|    approx_kl            | 0.047866795 |
+|    clip_fraction        | 0.237       |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -7.02       |
+|    explained_variance   | 0.135       |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 0.433       |
+|    n_updates            | 810         |
+|    policy_gradient_loss | -0.0285     |
+|    value_loss           | 5.26        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 1.92e+03    |
+|    ep_rew_mean          | -88.8       |
+| time/                   |             |
+|    fps                  | 359         |
+|    iterations           | 83          |
+|    time_elapsed         | 264         |
+|    total_timesteps      | 95118       |
+| train/                  |             |
+|    approx_kl            | 0.066993006 |
+|    clip_fraction        | 0.28        |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -7.12       |
+|    explained_variance   | -0.127      |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 4.38        |
+|    n_updates            | 820         |
+|    policy_gradient_loss | -0.0312     |
+|    value_loss           | 9.15        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 1.91e+03   |
+|    ep_rew_mean          | -91.8      |
+| time/                   |            |
+|    fps                  | 359        |
+|    iterations           | 84         |
+|    time_elapsed         | 267        |
+|    total_timesteps      | 96264      |
+| train/                  |            |
+|    approx_kl            | 0.05563952 |
+|    clip_fraction        | 0.247      |
+|    clip_range           | 0.332      |
+|    entropy_loss         | -7.08      |
+|    explained_variance   | 0.154      |
+|    learning_rate        | 8.88e-05   |
+|    loss                 | 0.568      |
+|    n_updates            | 830        |
+|    policy_gradient_loss | -0.0269    |
+|    value_loss           | 8.24       |
+----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 1.91e+03   |
+|    ep_rew_mean          | -91.8      |
+| time/                   |            |
+|    fps                  | 359        |
+|    iterations           | 85         |
+|    time_elapsed         | 271        |
+|    total_timesteps      | 97410      |
+| train/                  |            |
+|    approx_kl            | 0.05921689 |
+|    clip_fraction        | 0.293      |
+|    clip_range           | 0.332      |
+|    entropy_loss         | -6.99      |
+|    explained_variance   | 0.0197     |
+|    learning_rate        | 8.88e-05   |
+|    loss                 | 1.75       |
+|    n_updates            | 840        |
+|    policy_gradient_loss | -0.0203    |
+|    value_loss           | 33         |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 1.92e+03    |
+|    ep_rew_mean          | -93.6       |
+| time/                   |             |
+|    fps                  | 359         |
+|    iterations           | 86          |
+|    time_elapsed         | 274         |
+|    total_timesteps      | 98556       |
+| train/                  |             |
+|    approx_kl            | 0.048403326 |
+|    clip_fraction        | 0.219       |
+|    clip_range           | 0.332       |
+|    entropy_loss         | -6.95       |
+|    explained_variance   | 0.00879     |
+|    learning_rate        | 8.88e-05    |
+|    loss                 | 0.85        |
+|    n_updates            | 850         |
+|    policy_gradient_loss | -0.0288     |
+|    value_loss           | 6.54        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 1.92e+03   |
+|    ep_rew_mean          | -93.6      |
+| time/                   |            |
+|    fps                  | 358        |
+|    iterations           | 87         |
+|    time_elapsed         | 277        |
+|    total_timesteps      | 99702      |
+| train/                  |            |
+|    approx_kl            | 0.04475287 |
+|    clip_fraction        | 0.178      |
+|    clip_range           | 0.332      |
+|    entropy_loss         | -7.01      |
+|    explained_variance   | 0.204      |
+|    learning_rate        | 8.88e-05   |
+|    loss                 | 0.859      |
+|    n_updates            | 860        |
+|    policy_gradient_loss | -0.0151    |
+|    value_loss           | 17.5       |
+----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 1.93e+03   |
+|    ep_rew_mean          | -93.8      |
+| time/                   |            |
+|    fps                  | 358        |
+|    iterations           | 88         |
+|    time_elapsed         | 280        |
+|    total_timesteps      | 100848     |
+| train/                  |            |
+|    approx_kl            | 0.06419406 |
+|    clip_fraction        | 0.244      |
+|    clip_range           | 0.332      |
+|    entropy_loss         | -7         |
+|    explained_variance   | -0.22      |
+|    learning_rate        | 8.88e-05   |
+|    loss                 | 0.243      |
+|    n_updates            | 870        |
+|    policy_gradient_loss | -0.025     |
+|    value_loss           | 1.64       |
+----------------------------------------
+[I 2023-03-30 22:24:11,258] Trial 8 finished with value: -343.0 and parameters: {'n_steps': 1146, 'gamma': 0.9192032939378013, 'learning_rate': 8.879233904874816e-05, 'clip_range': 0.3323467236435492, 'gae_lambda': 0.8683502580240515}. Best is trial 6 with value: -1.0.
+Using cuda device
+Wrapping the env in a DummyVecEnv.
+Wrapping the env in a VecTransposeImage.
+C:\ProgramData\Anaconda3\envs\StreetFighterAI\lib\site-packages\stable_baselines3\ppo\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 3539`, after every 55 untruncated mini-batches, there will be a truncated mini-batch of size 19
+We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
+Info: (n_steps=3539 and n_envs=1)
+  warnings.warn(
+Logging to logs/PPO_27
+---------------------------------
+| rollout/           |          |
+|    ep_len_mean     | 2.15e+03 |
+|    ep_rew_mean     | -50      |
+| time/              |          |
+|    fps             | 587      |
+|    iterations      | 1        |
+|    time_elapsed    | 6        |
+|    total_timesteps | 3539     |
+---------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.52e+03    |
+|    ep_rew_mean          | -45         |
+| time/                   |             |
+|    fps                  | 444         |
+|    iterations           | 2           |
+|    time_elapsed         | 15          |
+|    total_timesteps      | 7078        |
+| train/                  |             |
+|    approx_kl            | 0.006164868 |
+|    clip_fraction        | 0.12        |
+|    clip_range           | 0.155       |
+|    entropy_loss         | -8.31       |
+|    explained_variance   | -0.00174    |
+|    learning_rate        | 5.95e-05    |
+|    loss                 | 0.546       |
+|    n_updates            | 10          |
+|    policy_gradient_loss | -0.00714    |
+|    value_loss           | 8.14        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.56e+03   |
+|    ep_rew_mean          | -109       |
+| time/                   |            |
+|    fps                  | 404        |
+|    iterations           | 3          |
+|    time_elapsed         | 26         |
+|    total_timesteps      | 10617      |
+| train/                  |            |
+|    approx_kl            | 0.00478289 |
+|    clip_fraction        | 0.0976     |
+|    clip_range           | 0.155      |
+|    entropy_loss         | -8.31      |
+|    explained_variance   | 0.014      |
+|    learning_rate        | 5.95e-05   |
+|    loss                 | 0.361      |
+|    n_updates            | 20         |
+|    policy_gradient_loss | -0.00716   |
+|    value_loss           | 6.41       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.6e+03     |
+|    ep_rew_mean          | -129        |
+| time/                   |             |
+|    fps                  | 385         |
+|    iterations           | 4           |
+|    time_elapsed         | 36          |
+|    total_timesteps      | 14156       |
+| train/                  |             |
+|    approx_kl            | 0.007813611 |
+|    clip_fraction        | 0.146       |
+|    clip_range           | 0.155       |
+|    entropy_loss         | -8.3        |
+|    explained_variance   | 0.0131      |
+|    learning_rate        | 5.95e-05    |
+|    loss                 | 2.84        |
+|    n_updates            | 30          |
+|    policy_gradient_loss | -0.00689    |
+|    value_loss           | 14          |
+-----------------------------------------
+------------------------------------------
+| rollout/                |              |
+|    ep_len_mean          | 2.52e+03     |
+|    ep_rew_mean          | -111         |
+| time/                   |              |
+|    fps                  | 377          |
+|    iterations           | 5            |
+|    time_elapsed         | 46           |
+|    total_timesteps      | 17695        |
+| train/                  |              |
+|    approx_kl            | 0.0056522703 |
+|    clip_fraction        | 0.0913       |
+|    clip_range           | 0.155        |
+|    entropy_loss         | -8.3         |
+|    explained_variance   | 0.0146       |
+|    learning_rate        | 5.95e-05     |
+|    loss                 | 0.276        |
+|    n_updates            | 40           |
+|    policy_gradient_loss | -0.00467     |
+|    value_loss           | 13.4         |
+------------------------------------------
+------------------------------------------
+| rollout/                |              |
+|    ep_len_mean          | 2.47e+03     |
+|    ep_rew_mean          | -126         |
+| time/                   |              |
+|    fps                  | 373          |
+|    iterations           | 6            |
+|    time_elapsed         | 56           |
+|    total_timesteps      | 21234        |
+| train/                  |              |
+|    approx_kl            | 0.0062621506 |
+|    clip_fraction        | 0.123        |
+|    clip_range           | 0.155        |
+|    entropy_loss         | -8.29        |
+|    explained_variance   | 0.0869       |
+|    learning_rate        | 5.95e-05     |
+|    loss                 | 18.6         |
+|    n_updates            | 50           |
+|    policy_gradient_loss | -0.00766     |
+|    value_loss           | 7.72         |
+------------------------------------------
+------------------------------------------
+| rollout/                |              |
+|    ep_len_mean          | 2.34e+03     |
+|    ep_rew_mean          | -129         |
+| time/                   |              |
+|    fps                  | 371          |
+|    iterations           | 7            |
+|    time_elapsed         | 66           |
+|    total_timesteps      | 24773        |
+| train/                  |              |
+|    approx_kl            | 0.0071724947 |
+|    clip_fraction        | 0.159        |
+|    clip_range           | 0.155        |
+|    entropy_loss         | -8.28        |
+|    explained_variance   | 0.176        |
+|    learning_rate        | 5.95e-05     |
+|    loss                 | 0.872        |
+|    n_updates            | 60           |
+|    policy_gradient_loss | -0.00702     |
+|    value_loss           | 11.1         |
+------------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.35e+03    |
+|    ep_rew_mean          | -106        |
+| time/                   |             |
+|    fps                  | 369         |
+|    iterations           | 8           |
+|    time_elapsed         | 76          |
+|    total_timesteps      | 28312       |
+| train/                  |             |
+|    approx_kl            | 0.007235888 |
+|    clip_fraction        | 0.147       |
+|    clip_range           | 0.155       |
+|    entropy_loss         | -8.28       |
+|    explained_variance   | 0.0603      |
+|    learning_rate        | 5.95e-05    |
+|    loss                 | 0.535       |
+|    n_updates            | 70          |
+|    policy_gradient_loss | -0.00766    |
+|    value_loss           | 20.2        |
+-----------------------------------------
+------------------------------------------
+| rollout/                |              |
+|    ep_len_mean          | 2.41e+03     |
+|    ep_rew_mean          | -113         |
+| time/                   |              |
+|    fps                  | 365          |
+|    iterations           | 9            |
+|    time_elapsed         | 87           |
+|    total_timesteps      | 31851        |
+| train/                  |              |
+|    approx_kl            | 0.0057272953 |
+|    clip_fraction        | 0.133        |
+|    clip_range           | 0.155        |
+|    entropy_loss         | -8.27        |
+|    explained_variance   | 0.0634       |
+|    learning_rate        | 5.95e-05     |
+|    loss                 | 0.31         |
+|    n_updates            | 80           |
+|    policy_gradient_loss | -0.00732     |
+|    value_loss           | 7.16         |
+------------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.72e+03    |
+|    ep_rew_mean          | 198         |
+| time/                   |             |
+|    fps                  | 362         |
+|    iterations           | 10          |
+|    time_elapsed         | 97          |
+|    total_timesteps      | 35390       |
+| train/                  |             |
+|    approx_kl            | 0.006537366 |
+|    clip_fraction        | 0.154       |
+|    clip_range           | 0.155       |
+|    entropy_loss         | -8.26       |
+|    explained_variance   | -0.00102    |
+|    learning_rate        | 5.95e-05    |
+|    loss                 | 9.6         |
+|    n_updates            | 90          |
+|    policy_gradient_loss | -0.0035     |
+|    value_loss           | 5.98e+03    |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.66e+03    |
+|    ep_rew_mean          | 176         |
+| time/                   |             |
+|    fps                  | 361         |
+|    iterations           | 11          |
+|    time_elapsed         | 107         |
+|    total_timesteps      | 38929       |
+| train/                  |             |
+|    approx_kl            | 0.007428738 |
+|    clip_fraction        | 0.147       |
+|    clip_range           | 0.155       |
+|    entropy_loss         | -8.25       |
+|    explained_variance   | 0.275       |
+|    learning_rate        | 5.95e-05    |
+|    loss                 | 0.198       |
+|    n_updates            | 100         |
+|    policy_gradient_loss | -0.0124     |
+|    value_loss           | 4.22        |
+-----------------------------------------
+------------------------------------------
+| rollout/                |              |
+|    ep_len_mean          | 2.59e+03     |
+|    ep_rew_mean          | 148          |
+| time/                   |              |
+|    fps                  | 361          |
+|    iterations           | 12           |
+|    time_elapsed         | 117          |
+|    total_timesteps      | 42468        |
+| train/                  |              |
+|    approx_kl            | 0.0071307733 |
+|    clip_fraction        | 0.139        |
+|    clip_range           | 0.155        |
+|    entropy_loss         | -8.25        |
+|    explained_variance   | 0.0965       |
+|    learning_rate        | 5.95e-05     |
+|    loss                 | 0.68         |
+|    n_updates            | 110          |
+|    policy_gradient_loss | -0.0106      |
+|    value_loss           | 8.22         |
+------------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.55e+03    |
+|    ep_rew_mean          | 114         |
+| time/                   |             |
+|    fps                  | 360         |
+|    iterations           | 13          |
+|    time_elapsed         | 127         |
+|    total_timesteps      | 46007       |
+| train/                  |             |
+|    approx_kl            | 0.007813596 |
+|    clip_fraction        | 0.141       |
+|    clip_range           | 0.155       |
+|    entropy_loss         | -8.24       |
+|    explained_variance   | 0.0396      |
+|    learning_rate        | 5.95e-05    |
+|    loss                 | 0.861       |
+|    n_updates            | 120         |
+|    policy_gradient_loss | -0.00828    |
+|    value_loss           | 26.4        |
+-----------------------------------------
+------------------------------------------
+| rollout/                |              |
+|    ep_len_mean          | 2.49e+03     |
+|    ep_rew_mean          | 89.8         |
+| time/                   |              |
+|    fps                  | 358          |
+|    iterations           | 14           |
+|    time_elapsed         | 138          |
+|    total_timesteps      | 49546        |
+| train/                  |              |
+|    approx_kl            | 0.0077910186 |
+|    clip_fraction        | 0.148        |
+|    clip_range           | 0.155        |
+|    entropy_loss         | -8.23        |
+|    explained_variance   | 0.0748       |
+|    learning_rate        | 5.95e-05     |
+|    loss                 | 1.18         |
+|    n_updates            | 130          |
+|    policy_gradient_loss | -0.00939     |
+|    value_loss           | 16.7         |
+------------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.5e+03     |
+|    ep_rew_mean          | 74          |
+| time/                   |             |
+|    fps                  | 356         |
+|    iterations           | 15          |
+|    time_elapsed         | 149         |
+|    total_timesteps      | 53085       |
+| train/                  |             |
+|    approx_kl            | 0.009532078 |
+|    clip_fraction        | 0.184       |
+|    clip_range           | 0.155       |
+|    entropy_loss         | -8.23       |
+|    explained_variance   | 0.191       |
+|    learning_rate        | 5.95e-05    |
+|    loss                 | 1.42        |
+|    n_updates            | 140         |
+|    policy_gradient_loss | -0.011      |
+|    value_loss           | 20.1        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.52e+03    |
+|    ep_rew_mean          | 61.8        |
+| time/                   |             |
+|    fps                  | 353         |
+|    iterations           | 16          |
+|    time_elapsed         | 160         |
+|    total_timesteps      | 56624       |
+| train/                  |             |
+|    approx_kl            | 0.008085081 |
+|    clip_fraction        | 0.178       |
+|    clip_range           | 0.155       |
+|    entropy_loss         | -8.23       |
+|    explained_variance   | 0.185       |
+|    learning_rate        | 5.95e-05    |
+|    loss                 | 0.266       |
+|    n_updates            | 150         |
+|    policy_gradient_loss | -0.0106     |
+|    value_loss           | 10.2        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.54e+03    |
+|    ep_rew_mean          | 61.8        |
+| time/                   |             |
+|    fps                  | 354         |
+|    iterations           | 17          |
+|    time_elapsed         | 169         |
+|    total_timesteps      | 60163       |
+| train/                  |             |
+|    approx_kl            | 0.008233994 |
+|    clip_fraction        | 0.181       |
+|    clip_range           | 0.155       |
+|    entropy_loss         | -8.22       |
+|    explained_variance   | 0.143       |
+|    learning_rate        | 5.95e-05    |
+|    loss                 | 0.381       |
+|    n_updates            | 160         |
+|    policy_gradient_loss | -0.0085     |
+|    value_loss           | 6.92        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.53e+03   |
+|    ep_rew_mean          | 58.3       |
+| time/                   |            |
+|    fps                  | 354        |
+|    iterations           | 18         |
+|    time_elapsed         | 179        |
+|    total_timesteps      | 63702      |
+| train/                  |            |
+|    approx_kl            | 0.00866387 |
+|    clip_fraction        | 0.173      |
+|    clip_range           | 0.155      |
+|    entropy_loss         | -8.21      |
+|    explained_variance   | 0.21       |
+|    learning_rate        | 5.95e-05   |
+|    loss                 | 0.428      |
+|    n_updates            | 170        |
+|    policy_gradient_loss | -0.0136    |
+|    value_loss           | 4.9        |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.58e+03    |
+|    ep_rew_mean          | 54.2        |
+| time/                   |             |
+|    fps                  | 354         |
+|    iterations           | 19          |
+|    time_elapsed         | 189         |
+|    total_timesteps      | 67241       |
+| train/                  |             |
+|    approx_kl            | 0.008494033 |
+|    clip_fraction        | 0.195       |
+|    clip_range           | 0.155       |
+|    entropy_loss         | -8.2        |
+|    explained_variance   | 0.0444      |
+|    learning_rate        | 5.95e-05    |
+|    loss                 | 24.3        |
+|    n_updates            | 180         |
+|    policy_gradient_loss | -0.00716    |
+|    value_loss           | 16.3        |
+-----------------------------------------
+----------------------------------------
+| rollout/                |            |
+|    ep_len_mean          | 2.6e+03    |
+|    ep_rew_mean          | 44.4       |
+| time/                   |            |
+|    fps                  | 353        |
+|    iterations           | 20         |
+|    time_elapsed         | 200        |
+|    total_timesteps      | 70780      |
+| train/                  |            |
+|    approx_kl            | 0.00970717 |
+|    clip_fraction        | 0.195      |
+|    clip_range           | 0.155      |
+|    entropy_loss         | -8.17      |
+|    explained_variance   | 0.282      |
+|    learning_rate        | 5.95e-05   |
+|    loss                 | 0.367      |
+|    n_updates            | 190        |
+|    policy_gradient_loss | -0.0132    |
+|    value_loss           | 4.43       |
+----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.58e+03    |
+|    ep_rew_mean          | 39.4        |
+| time/                   |             |
+|    fps                  | 352         |
+|    iterations           | 21          |
+|    time_elapsed         | 211         |
+|    total_timesteps      | 74319       |
+| train/                  |             |
+|    approx_kl            | 0.008659723 |
+|    clip_fraction        | 0.206       |
+|    clip_range           | 0.155       |
+|    entropy_loss         | -8.18       |
+|    explained_variance   | 0.214       |
+|    learning_rate        | 5.95e-05    |
+|    loss                 | 0.268       |
+|    n_updates            | 200         |
+|    policy_gradient_loss | -0.014      |
+|    value_loss           | 6.89        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.59e+03    |
+|    ep_rew_mean          | 39.8        |
+| time/                   |             |
+|    fps                  | 352         |
+|    iterations           | 22          |
+|    time_elapsed         | 220         |
+|    total_timesteps      | 77858       |
+| train/                  |             |
+|    approx_kl            | 0.011562935 |
+|    clip_fraction        | 0.223       |
+|    clip_range           | 0.155       |
+|    entropy_loss         | -8.21       |
+|    explained_variance   | 0.16        |
+|    learning_rate        | 5.95e-05    |
+|    loss                 | 5.91        |
+|    n_updates            | 210         |
+|    policy_gradient_loss | -0.00776    |
+|    value_loss           | 8.23        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.58e+03    |
+|    ep_rew_mean          | 25.9        |
+| time/                   |             |
+|    fps                  | 352         |
+|    iterations           | 23          |
+|    time_elapsed         | 230         |
+|    total_timesteps      | 81397       |
+| train/                  |             |
+|    approx_kl            | 0.011082681 |
+|    clip_fraction        | 0.22        |
+|    clip_range           | 0.155       |
+|    entropy_loss         | -8.15       |
+|    explained_variance   | 0.438       |
+|    learning_rate        | 5.95e-05    |
+|    loss                 | 0.259       |
+|    n_updates            | 220         |
+|    policy_gradient_loss | -0.017      |
+|    value_loss           | 3.91        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.58e+03    |
+|    ep_rew_mean          | 23.9        |
+| time/                   |             |
+|    fps                  | 353         |
+|    iterations           | 24          |
+|    time_elapsed         | 240         |
+|    total_timesteps      | 84936       |
+| train/                  |             |
+|    approx_kl            | 0.010984284 |
+|    clip_fraction        | 0.237       |
+|    clip_range           | 0.155       |
+|    entropy_loss         | -8.15       |
+|    explained_variance   | 0.0983      |
+|    learning_rate        | 5.95e-05    |
+|    loss                 | 4.34        |
+|    n_updates            | 230         |
+|    policy_gradient_loss | -0.0112     |
+|    value_loss           | 14.1        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.59e+03    |
+|    ep_rew_mean          | 18.8        |
+| time/                   |             |
+|    fps                  | 353         |
+|    iterations           | 25          |
+|    time_elapsed         | 250         |
+|    total_timesteps      | 88475       |
+| train/                  |             |
+|    approx_kl            | 0.012365894 |
+|    clip_fraction        | 0.254       |
+|    clip_range           | 0.155       |
+|    entropy_loss         | -8.14       |
+|    explained_variance   | 0.179       |
+|    learning_rate        | 5.95e-05    |
+|    loss                 | 3.71        |
+|    n_updates            | 240         |
+|    policy_gradient_loss | -0.0117     |
+|    value_loss           | 7.45        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.56e+03    |
+|    ep_rew_mean          | 9.77        |
+| time/                   |             |
+|    fps                  | 352         |
+|    iterations           | 26          |
+|    time_elapsed         | 260         |
+|    total_timesteps      | 92014       |
+| train/                  |             |
+|    approx_kl            | 0.010001008 |
+|    clip_fraction        | 0.231       |
+|    clip_range           | 0.155       |
+|    entropy_loss         | -8.2        |
+|    explained_variance   | 0.146       |
+|    learning_rate        | 5.95e-05    |
+|    loss                 | 19          |
+|    n_updates            | 250         |
+|    policy_gradient_loss | -0.011      |
+|    value_loss           | 5.73        |
+-----------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.55e+03    |
+|    ep_rew_mean          | -3.51       |
+| time/                   |             |
+|    fps                  | 351         |
+|    iterations           | 27          |
+|    time_elapsed         | 271         |
+|    total_timesteps      | 95553       |
+| train/                  |             |
+|    approx_kl            | 0.011729387 |
+|    clip_fraction        | 0.264       |
+|    clip_range           | 0.155       |
+|    entropy_loss         | -8.15       |
+|    explained_variance   | 0.0695      |
+|    learning_rate        | 5.95e-05    |
+|    loss                 | 0.242       |
+|    n_updates            | 260         |
+|    policy_gradient_loss | -0.0102     |
+|    value_loss           | 16.5        |
+-----------------------------------------
+------------------------------------------
+| rollout/                |              |
+|    ep_len_mean          | 2.5e+03      |
+|    ep_rew_mean          | -3.13        |
+| time/                   |              |
+|    fps                  | 350          |
+|    iterations           | 28           |
+|    time_elapsed         | 282          |
+|    total_timesteps      | 99092        |
+| train/                  |              |
+|    approx_kl            | 0.0153510645 |
+|    clip_fraction        | 0.298        |
+|    clip_range           | 0.155        |
+|    entropy_loss         | -8.12        |
+|    explained_variance   | 0.0587       |
+|    learning_rate        | 5.95e-05     |
+|    loss                 | 3.32         |
+|    n_updates            | 270          |
+|    policy_gradient_loss | -0.00956     |
+|    value_loss           | 15.8         |
+------------------------------------------
+-----------------------------------------
+| rollout/                |             |
+|    ep_len_mean          | 2.49e+03    |
+|    ep_rew_mean          | -3.24       |
+| time/                   |             |
+|    fps                  | 350         |
+|    iterations           | 29          |
+|    time_elapsed         | 292         |
+|    total_timesteps      | 102631      |
+| train/                  |             |
+|    approx_kl            | 0.018420441 |
+|    clip_fraction        | 0.324       |
+|    clip_range           | 0.155       |
+|    entropy_loss         | -8.1        |
+|    explained_variance   | -0.00146    |
+|    learning_rate        | 5.95e-05    |
+|    loss                 | 0.384       |
+|    n_updates            | 280         |
+|    policy_gradient_loss | -0.00777    |
+|    value_loss           | 13.8        |
+-----------------------------------------
+[I 2023-03-30 22:29:37,908] Trial 9 finished with value: -345.0 and parameters: {'n_steps': 3539, 'gamma': 0.9483166689072441, 'learning_rate': 5.947863028406936e-05, 'clip_range': 0.15487331840468324, 'gae_lambda': 0.8132195074364921}. Best is trial 6 with value: -1.0.
+{'n_steps': 4033, 'gamma': 0.9021075358644198, 'learning_rate': 9.226619222512819e-05, 'clip_range': 0.2947845259713344, 'gae_lambda': 0.9634981775867985}
+FrozenTrial(number=6, state=TrialState.COMPLETE, values=[-1.0], datetime_start=datetime.datetime(2023, 3, 30, 22, 13, 4, 197753), datetime_complete=datetime.datetime(2023, 3, 30, 22, 13, 11, 640374), params={'n_steps': 4033, 'gamma': 0.9021075358644198, 'learning_rate': 9.226619222512819e-05, 'clip_range': 0.2947845259713344, 'gae_lambda': 0.9634981775867985}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'n_steps': IntDistribution(high=8192, log=True, low=1024, step=1), 'gamma': FloatDistribution(high=0.9999, log=False, low=0.9, step=None), 'learning_rate': FloatDistribution(high=0.0001, log=True, low=5e-05, step=None), 'clip_range': FloatDistribution(high=0.4, log=False, low=0.1, step=None), 'gae_lambda': FloatDistribution(high=0.99, log=False, low=0.8, step=None)}, trial_id=6, value=None)
\ No newline at end of file
diff --git a/000_image_stack_ram_based_reward/rmsprop_optim.py b/000_image_stack_ram_based_reward/rmsprop_optim.py
new file mode 100644
index 0000000..788f675
--- /dev/null
+++ b/000_image_stack_ram_based_reward/rmsprop_optim.py
@@ -0,0 +1,93 @@
+import torch
+from torch.optim import Optimizer
+
+class RMSpropTF(Optimizer):
+    def __init__(self, params, lr=1e-2, alpha=0.9, eps=1e-10, 
+                 weight_decay=0, momentum=0., centered=False,
+                 decoupled_decay=False, lr_in_momentum=True
+        ):
+        if not 0.0 <= lr:
+            raise ValueError("Invalid learning rate: {}".format(lr))
+        if not 0.0 <= eps:
+            raise ValueError("Invalid epsilon value: {}".format(eps))
+        if not 0.0 <= momentum:
+            raise ValueError("Invalid momentum value: {}".format(momentum))
+        if not 0.0 <= weight_decay:
+            raise ValueError("Invalid weight_decay value: {}".format(weight_decay))
+        if not 0.0 <= alpha:
+            raise ValueError("Invalid alpha value: {}".format(alpha))
+        defaults = dict(lr=lr, momentum=momentum, alpha=alpha, eps=eps, 
+                        centered=centered, weight_decay=weight_decay,
+                        decoupled_decay=decoupled_decay, 
+                        lr_in_momentum=lr_in_momentum
+        )
+        super(RMSpropTF, self).__init__(params, defaults)
+
+    def __setstate__(self, state):
+        super(RMSpropTF, self).__setstate__(state)
+        for group in self.param_groups:
+            group.setdefault('momentum', 0)
+            group.setdefault('centered', False)
+    
+    @torch.no_grad()
+    def step(self, closure=None):
+        """Performs a single optimization step.
+        Arguments:
+        closure (callable, optional): A closure that reevaluates the 
+        model
+        and returns the loss.
+        """
+        loss = None
+        if closure is not None:
+            with torch.enable_grad():
+                loss = closure()
+        for group in self.param_groups:
+            for p in group['params']:
+                if p.grad is None:
+                    continue
+                grad = p.grad
+                if grad.is_sparse:
+                    raise RuntimeError('RMSprop does not support sparse gradients')
+                state = self.state[p]
+                # State initialization
+                if len(state) == 0:
+                    state['step'] = 0
+                    state['square_avg'] = torch.ones_like(p) # PyTorch inits to zero
+                    if group['momentum'] > 0:
+                        state['momentum_buffer'] = torch.zeros_like(p)
+                    if group['centered']:
+                        state['grad_avg'] = torch.zeros_like(p)
+                square_avg = state['square_avg']
+                one_minus_alpha = 1. - group['alpha']
+                state['step'] += 1
+                if group['weight_decay'] != 0:
+                    if group['decoupled_decay']:
+                        p.mul_(1. - group['lr'] * group['weight_decay'])
+                    else:
+                        grad = grad.add(p, alpha=group['weight_decay'])
+                
+                # Tensorflow order of ops for updating squared avg
+                square_avg.add_(grad.pow(2) - square_avg, alpha=one_minus_alpha)
+                # square_avg.mul_(alpha).addcmul_(grad, grad, value=1 - alpha) # PyTorch original
+                if group['centered']:
+                    grad_avg = state['grad_avg']
+                    grad_avg.add_(grad - grad_avg, alpha=one_minus_alpha)
+                    avg = square_avg.addcmul(grad_avg, grad_avg, value=-1).add(group['eps']).sqrt_() # eps in sqrt
+                # grad_avg.mul_(alpha).add_(grad, alpha=1 - alpha) # 
+                # PyTorch original
+                else:
+                    avg = square_avg.add(group['eps']).sqrt_() # eps moved in sqrt
+                if group['momentum'] > 0:
+                    buf = state['momentum_buffer']
+                    # Tensorflow accumulates the LR scaling in the momentum buffer
+                    if group['lr_in_momentum']:
+                        buf.mul_(group['momentum']).addcdiv_(grad, avg, value=group['lr'])
+                        p.add_(-buf)
+                    else:
+                        # PyTorch scales the param update by LR
+                        buf.mul_(group['momentum']).addcdiv_(grad, avg)
+                        p.add_(buf, alpha=-group['lr'])
+                else:
+                    p.addcdiv_(grad, avg, value=-group['lr'])
+        return loss
+
diff --git a/000_image_stack_ram_based_reward/street_fighter_custom_wrapper.py b/000_image_stack_ram_based_reward/street_fighter_custom_wrapper.py
new file mode 100644
index 0000000..eafa231
--- /dev/null
+++ b/000_image_stack_ram_based_reward/street_fighter_custom_wrapper.py
@@ -0,0 +1,97 @@
+import collections
+
+import gym
+import cv2
+import numpy as np
+
+# Custom environment wrapper
+class StreetFighterCustomWrapper(gym.Wrapper):
+    def __init__(self, env, testing=False):
+        super(StreetFighterCustomWrapper, self).__init__(env)
+        self.env = env
+
+        # Use a deque to store the last 4 frames
+        self.num_frames = 3
+        self.frame_stack = collections.deque(maxlen=self.num_frames)
+
+        self.full_hp = 176
+        self.prev_player_health = self.full_hp
+        self.prev_oppont_health = self.full_hp
+
+        # Update observation space to include stacked grayscale images
+        self.observation_space = gym.spaces.Box(low=0, high=255, shape=(84, 84, 3), dtype=np.uint8)
+        
+        self.testing = testing
+    
+    def _preprocess_observation(self, observation):
+        obs_gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)
+        obs_gray_resized = cv2.resize(obs_gray, (84, 84), interpolation=cv2.INTER_AREA)
+        
+        # Add the resized image to the frame stack
+        self.frame_stack.append(obs_gray_resized)
+
+        # Stack the frames and return the "image"
+        stacked_frames = np.stack(self.frame_stack, axis=-1)
+        return stacked_frames
+
+    def reset(self):
+        observation = self.env.reset()
+        self.prev_player_health = self.full_hp
+        self.prev_oppont_health = self.full_hp
+
+        obs_gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)
+        obs_gray_resized = cv2.resize(obs_gray, (84, 84), interpolation=cv2.INTER_AREA)
+        
+        # Clear the frame stack and add the first observation [num_frames] times
+        self.frame_stack.clear()
+        for _ in range(self.num_frames):
+            self.frame_stack.append(obs_gray_resized)
+
+        return np.stack(self.frame_stack, axis=-1)
+
+    def step(self, action):
+        
+        obs, reward, done, info = self.env.step(action)
+        
+        # During fighting, either player or opponent has positive health points.
+        if info['health'] > 0 or info['enemy_health'] > 0:
+
+            # Player Loses
+            if info['health'] < 0 and info['enemy_health'] > 0:
+                # reward = (-self.full_hp) * info['enemy_health'] * 0.05 # max = 0.05 * 176 * 176 = 1548.8
+                reward = -info['enemy_health'] # Use the left over health points as penalty
+                
+                # Prevent data overflow
+                if reward < -self.full_hp: 
+                    reward = 0
+                
+                done = True
+
+            # Player Wins
+            elif info['enemy_health'] < 0 and info['health'] > 0:
+                # reward = self.full_hp * info['health'] * 0.05
+                reward = info['health']
+
+
+                # Prevent data overflow
+                if reward > self.full_hp:
+                    reward = 0
+
+                done = True
+
+            # During Fighting
+            else:
+                reward = (self.prev_oppont_health - info['enemy_health']) - (self.prev_player_health - info['health'])
+
+                # Prevent data overflow
+                if reward > 99:
+                    reward = 0
+
+        self.prev_player_health = info['health']
+        self.prev_oppont_health = info['enemy_health']
+
+        if self.testing:
+            done = False
+             
+        return self._preprocess_observation(obs), reward, done, info
+    
\ No newline at end of file
diff --git a/000_image_stack_ram_based_reward/street_fighter_notebook.ipynb b/000_image_stack_ram_based_reward/street_fighter_notebook.ipynb
new file mode 100644
index 0000000..ff092ed
--- /dev/null
+++ b/000_image_stack_ram_based_reward/street_fighter_notebook.ipynb
@@ -0,0 +1,314 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "bfc79b8c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import retro"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "c24fbcab",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "game = \"StreetFighterIISpecialChampionEdition-Genesis\"\n",
+    "state = \"Champion.Level1.ChunLiVsGuile\"\n",
+    "env = retro.make(game=game, state=state)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "59839d9c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1], dtype=int8)"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "env.action_space.sample()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "e068cb0a",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(200, 256, 3)"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "env.observation_space.sample().shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "1cb0297f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(200, 256, 3)\n",
+      "{'enemy_matches_won': 0, 'score': 0, 'matches_won': 0, 'continuetimer': 0, 'enemy_health': 176, 'health': 176}\n"
+     ]
+    }
+   ],
+   "source": [
+    "observation = env.reset()\n",
+    "print(observation.shape)\n",
+    "\n",
+    "action = env.action_space.sample()\n",
+    "obs, rewards, done, info = env.step(action)\n",
+    "print(info)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "0eaa5cc8",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "MultiBinary(12)\n"
+     ]
+    }
+   ],
+   "source": [
+    "from gym.spaces import Box, MultiBinary\n",
+    "\n",
+    "print(MultiBinary(12))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "id": "49f6cf5c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import cv2\n",
+    "\n",
+    "import gym\n",
+    "import numpy as np\n",
+    "from gym.spaces import Box, MultiBinary\n",
+    "\n",
+    "class StreetFighter(gym.Env):\n",
+    "    def __init__(self):\n",
+    "        super().__init__()\n",
+    "        self.observation_space = Box(low=0, high=255, shape=(84, 84), dtype=np.uint8)\n",
+    "        self.action_space = MultiBinary(12)\n",
+    "        self.game = retro.make(game=\"StreetFighterIISpecialChampionEdition-Genesis\", use_restricted_actions=retro.Actions.FILTERED)\n",
+    "        \n",
+    "        self.full_hp = 176\n",
+    "        self.player_health = self.full_hp\n",
+    "        self.oppont_health = self.full_hp\n",
+    "        \n",
+    "        self.score = 0\n",
+    "        \n",
+    "    def __preprocess(self, observation):\n",
+    "        gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)\n",
+    "        resize = cv2.resize(gray, (84,84), interpolation=cv2.INTER_CUBIC)\n",
+    "        return resize\n",
+    "\n",
+    "    def step(self, action):\n",
+    "\n",
+    "        obs, reward, done, info = self.game.step(action)\n",
+    "        custom_obs = self.__preprocess(obs) # It's just frame, not frame_delta\n",
+    "\n",
+    "        # During fighting, either player or opponent has positive health points.\n",
+    "        if info['health'] > 0 or info['enemy_health'] > 0:\n",
+    "\n",
+    "            # Player Loses\n",
+    "            if info['health'] < 0 and info['health'] != self.player_health and info['enemy_health'] != 0:\n",
+    "                reward = (-self.full_hp) * info['enemy_health']\n",
+    "\n",
+    "            # Player Wins\n",
+    "            elif info['enemy_health'] < 0 and info['enemy_health'] != self.oppont_health and info['health'] != 0:\n",
+    "                reward = self.full_hp * info['health']\n",
+    "\n",
+    "            # During Fighting\n",
+    "            else:\n",
+    "                reward = (self.oppont_health - info['enemy_health']) - (self.player_health - info['health'])\n",
+    "        \n",
+    "        self.player_health = info['health']\n",
+    "        self.oppont_health = info['enemy_health']\n",
+    "        \n",
+    "        return custom_obs, reward, done, info\n",
+    "    \n",
+    "    def render(self, *args, **kwargs):\n",
+    "        self.game.render()\n",
+    "        \n",
+    "    def reset(self):\n",
+    "        obs = self.game.reset()\n",
+    "        custom_obs = self.__preprocess(obs)\n",
+    "        self.previous_frame = obs\n",
+    "    \n",
+    "        self.player_health = self.full_hp\n",
+    "        self.oppont_health = self.full_hp\n",
+    "        return custom_obs\n",
+    "\n",
+    "    def close(self):\n",
+    "        self.game.close()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "id": "6ec30177",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(84, 84)\n"
+     ]
+    }
+   ],
+   "source": [
+    "env.close()\n",
+    "env = StreetFighter()\n",
+    "print(env.observation_space.shape)\n",
+    "env.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "id": "7d9eab3a",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\ProgramData\\Anaconda3\\envs\\StreetFighterAI\\lib\\site-packages\\pyglet\\image\\codecs\\wic.py:289: UserWarning: [WinError -2147417850] Cannot change thread mode after it is set\n",
+      "  warnings.warn(str(err))\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-22 154 176\n",
+      "-32 122 176\n",
+      "29 122 147\n",
+      "7 122 140\n",
+      "-31 91 140\n",
+      "29 91 111\n",
+      "-23 68 111\n",
+      "-24 44 111\n",
+      "-24 20 111\n",
+      "31 20 80\n",
+      "10 20 70\n",
+      "45 20 25\n",
+      "5 20 20\n",
+      "-15 5 20\n",
+      "19 5 1\n",
+      "-176 -1 1\n",
+      "46 176 130\n",
+      "7 176 123\n",
+      "-24 152 123\n",
+      "29 152 94\n",
+      "-24 128 94\n",
+      "7 128 87\n",
+      "39 128 48\n",
+      "-31 97 48\n",
+      "36 97 12\n",
+      "-24 73 12\n",
+      "-24 49 12\n",
+      "8624 49 -1\n",
+      "39 176 137\n",
+      "-24 152 137\n",
+      "-23 129 137\n",
+      "-23 106 137\n",
+      "-26 80 137\n",
+      "-24 56 137\n",
+      "-23 33 137\n",
+      "-21 12 137\n",
+      "-12 0 137\n",
+      "-24112 -1 137\n"
+     ]
+    }
+   ],
+   "source": [
+    "## Checking Rewards functionality\n",
+    "import time\n",
+    "\n",
+    "env = StreetFighter()\n",
+    "obs = env.reset()\n",
+    "done = False\n",
+    "\n",
+    "for game in range(5):\n",
+    "    while not done:\n",
+    "        if done:\n",
+    "            obs = env.reset()\n",
+    "        env.render()\n",
+    "        obs, reward, done, info = env.step(env.action_space.sample())\n",
+    "        if reward != 0:\n",
+    "            print(reward, info['health'], info['enemy_health'])\n",
+    "        time.sleep(0.01)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b1ae8310",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/000_image_stack_ram_based_reward/test.py b/000_image_stack_ram_based_reward/test.py
new file mode 100644
index 0000000..d0611e7
--- /dev/null
+++ b/000_image_stack_ram_based_reward/test.py
@@ -0,0 +1,69 @@
+import time 
+
+import retro
+from stable_baselines3 import PPO
+
+from street_fighter_custom_wrapper import StreetFighterCustomWrapper
+    
+def make_env(game, state):
+    def _init():
+        env = retro.make(
+            game=game, 
+            state=state, 
+            use_restricted_actions=retro.Actions.FILTERED, 
+            obs_type=retro.Observations.IMAGE    
+        )
+        env = StreetFighterCustomWrapper(env)
+        return env
+    return _init
+
+game = "StreetFighterIISpecialChampionEdition-Genesis"
+state_stages = [
+    "Champion.Level1.ChunLiVsGuile", # Average reward for random strategy: -102.3
+    "ChampionX.Level1.ChunLiVsKen", # Average reward for random strategy: -247.6
+    "Champion.Level2.ChunLiVsKen",
+    "Champion.Level3.ChunLiVsChunLi",
+    "Champion.Level4.ChunLiVsZangief",
+    "Champion.Level5.ChunLiVsDhalsim",
+    "Champion.Level6.ChunLiVsRyu",
+    "Champion.Level7.ChunLiVsEHonda",
+    "Champion.Level8.ChunLiVsBlanka",
+    "Champion.Level9.ChunLiVsBalrog",
+    "Champion.Level10.ChunLiVsVega",
+    "Champion.Level11.ChunLiVsSagat",
+    "Champion.Level12.ChunLiVsBison"
+    # Add other stages as necessary
+]
+
+env = make_env(game, state_stages[0])()
+
+model = PPO(
+    "CnnPolicy", 
+    env,
+    verbose=1
+)
+model_path = r"optuna/trial_1_best_model" # Average reward for optuna/trial_1_best_model: -82.3
+model.load(model_path)
+
+obs = env.reset()
+done = False
+
+num_episodes = 30
+episode_reward_sum = 0
+for _ in range(num_episodes):
+    done = False
+    obs = env.reset()
+    total_reward = 0
+    while not done:
+        timestamp = time.time()
+        obs, reward, done, info = env.step(env.action_space.sample())
+
+        if reward != 0:
+            total_reward += reward
+            print("Reward: {}, playerHP: {}, enemyHP:{}".format(reward, info['health'], info['enemy_health']))
+        env.render()
+    print("Total reward: {}".format(total_reward))
+    episode_reward_sum += total_reward
+
+env.close()
+print("Average reward for {}: {}".format(model_path, episode_reward_sum/num_episodes))
\ No newline at end of file
diff --git a/000_image_stack_ram_based_reward/train.py b/000_image_stack_ram_based_reward/train.py
new file mode 100644
index 0000000..0e767d3
--- /dev/null
+++ b/000_image_stack_ram_based_reward/train.py
@@ -0,0 +1,125 @@
+import os
+import random
+
+import retro
+from stable_baselines3 import PPO
+from stable_baselines3.common.vec_env import SubprocVecEnv
+from stable_baselines3.common.callbacks import BaseCallback, CheckpointCallback
+
+from rmsprop_optim import RMSpropTF
+from custom_cnn import CustomCNN
+from street_fighter_custom_wrapper import StreetFighterCustomWrapper
+
+class RandomOpponentChangeCallback(BaseCallback):
+    def __init__(self, stages, opponent_interval, verbose=0):
+        super(RandomOpponentChangeCallback, self).__init__(verbose)
+        self.stages = stages
+        self.opponent_interval = opponent_interval
+
+    def _on_step(self) -> bool:
+        if self.n_calls % self.opponent_interval == 0:
+            new_state = random.choice(self.stages)
+            print("\nCurrent state:", new_state)
+            self.training_env.env_method("load_state", new_state, indices=None)
+        return True
+    
+def make_env(game, state, seed=0):
+    def _init():
+        env = retro.make(
+            game=game, 
+            state=state, 
+            use_restricted_actions=retro.Actions.FILTERED, 
+            obs_type=retro.Observations.IMAGE    
+        )
+        env = StreetFighterCustomWrapper(env)
+        env.seed(seed)
+        return env
+    return _init
+
+def main():
+    # Set up the environment and model
+    game = "StreetFighterIISpecialChampionEdition-Genesis"
+    state_stages = [
+        "ChampionX.Level1.ChunLiVsKen",
+        "ChampionX.Level2.ChunLiVsChunLi",
+        "ChampionX.Level3.ChunLiVsZangief",
+        "ChampionX.Level4.ChunLiVsDhalsim",
+        "ChampionX.Level5.ChunLiVsRyu",
+        "ChampionX.Level6.ChunLiVsEHonda",
+        "ChampionX.Level7.ChunLiVsBlanka",
+        "ChampionX.Level8.ChunLiVsGuile",
+        "ChampionX.Level9.ChunLiVsBalrog",
+        "ChampionX.Level10.ChunLiVsVega",
+        "ChampionX.Level11.ChunLiVsSagat",
+        "ChampionX.Level12.ChunLiVsBison"
+        # Add other stages as necessary
+    ]
+    # Champion is at difficulty level 4, ChampionX is at difficulty level 8.
+
+    num_envs = 8
+
+    env = SubprocVecEnv([make_env(game, state_stages[0], seed=i) for i in range(num_envs)])
+
+    # Using CustomCNN as the feature extractor
+    policy_kwargs = {
+        'features_extractor_class': CustomCNN
+    }
+
+    model = PPO(
+        "CnnPolicy", 
+        env,
+        device="cuda", 
+        policy_kwargs=policy_kwargs, 
+        verbose=1,
+        n_steps=5400,
+        batch_size=64,
+        learning_rate=0.0001,
+        ent_coef=0.01,
+        clip_range=0.2,
+        gamma=0.99,
+        gae_lambda=0.95,
+        tensorboard_log="logs/"
+    )
+
+    # Set the save directory
+    save_dir = "trained_models"
+    os.makedirs(save_dir, exist_ok=True)
+
+    # Load the model from file
+    # model_path = "trained_models/ppo_chunli_1296000_steps.zip"
+    
+    # Load model and modify the learning rate and entropy coefficient
+    # custom_objects = {
+    #     "learning_rate": 0.0002
+    # }
+    # model = PPO.load(model_path, env=env, device="cuda")#, custom_objects=custom_objects)
+
+    # Set up callbacks
+    opponent_interval = 5400 # stage_interval * num_envs = total_steps_per_stage
+    checkpoint_interval = 54000 # checkpoint_interval * num_envs = total_steps_per_checkpoint (Every 80 rounds)
+    checkpoint_callback = CheckpointCallback(save_freq=checkpoint_interval, save_path=save_dir, name_prefix="ppo_chunli")
+    stage_increase_callback = RandomOpponentChangeCallback(state_stages, opponent_interval, save_dir)
+
+    # model_params = {
+    #     'n_steps': 5, 
+    #     'gamma': 0.99, 
+    #     'gae_lambda':1, 
+    #     'learning_rate': 7e-4, 
+    #     'vf_coef': 0.5,
+    #     'ent_coef': 0.0,
+    #     'max_grad_norm':0.5,
+    #     'rms_prop_eps':1e-05 
+    # }
+    # model = A2C('CnnPolicy', env, tensorboard_log='logs/', verbose=1, **model_params, policy_kwargs=dict(optimizer_class=RMSpropTF))
+
+    model.learn(
+        total_timesteps=int(6048000), # total_timesteps = stage_interval * num_envs * num_stages (1120 rounds)
+        callback=[checkpoint_callback, stage_increase_callback]
+    )
+    env.close()
+
+    # Save the final model
+    model.save(os.path.join(save_dir, "ppo_sf2_chunli_final.zip"))
+
+if __name__ == "__main__":
+    main()
diff --git a/000_image_stack_ram_based_reward/tune.py b/000_image_stack_ram_based_reward/tune.py
new file mode 100644
index 0000000..2c60de1
--- /dev/null
+++ b/000_image_stack_ram_based_reward/tune.py
@@ -0,0 +1,81 @@
+import gym
+import retro
+import optuna
+from stable_baselines3 import PPO
+from stable_baselines3.common.monitor import Monitor
+from stable_baselines3.common.evaluation import evaluate_policy
+
+from custom_cnn import CustomCNN
+from street_fighter_custom_wrapper import StreetFighterCustomWrapper
+
+def make_env(game, state, seed=0):
+    def _init():
+        env = retro.RetroEnv(
+            game=game, 
+            state=state, 
+            use_restricted_actions=retro.Actions.FILTERED, 
+            obs_type=retro.Observations.IMAGE    
+        )
+        env = StreetFighterCustomWrapper(env)
+        env = Monitor(env)
+        env.seed(seed)
+        return env
+    return _init
+
+def objective(trial):
+    game = "StreetFighterIISpecialChampionEdition-Genesis"
+    env = make_env(game, state="ChampionX.Level1.ChunLiVsKen")()
+
+    # Suggest hyperparameters
+    learning_rate = trial.suggest_float("learning_rate", 5e-5, 1e-3, log=True)
+    n_steps = trial.suggest_int("n_steps", 256, 8192, log=True)
+    batch_size = trial.suggest_int("batch_size", 16, 128, log=True)
+    gamma = trial.suggest_float("gamma", 0.9, 0.9999)
+    gae_lambda = trial.suggest_float("gae_lambda", 0.9, 1.0)
+    clip_range = trial.suggest_float("clip_range", 0.1, 0.4)
+    ent_coef = trial.suggest_float("ent_coef", 1e-4, 1e-2, log=True)
+    vf_coef = trial.suggest_float("vf_coef", 0.1, 1.0)
+
+    # Using CustomCNN as the feature extractor
+    policy_kwargs = {
+        'features_extractor_class': CustomCNN
+    }
+
+    # Train the model
+    model = PPO(
+        "CnnPolicy", 
+        env,
+        device="cuda", 
+        policy_kwargs=policy_kwargs, 
+        verbose=1,
+        n_steps=n_steps,
+        batch_size=batch_size,
+        learning_rate=learning_rate,
+        ent_coef=ent_coef,
+        clip_range=clip_range,
+        vf_coef=vf_coef,
+        gamma=gamma,
+        gae_lambda=gae_lambda
+    )
+
+    for iteration in range(10):
+        model.learn(total_timesteps=100000)
+        mean_reward, _std_reward = evaluate_policy(model, env, n_eval_episodes=10)
+
+        trial.report(mean_reward, iteration)
+
+        if trial.should_prune():
+            raise optuna.TrialPruned()
+
+    return mean_reward
+
+study = optuna.create_study(direction="maximize")
+study.optimize(objective, n_trials=100, timeout=7200)  # Run optimization for 100 trials or 2 hours, whichever comes first
+
+print("Best trial:")
+trial = study.best_trial
+
+print(" Value: ", trial.value)
+print(" Params: ")
+for key, value in trial.params.items():
+    print(f"{key}: {value}")
diff --git a/000_image_stack_ram_based_reward/tune_ppo.py b/000_image_stack_ram_based_reward/tune_ppo.py
new file mode 100644
index 0000000..818da65
--- /dev/null
+++ b/000_image_stack_ram_based_reward/tune_ppo.py
@@ -0,0 +1,69 @@
+import os
+
+import retro
+import optuna
+from stable_baselines3 import PPO
+from stable_baselines3.common.monitor import Monitor
+from stable_baselines3.common.evaluation import evaluate_policy
+
+from street_fighter_custom_wrapper import StreetFighterCustomWrapper
+
+LOG_DIR = 'logs/'
+OPT_DIR = 'optuna/'
+os.makedirs(LOG_DIR, exist_ok=True)
+os.makedirs(OPT_DIR, exist_ok=True)
+
+def optimize_ppo(trial): 
+    return {
+        'n_steps':trial.suggest_int('n_steps', 1024, 8192, log=True),
+        'gamma':trial.suggest_float('gamma', 0.9, 0.9999),
+        'learning_rate':trial.suggest_float('learning_rate', 5e-5, 1e-4, log=True),
+        'clip_range':trial.suggest_float('clip_range', 0.1, 0.4),
+        'gae_lambda':trial.suggest_float('gae_lambda', 0.8, 0.99)
+    }
+
+def make_env(game, state):
+    def _init():
+        env = retro.make(
+            game=game, 
+            state=state, 
+            use_restricted_actions=retro.Actions.FILTERED, 
+            obs_type=retro.Observations.IMAGE
+        )
+        env = StreetFighterCustomWrapper(env)
+        return env
+    return _init
+
+def optimize_agent(trial):
+    game = "StreetFighterIISpecialChampionEdition-Genesis"
+    state = "Champion.Level1.ChunLiVsGuile"#"ChampionX.Level1.ChunLiVsKen"
+
+    try:
+        model_params = optimize_ppo(trial) 
+
+        # Create environment 
+        env = make_env(game, state)()
+        env = Monitor(env, LOG_DIR)
+
+        # Create algo 
+        model = PPO('CnnPolicy', env, tensorboard_log=LOG_DIR, verbose=1, **model_params)
+        model.learn(total_timesteps=100000)
+
+        # Evaluate model 
+        mean_reward, _ = evaluate_policy(model, env, n_eval_episodes=30)
+        env.close()
+
+        SAVE_PATH = os.path.join(OPT_DIR, 'trial_{}_best_model'.format(trial.number))
+        model.save(SAVE_PATH)
+
+        return mean_reward
+    
+    except Exception as e:
+        return -1
+    
+# Creating the experiment 
+study = optuna.create_study(direction='maximize')
+study.optimize(optimize_agent, n_trials=10, n_jobs=1)
+
+print(study.best_params)
+print(study.best_trial)
diff --git a/001_image_stack/__pycache__/street_fighter_custom_wrapper.cpython-38.pyc b/001_image_stack/__pycache__/street_fighter_custom_wrapper.cpython-38.pyc
deleted file mode 100644
index 5ab9e2f..0000000
Binary files a/001_image_stack/__pycache__/street_fighter_custom_wrapper.cpython-38.pyc and /dev/null differ
diff --git a/001_image_stack/__pycache__/custom_cnn.cpython-38.pyc b/001_image_stack_vision_based_reward/__pycache__/custom_cnn.cpython-38.pyc
similarity index 100%
rename from 001_image_stack/__pycache__/custom_cnn.cpython-38.pyc
rename to 001_image_stack_vision_based_reward/__pycache__/custom_cnn.cpython-38.pyc
diff --git a/001_image_stack_vision_based_reward/__pycache__/street_fighter_custom_wrapper.cpython-38.pyc b/001_image_stack_vision_based_reward/__pycache__/street_fighter_custom_wrapper.cpython-38.pyc
new file mode 100644
index 0000000..d0c11f5
Binary files /dev/null and b/001_image_stack_vision_based_reward/__pycache__/street_fighter_custom_wrapper.cpython-38.pyc differ
diff --git a/001_image_stack_vision_based_reward/check_reward.py b/001_image_stack_vision_based_reward/check_reward.py
new file mode 100644
index 0000000..298cb6f
--- /dev/null
+++ b/001_image_stack_vision_based_reward/check_reward.py
@@ -0,0 +1,39 @@
+import time 
+
+import retro
+from stable_baselines3 import PPO
+from stable_baselines3.common.vec_env import DummyVecEnv
+
+from custom_cnn import CustomCNN
+from street_fighter_custom_wrapper import StreetFighterCustomWrapper
+    
+def make_env(game, state):
+    def _init():
+        env = retro.RetroEnv(
+            game=game, 
+            state=state, 
+            use_restricted_actions=retro.Actions.FILTERED, 
+            obs_type=retro.Observations.IMAGE
+        )
+        env = StreetFighterCustomWrapper(env, testing=True)
+        return env
+    return _init
+
+game = "StreetFighterIISpecialChampionEdition-Genesis"
+state = "Champion.Level1.ChunLiVsGuile"
+
+env = make_env(game, state)()
+model = PPO.load(r"trained_models_continued/ppo_chunli_6048000_steps")
+obs = env.reset()
+done = False
+
+while not done:
+    timestamp = time.time()
+    action, _ = model.predict(obs)
+    obs, reward, done, info = env.step(action)
+    print(info)
+    if reward != 0:
+        print(reward, info['health'], info['enemy_health'])
+    env.render()
+
+env.close()
\ No newline at end of file
diff --git a/001_image_stack_vision_based_reward/custom_cnn.py b/001_image_stack_vision_based_reward/custom_cnn.py
new file mode 100644
index 0000000..25c50ea
--- /dev/null
+++ b/001_image_stack_vision_based_reward/custom_cnn.py
@@ -0,0 +1,24 @@
+import gym
+import torch
+import torch.nn as nn
+from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
+
+# Custom feature extractor (CNN)
+class CustomCNN(BaseFeaturesExtractor):
+    def __init__(self, observation_space: gym.Space):
+        super(CustomCNN, self).__init__(observation_space, features_dim=512)
+        self.cnn = nn.Sequential(
+            nn.Conv2d(4, 32, kernel_size=5, stride=2, padding=0),
+            nn.ReLU(),
+            nn.Conv2d(32, 64, kernel_size=5, stride=2, padding=0),
+            nn.ReLU(),
+            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=0),
+            nn.ReLU(),
+            nn.Flatten(),
+            nn.Linear(16384, self.features_dim),
+            nn.ReLU()
+        )
+
+    def forward(self, observations: torch.Tensor) -> torch.Tensor:
+        return self.cnn(observations)
+    
\ No newline at end of file
diff --git a/001_image_stack_vision_based_reward/evaluate.py b/001_image_stack_vision_based_reward/evaluate.py
new file mode 100644
index 0000000..03da618
--- /dev/null
+++ b/001_image_stack_vision_based_reward/evaluate.py
@@ -0,0 +1,47 @@
+import retro
+
+from stable_baselines3 import PPO
+from stable_baselines3.common.vec_env import DummyVecEnv
+from stable_baselines3.common.monitor import Monitor
+from stable_baselines3.common.evaluation import evaluate_policy
+
+from street_fighter_custom_wrapper import StreetFighterCustomWrapper
+
+def make_env(game, state):
+    def _init():
+        env = retro.RetroEnv(
+            game=game, 
+            state=state, 
+            use_restricted_actions=retro.Actions.FILTERED, 
+            obs_type=retro.Observations.IMAGE    
+        )
+        env = StreetFighterCustomWrapper(env)
+        return env
+    return _init
+
+game = "StreetFighterIISpecialChampionEdition-Genesis"
+state_stages = [
+    "Champion.Level1.ChunLiVsGuile",
+    "Champion.Level2.ChunLiVsKen",
+    "Champion.Level3.ChunLiVsChunLi",
+    "Champion.Level4.ChunLiVsZangief",
+    "Champion.Level5.ChunLiVsDhalsim",
+    "Champion.Level6.ChunLiVsRyu",
+    "Champion.Level7.ChunLiVsEHonda",
+    "Champion.Level8.ChunLiVsBlanka",
+    "Champion.Level9.ChunLiVsBalrog",
+    "Champion.Level10.ChunLiVsVega",
+    "Champion.Level11.ChunLiVsSagat",
+    "Champion.Level12.ChunLiVsBison"
+    # Add other stages as necessary
+]
+
+env = make_env(game, state_stages[0])()
+
+# Wrap the environment
+env = Monitor(env, 'logs/')
+env = DummyVecEnv([lambda: env])
+
+model = PPO.load('trained_models/ppo_chunli_1296000_steps')
+mean_reward, std_reward = evaluate_policy(model, env, render=True, n_eval_episodes=10)
+print(f"Mean reward: {mean_reward:.2f} +/- {std_reward:.2f}")
\ No newline at end of file
diff --git a/001_image_stack_vision_based_reward/logs/monitor.csv b/001_image_stack_vision_based_reward/logs/monitor.csv
new file mode 100644
index 0000000..671bb3b
--- /dev/null
+++ b/001_image_stack_vision_based_reward/logs/monitor.csv
@@ -0,0 +1,12 @@
+#{"t_start": 1680163278.6497958, "env_id": null}
+r,l,t
+-1115.766667,2842,13.829476
+-1115.766667,2842,22.367655
+-1115.766667,2842,32.010939
+-1115.766667,2842,41.401216
+-1115.766667,2842,50.451062
+-1115.766667,2842,59.522487
+-1115.766667,2842,68.723222
+-1115.766667,2842,78.205462
+-1115.766667,2842,88.455592
+-1115.766667,2842,97.656297
diff --git a/001_image_stack/street_fighter_custom_wrapper.py b/001_image_stack_vision_based_reward/street_fighter_custom_wrapper.py
similarity index 96%
rename from 001_image_stack/street_fighter_custom_wrapper.py
rename to 001_image_stack_vision_based_reward/street_fighter_custom_wrapper.py
index 5fd4d35..e2e4c53 100644
--- a/001_image_stack/street_fighter_custom_wrapper.py
+++ b/001_image_stack_vision_based_reward/street_fighter_custom_wrapper.py
@@ -12,8 +12,6 @@ class StreetFighterCustomWrapper(gym.Wrapper):
     def __init__(self, env, testing=False, threshold=0.65):
         super(StreetFighterCustomWrapper, self).__init__(env)
         
-        self.action_space = MultiBinary(12)
-        
         # Use a deque to store the last 4 frames
         self.frame_stack = collections.deque(maxlen=4)
 
@@ -89,7 +87,7 @@ class StreetFighterCustomWrapper(gym.Wrapper):
 
     def step(self, action):
         # observation, _, _, info = self.env.step(action)
-        observation, _reward, _done, info = self.env.step(self.env.action_space.sample())
+        observation, _reward, _done, info = self.env.step(action)
         custom_reward = self._get_reward()
         custom_reward -= 1.0 / 60.0 # penalty for each step (-1 points per second)
 
diff --git a/001_image_stack/test.py b/001_image_stack_vision_based_reward/test.py
similarity index 96%
rename from 001_image_stack/test.py
rename to 001_image_stack_vision_based_reward/test.py
index 614b247..db08ae4 100644
--- a/001_image_stack/test.py
+++ b/001_image_stack_vision_based_reward/test.py
@@ -53,7 +53,7 @@ model = PPO(
     policy_kwargs=policy_kwargs, 
     verbose=1
 )
-model.load(r"trained_models_continued/ppo_chunli_432000_steps")
+model.load(r"trained_models/ppo_chunli_1296000_steps")
 
 obs = env.reset()
 done = False
diff --git a/001_image_stack/train.py b/001_image_stack_vision_based_reward/train.py
similarity index 91%
rename from 001_image_stack/train.py
rename to 001_image_stack_vision_based_reward/train.py
index 4e2195f..9861457 100644
--- a/001_image_stack/train.py
+++ b/001_image_stack_vision_based_reward/train.py
@@ -1,13 +1,9 @@
 import os
 import random
 
-import gym
-import cv2
 import retro
-import numpy as np
 from stable_baselines3 import PPO
 from stable_baselines3.common.vec_env import SubprocVecEnv
-from stable_baselines3.common.preprocessing import is_image_space, is_image_space_channels_first
 from stable_baselines3.common.callbacks import BaseCallback, CheckpointCallback
 
 from custom_cnn import CustomCNN
@@ -77,20 +73,16 @@ def main():
         verbose=1,
         n_steps=5400,
         batch_size=64,
-        n_epochs=10,
         learning_rate=0.0003,
         ent_coef=0.01,
         clip_range=0.2,
-        clip_range_vf=None,
         gamma=0.99,
         gae_lambda=0.95,
-        max_grad_norm=0.5,
-        use_sde=False,
-        sde_sample_freq=-1
+        tensorboard_log="logs/"
     )
 
     # Set the save directory
-    save_dir = "trained_models_continued"
+    save_dir = "trained_models_continued_new"
     os.makedirs(save_dir, exist_ok=True)
 
      # Load the model from file
@@ -99,8 +91,7 @@ def main():
     
     # Load model and modify the learning rate and entropy coefficient
     custom_objects = {
-        "learning_rate": 0.00005,
-        "ent_coef": 0.2
+        "learning_rate": 0.0001
     }
     model = PPO.load(model_path, env=env, device="cuda", custom_objects=custom_objects)
 
@@ -110,7 +101,6 @@ def main():
     checkpoint_callback = CheckpointCallback(save_freq=checkpoint_interval, save_path=save_dir, name_prefix="ppo_chunli")
     stage_increase_callback = RandomOpponentChangeCallback(state_stages, opponent_interval, save_dir)
 
-    
     model.learn(
         total_timesteps=int(6048000), # total_timesteps = stage_interval * num_envs * num_stages (1120 rounds)
         callback=[checkpoint_callback, stage_increase_callback]
diff --git a/001_image_stack_vision_based_reward/trainging_log_continued.txt b/001_image_stack_vision_based_reward/trainging_log_continued.txt
new file mode 100644
index 0000000..b299f62
--- /dev/null
+++ b/001_image_stack_vision_based_reward/trainging_log_continued.txt
@@ -0,0 +1,2791 @@
+(StreetFighterAI) PS C:\Users\unitec\Documents\AIProjects\street-fighter-ai\001_image_stack> python .\train.py
+Using cuda device
+
+Current state: ChampionX.Level7.ChunLiVsBlanka
+------------------------------
+| time/              |       |
+|    fps             | 1534  |
+|    iterations      | 1     |
+|    time_elapsed    | 28    |
+|    total_timesteps | 43200 |
+------------------------------
+
+Current state: ChampionX.Level8.ChunLiVsGuile
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 696         |
+|    iterations           | 2           |
+|    time_elapsed         | 123         |
+|    total_timesteps      | 86400       |
+| train/                  |             |
+|    approx_kl            | 0.019640451 |
+|    clip_fraction        | 0.222       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.12       |
+|    explained_variance   | 0.731       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.529       |
+|    n_updates            | 300         |
+|    policy_gradient_loss | 0.0037      |
+|    value_loss           | 17.4        |
+-----------------------------------------
+
+Current state: ChampionX.Level1.ChunLiVsKen
+----------------------------------------
+| time/                   |            |
+|    fps                  | 587        |
+|    iterations           | 3          |
+|    time_elapsed         | 220        |
+|    total_timesteps      | 129600     |
+| train/                  |            |
+|    approx_kl            | 0.01716586 |
+|    clip_fraction        | 0.184      |
+|    clip_range           | 0.2        |
+|    entropy_loss         | -8.15      |
+|    explained_variance   | 0.681      |
+|    learning_rate        | 0.0002     |
+|    loss                 | 0.305      |
+|    n_updates            | 310        |
+|    policy_gradient_loss | -0.00363   |
+|    value_loss           | 12.6       |
+----------------------------------------
+
+Current state: ChampionX.Level12.ChunLiVsBison
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 545         |
+|    iterations           | 4           |
+|    time_elapsed         | 316         |
+|    total_timesteps      | 172800      |
+| train/                  |             |
+|    approx_kl            | 0.017642297 |
+|    clip_fraction        | 0.18        |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.14       |
+|    explained_variance   | 0.752       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.693       |
+|    n_updates            | 320         |
+|    policy_gradient_loss | -0.0013     |
+|    value_loss           | 15.3        |
+-----------------------------------------
+
+Current state: ChampionX.Level5.ChunLiVsRyu
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 523         |
+|    iterations           | 5           |
+|    time_elapsed         | 412         |
+|    total_timesteps      | 216000      |
+| train/                  |             |
+|    approx_kl            | 0.016423995 |
+|    clip_fraction        | 0.159       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.14       |
+|    explained_variance   | 0.769       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.238       |
+|    n_updates            | 330         |
+|    policy_gradient_loss | -0.00348    |
+|    value_loss           | 17.4        |
+-----------------------------------------
+
+Current state: ChampionX.Level4.ChunLiVsDhalsim
+----------------------------------------
+| time/                   |            |
+|    fps                  | 508        |
+|    iterations           | 6          |
+|    time_elapsed         | 509        |
+|    total_timesteps      | 259200     |
+| train/                  |            |
+|    approx_kl            | 0.01582943 |
+|    clip_fraction        | 0.155      |
+|    clip_range           | 0.2        |
+|    entropy_loss         | -8.15      |
+|    explained_variance   | 0.734      |
+|    learning_rate        | 0.0002     |
+|    loss                 | 0.688      |
+|    n_updates            | 340        |
+|    policy_gradient_loss | -0.00491   |
+|    value_loss           | 14.8       |
+----------------------------------------
+
+Current state: ChampionX.Level11.ChunLiVsSagat
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 498         |
+|    iterations           | 7           |
+|    time_elapsed         | 606         |
+|    total_timesteps      | 302400      |
+| train/                  |             |
+|    approx_kl            | 0.019045277 |
+|    clip_fraction        | 0.176       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.15       |
+|    explained_variance   | 0.778       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.729       |
+|    n_updates            | 350         |
+|    policy_gradient_loss | -0.00323    |
+|    value_loss           | 15.8        |
+-----------------------------------------
+
+Current state: ChampionX.Level3.ChunLiVsZangief
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 490         |
+|    iterations           | 8           |
+|    time_elapsed         | 705         |
+|    total_timesteps      | 345600      |
+| train/                  |             |
+|    approx_kl            | 0.018350422 |
+|    clip_fraction        | 0.177       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.16       |
+|    explained_variance   | 0.789       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 1.17        |
+|    n_updates            | 360         |
+|    policy_gradient_loss | -0.0043     |
+|    value_loss           | 12.4        |
+-----------------------------------------
+
+Current state: ChampionX.Level5.ChunLiVsRyu
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 484         |
+|    iterations           | 9           |
+|    time_elapsed         | 802         |
+|    total_timesteps      | 388800      |
+| train/                  |             |
+|    approx_kl            | 0.018348452 |
+|    clip_fraction        | 0.183       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.15       |
+|    explained_variance   | 0.797       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 1.45        |
+|    n_updates            | 370         |
+|    policy_gradient_loss | -0.000873   |
+|    value_loss           | 16          |
+-----------------------------------------
+
+Current state: ChampionX.Level3.ChunLiVsZangief
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 480         |
+|    iterations           | 10          |
+|    time_elapsed         | 899         |
+|    total_timesteps      | 432000      |
+| train/                  |             |
+|    approx_kl            | 0.017740099 |
+|    clip_fraction        | 0.175       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.14       |
+|    explained_variance   | 0.81        |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.596       |
+|    n_updates            | 380         |
+|    policy_gradient_loss | -0.00329    |
+|    value_loss           | 20.7        |
+-----------------------------------------
+
+Current state: ChampionX.Level9.ChunLiVsBalrog
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 475         |
+|    iterations           | 11          |
+|    time_elapsed         | 998         |
+|    total_timesteps      | 475200      |
+| train/                  |             |
+|    approx_kl            | 0.020382024 |
+|    clip_fraction        | 0.204       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.14       |
+|    explained_variance   | 0.783       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.51        |
+|    n_updates            | 390         |
+|    policy_gradient_loss | -0.0046     |
+|    value_loss           | 17.3        |
+-----------------------------------------
+
+Current state: ChampionX.Level12.ChunLiVsBison
+----------------------------------------
+| time/                   |            |
+|    fps                  | 473        |
+|    iterations           | 12         |
+|    time_elapsed         | 1095       |
+|    total_timesteps      | 518400     |
+| train/                  |            |
+|    approx_kl            | 0.01975372 |
+|    clip_fraction        | 0.192      |
+|    clip_range           | 0.2        |
+|    entropy_loss         | -8.12      |
+|    explained_variance   | 0.78       |
+|    learning_rate        | 0.0002     |
+|    loss                 | 0.59       |
+|    n_updates            | 400        |
+|    policy_gradient_loss | -0.00151   |
+|    value_loss           | 22.9       |
+----------------------------------------
+
+Current state: ChampionX.Level11.ChunLiVsSagat
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 470         |
+|    iterations           | 13          |
+|    time_elapsed         | 1192        |
+|    total_timesteps      | 561600      |
+| train/                  |             |
+|    approx_kl            | 0.019312538 |
+|    clip_fraction        | 0.199       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.13       |
+|    explained_variance   | 0.697       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 1.05        |
+|    n_updates            | 410         |
+|    policy_gradient_loss | -0.000962   |
+|    value_loss           | 21.6        |
+-----------------------------------------
+
+Current state: ChampionX.Level4.ChunLiVsDhalsim
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 468         |
+|    iterations           | 14          |
+|    time_elapsed         | 1290        |
+|    total_timesteps      | 604800      |
+| train/                  |             |
+|    approx_kl            | 0.018606355 |
+|    clip_fraction        | 0.189       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.12       |
+|    explained_variance   | 0.742       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.385       |
+|    n_updates            | 420         |
+|    policy_gradient_loss | -0.00191    |
+|    value_loss           | 18.1        |
+-----------------------------------------
+
+Current state: ChampionX.Level3.ChunLiVsZangief
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 467         |
+|    iterations           | 15          |
+|    time_elapsed         | 1387        |
+|    total_timesteps      | 648000      |
+| train/                  |             |
+|    approx_kl            | 0.017203132 |
+|    clip_fraction        | 0.179       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.12       |
+|    explained_variance   | 0.787       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.26        |
+|    n_updates            | 430         |
+|    policy_gradient_loss | -0.0021     |
+|    value_loss           | 15.2        |
+-----------------------------------------
+
+Current state: ChampionX.Level7.ChunLiVsBlanka
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 465         |
+|    iterations           | 16          |
+|    time_elapsed         | 1484        |
+|    total_timesteps      | 691200      |
+| train/                  |             |
+|    approx_kl            | 0.018841917 |
+|    clip_fraction        | 0.184       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.09       |
+|    explained_variance   | 0.791       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.811       |
+|    n_updates            | 440         |
+|    policy_gradient_loss | -0.00263    |
+|    value_loss           | 12.1        |
+-----------------------------------------
+
+Current state: ChampionX.Level4.ChunLiVsDhalsim
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 464         |
+|    iterations           | 17          |
+|    time_elapsed         | 1581        |
+|    total_timesteps      | 734400      |
+| train/                  |             |
+|    approx_kl            | 0.016460957 |
+|    clip_fraction        | 0.161       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.11       |
+|    explained_variance   | 0.809       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 1.47        |
+|    n_updates            | 450         |
+|    policy_gradient_loss | -0.00405    |
+|    value_loss           | 17.5        |
+-----------------------------------------
+
+Current state: ChampionX.Level7.ChunLiVsBlanka
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 463         |
+|    iterations           | 18          |
+|    time_elapsed         | 1678        |
+|    total_timesteps      | 777600      |
+| train/                  |             |
+|    approx_kl            | 0.018824814 |
+|    clip_fraction        | 0.187       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.1        |
+|    explained_variance   | 0.766       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.312       |
+|    n_updates            | 460         |
+|    policy_gradient_loss | -0.00269    |
+|    value_loss           | 15.2        |
+-----------------------------------------
+
+Current state: ChampionX.Level4.ChunLiVsDhalsim
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 462         |
+|    iterations           | 19          |
+|    time_elapsed         | 1776        |
+|    total_timesteps      | 820800      |
+| train/                  |             |
+|    approx_kl            | 0.017789861 |
+|    clip_fraction        | 0.168       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.09       |
+|    explained_variance   | 0.762       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 1.01        |
+|    n_updates            | 470         |
+|    policy_gradient_loss | -0.00204    |
+|    value_loss           | 16.2        |
+-----------------------------------------
+
+Current state: ChampionX.Level9.ChunLiVsBalrog
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 461         |
+|    iterations           | 20          |
+|    time_elapsed         | 1872        |
+|    total_timesteps      | 864000      |
+| train/                  |             |
+|    approx_kl            | 0.018345973 |
+|    clip_fraction        | 0.173       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.1        |
+|    explained_variance   | 0.79        |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.736       |
+|    n_updates            | 480         |
+|    policy_gradient_loss | -0.00369    |
+|    value_loss           | 12.8        |
+-----------------------------------------
+
+Current state: ChampionX.Level12.ChunLiVsBison
+----------------------------------------
+| time/                   |            |
+|    fps                  | 460        |
+|    iterations           | 21         |
+|    time_elapsed         | 1969       |
+|    total_timesteps      | 907200     |
+| train/                  |            |
+|    approx_kl            | 0.02151764 |
+|    clip_fraction        | 0.192      |
+|    clip_range           | 0.2        |
+|    entropy_loss         | -8.09      |
+|    explained_variance   | 0.782      |
+|    learning_rate        | 0.0002     |
+|    loss                 | 0.267      |
+|    n_updates            | 490        |
+|    policy_gradient_loss | -0.00102   |
+|    value_loss           | 13.8       |
+----------------------------------------
+
+Current state: ChampionX.Level4.ChunLiVsDhalsim
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 459         |
+|    iterations           | 22          |
+|    time_elapsed         | 2066        |
+|    total_timesteps      | 950400      |
+| train/                  |             |
+|    approx_kl            | 0.021028183 |
+|    clip_fraction        | 0.19        |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.13       |
+|    explained_variance   | 0.676       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.253       |
+|    n_updates            | 500         |
+|    policy_gradient_loss | -0.00186    |
+|    value_loss           | 20          |
+-----------------------------------------
+
+Current state: ChampionX.Level3.ChunLiVsZangief
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 459         |
+|    iterations           | 23          |
+|    time_elapsed         | 2163        |
+|    total_timesteps      | 993600      |
+| train/                  |             |
+|    approx_kl            | 0.019285567 |
+|    clip_fraction        | 0.18        |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.12       |
+|    explained_variance   | 0.729       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.329       |
+|    n_updates            | 510         |
+|    policy_gradient_loss | -0.00156    |
+|    value_loss           | 20.8        |
+-----------------------------------------
+
+Current state: ChampionX.Level10.ChunLiVsVega
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 458         |
+|    iterations           | 24          |
+|    time_elapsed         | 2260        |
+|    total_timesteps      | 1036800     |
+| train/                  |             |
+|    approx_kl            | 0.019038767 |
+|    clip_fraction        | 0.195       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.09       |
+|    explained_variance   | 0.665       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.685       |
+|    n_updates            | 520         |
+|    policy_gradient_loss | -0.000273   |
+|    value_loss           | 15.8        |
+-----------------------------------------
+
+Current state: ChampionX.Level7.ChunLiVsBlanka
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 458         |
+|    iterations           | 25          |
+|    time_elapsed         | 2357        |
+|    total_timesteps      | 1080000     |
+| train/                  |             |
+|    approx_kl            | 0.020219645 |
+|    clip_fraction        | 0.192       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.09       |
+|    explained_variance   | 0.776       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 1.49        |
+|    n_updates            | 530         |
+|    policy_gradient_loss | -0.00111    |
+|    value_loss           | 21.8        |
+-----------------------------------------
+
+Current state: ChampionX.Level11.ChunLiVsSagat
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 457         |
+|    iterations           | 26          |
+|    time_elapsed         | 2455        |
+|    total_timesteps      | 1123200     |
+| train/                  |             |
+|    approx_kl            | 0.018398428 |
+|    clip_fraction        | 0.179       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.12       |
+|    explained_variance   | 0.784       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.225       |
+|    n_updates            | 540         |
+|    policy_gradient_loss | -0.00625    |
+|    value_loss           | 12.3        |
+-----------------------------------------
+
+Current state: ChampionX.Level1.ChunLiVsKen
+----------------------------------------
+| time/                   |            |
+|    fps                  | 456        |
+|    iterations           | 27         |
+|    time_elapsed         | 2552       |
+|    total_timesteps      | 1166400    |
+| train/                  |            |
+|    approx_kl            | 0.02056862 |
+|    clip_fraction        | 0.178      |
+|    clip_range           | 0.2        |
+|    entropy_loss         | -8.1       |
+|    explained_variance   | 0.718      |
+|    learning_rate        | 0.0002     |
+|    loss                 | 0.265      |
+|    n_updates            | 550        |
+|    policy_gradient_loss | -0.00118   |
+|    value_loss           | 21.3       |
+----------------------------------------
+
+Current state: ChampionX.Level10.ChunLiVsVega
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 456         |
+|    iterations           | 28          |
+|    time_elapsed         | 2649        |
+|    total_timesteps      | 1209600     |
+| train/                  |             |
+|    approx_kl            | 0.018739836 |
+|    clip_fraction        | 0.182       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.09       |
+|    explained_variance   | 0.786       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.562       |
+|    n_updates            | 560         |
+|    policy_gradient_loss | -0.00141    |
+|    value_loss           | 16.7        |
+-----------------------------------------
+
+Current state: ChampionX.Level7.ChunLiVsBlanka
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 456         |
+|    iterations           | 29          |
+|    time_elapsed         | 2747        |
+|    total_timesteps      | 1252800     |
+| train/                  |             |
+|    approx_kl            | 0.019046063 |
+|    clip_fraction        | 0.178       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.1        |
+|    explained_variance   | 0.77        |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.655       |
+|    n_updates            | 570         |
+|    policy_gradient_loss | -0.00238    |
+|    value_loss           | 19          |
+-----------------------------------------
+
+Current state: ChampionX.Level11.ChunLiVsSagat
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 455         |
+|    iterations           | 30          |
+|    time_elapsed         | 2845        |
+|    total_timesteps      | 1296000     |
+| train/                  |             |
+|    approx_kl            | 0.017575732 |
+|    clip_fraction        | 0.181       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.12       |
+|    explained_variance   | 0.763       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.461       |
+|    n_updates            | 580         |
+|    policy_gradient_loss | -0.00471    |
+|    value_loss           | 12.1        |
+-----------------------------------------
+
+Current state: ChampionX.Level6.ChunLiVsEHonda
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 455         |
+|    iterations           | 31          |
+|    time_elapsed         | 2942        |
+|    total_timesteps      | 1339200     |
+| train/                  |             |
+|    approx_kl            | 0.020356499 |
+|    clip_fraction        | 0.179       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.12       |
+|    explained_variance   | 0.772       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 1.84        |
+|    n_updates            | 590         |
+|    policy_gradient_loss | -0.00473    |
+|    value_loss           | 11.5        |
+-----------------------------------------
+
+Current state: ChampionX.Level6.ChunLiVsEHonda
+----------------------------------------
+| time/                   |            |
+|    fps                  | 454        |
+|    iterations           | 32         |
+|    time_elapsed         | 3039       |
+|    total_timesteps      | 1382400    |
+| train/                  |            |
+|    approx_kl            | 0.02154484 |
+|    clip_fraction        | 0.186      |
+|    clip_range           | 0.2        |
+|    entropy_loss         | -8.09      |
+|    explained_variance   | 0.82       |
+|    learning_rate        | 0.0002     |
+|    loss                 | 2.06       |
+|    n_updates            | 600        |
+|    policy_gradient_loss | 0.00338    |
+|    value_loss           | 23.1       |
+----------------------------------------
+
+Current state: ChampionX.Level7.ChunLiVsBlanka
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 454         |
+|    iterations           | 33          |
+|    time_elapsed         | 3137        |
+|    total_timesteps      | 1425600     |
+| train/                  |             |
+|    approx_kl            | 0.022631256 |
+|    clip_fraction        | 0.196       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.03       |
+|    explained_variance   | 0.81        |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.664       |
+|    n_updates            | 610         |
+|    policy_gradient_loss | 0.0058      |
+|    value_loss           | 21.7        |
+-----------------------------------------
+
+Current state: ChampionX.Level8.ChunLiVsGuile
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 454         |
+|    iterations           | 34          |
+|    time_elapsed         | 3234        |
+|    total_timesteps      | 1468800     |
+| train/                  |             |
+|    approx_kl            | 0.019701418 |
+|    clip_fraction        | 0.172       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.07       |
+|    explained_variance   | 0.833       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 2.15        |
+|    n_updates            | 620         |
+|    policy_gradient_loss | 0.00112     |
+|    value_loss           | 22.3        |
+-----------------------------------------
+
+Current state: ChampionX.Level6.ChunLiVsEHonda
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 453         |
+|    iterations           | 35          |
+|    time_elapsed         | 3332        |
+|    total_timesteps      | 1512000     |
+| train/                  |             |
+|    approx_kl            | 0.020245243 |
+|    clip_fraction        | 0.183       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.05       |
+|    explained_variance   | 0.815       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.494       |
+|    n_updates            | 630         |
+|    policy_gradient_loss | -0.00146    |
+|    value_loss           | 13.8        |
+-----------------------------------------
+
+Current state: ChampionX.Level10.ChunLiVsVega
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 453         |
+|    iterations           | 36          |
+|    time_elapsed         | 3430        |
+|    total_timesteps      | 1555200     |
+| train/                  |             |
+|    approx_kl            | 0.022184841 |
+|    clip_fraction        | 0.187       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.02       |
+|    explained_variance   | 0.761       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.232       |
+|    n_updates            | 640         |
+|    policy_gradient_loss | 0.00242     |
+|    value_loss           | 18.8        |
+-----------------------------------------
+
+Current state: ChampionX.Level4.ChunLiVsDhalsim
+----------------------------------------
+| time/                   |            |
+|    fps                  | 453        |
+|    iterations           | 37         |
+|    time_elapsed         | 3526       |
+|    total_timesteps      | 1598400    |
+| train/                  |            |
+|    approx_kl            | 0.01909801 |
+|    clip_fraction        | 0.172      |
+|    clip_range           | 0.2        |
+|    entropy_loss         | -8.07      |
+|    explained_variance   | 0.82       |
+|    learning_rate        | 0.0002     |
+|    loss                 | 1.39       |
+|    n_updates            | 650        |
+|    policy_gradient_loss | 0.00125    |
+|    value_loss           | 18.5       |
+----------------------------------------
+
+Current state: ChampionX.Level11.ChunLiVsSagat
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 453         |
+|    iterations           | 38          |
+|    time_elapsed         | 3623        |
+|    total_timesteps      | 1641600     |
+| train/                  |             |
+|    approx_kl            | 0.019127825 |
+|    clip_fraction        | 0.175       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.1        |
+|    explained_variance   | 0.796       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.646       |
+|    n_updates            | 660         |
+|    policy_gradient_loss | -0.0033     |
+|    value_loss           | 15          |
+-----------------------------------------
+
+Current state: ChampionX.Level2.ChunLiVsChunLi
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 452         |
+|    iterations           | 39          |
+|    time_elapsed         | 3720        |
+|    total_timesteps      | 1684800     |
+| train/                  |             |
+|    approx_kl            | 0.018327592 |
+|    clip_fraction        | 0.179       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.1        |
+|    explained_variance   | 0.718       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.905       |
+|    n_updates            | 670         |
+|    policy_gradient_loss | 0.000898    |
+|    value_loss           | 14.6        |
+-----------------------------------------
+
+Current state: ChampionX.Level1.ChunLiVsKen
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 452         |
+|    iterations           | 40          |
+|    time_elapsed         | 3818        |
+|    total_timesteps      | 1728000     |
+| train/                  |             |
+|    approx_kl            | 0.019133803 |
+|    clip_fraction        | 0.173       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.03       |
+|    explained_variance   | 0.839       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.95        |
+|    n_updates            | 680         |
+|    policy_gradient_loss | -0.00206    |
+|    value_loss           | 17.9        |
+-----------------------------------------
+
+Current state: ChampionX.Level1.ChunLiVsKen
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 452         |
+|    iterations           | 41          |
+|    time_elapsed         | 3916        |
+|    total_timesteps      | 1771200     |
+| train/                  |             |
+|    approx_kl            | 0.021123584 |
+|    clip_fraction        | 0.202       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.06       |
+|    explained_variance   | 0.775       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.615       |
+|    n_updates            | 690         |
+|    policy_gradient_loss | 0.000314    |
+|    value_loss           | 14          |
+-----------------------------------------
+
+Current state: ChampionX.Level2.ChunLiVsChunLi
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 452         |
+|    iterations           | 42          |
+|    time_elapsed         | 4013        |
+|    total_timesteps      | 1814400     |
+| train/                  |             |
+|    approx_kl            | 0.018802634 |
+|    clip_fraction        | 0.164       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.04       |
+|    explained_variance   | 0.811       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.295       |
+|    n_updates            | 700         |
+|    policy_gradient_loss | 0.00141     |
+|    value_loss           | 19.3        |
+-----------------------------------------
+
+Current state: ChampionX.Level9.ChunLiVsBalrog
+----------------------------------------
+| time/                   |            |
+|    fps                  | 451        |
+|    iterations           | 43         |
+|    time_elapsed         | 4109       |
+|    total_timesteps      | 1857600    |
+| train/                  |            |
+|    approx_kl            | 0.01865595 |
+|    clip_fraction        | 0.169      |
+|    clip_range           | 0.2        |
+|    entropy_loss         | -8.04      |
+|    explained_variance   | 0.818      |
+|    learning_rate        | 0.0002     |
+|    loss                 | 0.357      |
+|    n_updates            | 710        |
+|    policy_gradient_loss | 0.000324   |
+|    value_loss           | 19.1       |
+----------------------------------------
+
+Current state: ChampionX.Level11.ChunLiVsSagat
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 451         |
+|    iterations           | 44          |
+|    time_elapsed         | 4205        |
+|    total_timesteps      | 1900800     |
+| train/                  |             |
+|    approx_kl            | 0.022585243 |
+|    clip_fraction        | 0.195       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.04       |
+|    explained_variance   | 0.768       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.515       |
+|    n_updates            | 720         |
+|    policy_gradient_loss | 0.00268     |
+|    value_loss           | 18.3        |
+-----------------------------------------
+
+Current state: ChampionX.Level9.ChunLiVsBalrog
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 451         |
+|    iterations           | 45          |
+|    time_elapsed         | 4301        |
+|    total_timesteps      | 1944000     |
+| train/                  |             |
+|    approx_kl            | 0.020417377 |
+|    clip_fraction        | 0.173       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.07       |
+|    explained_variance   | 0.683       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.654       |
+|    n_updates            | 730         |
+|    policy_gradient_loss | 0.00203     |
+|    value_loss           | 20.7        |
+-----------------------------------------
+
+Current state: ChampionX.Level8.ChunLiVsGuile
+----------------------------------------
+| time/                   |            |
+|    fps                  | 451        |
+|    iterations           | 46         |
+|    time_elapsed         | 4397       |
+|    total_timesteps      | 1987200    |
+| train/                  |            |
+|    approx_kl            | 0.01640241 |
+|    clip_fraction        | 0.136      |
+|    clip_range           | 0.2        |
+|    entropy_loss         | -8.09      |
+|    explained_variance   | 0.855      |
+|    learning_rate        | 0.0002     |
+|    loss                 | 0.681      |
+|    n_updates            | 740        |
+|    policy_gradient_loss | -0.00244   |
+|    value_loss           | 18.1       |
+----------------------------------------
+
+Current state: ChampionX.Level9.ChunLiVsBalrog
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 451         |
+|    iterations           | 47          |
+|    time_elapsed         | 4492        |
+|    total_timesteps      | 2030400     |
+| train/                  |             |
+|    approx_kl            | 0.020942345 |
+|    clip_fraction        | 0.155       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.09       |
+|    explained_variance   | 0.799       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.847       |
+|    n_updates            | 750         |
+|    policy_gradient_loss | 0.00232     |
+|    value_loss           | 20.4        |
+-----------------------------------------
+
+Current state: ChampionX.Level6.ChunLiVsEHonda
+----------------------------------------
+| time/                   |            |
+|    fps                  | 451        |
+|    iterations           | 48         |
+|    time_elapsed         | 4588       |
+|    total_timesteps      | 2073600    |
+| train/                  |            |
+|    approx_kl            | 0.02003836 |
+|    clip_fraction        | 0.168      |
+|    clip_range           | 0.2        |
+|    entropy_loss         | -8.07      |
+|    explained_variance   | 0.815      |
+|    learning_rate        | 0.0002     |
+|    loss                 | 0.501      |
+|    n_updates            | 760        |
+|    policy_gradient_loss | -0.0017    |
+|    value_loss           | 13.2       |
+----------------------------------------
+
+Current state: ChampionX.Level8.ChunLiVsGuile
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 451         |
+|    iterations           | 49          |
+|    time_elapsed         | 4684        |
+|    total_timesteps      | 2116800     |
+| train/                  |             |
+|    approx_kl            | 0.022403738 |
+|    clip_fraction        | 0.173       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.1        |
+|    explained_variance   | 0.735       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 4.1         |
+|    n_updates            | 770         |
+|    policy_gradient_loss | 0.00325     |
+|    value_loss           | 35.2        |
+-----------------------------------------
+
+Current state: ChampionX.Level1.ChunLiVsKen
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 451         |
+|    iterations           | 50          |
+|    time_elapsed         | 4780        |
+|    total_timesteps      | 2160000     |
+| train/                  |             |
+|    approx_kl            | 0.020465719 |
+|    clip_fraction        | 0.171       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.09       |
+|    explained_variance   | 0.814       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.346       |
+|    n_updates            | 780         |
+|    policy_gradient_loss | 0.00119     |
+|    value_loss           | 18.8        |
+-----------------------------------------
+
+Current state: ChampionX.Level6.ChunLiVsEHonda
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 451         |
+|    iterations           | 51          |
+|    time_elapsed         | 4877        |
+|    total_timesteps      | 2203200     |
+| train/                  |             |
+|    approx_kl            | 0.019918704 |
+|    clip_fraction        | 0.163       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.07       |
+|    explained_variance   | 0.823       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.223       |
+|    n_updates            | 790         |
+|    policy_gradient_loss | -0.0011     |
+|    value_loss           | 16          |
+-----------------------------------------
+
+Current state: ChampionX.Level8.ChunLiVsGuile
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 451         |
+|    iterations           | 52          |
+|    time_elapsed         | 4973        |
+|    total_timesteps      | 2246400     |
+| train/                  |             |
+|    approx_kl            | 0.026293177 |
+|    clip_fraction        | 0.189       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.01       |
+|    explained_variance   | 0.786       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 1.37        |
+|    n_updates            | 800         |
+|    policy_gradient_loss | 0.00725     |
+|    value_loss           | 21.2        |
+-----------------------------------------
+
+Current state: ChampionX.Level8.ChunLiVsGuile
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 451         |
+|    iterations           | 53          |
+|    time_elapsed         | 5068        |
+|    total_timesteps      | 2289600     |
+| train/                  |             |
+|    approx_kl            | 0.018323697 |
+|    clip_fraction        | 0.159       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.06       |
+|    explained_variance   | 0.822       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 1.02        |
+|    n_updates            | 810         |
+|    policy_gradient_loss | 0.000499    |
+|    value_loss           | 17.8        |
+-----------------------------------------
+
+Current state: ChampionX.Level12.ChunLiVsBison
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 451         |
+|    iterations           | 54          |
+|    time_elapsed         | 5164        |
+|    total_timesteps      | 2332800     |
+| train/                  |             |
+|    approx_kl            | 0.022256708 |
+|    clip_fraction        | 0.186       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.04       |
+|    explained_variance   | 0.781       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.717       |
+|    n_updates            | 820         |
+|    policy_gradient_loss | 0.00159     |
+|    value_loss           | 16.5        |
+-----------------------------------------
+
+Current state: ChampionX.Level7.ChunLiVsBlanka
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 451         |
+|    iterations           | 55          |
+|    time_elapsed         | 5260        |
+|    total_timesteps      | 2376000     |
+| train/                  |             |
+|    approx_kl            | 0.020457426 |
+|    clip_fraction        | 0.177       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -7.99       |
+|    explained_variance   | 0.791       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 2.93        |
+|    n_updates            | 830         |
+|    policy_gradient_loss | -0.00147    |
+|    value_loss           | 17.3        |
+-----------------------------------------
+
+Current state: ChampionX.Level7.ChunLiVsBlanka
+---------------------------------------
+| time/                   |           |
+|    fps                  | 451       |
+|    iterations           | 56        |
+|    time_elapsed         | 5356      |
+|    total_timesteps      | 2419200   |
+| train/                  |           |
+|    approx_kl            | 0.0213855 |
+|    clip_fraction        | 0.178     |
+|    clip_range           | 0.2       |
+|    entropy_loss         | -8.05     |
+|    explained_variance   | 0.728     |
+|    learning_rate        | 0.0002    |
+|    loss                 | 0.302     |
+|    n_updates            | 840       |
+|    policy_gradient_loss | 0.00053   |
+|    value_loss           | 17.1      |
+---------------------------------------
+
+Current state: ChampionX.Level10.ChunLiVsVega
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 451         |
+|    iterations           | 57          |
+|    time_elapsed         | 5451        |
+|    total_timesteps      | 2462400     |
+| train/                  |             |
+|    approx_kl            | 0.021137744 |
+|    clip_fraction        | 0.173       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.08       |
+|    explained_variance   | 0.788       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.303       |
+|    n_updates            | 850         |
+|    policy_gradient_loss | -0.00111    |
+|    value_loss           | 14.6        |
+-----------------------------------------
+
+Current state: ChampionX.Level9.ChunLiVsBalrog
+----------------------------------------
+| time/                   |            |
+|    fps                  | 451        |
+|    iterations           | 58         |
+|    time_elapsed         | 5547       |
+|    total_timesteps      | 2505600    |
+| train/                  |            |
+|    approx_kl            | 0.02023245 |
+|    clip_fraction        | 0.169      |
+|    clip_range           | 0.2        |
+|    entropy_loss         | -8.05      |
+|    explained_variance   | 0.816      |
+|    learning_rate        | 0.0002     |
+|    loss                 | 0.361      |
+|    n_updates            | 860        |
+|    policy_gradient_loss | 0.000275   |
+|    value_loss           | 16.8       |
+----------------------------------------
+
+Current state: ChampionX.Level12.ChunLiVsBison
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 451         |
+|    iterations           | 59          |
+|    time_elapsed         | 5643        |
+|    total_timesteps      | 2548800     |
+| train/                  |             |
+|    approx_kl            | 0.019979084 |
+|    clip_fraction        | 0.175       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.04       |
+|    explained_variance   | 0.791       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.204       |
+|    n_updates            | 870         |
+|    policy_gradient_loss | -0.00152    |
+|    value_loss           | 12.4        |
+-----------------------------------------
+
+Current state: ChampionX.Level7.ChunLiVsBlanka
+----------------------------------------
+| time/                   |            |
+|    fps                  | 451        |
+|    iterations           | 60         |
+|    time_elapsed         | 5740       |
+|    total_timesteps      | 2592000    |
+| train/                  |            |
+|    approx_kl            | 0.02290177 |
+|    clip_fraction        | 0.189      |
+|    clip_range           | 0.2        |
+|    entropy_loss         | -8.06      |
+|    explained_variance   | 0.744      |
+|    learning_rate        | 0.0002     |
+|    loss                 | 0.599      |
+|    n_updates            | 880        |
+|    policy_gradient_loss | 0.00403    |
+|    value_loss           | 22.4       |
+----------------------------------------
+
+Current state: ChampionX.Level8.ChunLiVsGuile
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 451         |
+|    iterations           | 61          |
+|    time_elapsed         | 5837        |
+|    total_timesteps      | 2635200     |
+| train/                  |             |
+|    approx_kl            | 0.019065047 |
+|    clip_fraction        | 0.172       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.06       |
+|    explained_variance   | 0.736       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.933       |
+|    n_updates            | 890         |
+|    policy_gradient_loss | -0.000417   |
+|    value_loss           | 20.4        |
+-----------------------------------------
+
+Current state: ChampionX.Level6.ChunLiVsEHonda
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 451         |
+|    iterations           | 62          |
+|    time_elapsed         | 5935        |
+|    total_timesteps      | 2678400     |
+| train/                  |             |
+|    approx_kl            | 0.018739864 |
+|    clip_fraction        | 0.173       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.08       |
+|    explained_variance   | 0.818       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 1.44        |
+|    n_updates            | 900         |
+|    policy_gradient_loss | -0.002      |
+|    value_loss           | 15.5        |
+-----------------------------------------
+
+Current state: ChampionX.Level5.ChunLiVsRyu
+----------------------------------------
+| time/                   |            |
+|    fps                  | 451        |
+|    iterations           | 63         |
+|    time_elapsed         | 6032       |
+|    total_timesteps      | 2721600    |
+| train/                  |            |
+|    approx_kl            | 0.02123648 |
+|    clip_fraction        | 0.172      |
+|    clip_range           | 0.2        |
+|    entropy_loss         | -8.03      |
+|    explained_variance   | 0.82       |
+|    learning_rate        | 0.0002     |
+|    loss                 | 0.792      |
+|    n_updates            | 910        |
+|    policy_gradient_loss | 8.58e-05   |
+|    value_loss           | 16         |
+----------------------------------------
+
+Current state: ChampionX.Level9.ChunLiVsBalrog
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 450         |
+|    iterations           | 64          |
+|    time_elapsed         | 6130        |
+|    total_timesteps      | 2764800     |
+| train/                  |             |
+|    approx_kl            | 0.024432074 |
+|    clip_fraction        | 0.209       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -7.99       |
+|    explained_variance   | 0.829       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.864       |
+|    n_updates            | 920         |
+|    policy_gradient_loss | 0.00649     |
+|    value_loss           | 20          |
+-----------------------------------------
+
+Current state: ChampionX.Level12.ChunLiVsBison
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 450         |
+|    iterations           | 65          |
+|    time_elapsed         | 6228        |
+|    total_timesteps      | 2808000     |
+| train/                  |             |
+|    approx_kl            | 0.022781633 |
+|    clip_fraction        | 0.184       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.07       |
+|    explained_variance   | 0.78        |
+|    learning_rate        | 0.0002      |
+|    loss                 | 2.75        |
+|    n_updates            | 930         |
+|    policy_gradient_loss | 0.00143     |
+|    value_loss           | 16.6        |
+-----------------------------------------
+
+Current state: ChampionX.Level7.ChunLiVsBlanka
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 450         |
+|    iterations           | 66          |
+|    time_elapsed         | 6327        |
+|    total_timesteps      | 2851200     |
+| train/                  |             |
+|    approx_kl            | 0.020004842 |
+|    clip_fraction        | 0.165       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.07       |
+|    explained_variance   | 0.784       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.68        |
+|    n_updates            | 940         |
+|    policy_gradient_loss | -0.000158   |
+|    value_loss           | 24.8        |
+-----------------------------------------
+
+Current state: ChampionX.Level4.ChunLiVsDhalsim
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 450         |
+|    iterations           | 67          |
+|    time_elapsed         | 6425        |
+|    total_timesteps      | 2894400     |
+| train/                  |             |
+|    approx_kl            | 0.019052736 |
+|    clip_fraction        | 0.177       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.07       |
+|    explained_variance   | 0.801       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.805       |
+|    n_updates            | 950         |
+|    policy_gradient_loss | -0.00147    |
+|    value_loss           | 16.7        |
+-----------------------------------------
+
+Current state: ChampionX.Level10.ChunLiVsVega
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 450         |
+|    iterations           | 68          |
+|    time_elapsed         | 6522        |
+|    total_timesteps      | 2937600     |
+| train/                  |             |
+|    approx_kl            | 0.018338915 |
+|    clip_fraction        | 0.166       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.07       |
+|    explained_variance   | 0.824       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.278       |
+|    n_updates            | 960         |
+|    policy_gradient_loss | -0.00394    |
+|    value_loss           | 14.7        |
+-----------------------------------------
+
+Current state: ChampionX.Level12.ChunLiVsBison
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 450         |
+|    iterations           | 69          |
+|    time_elapsed         | 6619        |
+|    total_timesteps      | 2980800     |
+| train/                  |             |
+|    approx_kl            | 0.022207119 |
+|    clip_fraction        | 0.203       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.03       |
+|    explained_variance   | 0.777       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 1.76        |
+|    n_updates            | 970         |
+|    policy_gradient_loss | 0.00349     |
+|    value_loss           | 21.1        |
+-----------------------------------------
+
+Current state: ChampionX.Level4.ChunLiVsDhalsim
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 450         |
+|    iterations           | 70          |
+|    time_elapsed         | 6717        |
+|    total_timesteps      | 3024000     |
+| train/                  |             |
+|    approx_kl            | 0.023251278 |
+|    clip_fraction        | 0.207       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8          |
+|    explained_variance   | 0.769       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.308       |
+|    n_updates            | 980         |
+|    policy_gradient_loss | 0.00178     |
+|    value_loss           | 16.2        |
+-----------------------------------------
+
+Current state: ChampionX.Level11.ChunLiVsSagat
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 450         |
+|    iterations           | 71          |
+|    time_elapsed         | 6815        |
+|    total_timesteps      | 3067200     |
+| train/                  |             |
+|    approx_kl            | 0.018753793 |
+|    clip_fraction        | 0.166       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.09       |
+|    explained_variance   | 0.772       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 1.91        |
+|    n_updates            | 990         |
+|    policy_gradient_loss | 0.000509    |
+|    value_loss           | 20          |
+-----------------------------------------
+
+Current state: ChampionX.Level4.ChunLiVsDhalsim
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 449         |
+|    iterations           | 72          |
+|    time_elapsed         | 6913        |
+|    total_timesteps      | 3110400     |
+| train/                  |             |
+|    approx_kl            | 0.018791752 |
+|    clip_fraction        | 0.185       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.01       |
+|    explained_variance   | 0.716       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.526       |
+|    n_updates            | 1000        |
+|    policy_gradient_loss | 0.00248     |
+|    value_loss           | 17.2        |
+-----------------------------------------
+
+Current state: ChampionX.Level3.ChunLiVsZangief
+----------------------------------------
+| time/                   |            |
+|    fps                  | 449        |
+|    iterations           | 73         |
+|    time_elapsed         | 7011       |
+|    total_timesteps      | 3153600    |
+| train/                  |            |
+|    approx_kl            | 0.02178302 |
+|    clip_fraction        | 0.18       |
+|    clip_range           | 0.2        |
+|    entropy_loss         | -8.03      |
+|    explained_variance   | 0.675      |
+|    learning_rate        | 0.0002     |
+|    loss                 | 2.77       |
+|    n_updates            | 1010       |
+|    policy_gradient_loss | 0.00759    |
+|    value_loss           | 30.3       |
+----------------------------------------
+
+Current state: ChampionX.Level11.ChunLiVsSagat
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 449         |
+|    iterations           | 74          |
+|    time_elapsed         | 7108        |
+|    total_timesteps      | 3196800     |
+| train/                  |             |
+|    approx_kl            | 0.019278381 |
+|    clip_fraction        | 0.171       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.05       |
+|    explained_variance   | 0.748       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.566       |
+|    n_updates            | 1020        |
+|    policy_gradient_loss | 0.000132    |
+|    value_loss           | 15.9        |
+-----------------------------------------
+
+Current state: ChampionX.Level6.ChunLiVsEHonda
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 449         |
+|    iterations           | 75          |
+|    time_elapsed         | 7206        |
+|    total_timesteps      | 3240000     |
+| train/                  |             |
+|    approx_kl            | 0.018280571 |
+|    clip_fraction        | 0.153       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.04       |
+|    explained_variance   | 0.716       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 2.45        |
+|    n_updates            | 1030        |
+|    policy_gradient_loss | 0.000711    |
+|    value_loss           | 23.1        |
+-----------------------------------------
+
+Current state: ChampionX.Level1.ChunLiVsKen
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 449         |
+|    iterations           | 76          |
+|    time_elapsed         | 7303        |
+|    total_timesteps      | 3283200     |
+| train/                  |             |
+|    approx_kl            | 0.017658442 |
+|    clip_fraction        | 0.154       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.07       |
+|    explained_variance   | 0.838       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 1.92        |
+|    n_updates            | 1040        |
+|    policy_gradient_loss | 0.000735    |
+|    value_loss           | 20          |
+-----------------------------------------
+
+Current state: ChampionX.Level8.ChunLiVsGuile
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 449         |
+|    iterations           | 77          |
+|    time_elapsed         | 7401        |
+|    total_timesteps      | 3326400     |
+| train/                  |             |
+|    approx_kl            | 0.019725492 |
+|    clip_fraction        | 0.176       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.05       |
+|    explained_variance   | 0.791       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 2.38        |
+|    n_updates            | 1050        |
+|    policy_gradient_loss | 0.00148     |
+|    value_loss           | 28.7        |
+-----------------------------------------
+
+Current state: ChampionX.Level4.ChunLiVsDhalsim
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 449         |
+|    iterations           | 78          |
+|    time_elapsed         | 7498        |
+|    total_timesteps      | 3369600     |
+| train/                  |             |
+|    approx_kl            | 0.016949095 |
+|    clip_fraction        | 0.152       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.09       |
+|    explained_variance   | 0.784       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.878       |
+|    n_updates            | 1060        |
+|    policy_gradient_loss | -0.00178    |
+|    value_loss           | 19.1        |
+-----------------------------------------
+
+Current state: ChampionX.Level11.ChunLiVsSagat
+----------------------------------------
+| time/                   |            |
+|    fps                  | 449        |
+|    iterations           | 79         |
+|    time_elapsed         | 7596       |
+|    total_timesteps      | 3412800    |
+| train/                  |            |
+|    approx_kl            | 0.02026636 |
+|    clip_fraction        | 0.181      |
+|    clip_range           | 0.2        |
+|    entropy_loss         | -8.05      |
+|    explained_variance   | 0.775      |
+|    learning_rate        | 0.0002     |
+|    loss                 | 3.35       |
+|    n_updates            | 1070       |
+|    policy_gradient_loss | 0.000907   |
+|    value_loss           | 15         |
+----------------------------------------
+
+Current state: ChampionX.Level7.ChunLiVsBlanka
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 449         |
+|    iterations           | 80          |
+|    time_elapsed         | 7693        |
+|    total_timesteps      | 3456000     |
+| train/                  |             |
+|    approx_kl            | 0.020292694 |
+|    clip_fraction        | 0.172       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.08       |
+|    explained_variance   | 0.729       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.937       |
+|    n_updates            | 1080        |
+|    policy_gradient_loss | -0.000479   |
+|    value_loss           | 14.2        |
+-----------------------------------------
+
+Current state: ChampionX.Level11.ChunLiVsSagat
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 449         |
+|    iterations           | 81          |
+|    time_elapsed         | 7790        |
+|    total_timesteps      | 3499200     |
+| train/                  |             |
+|    approx_kl            | 0.021046823 |
+|    clip_fraction        | 0.17        |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.04       |
+|    explained_variance   | 0.814       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 1.19        |
+|    n_updates            | 1090        |
+|    policy_gradient_loss | 0.00343     |
+|    value_loss           | 21.8        |
+-----------------------------------------
+
+Current state: ChampionX.Level5.ChunLiVsRyu
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 449         |
+|    iterations           | 82          |
+|    time_elapsed         | 7888        |
+|    total_timesteps      | 3542400     |
+| train/                  |             |
+|    approx_kl            | 0.018265078 |
+|    clip_fraction        | 0.16        |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.1        |
+|    explained_variance   | 0.771       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 1.41        |
+|    n_updates            | 1100        |
+|    policy_gradient_loss | -0.00154    |
+|    value_loss           | 17.9        |
+-----------------------------------------
+
+Current state: ChampionX.Level3.ChunLiVsZangief
+----------------------------------------
+| time/                   |            |
+|    fps                  | 448        |
+|    iterations           | 83         |
+|    time_elapsed         | 7986       |
+|    total_timesteps      | 3585600    |
+| train/                  |            |
+|    approx_kl            | 0.01761453 |
+|    clip_fraction        | 0.156      |
+|    clip_range           | 0.2        |
+|    entropy_loss         | -8.08      |
+|    explained_variance   | 0.848      |
+|    learning_rate        | 0.0002     |
+|    loss                 | 1.5        |
+|    n_updates            | 1110       |
+|    policy_gradient_loss | 4e-05      |
+|    value_loss           | 22.4       |
+----------------------------------------
+
+Current state: ChampionX.Level6.ChunLiVsEHonda
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 448         |
+|    iterations           | 84          |
+|    time_elapsed         | 8083        |
+|    total_timesteps      | 3628800     |
+| train/                  |             |
+|    approx_kl            | 0.019479048 |
+|    clip_fraction        | 0.167       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.13       |
+|    explained_variance   | 0.782       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 2.93        |
+|    n_updates            | 1120        |
+|    policy_gradient_loss | -0.00179    |
+|    value_loss           | 16.4        |
+-----------------------------------------
+
+Current state: ChampionX.Level4.ChunLiVsDhalsim
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 448         |
+|    iterations           | 85          |
+|    time_elapsed         | 8180        |
+|    total_timesteps      | 3672000     |
+| train/                  |             |
+|    approx_kl            | 0.017283197 |
+|    clip_fraction        | 0.149       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.12       |
+|    explained_variance   | 0.791       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 1.54        |
+|    n_updates            | 1130        |
+|    policy_gradient_loss | -0.00178    |
+|    value_loss           | 20.1        |
+-----------------------------------------
+
+Current state: ChampionX.Level3.ChunLiVsZangief
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 448         |
+|    iterations           | 86          |
+|    time_elapsed         | 8278        |
+|    total_timesteps      | 3715200     |
+| train/                  |             |
+|    approx_kl            | 0.019106768 |
+|    clip_fraction        | 0.178       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.09       |
+|    explained_variance   | 0.787       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 4.53        |
+|    n_updates            | 1140        |
+|    policy_gradient_loss | 0.00461     |
+|    value_loss           | 25.3        |
+-----------------------------------------
+
+Current state: ChampionX.Level2.ChunLiVsChunLi
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 448         |
+|    iterations           | 87          |
+|    time_elapsed         | 8376        |
+|    total_timesteps      | 3758400     |
+| train/                  |             |
+|    approx_kl            | 0.019611303 |
+|    clip_fraction        | 0.182       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.09       |
+|    explained_variance   | 0.782       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 1.7         |
+|    n_updates            | 1150        |
+|    policy_gradient_loss | 0.000516    |
+|    value_loss           | 16.3        |
+-----------------------------------------
+
+Current state: ChampionX.Level1.ChunLiVsKen
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 448         |
+|    iterations           | 88          |
+|    time_elapsed         | 8473        |
+|    total_timesteps      | 3801600     |
+| train/                  |             |
+|    approx_kl            | 0.017416934 |
+|    clip_fraction        | 0.168       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.05       |
+|    explained_variance   | 0.773       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 2.32        |
+|    n_updates            | 1160        |
+|    policy_gradient_loss | 0.000683    |
+|    value_loss           | 26.5        |
+-----------------------------------------
+
+Current state: ChampionX.Level10.ChunLiVsVega
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 448         |
+|    iterations           | 89          |
+|    time_elapsed         | 8570        |
+|    total_timesteps      | 3844800     |
+| train/                  |             |
+|    approx_kl            | 0.020442067 |
+|    clip_fraction        | 0.18        |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.11       |
+|    explained_variance   | 0.799       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.598       |
+|    n_updates            | 1170        |
+|    policy_gradient_loss | 0.00176     |
+|    value_loss           | 15          |
+-----------------------------------------
+
+Current state: ChampionX.Level12.ChunLiVsBison
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 448         |
+|    iterations           | 90          |
+|    time_elapsed         | 8668        |
+|    total_timesteps      | 3888000     |
+| train/                  |             |
+|    approx_kl            | 0.017660897 |
+|    clip_fraction        | 0.159       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.04       |
+|    explained_variance   | 0.795       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.928       |
+|    n_updates            | 1180        |
+|    policy_gradient_loss | 0.00123     |
+|    value_loss           | 17.6        |
+-----------------------------------------
+
+Current state: ChampionX.Level1.ChunLiVsKen
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 448         |
+|    iterations           | 91          |
+|    time_elapsed         | 8766        |
+|    total_timesteps      | 3931200     |
+| train/                  |             |
+|    approx_kl            | 0.016381918 |
+|    clip_fraction        | 0.163       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.06       |
+|    explained_variance   | 0.811       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.488       |
+|    n_updates            | 1190        |
+|    policy_gradient_loss | 0.000215    |
+|    value_loss           | 13.9        |
+-----------------------------------------
+
+Current state: ChampionX.Level11.ChunLiVsSagat
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 448         |
+|    iterations           | 92          |
+|    time_elapsed         | 8863        |
+|    total_timesteps      | 3974400     |
+| train/                  |             |
+|    approx_kl            | 0.017840233 |
+|    clip_fraction        | 0.167       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.03       |
+|    explained_variance   | 0.762       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.54        |
+|    n_updates            | 1200        |
+|    policy_gradient_loss | 0.00261     |
+|    value_loss           | 20.3        |
+-----------------------------------------
+
+Current state: ChampionX.Level10.ChunLiVsVega
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 448         |
+|    iterations           | 93          |
+|    time_elapsed         | 8961        |
+|    total_timesteps      | 4017600     |
+| train/                  |             |
+|    approx_kl            | 0.020303266 |
+|    clip_fraction        | 0.16        |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.05       |
+|    explained_variance   | 0.782       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.876       |
+|    n_updates            | 1210        |
+|    policy_gradient_loss | 0.00217     |
+|    value_loss           | 19.1        |
+-----------------------------------------
+
+Current state: ChampionX.Level1.ChunLiVsKen
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 448         |
+|    iterations           | 94          |
+|    time_elapsed         | 9058        |
+|    total_timesteps      | 4060800     |
+| train/                  |             |
+|    approx_kl            | 0.018209128 |
+|    clip_fraction        | 0.158       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.09       |
+|    explained_variance   | 0.827       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.47        |
+|    n_updates            | 1220        |
+|    policy_gradient_loss | 0.00344     |
+|    value_loss           | 22.1        |
+-----------------------------------------
+
+Current state: ChampionX.Level9.ChunLiVsBalrog
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 448         |
+|    iterations           | 95          |
+|    time_elapsed         | 9155        |
+|    total_timesteps      | 4104000     |
+| train/                  |             |
+|    approx_kl            | 0.016349936 |
+|    clip_fraction        | 0.16        |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.07       |
+|    explained_variance   | 0.816       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.436       |
+|    n_updates            | 1230        |
+|    policy_gradient_loss | -0.00384    |
+|    value_loss           | 12.7        |
+-----------------------------------------
+
+Current state: ChampionX.Level7.ChunLiVsBlanka
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 448         |
+|    iterations           | 96          |
+|    time_elapsed         | 9253        |
+|    total_timesteps      | 4147200     |
+| train/                  |             |
+|    approx_kl            | 0.016977612 |
+|    clip_fraction        | 0.148       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.09       |
+|    explained_variance   | 0.807       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.708       |
+|    n_updates            | 1240        |
+|    policy_gradient_loss | 0.000471    |
+|    value_loss           | 20.7        |
+-----------------------------------------
+
+Current state: ChampionX.Level6.ChunLiVsEHonda
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 448         |
+|    iterations           | 97          |
+|    time_elapsed         | 9349        |
+|    total_timesteps      | 4190400     |
+| train/                  |             |
+|    approx_kl            | 0.020063082 |
+|    clip_fraction        | 0.177       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.11       |
+|    explained_variance   | 0.751       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.891       |
+|    n_updates            | 1250        |
+|    policy_gradient_loss | 0.00348     |
+|    value_loss           | 21.2        |
+-----------------------------------------
+
+Current state: ChampionX.Level6.ChunLiVsEHonda
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 448         |
+|    iterations           | 98          |
+|    time_elapsed         | 9445        |
+|    total_timesteps      | 4233600     |
+| train/                  |             |
+|    approx_kl            | 0.019297507 |
+|    clip_fraction        | 0.163       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.12       |
+|    explained_variance   | 0.773       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.771       |
+|    n_updates            | 1260        |
+|    policy_gradient_loss | 0.0029      |
+|    value_loss           | 15.1        |
+-----------------------------------------
+
+Current state: ChampionX.Level11.ChunLiVsSagat
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 448         |
+|    iterations           | 99          |
+|    time_elapsed         | 9540        |
+|    total_timesteps      | 4276800     |
+| train/                  |             |
+|    approx_kl            | 0.017202292 |
+|    clip_fraction        | 0.154       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.08       |
+|    explained_variance   | 0.818       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 1.97        |
+|    n_updates            | 1270        |
+|    policy_gradient_loss | 0.00314     |
+|    value_loss           | 22.2        |
+-----------------------------------------
+
+Current state: ChampionX.Level8.ChunLiVsGuile
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 448         |
+|    iterations           | 100         |
+|    time_elapsed         | 9635        |
+|    total_timesteps      | 4320000     |
+| train/                  |             |
+|    approx_kl            | 0.019228933 |
+|    clip_fraction        | 0.172       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.04       |
+|    explained_variance   | 0.803       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 1.84        |
+|    n_updates            | 1280        |
+|    policy_gradient_loss | 0.00495     |
+|    value_loss           | 27.9        |
+-----------------------------------------
+
+Current state: ChampionX.Level7.ChunLiVsBlanka
+----------------------------------------
+| time/                   |            |
+|    fps                  | 448        |
+|    iterations           | 101        |
+|    time_elapsed         | 9732       |
+|    total_timesteps      | 4363200    |
+| train/                  |            |
+|    approx_kl            | 0.01626399 |
+|    clip_fraction        | 0.148      |
+|    clip_range           | 0.2        |
+|    entropy_loss         | -8.1       |
+|    explained_variance   | 0.863      |
+|    learning_rate        | 0.0002     |
+|    loss                 | 1.23       |
+|    n_updates            | 1290       |
+|    policy_gradient_loss | -0.000295  |
+|    value_loss           | 18.7       |
+----------------------------------------
+
+Current state: ChampionX.Level10.ChunLiVsVega
+----------------------------------------
+| time/                   |            |
+|    fps                  | 448        |
+|    iterations           | 102        |
+|    time_elapsed         | 9827       |
+|    total_timesteps      | 4406400    |
+| train/                  |            |
+|    approx_kl            | 0.01741675 |
+|    clip_fraction        | 0.167      |
+|    clip_range           | 0.2        |
+|    entropy_loss         | -8.06      |
+|    explained_variance   | 0.81       |
+|    learning_rate        | 0.0002     |
+|    loss                 | 0.693      |
+|    n_updates            | 1300       |
+|    policy_gradient_loss | 0.00085    |
+|    value_loss           | 16.9       |
+----------------------------------------
+
+Current state: ChampionX.Level3.ChunLiVsZangief
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 448         |
+|    iterations           | 103         |
+|    time_elapsed         | 9922        |
+|    total_timesteps      | 4449600     |
+| train/                  |             |
+|    approx_kl            | 0.017767375 |
+|    clip_fraction        | 0.146       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.12       |
+|    explained_variance   | 0.782       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.44        |
+|    n_updates            | 1310        |
+|    policy_gradient_loss | 0.000446    |
+|    value_loss           | 16.6        |
+-----------------------------------------
+
+Current state: ChampionX.Level7.ChunLiVsBlanka
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 448         |
+|    iterations           | 104         |
+|    time_elapsed         | 10018       |
+|    total_timesteps      | 4492800     |
+| train/                  |             |
+|    approx_kl            | 0.018537082 |
+|    clip_fraction        | 0.177       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.06       |
+|    explained_variance   | 0.782       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.594       |
+|    n_updates            | 1320        |
+|    policy_gradient_loss | 0.00192     |
+|    value_loss           | 16.5        |
+-----------------------------------------
+
+Current state: ChampionX.Level10.ChunLiVsVega
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 448         |
+|    iterations           | 105         |
+|    time_elapsed         | 10113       |
+|    total_timesteps      | 4536000     |
+| train/                  |             |
+|    approx_kl            | 0.016387263 |
+|    clip_fraction        | 0.151       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.08       |
+|    explained_variance   | 0.779       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.897       |
+|    n_updates            | 1330        |
+|    policy_gradient_loss | 0.00349     |
+|    value_loss           | 24          |
+-----------------------------------------
+
+Current state: ChampionX.Level3.ChunLiVsZangief
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 448         |
+|    iterations           | 106         |
+|    time_elapsed         | 10208       |
+|    total_timesteps      | 4579200     |
+| train/                  |             |
+|    approx_kl            | 0.016566757 |
+|    clip_fraction        | 0.168       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.1        |
+|    explained_variance   | 0.826       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.545       |
+|    n_updates            | 1340        |
+|    policy_gradient_loss | 0.00131     |
+|    value_loss           | 16.9        |
+-----------------------------------------
+
+Current state: ChampionX.Level7.ChunLiVsBlanka
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 448         |
+|    iterations           | 107         |
+|    time_elapsed         | 10304       |
+|    total_timesteps      | 4622400     |
+| train/                  |             |
+|    approx_kl            | 0.015347375 |
+|    clip_fraction        | 0.159       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.08       |
+|    explained_variance   | 0.81        |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.311       |
+|    n_updates            | 1350        |
+|    policy_gradient_loss | -0.00268    |
+|    value_loss           | 12.9        |
+-----------------------------------------
+
+Current state: ChampionX.Level1.ChunLiVsKen
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 448         |
+|    iterations           | 108         |
+|    time_elapsed         | 10400       |
+|    total_timesteps      | 4665600     |
+| train/                  |             |
+|    approx_kl            | 0.016015483 |
+|    clip_fraction        | 0.155       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.11       |
+|    explained_variance   | 0.797       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 2.26        |
+|    n_updates            | 1360        |
+|    policy_gradient_loss | -0.00208    |
+|    value_loss           | 20.3        |
+-----------------------------------------
+
+Current state: ChampionX.Level6.ChunLiVsEHonda
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 448         |
+|    iterations           | 109         |
+|    time_elapsed         | 10495       |
+|    total_timesteps      | 4708800     |
+| train/                  |             |
+|    approx_kl            | 0.016567804 |
+|    clip_fraction        | 0.155       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.15       |
+|    explained_variance   | 0.829       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 1.34        |
+|    n_updates            | 1370        |
+|    policy_gradient_loss | 0.0028      |
+|    value_loss           | 17.5        |
+-----------------------------------------
+
+Current state: ChampionX.Level3.ChunLiVsZangief
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 448         |
+|    iterations           | 110         |
+|    time_elapsed         | 10591       |
+|    total_timesteps      | 4752000     |
+| train/                  |             |
+|    approx_kl            | 0.018200098 |
+|    clip_fraction        | 0.168       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.13       |
+|    explained_variance   | 0.831       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.665       |
+|    n_updates            | 1380        |
+|    policy_gradient_loss | 0.00141     |
+|    value_loss           | 19.2        |
+-----------------------------------------
+
+Current state: ChampionX.Level7.ChunLiVsBlanka
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 448         |
+|    iterations           | 111         |
+|    time_elapsed         | 10686       |
+|    total_timesteps      | 4795200     |
+| train/                  |             |
+|    approx_kl            | 0.018930672 |
+|    clip_fraction        | 0.185       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.15       |
+|    explained_variance   | 0.833       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 1.09        |
+|    n_updates            | 1390        |
+|    policy_gradient_loss | 0.00529     |
+|    value_loss           | 19.5        |
+-----------------------------------------
+
+Current state: ChampionX.Level9.ChunLiVsBalrog
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 448         |
+|    iterations           | 112         |
+|    time_elapsed         | 10782       |
+|    total_timesteps      | 4838400     |
+| train/                  |             |
+|    approx_kl            | 0.015160192 |
+|    clip_fraction        | 0.158       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.12       |
+|    explained_variance   | 0.833       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 2.37        |
+|    n_updates            | 1400        |
+|    policy_gradient_loss | -0.000663   |
+|    value_loss           | 21          |
+-----------------------------------------
+
+Current state: ChampionX.Level7.ChunLiVsBlanka
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 448         |
+|    iterations           | 113         |
+|    time_elapsed         | 10878       |
+|    total_timesteps      | 4881600     |
+| train/                  |             |
+|    approx_kl            | 0.017860955 |
+|    clip_fraction        | 0.171       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.12       |
+|    explained_variance   | 0.82        |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.924       |
+|    n_updates            | 1410        |
+|    policy_gradient_loss | -0.000111   |
+|    value_loss           | 16.9        |
+-----------------------------------------
+
+Current state: ChampionX.Level7.ChunLiVsBlanka
+----------------------------------------
+| time/                   |            |
+|    fps                  | 448        |
+|    iterations           | 114        |
+|    time_elapsed         | 10974      |
+|    total_timesteps      | 4924800    |
+| train/                  |            |
+|    approx_kl            | 0.02072464 |
+|    clip_fraction        | 0.183      |
+|    clip_range           | 0.2        |
+|    entropy_loss         | -8.15      |
+|    explained_variance   | 0.788      |
+|    learning_rate        | 0.0002     |
+|    loss                 | 1.77       |
+|    n_updates            | 1420       |
+|    policy_gradient_loss | 0.00299    |
+|    value_loss           | 23.1       |
+----------------------------------------
+
+Current state: ChampionX.Level8.ChunLiVsGuile
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 448         |
+|    iterations           | 115         |
+|    time_elapsed         | 11069       |
+|    total_timesteps      | 4968000     |
+| train/                  |             |
+|    approx_kl            | 0.016052378 |
+|    clip_fraction        | 0.158       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.16       |
+|    explained_variance   | 0.845       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.692       |
+|    n_updates            | 1430        |
+|    policy_gradient_loss | -0.00267    |
+|    value_loss           | 16.4        |
+-----------------------------------------
+
+Current state: ChampionX.Level12.ChunLiVsBison
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 448         |
+|    iterations           | 116         |
+|    time_elapsed         | 11165       |
+|    total_timesteps      | 5011200     |
+| train/                  |             |
+|    approx_kl            | 0.019034935 |
+|    clip_fraction        | 0.177       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.15       |
+|    explained_variance   | 0.814       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 1.25        |
+|    n_updates            | 1440        |
+|    policy_gradient_loss | -0.000176   |
+|    value_loss           | 20.7        |
+-----------------------------------------
+
+Current state: ChampionX.Level7.ChunLiVsBlanka
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 448         |
+|    iterations           | 117         |
+|    time_elapsed         | 11260       |
+|    total_timesteps      | 5054400     |
+| train/                  |             |
+|    approx_kl            | 0.017005827 |
+|    clip_fraction        | 0.179       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.15       |
+|    explained_variance   | 0.811       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 2.66        |
+|    n_updates            | 1450        |
+|    policy_gradient_loss | 0.000235    |
+|    value_loss           | 14          |
+-----------------------------------------
+
+Current state: ChampionX.Level1.ChunLiVsKen
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 448         |
+|    iterations           | 118         |
+|    time_elapsed         | 11356       |
+|    total_timesteps      | 5097600     |
+| train/                  |             |
+|    approx_kl            | 0.016972119 |
+|    clip_fraction        | 0.169       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.16       |
+|    explained_variance   | 0.785       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.495       |
+|    n_updates            | 1460        |
+|    policy_gradient_loss | 0.00187     |
+|    value_loss           | 19.5        |
+-----------------------------------------
+
+Current state: ChampionX.Level2.ChunLiVsChunLi
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 448         |
+|    iterations           | 119         |
+|    time_elapsed         | 11451       |
+|    total_timesteps      | 5140800     |
+| train/                  |             |
+|    approx_kl            | 0.015783915 |
+|    clip_fraction        | 0.159       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.15       |
+|    explained_variance   | 0.812       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.603       |
+|    n_updates            | 1470        |
+|    policy_gradient_loss | -0.000571   |
+|    value_loss           | 21.2        |
+-----------------------------------------
+
+Current state: ChampionX.Level7.ChunLiVsBlanka
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 448         |
+|    iterations           | 120         |
+|    time_elapsed         | 11547       |
+|    total_timesteps      | 5184000     |
+| train/                  |             |
+|    approx_kl            | 0.017954912 |
+|    clip_fraction        | 0.186       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.14       |
+|    explained_variance   | 0.781       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.7         |
+|    n_updates            | 1480        |
+|    policy_gradient_loss | 0.00359     |
+|    value_loss           | 24.6        |
+-----------------------------------------
+
+Current state: ChampionX.Level5.ChunLiVsRyu
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 448         |
+|    iterations           | 121         |
+|    time_elapsed         | 11642       |
+|    total_timesteps      | 5227200     |
+| train/                  |             |
+|    approx_kl            | 0.017439196 |
+|    clip_fraction        | 0.182       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.15       |
+|    explained_variance   | 0.782       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.972       |
+|    n_updates            | 1490        |
+|    policy_gradient_loss | 0.0017      |
+|    value_loss           | 21.6        |
+-----------------------------------------
+
+Current state: ChampionX.Level3.ChunLiVsZangief
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 448         |
+|    iterations           | 122         |
+|    time_elapsed         | 11738       |
+|    total_timesteps      | 5270400     |
+| train/                  |             |
+|    approx_kl            | 0.016962286 |
+|    clip_fraction        | 0.173       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.16       |
+|    explained_variance   | 0.807       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.875       |
+|    n_updates            | 1500        |
+|    policy_gradient_loss | 0.000824    |
+|    value_loss           | 18.2        |
+-----------------------------------------
+
+Current state: ChampionX.Level12.ChunLiVsBison
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 449         |
+|    iterations           | 123         |
+|    time_elapsed         | 11833       |
+|    total_timesteps      | 5313600     |
+| train/                  |             |
+|    approx_kl            | 0.017236924 |
+|    clip_fraction        | 0.162       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.16       |
+|    explained_variance   | 0.779       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.853       |
+|    n_updates            | 1510        |
+|    policy_gradient_loss | 0.000141    |
+|    value_loss           | 18.9        |
+-----------------------------------------
+
+Current state: ChampionX.Level5.ChunLiVsRyu
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 449         |
+|    iterations           | 124         |
+|    time_elapsed         | 11928       |
+|    total_timesteps      | 5356800     |
+| train/                  |             |
+|    approx_kl            | 0.016021965 |
+|    clip_fraction        | 0.157       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.16       |
+|    explained_variance   | 0.83        |
+|    learning_rate        | 0.0002      |
+|    loss                 | 1           |
+|    n_updates            | 1520        |
+|    policy_gradient_loss | 0.00109     |
+|    value_loss           | 20          |
+-----------------------------------------
+
+Current state: ChampionX.Level10.ChunLiVsVega
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 449         |
+|    iterations           | 125         |
+|    time_elapsed         | 12024       |
+|    total_timesteps      | 5400000     |
+| train/                  |             |
+|    approx_kl            | 0.015824681 |
+|    clip_fraction        | 0.166       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.18       |
+|    explained_variance   | 0.803       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.51        |
+|    n_updates            | 1530        |
+|    policy_gradient_loss | 0.00165     |
+|    value_loss           | 17.8        |
+-----------------------------------------
+
+Current state: ChampionX.Level9.ChunLiVsBalrog
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 449         |
+|    iterations           | 126         |
+|    time_elapsed         | 12119       |
+|    total_timesteps      | 5443200     |
+| train/                  |             |
+|    approx_kl            | 0.014095656 |
+|    clip_fraction        | 0.14        |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.16       |
+|    explained_variance   | 0.809       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.666       |
+|    n_updates            | 1540        |
+|    policy_gradient_loss | -0.00077    |
+|    value_loss           | 20.5        |
+-----------------------------------------
+
+Current state: ChampionX.Level9.ChunLiVsBalrog
+----------------------------------------
+| time/                   |            |
+|    fps                  | 449        |
+|    iterations           | 127        |
+|    time_elapsed         | 12216      |
+|    total_timesteps      | 5486400    |
+| train/                  |            |
+|    approx_kl            | 0.01563808 |
+|    clip_fraction        | 0.154      |
+|    clip_range           | 0.2        |
+|    entropy_loss         | -8.15      |
+|    explained_variance   | 0.798      |
+|    learning_rate        | 0.0002     |
+|    loss                 | 0.739      |
+|    n_updates            | 1550       |
+|    policy_gradient_loss | -0.000601  |
+|    value_loss           | 17.6       |
+----------------------------------------
+
+Current state: ChampionX.Level5.ChunLiVsRyu
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 449         |
+|    iterations           | 128         |
+|    time_elapsed         | 12311       |
+|    total_timesteps      | 5529600     |
+| train/                  |             |
+|    approx_kl            | 0.016478073 |
+|    clip_fraction        | 0.159       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.15       |
+|    explained_variance   | 0.763       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 1.21        |
+|    n_updates            | 1560        |
+|    policy_gradient_loss | 0.000911    |
+|    value_loss           | 21.1        |
+-----------------------------------------
+
+Current state: ChampionX.Level6.ChunLiVsEHonda
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 449         |
+|    iterations           | 129         |
+|    time_elapsed         | 12407       |
+|    total_timesteps      | 5572800     |
+| train/                  |             |
+|    approx_kl            | 0.016799105 |
+|    clip_fraction        | 0.155       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.14       |
+|    explained_variance   | 0.795       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 1.04        |
+|    n_updates            | 1570        |
+|    policy_gradient_loss | 0.00415     |
+|    value_loss           | 33.2        |
+-----------------------------------------
+
+Current state: ChampionX.Level8.ChunLiVsGuile
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 449         |
+|    iterations           | 130         |
+|    time_elapsed         | 12503       |
+|    total_timesteps      | 5616000     |
+| train/                  |             |
+|    approx_kl            | 0.013092292 |
+|    clip_fraction        | 0.136       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.15       |
+|    explained_variance   | 0.801       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 1.22        |
+|    n_updates            | 1580        |
+|    policy_gradient_loss | -0.00466    |
+|    value_loss           | 16.9        |
+-----------------------------------------
+
+Current state: ChampionX.Level3.ChunLiVsZangief
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 449         |
+|    iterations           | 131         |
+|    time_elapsed         | 12598       |
+|    total_timesteps      | 5659200     |
+| train/                  |             |
+|    approx_kl            | 0.022095175 |
+|    clip_fraction        | 0.218       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.11       |
+|    explained_variance   | 0.767       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 1.75        |
+|    n_updates            | 1590        |
+|    policy_gradient_loss | 0.00969     |
+|    value_loss           | 28.1        |
+-----------------------------------------
+
+Current state: ChampionX.Level2.ChunLiVsChunLi
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 449         |
+|    iterations           | 132         |
+|    time_elapsed         | 12693       |
+|    total_timesteps      | 5702400     |
+| train/                  |             |
+|    approx_kl            | 0.015401343 |
+|    clip_fraction        | 0.155       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.13       |
+|    explained_variance   | 0.783       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.389       |
+|    n_updates            | 1600        |
+|    policy_gradient_loss | 0.00122     |
+|    value_loss           | 16.4        |
+-----------------------------------------
+
+Current state: ChampionX.Level3.ChunLiVsZangief
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 449         |
+|    iterations           | 133         |
+|    time_elapsed         | 12789       |
+|    total_timesteps      | 5745600     |
+| train/                  |             |
+|    approx_kl            | 0.013617316 |
+|    clip_fraction        | 0.135       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.11       |
+|    explained_variance   | 0.82        |
+|    learning_rate        | 0.0002      |
+|    loss                 | 1.51        |
+|    n_updates            | 1610        |
+|    policy_gradient_loss | -0.0011     |
+|    value_loss           | 18.3        |
+-----------------------------------------
+
+Current state: ChampionX.Level1.ChunLiVsKen
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 449         |
+|    iterations           | 134         |
+|    time_elapsed         | 12886       |
+|    total_timesteps      | 5788800     |
+| train/                  |             |
+|    approx_kl            | 0.018610569 |
+|    clip_fraction        | 0.2         |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.11       |
+|    explained_variance   | 0.72        |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.652       |
+|    n_updates            | 1620        |
+|    policy_gradient_loss | 0.00408     |
+|    value_loss           | 24.8        |
+-----------------------------------------
+
+Current state: ChampionX.Level5.ChunLiVsRyu
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 449         |
+|    iterations           | 135         |
+|    time_elapsed         | 12984       |
+|    total_timesteps      | 5832000     |
+| train/                  |             |
+|    approx_kl            | 0.013793538 |
+|    clip_fraction        | 0.135       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.12       |
+|    explained_variance   | 0.811       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 1.18        |
+|    n_updates            | 1630        |
+|    policy_gradient_loss | 3.8e-05     |
+|    value_loss           | 19.7        |
+-----------------------------------------
+
+Current state: ChampionX.Level3.ChunLiVsZangief
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 449         |
+|    iterations           | 136         |
+|    time_elapsed         | 13081       |
+|    total_timesteps      | 5875200     |
+| train/                  |             |
+|    approx_kl            | 0.015575893 |
+|    clip_fraction        | 0.164       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.13       |
+|    explained_variance   | 0.803       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.503       |
+|    n_updates            | 1640        |
+|    policy_gradient_loss | 0.000462    |
+|    value_loss           | 17.1        |
+-----------------------------------------
+
+Current state: ChampionX.Level7.ChunLiVsBlanka
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 449         |
+|    iterations           | 137         |
+|    time_elapsed         | 13178       |
+|    total_timesteps      | 5918400     |
+| train/                  |             |
+|    approx_kl            | 0.016451944 |
+|    clip_fraction        | 0.165       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.12       |
+|    explained_variance   | 0.802       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.83        |
+|    n_updates            | 1650        |
+|    policy_gradient_loss | 0.000427    |
+|    value_loss           | 19.6        |
+-----------------------------------------
+
+Current state: ChampionX.Level2.ChunLiVsChunLi
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 449         |
+|    iterations           | 138         |
+|    time_elapsed         | 13275       |
+|    total_timesteps      | 5961600     |
+| train/                  |             |
+|    approx_kl            | 0.013083423 |
+|    clip_fraction        | 0.132       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.13       |
+|    explained_variance   | 0.816       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 1.46        |
+|    n_updates            | 1660        |
+|    policy_gradient_loss | -0.000823   |
+|    value_loss           | 23.6        |
+-----------------------------------------
+
+Current state: ChampionX.Level5.ChunLiVsRyu
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 449         |
+|    iterations           | 139         |
+|    time_elapsed         | 13373       |
+|    total_timesteps      | 6004800     |
+| train/                  |             |
+|    approx_kl            | 0.016260127 |
+|    clip_fraction        | 0.16        |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.14       |
+|    explained_variance   | 0.805       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 1.25        |
+|    n_updates            | 1670        |
+|    policy_gradient_loss | -0.000364   |
+|    value_loss           | 16.2        |
+-----------------------------------------
+
+Current state: ChampionX.Level5.ChunLiVsRyu
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 448         |
+|    iterations           | 140         |
+|    time_elapsed         | 13470       |
+|    total_timesteps      | 6048000     |
+| train/                  |             |
+|    approx_kl            | 0.016119048 |
+|    clip_fraction        | 0.162       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -8.16       |
+|    explained_variance   | 0.796       |
+|    learning_rate        | 0.0002      |
+|    loss                 | 0.9         |
+|    n_updates            | 1680        |
+|    policy_gradient_loss | 0.000371    |
+|    value_loss           | 17.3        |
+-----------------------------------------
\ No newline at end of file
diff --git a/001_image_stack/training_log.txt b/001_image_stack_vision_based_reward/training_log.txt
similarity index 100%
rename from 001_image_stack/training_log.txt
rename to 001_image_stack_vision_based_reward/training_log.txt
diff --git a/001_image_stack_vision_based_reward/tune.py b/001_image_stack_vision_based_reward/tune.py
new file mode 100644
index 0000000..2c60de1
--- /dev/null
+++ b/001_image_stack_vision_based_reward/tune.py
@@ -0,0 +1,81 @@
+import gym
+import retro
+import optuna
+from stable_baselines3 import PPO
+from stable_baselines3.common.monitor import Monitor
+from stable_baselines3.common.evaluation import evaluate_policy
+
+from custom_cnn import CustomCNN
+from street_fighter_custom_wrapper import StreetFighterCustomWrapper
+
+def make_env(game, state, seed=0):
+    def _init():
+        env = retro.RetroEnv(
+            game=game, 
+            state=state, 
+            use_restricted_actions=retro.Actions.FILTERED, 
+            obs_type=retro.Observations.IMAGE    
+        )
+        env = StreetFighterCustomWrapper(env)
+        env = Monitor(env)
+        env.seed(seed)
+        return env
+    return _init
+
+def objective(trial):
+    game = "StreetFighterIISpecialChampionEdition-Genesis"
+    env = make_env(game, state="ChampionX.Level1.ChunLiVsKen")()
+
+    # Suggest hyperparameters
+    learning_rate = trial.suggest_float("learning_rate", 5e-5, 1e-3, log=True)
+    n_steps = trial.suggest_int("n_steps", 256, 8192, log=True)
+    batch_size = trial.suggest_int("batch_size", 16, 128, log=True)
+    gamma = trial.suggest_float("gamma", 0.9, 0.9999)
+    gae_lambda = trial.suggest_float("gae_lambda", 0.9, 1.0)
+    clip_range = trial.suggest_float("clip_range", 0.1, 0.4)
+    ent_coef = trial.suggest_float("ent_coef", 1e-4, 1e-2, log=True)
+    vf_coef = trial.suggest_float("vf_coef", 0.1, 1.0)
+
+    # Using CustomCNN as the feature extractor
+    policy_kwargs = {
+        'features_extractor_class': CustomCNN
+    }
+
+    # Train the model
+    model = PPO(
+        "CnnPolicy", 
+        env,
+        device="cuda", 
+        policy_kwargs=policy_kwargs, 
+        verbose=1,
+        n_steps=n_steps,
+        batch_size=batch_size,
+        learning_rate=learning_rate,
+        ent_coef=ent_coef,
+        clip_range=clip_range,
+        vf_coef=vf_coef,
+        gamma=gamma,
+        gae_lambda=gae_lambda
+    )
+
+    for iteration in range(10):
+        model.learn(total_timesteps=100000)
+        mean_reward, _std_reward = evaluate_policy(model, env, n_eval_episodes=10)
+
+        trial.report(mean_reward, iteration)
+
+        if trial.should_prune():
+            raise optuna.TrialPruned()
+
+    return mean_reward
+
+study = optuna.create_study(direction="maximize")
+study.optimize(objective, n_trials=100, timeout=7200)  # Run optimization for 100 trials or 2 hours, whichever comes first
+
+print("Best trial:")
+trial = study.best_trial
+
+print(" Value: ", trial.value)
+print(" Params: ")
+for key, value in trial.params.items():
+    print(f"{key}: {value}")
diff --git a/003_frame_delta_ram_based/__pycache__/custom_cnn.cpython-38.pyc b/003_frame_delta_ram_based/__pycache__/custom_cnn.cpython-38.pyc
new file mode 100644
index 0000000..73996e1
Binary files /dev/null and b/003_frame_delta_ram_based/__pycache__/custom_cnn.cpython-38.pyc differ
diff --git a/003_frame_delta_ram_based/__pycache__/street_fighter_custom_wrapper.cpython-38.pyc b/003_frame_delta_ram_based/__pycache__/street_fighter_custom_wrapper.cpython-38.pyc
new file mode 100644
index 0000000..de0618e
Binary files /dev/null and b/003_frame_delta_ram_based/__pycache__/street_fighter_custom_wrapper.cpython-38.pyc differ
diff --git a/003_frame_delta_ram_based/custom_cnn.py b/003_frame_delta_ram_based/custom_cnn.py
new file mode 100644
index 0000000..5ba84fa
--- /dev/null
+++ b/003_frame_delta_ram_based/custom_cnn.py
@@ -0,0 +1,25 @@
+import gym
+import torch
+import torch.nn as nn
+from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
+
+# Custom feature extractor (CNN)
+class CustomCNN(BaseFeaturesExtractor):
+    def __init__(self, observation_space: gym.Space):
+        super(CustomCNN, self).__init__(observation_space, features_dim=512)
+        self.cnn = nn.Sequential(
+            nn.Conv2d(1, 32, kernel_size=5, stride=2, padding=0),
+            nn.ReLU(),
+            nn.Conv2d(32, 64, kernel_size=5, stride=2, padding=0),
+            nn.ReLU(),
+            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=0),
+            nn.ReLU(),
+            nn.Flatten(),
+            nn.Linear(16384, self.features_dim),
+            nn.ReLU()
+        )
+
+    def forward(self, observations: torch.Tensor) -> torch.Tensor:
+        observations = observations.unsqueeze(1)
+        return self.cnn(observations)
+    
\ No newline at end of file
diff --git a/003_frame_delta_ram_based/logs/monitor.csv b/003_frame_delta_ram_based/logs/monitor.csv
new file mode 100644
index 0000000..531e49e
--- /dev/null
+++ b/003_frame_delta_ram_based/logs/monitor.csv
@@ -0,0 +1,2 @@
+#{"t_start": 1680175884.8182795, "env_id": null}
+r,l,t
diff --git a/003_frame_delta_ram_based/street_fighter_custom_wrapper.py b/003_frame_delta_ram_based/street_fighter_custom_wrapper.py
new file mode 100644
index 0000000..65b9c75
--- /dev/null
+++ b/003_frame_delta_ram_based/street_fighter_custom_wrapper.py
@@ -0,0 +1,72 @@
+import gym
+import cv2
+import numpy as np
+
+# Custom environment wrapper
+class StreetFighterCustomWrapper(gym.Wrapper):
+    def __init__(self, env, testing=False):
+        super(StreetFighterCustomWrapper, self).__init__(env)
+        self.env = env
+        self.testing = testing
+        
+        # Store the previous frame
+        self.prev_frame = None
+
+        self.full_hp = 176
+        self.prev_player_health = self.full_hp
+        self.prev_oppont_health = self.full_hp
+
+        # Update observation space to include one grayscale frame difference image
+        self.observation_space = gym.spaces.Box(low=0, high=255, shape=(84, 84, 1), dtype=np.uint8)
+    
+    def _preprocess_observation(self, observation):
+        obs_gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)
+        obs_gray_resized = cv2.resize(obs_gray, (84, 84), interpolation=cv2.INTER_AREA) / 255.0
+        return obs_gray_resized
+
+    def reset(self):
+        self.prev_player_health = self.full_hp
+        self.prev_oppont_health = self.full_hp
+        
+        observation = self.env.reset()
+        # Reset the previous frame
+        self.prev_frame = self._preprocess_observation(observation)
+        return np.zeros_like(self.prev_frame)
+
+    def step(self, action):
+        observation, _reward, _done, info = self.env.step(action)
+
+        obs_gray_resized = self._preprocess_observation(observation)
+
+        if self.prev_frame is not None:
+            frame_delta = obs_gray_resized - self.prev_frame
+        else:
+            frame_delta = np.zeros_like(obs_gray_resized)
+
+        self.prev_frame = obs_gray_resized
+
+        # During fighting, either player or opponent has positive health points.
+        if info['health'] > 0 or info['enemy_health'] > 0:
+
+            # Player Loses
+            if info['health'] < 0 and info['enemy_health'] > 0:
+                reward = (-self.full_hp) * info['enemy_health']
+                done = True
+
+            # Player Wins
+            elif info['enemy_health'] < 0 and info['health'] > 0:
+                reward = self.full_hp * info['health']
+                done = True
+
+            # During Fighting
+            else:
+                reward = (self.prev_oppont_health - info['enemy_health']) - (self.prev_player_health - info['health'])
+
+        self.prev_player_health = info['health']
+        self.prev_oppont_health = info['enemy_health']
+
+        if self.testing:
+            done = False
+             
+        return frame_delta, reward, done, info
+    
\ No newline at end of file
diff --git a/003_frame_delta_ram_based/test.py b/003_frame_delta_ram_based/test.py
new file mode 100644
index 0000000..aaf494c
--- /dev/null
+++ b/003_frame_delta_ram_based/test.py
@@ -0,0 +1,70 @@
+import time 
+
+import cv2
+import retro
+from stable_baselines3 import PPO
+from stable_baselines3.common.vec_env import DummyVecEnv
+
+from custom_cnn import CustomCNN
+from street_fighter_custom_wrapper import StreetFighterCustomWrapper
+    
+def make_env(game, state):
+    def _init():
+        env = retro.RetroEnv(
+            game=game, 
+            state=state, 
+            use_restricted_actions=retro.Actions.FILTERED, 
+            obs_type=retro.Observations.IMAGE    
+        )
+        env = StreetFighterCustomWrapper(env, testing=True)
+        return env
+    return _init
+
+game = "StreetFighterIISpecialChampionEdition-Genesis"
+state_stages = [
+    "Champion.Level1.ChunLiVsGuile",
+    "Champion.Level2.ChunLiVsKen",
+    "Champion.Level3.ChunLiVsChunLi",
+    "Champion.Level4.ChunLiVsZangief",
+    "Champion.Level5.ChunLiVsDhalsim",
+    "Champion.Level6.ChunLiVsRyu",
+    "Champion.Level7.ChunLiVsEHonda",
+    "Champion.Level8.ChunLiVsBlanka",
+    "Champion.Level9.ChunLiVsBalrog",
+    "Champion.Level10.ChunLiVsVega",
+    "Champion.Level11.ChunLiVsSagat",
+    "Champion.Level12.ChunLiVsBison"
+    # Add other stages as necessary
+]
+
+env = make_env(game, state_stages[0])()
+
+# Wrap the environment
+env = DummyVecEnv([lambda: env])
+
+policy_kwargs = {
+    'features_extractor_class': CustomCNN
+}
+
+model = PPO(
+    "CnnPolicy", 
+    env,
+    device="cuda", 
+    policy_kwargs=policy_kwargs, 
+    verbose=1
+)
+model.load(r"trained_models_continued/ppo_chunli_6048000_steps")
+
+obs = env.reset()
+done = False
+
+while True:
+    timestamp = time.time()
+    action, _ = model.predict(obs)
+    obs, rewards, done, info = env.step(action)
+    env.render()
+    render_time = time.time() - timestamp
+    if render_time < 0.0111:
+        time.sleep(0.0111 - render_time)  # Add a delay for 90 FPS
+
+# env.close()
diff --git a/003_frame_delta_ram_based/train.py b/003_frame_delta_ram_based/train.py
new file mode 100644
index 0000000..e4d1bc2
--- /dev/null
+++ b/003_frame_delta_ram_based/train.py
@@ -0,0 +1,124 @@
+import os
+import random
+
+import retro
+from stable_baselines3 import PPO, A2C
+from stable_baselines3.common.vec_env import SubprocVecEnv
+from stable_baselines3.common.callbacks import BaseCallback, CheckpointCallback
+
+from custom_cnn import CustomCNN
+from street_fighter_custom_wrapper import StreetFighterCustomWrapper
+
+class RandomOpponentChangeCallback(BaseCallback):
+    def __init__(self, stages, opponent_interval, verbose=0):
+        super(RandomOpponentChangeCallback, self).__init__(verbose)
+        self.stages = stages
+        self.opponent_interval = opponent_interval
+
+    def _on_step(self) -> bool:
+        if self.n_calls % self.opponent_interval == 0:
+            new_state = random.choice(self.stages)
+            print("\nCurrent state:", new_state)
+            self.training_env.env_method("load_state", new_state, indices=None)
+        return True
+    
+def make_env(game, state, seed=0):
+    def _init():
+        env = retro.make(
+            game=game, 
+            state=state, 
+            use_restricted_actions=retro.Actions.FILTERED, 
+            obs_type=retro.Observations.IMAGE    
+        )
+        env = StreetFighterCustomWrapper(env)
+        env.seed(seed)
+        return env
+    return _init
+
+def main():
+    # Set up the environment and model
+    game = "StreetFighterIISpecialChampionEdition-Genesis"
+    state_stages = [
+        "ChampionX.Level1.ChunLiVsKen",
+        "ChampionX.Level2.ChunLiVsChunLi",
+        "ChampionX.Level3.ChunLiVsZangief",
+        "ChampionX.Level4.ChunLiVsDhalsim",
+        "ChampionX.Level5.ChunLiVsRyu",
+        "ChampionX.Level6.ChunLiVsEHonda",
+        "ChampionX.Level7.ChunLiVsBlanka",
+        "ChampionX.Level8.ChunLiVsGuile",
+        "ChampionX.Level9.ChunLiVsBalrog",
+        "ChampionX.Level10.ChunLiVsVega",
+        "ChampionX.Level11.ChunLiVsSagat",
+        "ChampionX.Level12.ChunLiVsBison"
+        # Add other stages as necessary
+    ]
+    # Champion is at difficulty level 4, ChampionX is at difficulty level 8.
+
+    num_envs = 8
+
+    env = SubprocVecEnv([make_env(game, state_stages[0], seed=i) for i in range(num_envs)])
+
+    # Using CustomCNN as the feature extractor
+    policy_kwargs = {
+        'features_extractor_class': CustomCNN
+    }
+
+    model = PPO(
+        "CnnPolicy", 
+        env,
+        device="cuda", 
+        policy_kwargs=policy_kwargs, 
+        verbose=1,
+        n_steps=5400,
+        batch_size=64,
+        learning_rate=0.0001,
+        ent_coef=0.01,
+        clip_range=0.2,
+        gamma=0.99,
+        gae_lambda=0.95,
+        tensorboard_log="logs/"
+    )
+
+    # Set the save directory
+    save_dir = "trained_models"
+    os.makedirs(save_dir, exist_ok=True)
+
+    # Load the model from file
+    # model_path = "trained_models/ppo_chunli_1296000_steps.zip"
+    
+    # Load model and modify the learning rate and entropy coefficient
+    # custom_objects = {
+    #     "learning_rate": 0.0002
+    # }
+    # model = PPO.load(model_path, env=env, device="cuda")#, custom_objects=custom_objects)
+
+    # Set up callbacks
+    opponent_interval = 5400 # stage_interval * num_envs = total_steps_per_stage
+    checkpoint_interval = 54000 # checkpoint_interval * num_envs = total_steps_per_checkpoint (Every 80 rounds)
+    checkpoint_callback = CheckpointCallback(save_freq=checkpoint_interval, save_path=save_dir, name_prefix="ppo_chunli")
+    stage_increase_callback = RandomOpponentChangeCallback(state_stages, opponent_interval, save_dir)
+
+    # model_params = {
+    #     'n_steps': 5, 
+    #     'gamma': 0.99, 
+    #     'gae_lambda':1, 
+    #     'learning_rate': 7e-4, 
+    #     'vf_coef': 0.5,
+    #     'ent_coef': 0.0,
+    #     'max_grad_norm':0.5,
+    #     'rms_prop_eps':1e-05 
+    # }
+    # model = A2C('CnnPolicy', env, tensorboard_log='logs/', verbose=1, **model_params, policy_kwargs=dict(optimizer_class=RMSpropTF))
+
+    model.learn(
+        total_timesteps=int(6048000), # total_timesteps = stage_interval * num_envs * num_stages (1120 rounds)
+        callback=[checkpoint_callback, stage_increase_callback]
+    )
+    env.close()
+
+    # Save the final model
+    model.save(os.path.join(save_dir, "ppo_sf2_chunli_final.zip"))
+
+if __name__ == "__main__":
+    main()
diff --git a/003_frame_delta_ram_based/tune_ppo.py b/003_frame_delta_ram_based/tune_ppo.py
new file mode 100644
index 0000000..e5128e2
--- /dev/null
+++ b/003_frame_delta_ram_based/tune_ppo.py
@@ -0,0 +1,73 @@
+import os
+
+import retro
+import optuna
+from stable_baselines3 import PPO
+from stable_baselines3.common.evaluation import evaluate_policy
+from stable_baselines3.common.monitor import Monitor
+from stable_baselines3.common.vec_env import DummyVecEnv, VecFrameStack
+
+from street_fighter_custom_wrapper import StreetFighterCustomWrapper
+
+LOG_DIR = 'logs/'
+OPT_DIR = 'optuna/'
+os.makedirs(LOG_DIR, exist_ok=True)
+os.makedirs(OPT_DIR, exist_ok=True)
+
+def optimize_ppo(trial): 
+    return {
+        'n_steps':trial.suggest_int('n_steps', 1024, 8192, log=True),
+        'gamma':trial.suggest_float('gamma', 0.9, 0.9999),
+        'learning_rate':trial.suggest_float('learning_rate', 5e-5, 1e-4, log=True),
+        'clip_range':trial.suggest_float('clip_range', 0.1, 0.4),
+        'gae_lambda':trial.suggest_float('gae_lambda', 0.8, 0.99)
+    }
+
+def make_env(game, state, seed=0):
+    def _init():
+        env = retro.make(
+            game=game, 
+            state=state, 
+            use_restricted_actions=retro.Actions.FILTERED, 
+            obs_type=retro.Observations.IMAGE    
+        )
+        env = StreetFighterCustomWrapper(env)
+        env.seed(seed)
+        return env
+    return _init
+
+def optimize_agent(trial):
+    game = "StreetFighterIISpecialChampionEdition-Genesis"
+    state = "ChampionX.Level1.ChunLiVsKen"
+
+    # try:
+    model_params = optimize_ppo(trial) 
+
+    # Create environment 
+    env = make_env(game, state)()
+    env = Monitor(env, LOG_DIR)
+    env = DummyVecEnv([lambda: env])
+    env = VecFrameStack(env, 4, channels_order='last')
+
+    # Create algo 
+    model = PPO('CnnPolicy', env, tensorboard_log=LOG_DIR, verbose=0, **model_params)
+    model.learn(total_timesteps=100000)
+
+    # Evaluate model 
+    mean_reward, _ = evaluate_policy(model, env, n_eval_episodes=5)
+    env.close()
+
+    SAVE_PATH = os.path.join(OPT_DIR, 'trial_{}_best_model'.format(trial.number))
+    model.save(SAVE_PATH)
+
+    return mean_reward
+    
+    # except Exception as e:
+    #     return -1
+    
+# Creating the experiment 
+study = optuna.create_study(direction='maximize')
+study.optimize(optimize_agent, n_trials=10, n_jobs=1)
+
+print(study.best_params)
+print(study.best_trial)