diff --git a/000_image_stack_ram_based_reward/.ipynb_checkpoints/street_fighter_notebook-checkpoint.ipynb b/000_image_stack_ram_based_reward/.ipynb_checkpoints/street_fighter_notebook-checkpoint.ipynb new file mode 100644 index 0000000..ccf2f64 --- /dev/null +++ b/000_image_stack_ram_based_reward/.ipynb_checkpoints/street_fighter_notebook-checkpoint.ipynb @@ -0,0 +1,234 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "10d267bb", + "metadata": {}, + "outputs": [], + "source": [ + "import retro" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "1ef8ff20", + "metadata": {}, + "outputs": [], + "source": [ + "game = \"StreetFighterIISpecialChampionEdition-Genesis\"\n", + "state = \"Champion.Level1.ChunLiVsGuile\"\n", + "env = retro.make(game=game, state=state)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "5ce656b8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1], dtype=int8)" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "env.action_space.sample()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "8c3f0a4d", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(200, 256, 3)" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "env.observation_space.sample().shape" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "46db7b05", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(200, 256, 3)\n", + "{'enemy_matches_won': 0, 'score': 0, 'matches_won': 0, 'continuetimer': 0, 'enemy_health': 176, 'health': 176}\n" + ] + } + ], + "source": [ + "observation = env.reset()\n", + "print(observation.shape)\n", + "\n", + "action = env.action_space.sample()\n", + "obs, rewards, done, info = env.step(action)\n", + "print(info)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "09f0c6b0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MultiBinary(12)\n" + ] + } + ], + "source": [ + "from gym.spaces import Box, MultiBinary\n", + "\n", + "print(MultiBinary(12))" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "97df18cf", + "metadata": {}, + "outputs": [], + "source": [ + "import gym\n", + "import numpy as np\n", + "from gym.spaces import Box, MultiBinary\n", + "\n", + "class StreetFighter(gym.Env):\n", + " def __init__(self):\n", + " super().__init__()\n", + " self.observation_space = Box(low=0, high=255, shape=(84, 84), dtype=np.uint8)\n", + " self.action_space = MultiBinary(12)\n", + " self.game = retro.make(game=\"StreetFighterIISpecialChampionEdition-Genesis\", use_restricted_actions=retro.Actions.FILTERED)\n", + " \n", + " self.full_hp = 176\n", + " self.player_health = self.full_hp\n", + " self.oppont_health = self.full_hp\n", + " \n", + " self.score = 0\n", + " \n", + " def __preprocess(self, observation):\n", + " gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)\n", + " resize = cv2.resize(gray, (84,84), interpolation=cv2.INTER_CUBIC)\n", + " return resize\n", + "\n", + " def step(self, action):\n", + "\n", + " obs, reward, done, info = self.game.step(action)\n", + " custom_obs = self.__preprocess(obs) # It's just frame, not frame_delta\n", + "\n", + " # During fighting, either player or opponent has positive health points.\n", + " if info['health'] > 0 or info['enemy_health'] > 0:\n", + "\n", + " # Player Loses\n", + " if info['health'] < 0 and info['health'] != self.player_health and info['enemy_health'] != 0:\n", + " reward = (-self.full_hp) * info['enemy_health']\n", + "\n", + " # Player Wins\n", + " elif info['enemy_health'] < 0 and info['enemy_health'] != self.oppont_health and info['health'] != 0:\n", + " reward = self.full_hp * info['health']\n", + "\n", + " # During Fighting\n", + " else:\n", + " reward = (self.oppont_health - info['enemy_health']) - (self.player_health - info['health'])\n", + " \n", + " self.player_health = info['health']\n", + " self.oppont_health = info['enemy_health']\n", + " \n", + " return custom_obs, reward, done, info\n", + " \n", + " def render(self, *args, **kwargs):\n", + " self.game.render()\n", + " \n", + " def reset(self):\n", + " obs = self.game.reset()\n", + " custom_obs = self.__preprocess(obs)\n", + " self.previous_frame = obs\n", + " \n", + " self.player_health = self.full_hp\n", + " self.oppont_health = self.full_hp\n", + " return custom_obs\n", + "\n", + " def close(self):\n", + " self.game.close()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "0b137b88", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(84, 84, 1)\n" + ] + } + ], + "source": [ + "env.close()\n", + "env = StreetFighter()\n", + "print(env.observation_space.shape)\n", + "env.close()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2da50dbc", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/000_image_stack_ram_based_reward/__pycache__/custom_cnn.cpython-38.pyc b/000_image_stack_ram_based_reward/__pycache__/custom_cnn.cpython-38.pyc new file mode 100644 index 0000000..617ab55 Binary files /dev/null and b/000_image_stack_ram_based_reward/__pycache__/custom_cnn.cpython-38.pyc differ diff --git a/000_image_stack_ram_based_reward/__pycache__/rmsprop_optim.cpython-38.pyc b/000_image_stack_ram_based_reward/__pycache__/rmsprop_optim.cpython-38.pyc new file mode 100644 index 0000000..9a61bfd Binary files /dev/null and b/000_image_stack_ram_based_reward/__pycache__/rmsprop_optim.cpython-38.pyc differ diff --git a/000_image_stack_ram_based_reward/__pycache__/street_fighter_custom_wrapper.cpython-38.pyc b/000_image_stack_ram_based_reward/__pycache__/street_fighter_custom_wrapper.cpython-38.pyc new file mode 100644 index 0000000..5d8bbb4 Binary files /dev/null and b/000_image_stack_ram_based_reward/__pycache__/street_fighter_custom_wrapper.cpython-38.pyc differ diff --git a/000_image_stack_ram_based_reward/check_reward.py b/000_image_stack_ram_based_reward/check_reward.py new file mode 100644 index 0000000..7f46495 --- /dev/null +++ b/000_image_stack_ram_based_reward/check_reward.py @@ -0,0 +1,46 @@ +import time + +import retro +from stable_baselines3 import PPO +from stable_baselines3.common.monitor import Monitor +from stable_baselines3.common.vec_env import DummyVecEnv, VecFrameStack + +from street_fighter_custom_wrapper import StreetFighterCustomWrapper + +def make_env(game, state): + def _init(): + env = retro.make( + game=game, + state=state, + use_restricted_actions=retro.Actions.FILTERED, + obs_type=retro.Observations.IMAGE + ) + env = StreetFighterCustomWrapper(env) + return env + return _init + +game = "StreetFighterIISpecialChampionEdition-Genesis" +state = "Champion.Level1.ChunLiVsGuile"#"ChampionX.Level1.ChunLiVsKen" + +env = make_env(game, state)() +env = Monitor(env, 'logs/') + +num_episodes = 30 +episode_reward_sum = 0 +for _ in range(num_episodes): + done = False + obs = env.reset() + total_reward = 0 + while not done: + timestamp = time.time() + obs, reward, done, info = env.step(env.action_space.sample()) + + if reward != 0: + total_reward += reward + print("Reward: {}, playerHP: {}, enemyHP:{}".format(reward, info['health'], info['enemy_health'])) + env.render() + print("Total reward: {}".format(total_reward)) + episode_reward_sum += total_reward + +env.close() +print("Average reward for random strategy: {}".format(episode_reward_sum/num_episodes)) diff --git a/001_image_stack/custom_cnn.py b/000_image_stack_ram_based_reward/custom_cnn.py similarity index 100% rename from 001_image_stack/custom_cnn.py rename to 000_image_stack_ram_based_reward/custom_cnn.py diff --git a/000_image_stack_ram_based_reward/evaluate.py b/000_image_stack_ram_based_reward/evaluate.py new file mode 100644 index 0000000..c435f08 --- /dev/null +++ b/000_image_stack_ram_based_reward/evaluate.py @@ -0,0 +1,52 @@ +import retro + +from stable_baselines3 import PPO +from stable_baselines3.common.vec_env import DummyVecEnv +from stable_baselines3.common.monitor import Monitor +from stable_baselines3.common.evaluation import evaluate_policy + +from custom_cnn import CustomCNN +from street_fighter_custom_wrapper import StreetFighterCustomWrapper + +def make_env(game, state): + def _init(): + env = retro.make( + game=game, + state=state, + use_restricted_actions=retro.Actions.FILTERED, + obs_type=retro.Observations.IMAGE + ) + env = StreetFighterCustomWrapper(env) + return env + return _init + +game = "StreetFighterIISpecialChampionEdition-Genesis" +state_stages = [ + "Champion.Level1.ChunLiVsGuile", + "Champion.Level2.ChunLiVsKen", + "Champion.Level3.ChunLiVsChunLi", + "Champion.Level4.ChunLiVsZangief", + "Champion.Level5.ChunLiVsDhalsim", + "Champion.Level6.ChunLiVsRyu", + "Champion.Level7.ChunLiVsEHonda", + "Champion.Level8.ChunLiVsBlanka", + "Champion.Level9.ChunLiVsBalrog", + "Champion.Level10.ChunLiVsVega", + "Champion.Level11.ChunLiVsSagat", + "Champion.Level12.ChunLiVsBison" + # Add other stages as necessary +] + +env = make_env(game, state_stages[0])() + +# Wrap the environment +# env = Monitor(env, 'logs/') + +policy_kwargs = {'features_extractor_class': CustomCNN} +model = PPO("CnnPolicy", env, policy_kwargs=policy_kwargs) + +model = PPO.load(r"dummy_model_ppo_chunli") +# model.load(r"trained_models/ppo_chunli_864000_steps") + +mean_reward, std_reward = evaluate_policy(model, env, render=True, n_eval_episodes=10, deterministic=False, return_episode_rewards=True) +print(f"Mean reward: {mean_reward:.2f} +/- {std_reward:.2f}") diff --git a/000_image_stack_ram_based_reward/logs/PPO_1/events.out.tfevents.1680176551.DESKTOP-9E17TO7.25984.0 b/000_image_stack_ram_based_reward/logs/PPO_1/events.out.tfevents.1680176551.DESKTOP-9E17TO7.25984.0 new file mode 100644 index 0000000..9096b7c Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_1/events.out.tfevents.1680176551.DESKTOP-9E17TO7.25984.0 differ diff --git a/000_image_stack_ram_based_reward/logs/PPO_10/events.out.tfevents.1680180303.DESKTOP-9E17TO7.35284.0 b/000_image_stack_ram_based_reward/logs/PPO_10/events.out.tfevents.1680180303.DESKTOP-9E17TO7.35284.0 new file mode 100644 index 0000000..106794b Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_10/events.out.tfevents.1680180303.DESKTOP-9E17TO7.35284.0 differ diff --git a/000_image_stack_ram_based_reward/logs/PPO_11/events.out.tfevents.1680180514.DESKTOP-9E17TO7.11796.0 b/000_image_stack_ram_based_reward/logs/PPO_11/events.out.tfevents.1680180514.DESKTOP-9E17TO7.11796.0 new file mode 100644 index 0000000..6fab041 Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_11/events.out.tfevents.1680180514.DESKTOP-9E17TO7.11796.0 differ diff --git a/000_image_stack_ram_based_reward/logs/PPO_12/events.out.tfevents.1680180894.DESKTOP-9E17TO7.20548.0 b/000_image_stack_ram_based_reward/logs/PPO_12/events.out.tfevents.1680180894.DESKTOP-9E17TO7.20548.0 new file mode 100644 index 0000000..8ac9b2f Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_12/events.out.tfevents.1680180894.DESKTOP-9E17TO7.20548.0 differ diff --git a/000_image_stack_ram_based_reward/logs/PPO_13/events.out.tfevents.1680182153.DESKTOP-9E17TO7.30948.0 b/000_image_stack_ram_based_reward/logs/PPO_13/events.out.tfevents.1680182153.DESKTOP-9E17TO7.30948.0 new file mode 100644 index 0000000..74c786c Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_13/events.out.tfevents.1680182153.DESKTOP-9E17TO7.30948.0 differ diff --git a/000_image_stack_ram_based_reward/logs/PPO_14/events.out.tfevents.1680182468.DESKTOP-9E17TO7.30948.1 b/000_image_stack_ram_based_reward/logs/PPO_14/events.out.tfevents.1680182468.DESKTOP-9E17TO7.30948.1 new file mode 100644 index 0000000..39f8c7f Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_14/events.out.tfevents.1680182468.DESKTOP-9E17TO7.30948.1 differ diff --git a/000_image_stack_ram_based_reward/logs/PPO_15/events.out.tfevents.1680182795.DESKTOP-9E17TO7.30948.2 b/000_image_stack_ram_based_reward/logs/PPO_15/events.out.tfevents.1680182795.DESKTOP-9E17TO7.30948.2 new file mode 100644 index 0000000..23d3259 Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_15/events.out.tfevents.1680182795.DESKTOP-9E17TO7.30948.2 differ diff --git a/000_image_stack_ram_based_reward/logs/PPO_16/events.out.tfevents.1680183136.DESKTOP-9E17TO7.30948.3 b/000_image_stack_ram_based_reward/logs/PPO_16/events.out.tfevents.1680183136.DESKTOP-9E17TO7.30948.3 new file mode 100644 index 0000000..f357854 Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_16/events.out.tfevents.1680183136.DESKTOP-9E17TO7.30948.3 differ diff --git a/000_image_stack_ram_based_reward/logs/PPO_17/events.out.tfevents.1680183432.DESKTOP-9E17TO7.30948.4 b/000_image_stack_ram_based_reward/logs/PPO_17/events.out.tfevents.1680183432.DESKTOP-9E17TO7.30948.4 new file mode 100644 index 0000000..ac83234 Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_17/events.out.tfevents.1680183432.DESKTOP-9E17TO7.30948.4 differ diff --git a/000_image_stack_ram_based_reward/logs/PPO_18/events.out.tfevents.1680183612.DESKTOP-9E17TO7.32692.0 b/000_image_stack_ram_based_reward/logs/PPO_18/events.out.tfevents.1680183612.DESKTOP-9E17TO7.32692.0 new file mode 100644 index 0000000..8e7b54d Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_18/events.out.tfevents.1680183612.DESKTOP-9E17TO7.32692.0 differ diff --git a/000_image_stack_ram_based_reward/logs/PPO_19/events.out.tfevents.1680183923.DESKTOP-9E17TO7.32692.1 b/000_image_stack_ram_based_reward/logs/PPO_19/events.out.tfevents.1680183923.DESKTOP-9E17TO7.32692.1 new file mode 100644 index 0000000..4627ee2 Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_19/events.out.tfevents.1680183923.DESKTOP-9E17TO7.32692.1 differ diff --git a/000_image_stack_ram_based_reward/logs/PPO_2/events.out.tfevents.1680177234.DESKTOP-9E17TO7.2364.0 b/000_image_stack_ram_based_reward/logs/PPO_2/events.out.tfevents.1680177234.DESKTOP-9E17TO7.2364.0 new file mode 100644 index 0000000..9b22664 Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_2/events.out.tfevents.1680177234.DESKTOP-9E17TO7.2364.0 differ diff --git a/000_image_stack_ram_based_reward/logs/PPO_20/events.out.tfevents.1680184256.DESKTOP-9E17TO7.32692.2 b/000_image_stack_ram_based_reward/logs/PPO_20/events.out.tfevents.1680184256.DESKTOP-9E17TO7.32692.2 new file mode 100644 index 0000000..5a2dda3 Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_20/events.out.tfevents.1680184256.DESKTOP-9E17TO7.32692.2 differ diff --git a/000_image_stack_ram_based_reward/logs/PPO_21/events.out.tfevents.1680184590.DESKTOP-9E17TO7.32692.3 b/000_image_stack_ram_based_reward/logs/PPO_21/events.out.tfevents.1680184590.DESKTOP-9E17TO7.32692.3 new file mode 100644 index 0000000..55c5bef Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_21/events.out.tfevents.1680184590.DESKTOP-9E17TO7.32692.3 differ diff --git a/000_image_stack_ram_based_reward/logs/PPO_22/events.out.tfevents.1680184934.DESKTOP-9E17TO7.32692.4 b/000_image_stack_ram_based_reward/logs/PPO_22/events.out.tfevents.1680184934.DESKTOP-9E17TO7.32692.4 new file mode 100644 index 0000000..0210587 Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_22/events.out.tfevents.1680184934.DESKTOP-9E17TO7.32692.4 differ diff --git a/000_image_stack_ram_based_reward/logs/PPO_23/events.out.tfevents.1680185250.DESKTOP-9E17TO7.32692.5 b/000_image_stack_ram_based_reward/logs/PPO_23/events.out.tfevents.1680185250.DESKTOP-9E17TO7.32692.5 new file mode 100644 index 0000000..528228a Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_23/events.out.tfevents.1680185250.DESKTOP-9E17TO7.32692.5 differ diff --git a/000_image_stack_ram_based_reward/logs/PPO_24/events.out.tfevents.1680185584.DESKTOP-9E17TO7.32692.6 b/000_image_stack_ram_based_reward/logs/PPO_24/events.out.tfevents.1680185584.DESKTOP-9E17TO7.32692.6 new file mode 100644 index 0000000..c4c9faa Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_24/events.out.tfevents.1680185584.DESKTOP-9E17TO7.32692.6 differ diff --git a/000_image_stack_ram_based_reward/logs/PPO_25/events.out.tfevents.1680185591.DESKTOP-9E17TO7.32692.7 b/000_image_stack_ram_based_reward/logs/PPO_25/events.out.tfevents.1680185591.DESKTOP-9E17TO7.32692.7 new file mode 100644 index 0000000..a1a8e5d Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_25/events.out.tfevents.1680185591.DESKTOP-9E17TO7.32692.7 differ diff --git a/000_image_stack_ram_based_reward/logs/PPO_26/events.out.tfevents.1680185938.DESKTOP-9E17TO7.32692.8 b/000_image_stack_ram_based_reward/logs/PPO_26/events.out.tfevents.1680185938.DESKTOP-9E17TO7.32692.8 new file mode 100644 index 0000000..37dacc8 Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_26/events.out.tfevents.1680185938.DESKTOP-9E17TO7.32692.8 differ diff --git a/000_image_stack_ram_based_reward/logs/PPO_27/events.out.tfevents.1680186251.DESKTOP-9E17TO7.32692.9 b/000_image_stack_ram_based_reward/logs/PPO_27/events.out.tfevents.1680186251.DESKTOP-9E17TO7.32692.9 new file mode 100644 index 0000000..4299711 Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_27/events.out.tfevents.1680186251.DESKTOP-9E17TO7.32692.9 differ diff --git a/000_image_stack_ram_based_reward/logs/PPO_3/events.out.tfevents.1680177334.DESKTOP-9E17TO7.35060.0 b/000_image_stack_ram_based_reward/logs/PPO_3/events.out.tfevents.1680177334.DESKTOP-9E17TO7.35060.0 new file mode 100644 index 0000000..a51dfe5 Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_3/events.out.tfevents.1680177334.DESKTOP-9E17TO7.35060.0 differ diff --git a/000_image_stack_ram_based_reward/logs/PPO_4/events.out.tfevents.1680177771.DESKTOP-9E17TO7.35060.1 b/000_image_stack_ram_based_reward/logs/PPO_4/events.out.tfevents.1680177771.DESKTOP-9E17TO7.35060.1 new file mode 100644 index 0000000..4fee189 Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_4/events.out.tfevents.1680177771.DESKTOP-9E17TO7.35060.1 differ diff --git a/000_image_stack_ram_based_reward/logs/PPO_5/events.out.tfevents.1680178207.DESKTOP-9E17TO7.35060.2 b/000_image_stack_ram_based_reward/logs/PPO_5/events.out.tfevents.1680178207.DESKTOP-9E17TO7.35060.2 new file mode 100644 index 0000000..6741459 Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_5/events.out.tfevents.1680178207.DESKTOP-9E17TO7.35060.2 differ diff --git a/000_image_stack_ram_based_reward/logs/PPO_6/events.out.tfevents.1680178663.DESKTOP-9E17TO7.35060.3 b/000_image_stack_ram_based_reward/logs/PPO_6/events.out.tfevents.1680178663.DESKTOP-9E17TO7.35060.3 new file mode 100644 index 0000000..ddc6b02 Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_6/events.out.tfevents.1680178663.DESKTOP-9E17TO7.35060.3 differ diff --git a/000_image_stack_ram_based_reward/logs/PPO_7/events.out.tfevents.1680179100.DESKTOP-9E17TO7.35060.4 b/000_image_stack_ram_based_reward/logs/PPO_7/events.out.tfevents.1680179100.DESKTOP-9E17TO7.35060.4 new file mode 100644 index 0000000..f1b8b23 Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_7/events.out.tfevents.1680179100.DESKTOP-9E17TO7.35060.4 differ diff --git a/000_image_stack_ram_based_reward/logs/PPO_8/events.out.tfevents.1680179576.DESKTOP-9E17TO7.35060.5 b/000_image_stack_ram_based_reward/logs/PPO_8/events.out.tfevents.1680179576.DESKTOP-9E17TO7.35060.5 new file mode 100644 index 0000000..89b6ec0 Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_8/events.out.tfevents.1680179576.DESKTOP-9E17TO7.35060.5 differ diff --git a/000_image_stack_ram_based_reward/logs/PPO_9/events.out.tfevents.1680180040.DESKTOP-9E17TO7.35060.6 b/000_image_stack_ram_based_reward/logs/PPO_9/events.out.tfevents.1680180040.DESKTOP-9E17TO7.35060.6 new file mode 100644 index 0000000..755d4cc Binary files /dev/null and b/000_image_stack_ram_based_reward/logs/PPO_9/events.out.tfevents.1680180040.DESKTOP-9E17TO7.35060.6 differ diff --git a/000_image_stack_ram_based_reward/logs/monitor.csv b/000_image_stack_ram_based_reward/logs/monitor.csv new file mode 100644 index 0000000..a7eb38b --- /dev/null +++ b/000_image_stack_ram_based_reward/logs/monitor.csv @@ -0,0 +1,53 @@ +#{"t_start": 1680186251.3110938, "env_id": null} +r,l,t +-50,2150,3.695703 +-40,2886,12.564373 +-128,2196,20.599987 +-217,3000,25.620172 +-210,2753,34.631877 +27,2177,42.807461 +-161,2502,46.870715 +-227,2122,54.492589 +-289,1567,61.321581 +1,2075,64.463465 +130,2465,72.662509 +-192,3007,82.093462 +3927.0,6468,97.611361 +-109,1823,104.996175 +200,1820,112.333123 +-300,2478,116.020238 +-42,2351,124.010789 +-263,1990,127.212089 +-351,1486,134.405471 +-225,2611,143.112158 +-56,3290,153.69294 +-65,2138,157.640509 +62,3161,167.244644 +-189,2652,175.720904 +224,2138,179.193385 +-48,3706,189.4923 +-209,3172,199.319699 +-98,2059,207.148574 +51,2787,216.523835 +-88,3218,225.952495 +-263,1828,228.707771 +-38,2328,236.642072 +7,3179,245.83899 +-133,2421,249.558141 +-296,1684,256.702009 +-211,2881,266.1996 +-261,1710,269.33675 +-176,1974,277.229695 +184,1310,279.58493 +218,2222,288.236686 +-229,2460,291.904952 +-345,2510,299.876746 +-345,2510,302.781091 +-345,2510,305.701696 +-345,2510,308.687105 +-345,2510,311.624716 +-345,2510,314.566203 +-345,2510,317.608539 +-345,2510,320.618201 +-345,2510,323.649133 +-345,2510,326.561072 diff --git a/000_image_stack_ram_based_reward/optuna/tuning_log.txt b/000_image_stack_ram_based_reward/optuna/tuning_log.txt new file mode 100644 index 0000000..9a55088 --- /dev/null +++ b/000_image_stack_ram_based_reward/optuna/tuning_log.txt @@ -0,0 +1,8947 @@ +| value_loss | 20 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.64e+03 | +| ep_rew_mean | -99 | +| time/ | | +| fps | 353 | +| iterations | 6 | +| time_elapsed | 23 | +| total_timesteps | 8226 | +| train/ | | +| approx_kl | 0.014875706 | +| clip_fraction | 0.148 | +| clip_range | 0.26 | +| entropy_loss | -8.24 | +| explained_variance | 0.151 | +| learning_rate | 8.14e-05 | +| loss | 2.44 | +| n_updates | 50 | +| policy_gradient_loss | -0.00671 | +| value_loss | 10.3 | +----------------------------------------- +--------------------------------------- +| rollout/ | | +| ep_len_mean | 2.64e+03 | +| ep_rew_mean | -99 | +| time/ | | +| fps | 350 | +| iterations | 7 | +| time_elapsed | 27 | +| total_timesteps | 9597 | +| train/ | | +| approx_kl | 0.0164865 | +| clip_fraction | 0.162 | +| clip_range | 0.26 | +| entropy_loss | -8.21 | +| explained_variance | -0.272 | +| learning_rate | 8.14e-05 | +| loss | 4.19 | +| n_updates | 60 | +| policy_gradient_loss | -0.0113 | +| value_loss | 16.8 | +--------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.73e+03 | +| ep_rew_mean | -88.5 | +| time/ | | +| fps | 349 | +| iterations | 8 | +| time_elapsed | 31 | +| total_timesteps | 10968 | +| train/ | | +| approx_kl | 0.014885512 | +| clip_fraction | 0.162 | +| clip_range | 0.26 | +| entropy_loss | -8.18 | +| explained_variance | 0.0707 | +| learning_rate | 8.14e-05 | +| loss | 1.46 | +| n_updates | 70 | +| policy_gradient_loss | -0.017 | +| value_loss | 8.77 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.73e+03 | +| ep_rew_mean | -88.5 | +| time/ | | +| fps | 349 | +| iterations | 9 | +| time_elapsed | 35 | +| total_timesteps | 12339 | +| train/ | | +| approx_kl | 0.018109197 | +| clip_fraction | 0.118 | +| clip_range | 0.26 | +| entropy_loss | -8.17 | +| explained_variance | 0.0377 | +| learning_rate | 8.14e-05 | +| loss | 2.68 | +| n_updates | 80 | +| policy_gradient_loss | -0.0208 | +| value_loss | 10 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.47e+03 | +| ep_rew_mean | -121 | +| time/ | | +| fps | 347 | +| iterations | 10 | +| time_elapsed | 39 | +| total_timesteps | 13710 | +| train/ | | +| approx_kl | 0.02112376 | +| clip_fraction | 0.154 | +| clip_range | 0.26 | +| entropy_loss | -8.17 | +| explained_variance | 0.0154 | +| learning_rate | 8.14e-05 | +| loss | 4.45 | +| n_updates | 90 | +| policy_gradient_loss | -0.0182 | +| value_loss | 13.2 | +---------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.43e+03 | +| ep_rew_mean | -94 | +| time/ | | +| fps | 347 | +| iterations | 11 | +| time_elapsed | 43 | +| total_timesteps | 15081 | +| train/ | | +| approx_kl | 0.03655843 | +| clip_fraction | 0.271 | +| clip_range | 0.26 | +| entropy_loss | -8.1 | +| explained_variance | -0.0637 | +| learning_rate | 8.14e-05 | +| loss | 2.29 | +| n_updates | 100 | +| policy_gradient_loss | -0.00361 | +| value_loss | 31.6 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.19e+03 | +| ep_rew_mean | -45.3 | +| time/ | | +| fps | 349 | +| iterations | 12 | +| time_elapsed | 47 | +| total_timesteps | 16452 | +| train/ | | +| approx_kl | 0.037120674 | +| clip_fraction | 0.245 | +| clip_range | 0.26 | +| entropy_loss | -8.08 | +| explained_variance | 0.104 | +| learning_rate | 8.14e-05 | +| loss | 7.71 | +| n_updates | 110 | +| policy_gradient_loss | -0.00649 | +| value_loss | 22.7 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.21e+03 | +| ep_rew_mean | -65 | +| time/ | | +| fps | 350 | +| iterations | 13 | +| time_elapsed | 50 | +| total_timesteps | 17823 | +| train/ | | +| approx_kl | 0.027819885 | +| clip_fraction | 0.228 | +| clip_range | 0.26 | +| entropy_loss | -7.98 | +| explained_variance | 0.0436 | +| learning_rate | 8.14e-05 | +| loss | 7.06 | +| n_updates | 120 | +| policy_gradient_loss | 0.00299 | +| value_loss | 59.9 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.21e+03 | +| ep_rew_mean | -65 | +| time/ | | +| fps | 351 | +| iterations | 14 | +| time_elapsed | 54 | +| total_timesteps | 19194 | +| train/ | | +| approx_kl | 0.027972419 | +| clip_fraction | 0.197 | +| clip_range | 0.26 | +| entropy_loss | -7.94 | +| explained_variance | 0.0199 | +| learning_rate | 8.14e-05 | +| loss | 4.1 | +| n_updates | 130 | +| policy_gradient_loss | -0.00526 | +| value_loss | 26.3 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.21e+03 | +| ep_rew_mean | -65 | +| time/ | | +| fps | 352 | +| iterations | 15 | +| time_elapsed | 58 | +| total_timesteps | 20565 | +| train/ | | +| approx_kl | 0.035860673 | +| clip_fraction | 0.236 | +| clip_range | 0.26 | +| entropy_loss | -7.89 | +| explained_variance | -0.13 | +| learning_rate | 8.14e-05 | +| loss | 4.36 | +| n_updates | 140 | +| policy_gradient_loss | -0.0122 | +| value_loss | 10.5 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.3e+03 | +| ep_rew_mean | -57.8 | +| time/ | | +| fps | 352 | +| iterations | 16 | +| time_elapsed | 62 | +| total_timesteps | 21936 | +| train/ | | +| approx_kl | 0.020882078 | +| clip_fraction | 0.207 | +| clip_range | 0.26 | +| entropy_loss | -7.97 | +| explained_variance | -0.266 | +| learning_rate | 8.14e-05 | +| loss | 17.1 | +| n_updates | 150 | +| policy_gradient_loss | -0.00845 | +| value_loss | 12.5 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.32e+03 | +| ep_rew_mean | -76.7 | +| time/ | | +| fps | 353 | +| iterations | 17 | +| time_elapsed | 65 | +| total_timesteps | 23307 | +| train/ | | +| approx_kl | 0.017862184 | +| clip_fraction | 0.112 | +| clip_range | 0.26 | +| entropy_loss | -8.11 | +| explained_variance | -0.152 | +| learning_rate | 8.14e-05 | +| loss | 0.52 | +| n_updates | 160 | +| policy_gradient_loss | -0.014 | +| value_loss | 5 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.32e+03 | +| ep_rew_mean | -76.7 | +| time/ | | +| fps | 354 | +| iterations | 18 | +| time_elapsed | 69 | +| total_timesteps | 24678 | +| train/ | | +| approx_kl | 0.02715041 | +| clip_fraction | 0.179 | +| clip_range | 0.26 | +| entropy_loss | -8.03 | +| explained_variance | 0.0571 | +| learning_rate | 8.14e-05 | +| loss | 6.35 | +| n_updates | 170 | +| policy_gradient_loss | -0.00481 | +| value_loss | 43 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.26e+03 | +| ep_rew_mean | -52.5 | +| time/ | | +| fps | 354 | +| iterations | 19 | +| time_elapsed | 73 | +| total_timesteps | 26049 | +| train/ | | +| approx_kl | 0.020203596 | +| clip_fraction | 0.15 | +| clip_range | 0.26 | +| entropy_loss | -8.08 | +| explained_variance | -0.578 | +| learning_rate | 8.14e-05 | +| loss | 0.888 | +| n_updates | 180 | +| policy_gradient_loss | -0.011 | +| value_loss | 11.7 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.28e+03 | +| ep_rew_mean | -57.9 | +| time/ | | +| fps | 354 | +| iterations | 20 | +| time_elapsed | 77 | +| total_timesteps | 27420 | +| train/ | | +| approx_kl | 0.02579885 | +| clip_fraction | 0.181 | +| clip_range | 0.26 | +| entropy_loss | -7.96 | +| explained_variance | -0.0316 | +| learning_rate | 8.14e-05 | +| loss | 1.58 | +| n_updates | 190 | +| policy_gradient_loss | -0.0102 | +| value_loss | 34.6 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.28e+03 | +| ep_rew_mean | -57.9 | +| time/ | | +| fps | 354 | +| iterations | 21 | +| time_elapsed | 81 | +| total_timesteps | 28791 | +| train/ | | +| approx_kl | 0.016173096 | +| clip_fraction | 0.172 | +| clip_range | 0.26 | +| entropy_loss | -7.84 | +| explained_variance | -0.369 | +| learning_rate | 8.14e-05 | +| loss | 0.632 | +| n_updates | 200 | +| policy_gradient_loss | -0.0133 | +| value_loss | 11.3 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.28e+03 | +| ep_rew_mean | -57.9 | +| time/ | | +| fps | 354 | +| iterations | 22 | +| time_elapsed | 84 | +| total_timesteps | 30162 | +| train/ | | +| approx_kl | 0.018948458 | +| clip_fraction | 0.159 | +| clip_range | 0.26 | +| entropy_loss | -7.89 | +| explained_variance | -0.091 | +| learning_rate | 8.14e-05 | +| loss | 5.72 | +| n_updates | 210 | +| policy_gradient_loss | -0.0145 | +| value_loss | 7.36 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.36e+03 | +| ep_rew_mean | -56.1 | +| time/ | | +| fps | 354 | +| iterations | 23 | +| time_elapsed | 88 | +| total_timesteps | 31533 | +| train/ | | +| approx_kl | 0.018955443 | +| clip_fraction | 0.145 | +| clip_range | 0.26 | +| entropy_loss | -7.97 | +| explained_variance | -0.269 | +| learning_rate | 8.14e-05 | +| loss | 1.12 | +| n_updates | 220 | +| policy_gradient_loss | -0.0227 | +| value_loss | 7.03 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.36e+03 | +| ep_rew_mean | -56.1 | +| time/ | | +| fps | 354 | +| iterations | 24 | +| time_elapsed | 92 | +| total_timesteps | 32904 | +| train/ | | +| approx_kl | 0.017530933 | +| clip_fraction | 0.151 | +| clip_range | 0.26 | +| entropy_loss | -8.05 | +| explained_variance | -0.11 | +| learning_rate | 8.14e-05 | +| loss | 0.575 | +| n_updates | 230 | +| policy_gradient_loss | -0.0203 | +| value_loss | 11 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.36e+03 | +| ep_rew_mean | -49.5 | +| time/ | | +| fps | 354 | +| iterations | 25 | +| time_elapsed | 96 | +| total_timesteps | 34275 | +| train/ | | +| approx_kl | 0.025710236 | +| clip_fraction | 0.166 | +| clip_range | 0.26 | +| entropy_loss | -8.04 | +| explained_variance | 0.00206 | +| learning_rate | 8.14e-05 | +| loss | 1.7 | +| n_updates | 240 | +| policy_gradient_loss | -0.0246 | +| value_loss | 11.2 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.36e+03 | +| ep_rew_mean | -60.1 | +| time/ | | +| fps | 354 | +| iterations | 26 | +| time_elapsed | 100 | +| total_timesteps | 35646 | +| train/ | | +| approx_kl | 0.026275737 | +| clip_fraction | 0.21 | +| clip_range | 0.26 | +| entropy_loss | -8.1 | +| explained_variance | -0.414 | +| learning_rate | 8.14e-05 | +| loss | 0.556 | +| n_updates | 250 | +| policy_gradient_loss | -0.0248 | +| value_loss | 5.13 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.36e+03 | +| ep_rew_mean | -60.1 | +| time/ | | +| fps | 354 | +| iterations | 27 | +| time_elapsed | 104 | +| total_timesteps | 37017 | +| train/ | | +| approx_kl | 0.026121318 | +| clip_fraction | 0.171 | +| clip_range | 0.26 | +| entropy_loss | -8.12 | +| explained_variance | -0.0283 | +| learning_rate | 8.14e-05 | +| loss | 2.91 | +| n_updates | 260 | +| policy_gradient_loss | -0.00495 | +| value_loss | 30 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.35e+03 | +| ep_rew_mean | -60.1 | +| time/ | | +| fps | 354 | +| iterations | 28 | +| time_elapsed | 108 | +| total_timesteps | 38388 | +| train/ | | +| approx_kl | 0.02375033 | +| clip_fraction | 0.177 | +| clip_range | 0.26 | +| entropy_loss | -8.01 | +| explained_variance | -0.12 | +| learning_rate | 8.14e-05 | +| loss | 4.69 | +| n_updates | 270 | +| policy_gradient_loss | -0.0175 | +| value_loss | 11.3 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.35e+03 | +| ep_rew_mean | -60.1 | +| time/ | | +| fps | 354 | +| iterations | 29 | +| time_elapsed | 112 | +| total_timesteps | 39759 | +| train/ | | +| approx_kl | 0.025788946 | +| clip_fraction | 0.206 | +| clip_range | 0.26 | +| entropy_loss | -8.1 | +| explained_variance | 0.122 | +| learning_rate | 8.14e-05 | +| loss | 1.84 | +| n_updates | 280 | +| policy_gradient_loss | -0.0214 | +| value_loss | 13.7 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.35e+03 | +| ep_rew_mean | -57.4 | +| time/ | | +| fps | 354 | +| iterations | 30 | +| time_elapsed | 115 | +| total_timesteps | 41130 | +| train/ | | +| approx_kl | 0.035159614 | +| clip_fraction | 0.199 | +| clip_range | 0.26 | +| entropy_loss | -8.01 | +| explained_variance | -0.0443 | +| learning_rate | 8.14e-05 | +| loss | 0.494 | +| n_updates | 290 | +| policy_gradient_loss | -0.0228 | +| value_loss | 9.62 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.35e+03 | +| ep_rew_mean | -55.4 | +| time/ | | +| fps | 354 | +| iterations | 31 | +| time_elapsed | 119 | +| total_timesteps | 42501 | +| train/ | | +| approx_kl | 0.03578476 | +| clip_fraction | 0.198 | +| clip_range | 0.26 | +| entropy_loss | -7.9 | +| explained_variance | 0.0155 | +| learning_rate | 8.14e-05 | +| loss | 0.58 | +| n_updates | 300 | +| policy_gradient_loss | -0.0134 | +| value_loss | 7.94 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.3e+03 | +| ep_rew_mean | -41.5 | +| time/ | | +| fps | 355 | +| iterations | 32 | +| time_elapsed | 123 | +| total_timesteps | 43872 | +| train/ | | +| approx_kl | 0.027321111 | +| clip_fraction | 0.229 | +| clip_range | 0.26 | +| entropy_loss | -7.84 | +| explained_variance | -0.272 | +| learning_rate | 8.14e-05 | +| loss | 1.25 | +| n_updates | 310 | +| policy_gradient_loss | -0.0237 | +| value_loss | 8.41 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.24e+03 | +| ep_rew_mean | -28.3 | +| time/ | | +| fps | 355 | +| iterations | 33 | +| time_elapsed | 127 | +| total_timesteps | 45243 | +| train/ | | +| approx_kl | 0.032422796 | +| clip_fraction | 0.232 | +| clip_range | 0.26 | +| entropy_loss | -7.83 | +| explained_variance | 0.101 | +| learning_rate | 8.14e-05 | +| loss | 6.39 | +| n_updates | 320 | +| policy_gradient_loss | -0.00398 | +| value_loss | 30.1 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.2e+03 | +| ep_rew_mean | -20.8 | +| time/ | | +| fps | 356 | +| iterations | 34 | +| time_elapsed | 130 | +| total_timesteps | 46614 | +| train/ | | +| approx_kl | 0.031185307 | +| clip_fraction | 0.234 | +| clip_range | 0.26 | +| entropy_loss | -7.86 | +| explained_variance | -0.055 | +| learning_rate | 8.14e-05 | +| loss | 3.37 | +| n_updates | 330 | +| policy_gradient_loss | -0.0114 | +| value_loss | 35.8 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.2e+03 | +| ep_rew_mean | -20.8 | +| time/ | | +| fps | 355 | +| iterations | 35 | +| time_elapsed | 134 | +| total_timesteps | 47985 | +| train/ | | +| approx_kl | 0.030157859 | +| clip_fraction | 0.268 | +| clip_range | 0.26 | +| entropy_loss | -7.92 | +| explained_variance | -0.443 | +| learning_rate | 8.14e-05 | +| loss | 1.93 | +| n_updates | 340 | +| policy_gradient_loss | -0.0119 | +| value_loss | 16.3 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.23e+03 | +| ep_rew_mean | -15.9 | +| time/ | | +| fps | 355 | +| iterations | 36 | +| time_elapsed | 138 | +| total_timesteps | 49356 | +| train/ | | +| approx_kl | 0.028865792 | +| clip_fraction | 0.237 | +| clip_range | 0.26 | +| entropy_loss | -7.59 | +| explained_variance | 0.14 | +| learning_rate | 8.14e-05 | +| loss | 0.794 | +| n_updates | 350 | +| policy_gradient_loss | -0.0105 | +| value_loss | 9.95 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.23e+03 | +| ep_rew_mean | -15.9 | +| time/ | | +| fps | 355 | +| iterations | 37 | +| time_elapsed | 142 | +| total_timesteps | 50727 | +| train/ | | +| approx_kl | 0.02842192 | +| clip_fraction | 0.236 | +| clip_range | 0.26 | +| entropy_loss | -7.84 | +| explained_variance | -0.549 | +| learning_rate | 8.14e-05 | +| loss | 0.895 | +| n_updates | 360 | +| policy_gradient_loss | -0.0193 | +| value_loss | 10.1 | +---------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.22e+03 | +| ep_rew_mean | -20.1 | +| time/ | | +| fps | 355 | +| iterations | 38 | +| time_elapsed | 146 | +| total_timesteps | 52098 | +| train/ | | +| approx_kl | 0.03672131 | +| clip_fraction | 0.237 | +| clip_range | 0.26 | +| entropy_loss | -8.07 | +| explained_variance | 0.0648 | +| learning_rate | 8.14e-05 | +| loss | 4.86 | +| n_updates | 370 | +| policy_gradient_loss | -0.0211 | +| value_loss | 6.36 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.22e+03 | +| ep_rew_mean | -30.7 | +| time/ | | +| fps | 355 | +| iterations | 39 | +| time_elapsed | 150 | +| total_timesteps | 53469 | +| train/ | | +| approx_kl | 0.035383318 | +| clip_fraction | 0.218 | +| clip_range | 0.26 | +| entropy_loss | -7.98 | +| explained_variance | 0.0373 | +| learning_rate | 8.14e-05 | +| loss | 0.993 | +| n_updates | 380 | +| policy_gradient_loss | -0.0212 | +| value_loss | 13.9 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.19e+03 | +| ep_rew_mean | -23.8 | +| time/ | | +| fps | 355 | +| iterations | 40 | +| time_elapsed | 154 | +| total_timesteps | 54840 | +| train/ | | +| approx_kl | 0.035663478 | +| clip_fraction | 0.313 | +| clip_range | 0.26 | +| entropy_loss | -7.36 | +| explained_variance | 0.0586 | +| learning_rate | 8.14e-05 | +| loss | 2.23 | +| n_updates | 390 | +| policy_gradient_loss | -0.00997 | +| value_loss | 46.3 | +----------------------------------------- +-------------------------------------- +| rollout/ | | +| ep_len_mean | 2.19e+03 | +| ep_rew_mean | -23.8 | +| time/ | | +| fps | 354 | +| iterations | 41 | +| time_elapsed | 158 | +| total_timesteps | 56211 | +| train/ | | +| approx_kl | 0.052496 | +| clip_fraction | 0.338 | +| clip_range | 0.26 | +| entropy_loss | -7.89 | +| explained_variance | -0.49 | +| learning_rate | 8.14e-05 | +| loss | 1.45 | +| n_updates | 400 | +| policy_gradient_loss | -0.00552 | +| value_loss | 23.9 | +-------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.18e+03 | +| ep_rew_mean | -25.5 | +| time/ | | +| fps | 354 | +| iterations | 42 | +| time_elapsed | 162 | +| total_timesteps | 57582 | +| train/ | | +| approx_kl | 0.032533452 | +| clip_fraction | 0.26 | +| clip_range | 0.26 | +| entropy_loss | -7.35 | +| explained_variance | -0.0372 | +| learning_rate | 8.14e-05 | +| loss | 1.11 | +| n_updates | 410 | +| policy_gradient_loss | -0.0189 | +| value_loss | 9.41 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.18e+03 | +| ep_rew_mean | -25.5 | +| time/ | | +| fps | 355 | +| iterations | 43 | +| time_elapsed | 166 | +| total_timesteps | 58953 | +| train/ | | +| approx_kl | 0.033652484 | +| clip_fraction | 0.245 | +| clip_range | 0.26 | +| entropy_loss | -7.66 | +| explained_variance | -0.0541 | +| learning_rate | 8.14e-05 | +| loss | 1.85 | +| n_updates | 420 | +| policy_gradient_loss | -0.013 | +| value_loss | 14.4 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.14e+03 | +| ep_rew_mean | -12.6 | +| time/ | | +| fps | 355 | +| iterations | 44 | +| time_elapsed | 169 | +| total_timesteps | 60324 | +| train/ | | +| approx_kl | 0.029627763 | +| clip_fraction | 0.287 | +| clip_range | 0.26 | +| entropy_loss | -7.78 | +| explained_variance | -0.173 | +| learning_rate | 8.14e-05 | +| loss | 4.15 | +| n_updates | 430 | +| policy_gradient_loss | -0.0145 | +| value_loss | 11.2 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.1e+03 | +| ep_rew_mean | -3.31 | +| time/ | | +| fps | 355 | +| iterations | 45 | +| time_elapsed | 173 | +| total_timesteps | 61695 | +| train/ | | +| approx_kl | 0.053978715 | +| clip_fraction | 0.301 | +| clip_range | 0.26 | +| entropy_loss | -7.78 | +| explained_variance | 0.0145 | +| learning_rate | 8.14e-05 | +| loss | 5.25 | +| n_updates | 440 | +| policy_gradient_loss | -0.00936 | +| value_loss | 83.2 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.1e+03 | +| ep_rew_mean | -3.31 | +| time/ | | +| fps | 355 | +| iterations | 46 | +| time_elapsed | 177 | +| total_timesteps | 63066 | +| train/ | | +| approx_kl | 0.04385848 | +| clip_fraction | 0.309 | +| clip_range | 0.26 | +| entropy_loss | -7.72 | +| explained_variance | -0.142 | +| learning_rate | 8.14e-05 | +| loss | 1.44 | +| n_updates | 450 | +| policy_gradient_loss | -0.00675 | +| value_loss | 32.4 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.12e+03 | +| ep_rew_mean | -1.23 | +| time/ | | +| fps | 356 | +| iterations | 47 | +| time_elapsed | 180 | +| total_timesteps | 64437 | +| train/ | | +| approx_kl | 0.034602597 | +| clip_fraction | 0.301 | +| clip_range | 0.26 | +| entropy_loss | -7.3 | +| explained_variance | -0.657 | +| learning_rate | 8.14e-05 | +| loss | 1.27 | +| n_updates | 460 | +| policy_gradient_loss | -0.0142 | +| value_loss | 9.68 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.12e+03 | +| ep_rew_mean | -1.23 | +| time/ | | +| fps | 356 | +| iterations | 48 | +| time_elapsed | 184 | +| total_timesteps | 65808 | +| train/ | | +| approx_kl | 0.034854636 | +| clip_fraction | 0.264 | +| clip_range | 0.26 | +| entropy_loss | -7.11 | +| explained_variance | 0.0247 | +| learning_rate | 8.14e-05 | +| loss | 11.7 | +| n_updates | 470 | +| policy_gradient_loss | -0.0175 | +| value_loss | 20.1 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.13e+03 | +| ep_rew_mean | -1.26 | +| time/ | | +| fps | 356 | +| iterations | 49 | +| time_elapsed | 188 | +| total_timesteps | 67179 | +| train/ | | +| approx_kl | 0.050826874 | +| clip_fraction | 0.328 | +| clip_range | 0.26 | +| entropy_loss | -7.92 | +| explained_variance | -0.296 | +| learning_rate | 8.14e-05 | +| loss | 5.44 | +| n_updates | 480 | +| policy_gradient_loss | -0.0181 | +| value_loss | 13 | +----------------------------------------- +--------------------------------------- +| rollout/ | | +| ep_len_mean | 2.12e+03 | +| ep_rew_mean | -2.72 | +| time/ | | +| fps | 356 | +| iterations | 50 | +| time_elapsed | 192 | +| total_timesteps | 68550 | +| train/ | | +| approx_kl | 0.0387544 | +| clip_fraction | 0.284 | +| clip_range | 0.26 | +| entropy_loss | -7.56 | +| explained_variance | -0.016 | +| learning_rate | 8.14e-05 | +| loss | 1.01 | +| n_updates | 490 | +| policy_gradient_loss | -0.0124 | +| value_loss | 10.5 | +--------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.09e+03 | +| ep_rew_mean | 2.79 | +| time/ | | +| fps | 356 | +| iterations | 51 | +| time_elapsed | 196 | +| total_timesteps | 69921 | +| train/ | | +| approx_kl | 0.033755988 | +| clip_fraction | 0.261 | +| clip_range | 0.26 | +| entropy_loss | -6.82 | +| explained_variance | 0.0437 | +| learning_rate | 8.14e-05 | +| loss | 1.73 | +| n_updates | 500 | +| policy_gradient_loss | -0.019 | +| value_loss | 10.5 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.08e+03 | +| ep_rew_mean | 6.5 | +| time/ | | +| fps | 356 | +| iterations | 52 | +| time_elapsed | 200 | +| total_timesteps | 71292 | +| train/ | | +| approx_kl | 0.028060019 | +| clip_fraction | 0.276 | +| clip_range | 0.26 | +| entropy_loss | -7.04 | +| explained_variance | 0.0647 | +| learning_rate | 8.14e-05 | +| loss | 1.4 | +| n_updates | 510 | +| policy_gradient_loss | -0.00959 | +| value_loss | 33.6 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.08e+03 | +| ep_rew_mean | 6.5 | +| time/ | | +| fps | 355 | +| iterations | 53 | +| time_elapsed | 204 | +| total_timesteps | 72663 | +| train/ | | +| approx_kl | 0.029590033 | +| clip_fraction | 0.232 | +| clip_range | 0.26 | +| entropy_loss | -6.96 | +| explained_variance | 0.174 | +| learning_rate | 8.14e-05 | +| loss | 7.49 | +| n_updates | 520 | +| policy_gradient_loss | -0.00783 | +| value_loss | 14.9 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.08e+03 | +| ep_rew_mean | 6.5 | +| time/ | | +| fps | 355 | +| iterations | 54 | +| time_elapsed | 208 | +| total_timesteps | 74034 | +| train/ | | +| approx_kl | 0.044851318 | +| clip_fraction | 0.327 | +| clip_range | 0.26 | +| entropy_loss | -7.81 | +| explained_variance | 0.106 | +| learning_rate | 8.14e-05 | +| loss | 1.97 | +| n_updates | 530 | +| policy_gradient_loss | -0.00695 | +| value_loss | 14.4 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.12e+03 | +| ep_rew_mean | 6 | +| time/ | | +| fps | 355 | +| iterations | 55 | +| time_elapsed | 212 | +| total_timesteps | 75405 | +| train/ | | +| approx_kl | 0.042934623 | +| clip_fraction | 0.296 | +| clip_range | 0.26 | +| entropy_loss | -7.69 | +| explained_variance | -0.881 | +| learning_rate | 8.14e-05 | +| loss | 0.152 | +| n_updates | 540 | +| policy_gradient_loss | -0.023 | +| value_loss | 3.2 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.12e+03 | +| ep_rew_mean | 6 | +| time/ | | +| fps | 355 | +| iterations | 56 | +| time_elapsed | 216 | +| total_timesteps | 76776 | +| train/ | | +| approx_kl | 0.031715214 | +| clip_fraction | 0.266 | +| clip_range | 0.26 | +| entropy_loss | -7.55 | +| explained_variance | 0.163 | +| learning_rate | 8.14e-05 | +| loss | 0.161 | +| n_updates | 550 | +| policy_gradient_loss | -0.0273 | +| value_loss | 4.01 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.14e+03 | +| ep_rew_mean | 5.56 | +| time/ | | +| fps | 355 | +| iterations | 57 | +| time_elapsed | 219 | +| total_timesteps | 78147 | +| train/ | | +| approx_kl | 0.043580677 | +| clip_fraction | 0.346 | +| clip_range | 0.26 | +| entropy_loss | -7.62 | +| explained_variance | -0.127 | +| learning_rate | 8.14e-05 | +| loss | 1.73 | +| n_updates | 560 | +| policy_gradient_loss | -0.0176 | +| value_loss | 11 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.13e+03 | +| ep_rew_mean | 11.6 | +| time/ | | +| fps | 355 | +| iterations | 58 | +| time_elapsed | 223 | +| total_timesteps | 79518 | +| train/ | | +| approx_kl | 0.038065173 | +| clip_fraction | 0.272 | +| clip_range | 0.26 | +| entropy_loss | -7.52 | +| explained_variance | 0.234 | +| learning_rate | 8.14e-05 | +| loss | 4.84 | +| n_updates | 570 | +| policy_gradient_loss | -0.018 | +| value_loss | 8.44 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.13e+03 | +| ep_rew_mean | 11.6 | +| time/ | | +| fps | 355 | +| iterations | 59 | +| time_elapsed | 227 | +| total_timesteps | 80889 | +| train/ | | +| approx_kl | 0.049862172 | +| clip_fraction | 0.31 | +| clip_range | 0.26 | +| entropy_loss | -7.6 | +| explained_variance | 0.0943 | +| learning_rate | 8.14e-05 | +| loss | 0.524 | +| n_updates | 580 | +| policy_gradient_loss | -0.015 | +| value_loss | 14.5 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.13e+03 | +| ep_rew_mean | 11.2 | +| time/ | | +| fps | 355 | +| iterations | 60 | +| time_elapsed | 231 | +| total_timesteps | 82260 | +| train/ | | +| approx_kl | 0.040924706 | +| clip_fraction | 0.313 | +| clip_range | 0.26 | +| entropy_loss | -7.74 | +| explained_variance | -0.255 | +| learning_rate | 8.14e-05 | +| loss | 0.218 | +| n_updates | 590 | +| policy_gradient_loss | -0.0118 | +| value_loss | 10.8 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.14e+03 | +| ep_rew_mean | 5.38 | +| time/ | | +| fps | 355 | +| iterations | 61 | +| time_elapsed | 234 | +| total_timesteps | 83631 | +| train/ | | +| approx_kl | 0.031327777 | +| clip_fraction | 0.273 | +| clip_range | 0.26 | +| entropy_loss | -7.33 | +| explained_variance | -0.235 | +| learning_rate | 8.14e-05 | +| loss | 1.87 | +| n_updates | 600 | +| policy_gradient_loss | -0.00756 | +| value_loss | 9.36 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.11e+03 | +| ep_rew_mean | 9.3 | +| time/ | | +| fps | 356 | +| iterations | 62 | +| time_elapsed | 238 | +| total_timesteps | 85002 | +| train/ | | +| approx_kl | 0.077066906 | +| clip_fraction | 0.375 | +| clip_range | 0.26 | +| entropy_loss | -7.63 | +| explained_variance | -6.45e-05 | +| learning_rate | 8.14e-05 | +| loss | 4.58 | +| n_updates | 610 | +| policy_gradient_loss | -0.00174 | +| value_loss | 24.8 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.08e+03 | +| ep_rew_mean | 12.6 | +| time/ | | +| fps | 356 | +| iterations | 63 | +| time_elapsed | 242 | +| total_timesteps | 86373 | +| train/ | | +| approx_kl | 0.047113765 | +| clip_fraction | 0.325 | +| clip_range | 0.26 | +| entropy_loss | -7.27 | +| explained_variance | 0.345 | +| learning_rate | 8.14e-05 | +| loss | 2.71 | +| n_updates | 620 | +| policy_gradient_loss | -0.0105 | +| value_loss | 22 | +----------------------------------------- +-------------------------------------- +| rollout/ | | +| ep_len_mean | 2.07e+03 | +| ep_rew_mean | 7.55 | +| time/ | | +| fps | 356 | +| iterations | 64 | +| time_elapsed | 246 | +| total_timesteps | 87744 | +| train/ | | +| approx_kl | 0.051026 | +| clip_fraction | 0.328 | +| clip_range | 0.26 | +| entropy_loss | -7.45 | +| explained_variance | -0.0504 | +| learning_rate | 8.14e-05 | +| loss | 8.05 | +| n_updates | 630 | +| policy_gradient_loss | -0.0202 | +| value_loss | 21.4 | +-------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.07e+03 | +| ep_rew_mean | 7.55 | +| time/ | | +| fps | 356 | +| iterations | 65 | +| time_elapsed | 249 | +| total_timesteps | 89115 | +| train/ | | +| approx_kl | 0.08706577 | +| clip_fraction | 0.408 | +| clip_range | 0.26 | +| entropy_loss | -7.05 | +| explained_variance | -0.281 | +| learning_rate | 8.14e-05 | +| loss | 6.06 | +| n_updates | 640 | +| policy_gradient_loss | -0.0151 | +| value_loss | 24.7 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.07e+03 | +| ep_rew_mean | 7.37 | +| time/ | | +| fps | 356 | +| iterations | 66 | +| time_elapsed | 253 | +| total_timesteps | 90486 | +| train/ | | +| approx_kl | 0.060183015 | +| clip_fraction | 0.353 | +| clip_range | 0.26 | +| entropy_loss | -7.27 | +| explained_variance | -0.235 | +| learning_rate | 8.14e-05 | +| loss | 2.65 | +| n_updates | 650 | +| policy_gradient_loss | -0.00283 | +| value_loss | 11.1 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.07e+03 | +| ep_rew_mean | 6.11 | +| time/ | | +| fps | 356 | +| iterations | 67 | +| time_elapsed | 257 | +| total_timesteps | 91857 | +| train/ | | +| approx_kl | 0.03188397 | +| clip_fraction | 0.266 | +| clip_range | 0.26 | +| entropy_loss | -7.28 | +| explained_variance | 0.319 | +| learning_rate | 8.14e-05 | +| loss | 0.867 | +| n_updates | 660 | +| policy_gradient_loss | -0.022 | +| value_loss | 7.59 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.07e+03 | +| ep_rew_mean | 6.11 | +| time/ | | +| fps | 356 | +| iterations | 68 | +| time_elapsed | 261 | +| total_timesteps | 93228 | +| train/ | | +| approx_kl | 0.049166773 | +| clip_fraction | 0.343 | +| clip_range | 0.26 | +| entropy_loss | -7.11 | +| explained_variance | -0.196 | +| learning_rate | 8.14e-05 | +| loss | 0.647 | +| n_updates | 670 | +| policy_gradient_loss | -0.0119 | +| value_loss | 11.7 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.08e+03 | +| ep_rew_mean | 2.13 | +| time/ | | +| fps | 355 | +| iterations | 69 | +| time_elapsed | 265 | +| total_timesteps | 94599 | +| train/ | | +| approx_kl | 0.03328535 | +| clip_fraction | 0.278 | +| clip_range | 0.26 | +| entropy_loss | -6.78 | +| explained_variance | -0.396 | +| learning_rate | 8.14e-05 | +| loss | 6.24 | +| n_updates | 680 | +| policy_gradient_loss | -0.0167 | +| value_loss | 7.49 | +---------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.08e+03 | +| ep_rew_mean | 2.13 | +| time/ | | +| fps | 355 | +| iterations | 70 | +| time_elapsed | 269 | +| total_timesteps | 95970 | +| train/ | | +| approx_kl | 0.08318052 | +| clip_fraction | 0.38 | +| clip_range | 0.26 | +| entropy_loss | -7.26 | +| explained_variance | -0.267 | +| learning_rate | 8.14e-05 | +| loss | 4.04 | +| n_updates | 690 | +| policy_gradient_loss | 0.00117 | +| value_loss | 15 | +---------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.09e+03 | +| ep_rew_mean | 1.28 | +| time/ | | +| fps | 355 | +| iterations | 71 | +| time_elapsed | 274 | +| total_timesteps | 97341 | +| train/ | | +| approx_kl | 0.05132381 | +| clip_fraction | 0.372 | +| clip_range | 0.26 | +| entropy_loss | -7.08 | +| explained_variance | -0.574 | +| learning_rate | 8.14e-05 | +| loss | 14.8 | +| n_updates | 700 | +| policy_gradient_loss | 0.0229 | +| value_loss | 15.2 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.09e+03 | +| ep_rew_mean | 0.298 | +| time/ | | +| fps | 354 | +| iterations | 72 | +| time_elapsed | 278 | +| total_timesteps | 98712 | +| train/ | | +| approx_kl | 0.049137857 | +| clip_fraction | 0.354 | +| clip_range | 0.26 | +| entropy_loss | -6.89 | +| explained_variance | 0.0478 | +| learning_rate | 8.14e-05 | +| loss | 1.22 | +| n_updates | 710 | +| policy_gradient_loss | -0.0197 | +| value_loss | 7.75 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.09e+03 | +| ep_rew_mean | 0.298 | +| time/ | | +| fps | 354 | +| iterations | 73 | +| time_elapsed | 282 | +| total_timesteps | 100083 | +| train/ | | +| approx_kl | 0.04495397 | +| clip_fraction | 0.278 | +| clip_range | 0.26 | +| entropy_loss | -6.95 | +| explained_variance | 0.0999 | +| learning_rate | 8.14e-05 | +| loss | 0.888 | +| n_updates | 720 | +| policy_gradient_loss | -0.0123 | +| value_loss | 11.4 | +---------------------------------------- +[I 2023-03-30 21:45:23,843] Trial 0 finished with value: -347.0 and parameters: {'n_steps': 1371, 'gamma': 0.9373200020810921, 'learning_rate': 8.141042840141496e-05, 'clip_range': 0.2600128459343352, 'gae_lambda': 0.9415709130298376}. Best is trial 0 with value: -347.0. +Using cuda device +Wrapping the env in a DummyVecEnv. +Wrapping the env in a VecTransposeImage. +C:\ProgramData\Anaconda3\envs\StreetFighterAI\lib\site-packages\stable_baselines3\ppo\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 3294`, after every 51 untruncated mini-batches, there will be a truncated mini-batch of size 30 +We recommend using a `batch_size` that is a factor of `n_steps * n_envs`. +Info: (n_steps=3294 and n_envs=1) + warnings.warn( +Logging to logs/PPO_19 +--------------------------------- +| rollout/ | | +| ep_len_mean | 2.44e+03 | +| ep_rew_mean | -47 | +| time/ | | +| fps | 550 | +| iterations | 1 | +| time_elapsed | 5 | +| total_timesteps | 3294 | +--------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2e+03 | +| ep_rew_mean | -140 | +| time/ | | +| fps | 417 | +| iterations | 2 | +| time_elapsed | 15 | +| total_timesteps | 6588 | +| train/ | | +| approx_kl | 0.008250391 | +| clip_fraction | 0.13 | +| clip_range | 0.193 | +| entropy_loss | -8.31 | +| explained_variance | -0.00164 | +| learning_rate | 5.11e-05 | +| loss | 0.766 | +| n_updates | 10 | +| policy_gradient_loss | -0.00627 | +| value_loss | 8.03 | +----------------------------------------- +------------------------------------------ +| rollout/ | | +| ep_len_mean | 2.31e+03 | +| ep_rew_mean | -95.8 | +| time/ | | +| fps | 392 | +| iterations | 3 | +| time_elapsed | 25 | +| total_timesteps | 9882 | +| train/ | | +| approx_kl | 0.0076712077 | +| clip_fraction | 0.113 | +| clip_range | 0.193 | +| entropy_loss | -8.3 | +| explained_variance | -0.0133 | +| learning_rate | 5.11e-05 | +| loss | 8.2 | +| n_updates | 20 | +| policy_gradient_loss | -0.00919 | +| value_loss | 22.1 | +------------------------------------------ +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.32e+03 | +| ep_rew_mean | -75.8 | +| time/ | | +| fps | 379 | +| iterations | 4 | +| time_elapsed | 34 | +| total_timesteps | 13176 | +| train/ | | +| approx_kl | 0.009143725 | +| clip_fraction | 0.1 | +| clip_range | 0.193 | +| entropy_loss | -8.29 | +| explained_variance | -0.0603 | +| learning_rate | 5.11e-05 | +| loss | 2.06 | +| n_updates | 30 | +| policy_gradient_loss | -0.0105 | +| value_loss | 7.27 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.32e+03 | +| ep_rew_mean | -68.2 | +| time/ | | +| fps | 371 | +| iterations | 5 | +| time_elapsed | 44 | +| total_timesteps | 16470 | +| train/ | | +| approx_kl | 0.009427849 | +| clip_fraction | 0.108 | +| clip_range | 0.193 | +| entropy_loss | -8.28 | +| explained_variance | 0.00375 | +| learning_rate | 5.11e-05 | +| loss | 1.5 | +| n_updates | 40 | +| policy_gradient_loss | -0.0116 | +| value_loss | 5.16 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.4e+03 | +| ep_rew_mean | -60.1 | +| time/ | | +| fps | 367 | +| iterations | 6 | +| time_elapsed | 53 | +| total_timesteps | 19764 | +| train/ | | +| approx_kl | 0.008516062 | +| clip_fraction | 0.102 | +| clip_range | 0.193 | +| entropy_loss | -8.27 | +| explained_variance | 0.0201 | +| learning_rate | 5.11e-05 | +| loss | 1.73 | +| n_updates | 50 | +| policy_gradient_loss | -0.0116 | +| value_loss | 7.34 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.44e+03 | +| ep_rew_mean | -73.8 | +| time/ | | +| fps | 364 | +| iterations | 7 | +| time_elapsed | 63 | +| total_timesteps | 23058 | +| train/ | | +| approx_kl | 0.009343307 | +| clip_fraction | 0.105 | +| clip_range | 0.193 | +| entropy_loss | -8.27 | +| explained_variance | 0.0736 | +| learning_rate | 5.11e-05 | +| loss | 2.54 | +| n_updates | 60 | +| policy_gradient_loss | -0.0116 | +| value_loss | 5.97 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.45e+03 | +| ep_rew_mean | -94.5 | +| time/ | | +| fps | 359 | +| iterations | 8 | +| time_elapsed | 73 | +| total_timesteps | 26352 | +| train/ | | +| approx_kl | 0.008755345 | +| clip_fraction | 0.103 | +| clip_range | 0.193 | +| entropy_loss | -8.26 | +| explained_variance | -0.0946 | +| learning_rate | 5.11e-05 | +| loss | 13.2 | +| n_updates | 70 | +| policy_gradient_loss | -0.0106 | +| value_loss | 18.1 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.46e+03 | +| ep_rew_mean | -100 | +| time/ | | +| fps | 357 | +| iterations | 9 | +| time_elapsed | 82 | +| total_timesteps | 29646 | +| train/ | | +| approx_kl | 0.007682183 | +| clip_fraction | 0.0851 | +| clip_range | 0.193 | +| entropy_loss | -8.25 | +| explained_variance | 0.0101 | +| learning_rate | 5.11e-05 | +| loss | 0.305 | +| n_updates | 80 | +| policy_gradient_loss | -0.0103 | +| value_loss | 18.6 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.34e+03 | +| ep_rew_mean | -116 | +| time/ | | +| fps | 355 | +| iterations | 10 | +| time_elapsed | 92 | +| total_timesteps | 32940 | +| train/ | | +| approx_kl | 0.010493592 | +| clip_fraction | 0.12 | +| clip_range | 0.193 | +| entropy_loss | -8.25 | +| explained_variance | -0.0487 | +| learning_rate | 5.11e-05 | +| loss | 0.736 | +| n_updates | 90 | +| policy_gradient_loss | -0.0141 | +| value_loss | 10.8 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.34e+03 | +| ep_rew_mean | -116 | +| time/ | | +| fps | 355 | +| iterations | 11 | +| time_elapsed | 102 | +| total_timesteps | 36234 | +| train/ | | +| approx_kl | 0.014990667 | +| clip_fraction | 0.177 | +| clip_range | 0.193 | +| entropy_loss | -8.24 | +| explained_variance | -0.0262 | +| learning_rate | 5.11e-05 | +| loss | 6.56 | +| n_updates | 100 | +| policy_gradient_loss | -0.00923 | +| value_loss | 17.9 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.42e+03 | +| ep_rew_mean | -95.2 | +| time/ | | +| fps | 355 | +| iterations | 12 | +| time_elapsed | 111 | +| total_timesteps | 39528 | +| train/ | | +| approx_kl | 0.012156485 | +| clip_fraction | 0.158 | +| clip_range | 0.193 | +| entropy_loss | -8.22 | +| explained_variance | 0.0904 | +| learning_rate | 5.11e-05 | +| loss | 11.6 | +| n_updates | 110 | +| policy_gradient_loss | -0.0144 | +| value_loss | 4.6 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.43e+03 | +| ep_rew_mean | -96.6 | +| time/ | | +| fps | 354 | +| iterations | 13 | +| time_elapsed | 120 | +| total_timesteps | 42822 | +| train/ | | +| approx_kl | 0.009144909 | +| clip_fraction | 0.116 | +| clip_range | 0.193 | +| entropy_loss | -8.21 | +| explained_variance | 0.0377 | +| learning_rate | 5.11e-05 | +| loss | 0.243 | +| n_updates | 120 | +| policy_gradient_loss | -0.0126 | +| value_loss | 7.32 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.47e+03 | +| ep_rew_mean | -93.6 | +| time/ | | +| fps | 353 | +| iterations | 14 | +| time_elapsed | 130 | +| total_timesteps | 46116 | +| train/ | | +| approx_kl | 0.010074705 | +| clip_fraction | 0.143 | +| clip_range | 0.193 | +| entropy_loss | -8.21 | +| explained_variance | 0.0431 | +| learning_rate | 5.11e-05 | +| loss | 0.422 | +| n_updates | 130 | +| policy_gradient_loss | -0.0116 | +| value_loss | 8.13 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.46e+03 | +| ep_rew_mean | -89.6 | +| time/ | | +| fps | 352 | +| iterations | 15 | +| time_elapsed | 140 | +| total_timesteps | 49410 | +| train/ | | +| approx_kl | 0.00962226 | +| clip_fraction | 0.141 | +| clip_range | 0.193 | +| entropy_loss | -8.19 | +| explained_variance | -0.127 | +| learning_rate | 5.11e-05 | +| loss | 4.31 | +| n_updates | 140 | +| policy_gradient_loss | -0.0141 | +| value_loss | 7.67 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.51e+03 | +| ep_rew_mean | -95.2 | +| time/ | | +| fps | 352 | +| iterations | 16 | +| time_elapsed | 149 | +| total_timesteps | 52704 | +| train/ | | +| approx_kl | 0.010282748 | +| clip_fraction | 0.149 | +| clip_range | 0.193 | +| entropy_loss | -8.18 | +| explained_variance | 0.0186 | +| learning_rate | 5.11e-05 | +| loss | 0.291 | +| n_updates | 150 | +| policy_gradient_loss | -0.0157 | +| value_loss | 3.42 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.49e+03 | +| ep_rew_mean | -86.3 | +| time/ | | +| fps | 352 | +| iterations | 17 | +| time_elapsed | 158 | +| total_timesteps | 55998 | +| train/ | | +| approx_kl | 0.011693283 | +| clip_fraction | 0.129 | +| clip_range | 0.193 | +| entropy_loss | -8.17 | +| explained_variance | 0.038 | +| learning_rate | 5.11e-05 | +| loss | 2.92 | +| n_updates | 160 | +| policy_gradient_loss | -0.0117 | +| value_loss | 12.6 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.47e+03 | +| ep_rew_mean | -91.1 | +| time/ | | +| fps | 353 | +| iterations | 18 | +| time_elapsed | 167 | +| total_timesteps | 59292 | +| train/ | | +| approx_kl | 0.011810733 | +| clip_fraction | 0.155 | +| clip_range | 0.193 | +| entropy_loss | -8.16 | +| explained_variance | 0.0673 | +| learning_rate | 5.11e-05 | +| loss | 0.93 | +| n_updates | 170 | +| policy_gradient_loss | -0.0134 | +| value_loss | 9.58 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.5e+03 | +| ep_rew_mean | -88.9 | +| time/ | | +| fps | 353 | +| iterations | 19 | +| time_elapsed | 177 | +| total_timesteps | 62586 | +| train/ | | +| approx_kl | 0.012234138 | +| clip_fraction | 0.167 | +| clip_range | 0.193 | +| entropy_loss | -8.17 | +| explained_variance | 0.033 | +| learning_rate | 5.11e-05 | +| loss | 0.243 | +| n_updates | 180 | +| policy_gradient_loss | -0.0139 | +| value_loss | 8.98 | +----------------------------------------- +------------------------------------------ +| rollout/ | | +| ep_len_mean | 2.5e+03 | +| ep_rew_mean | -88.9 | +| time/ | | +| fps | 352 | +| iterations | 20 | +| time_elapsed | 186 | +| total_timesteps | 65880 | +| train/ | | +| approx_kl | 0.0135463225 | +| clip_fraction | 0.167 | +| clip_range | 0.193 | +| entropy_loss | -8.15 | +| explained_variance | 0.104 | +| learning_rate | 5.11e-05 | +| loss | 1.94 | +| n_updates | 190 | +| policy_gradient_loss | -0.0105 | +| value_loss | 8.12 | +------------------------------------------ +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.59e+03 | +| ep_rew_mean | 114 | +| time/ | | +| fps | 351 | +| iterations | 21 | +| time_elapsed | 196 | +| total_timesteps | 69174 | +| train/ | | +| approx_kl | 0.012275431 | +| clip_fraction | 0.164 | +| clip_range | 0.193 | +| entropy_loss | -8.14 | +| explained_variance | -0.00593 | +| learning_rate | 5.11e-05 | +| loss | 15.6 | +| n_updates | 200 | +| policy_gradient_loss | -0.00313 | +| value_loss | 1.08e+04 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.54e+03 | +| ep_rew_mean | 104 | +| time/ | | +| fps | 351 | +| iterations | 22 | +| time_elapsed | 206 | +| total_timesteps | 72468 | +| train/ | | +| approx_kl | 0.014285704 | +| clip_fraction | 0.189 | +| clip_range | 0.193 | +| entropy_loss | -8.14 | +| explained_variance | 0.172 | +| learning_rate | 5.11e-05 | +| loss | 0.239 | +| n_updates | 210 | +| policy_gradient_loss | -0.0122 | +| value_loss | 8.08 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.53e+03 | +| ep_rew_mean | 91.4 | +| time/ | | +| fps | 351 | +| iterations | 23 | +| time_elapsed | 215 | +| total_timesteps | 75762 | +| train/ | | +| approx_kl | 0.01594875 | +| clip_fraction | 0.212 | +| clip_range | 0.193 | +| entropy_loss | -8.11 | +| explained_variance | -0.0207 | +| learning_rate | 5.11e-05 | +| loss | 19.3 | +| n_updates | 220 | +| policy_gradient_loss | -0.0113 | +| value_loss | 12.8 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.52e+03 | +| ep_rew_mean | 98.8 | +| time/ | | +| fps | 351 | +| iterations | 24 | +| time_elapsed | 225 | +| total_timesteps | 79056 | +| train/ | | +| approx_kl | 0.016312802 | +| clip_fraction | 0.236 | +| clip_range | 0.193 | +| entropy_loss | -8.03 | +| explained_variance | 0.118 | +| learning_rate | 5.11e-05 | +| loss | 0.819 | +| n_updates | 230 | +| policy_gradient_loss | -0.0082 | +| value_loss | 15.8 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.49e+03 | +| ep_rew_mean | 77.5 | +| time/ | | +| fps | 351 | +| iterations | 25 | +| time_elapsed | 234 | +| total_timesteps | 82350 | +| train/ | | +| approx_kl | 0.012337481 | +| clip_fraction | 0.163 | +| clip_range | 0.193 | +| entropy_loss | -8.13 | +| explained_variance | 0.0669 | +| learning_rate | 5.11e-05 | +| loss | 16.1 | +| n_updates | 240 | +| policy_gradient_loss | -0.00899 | +| value_loss | 16.1 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.46e+03 | +| ep_rew_mean | 66.2 | +| time/ | | +| fps | 347 | +| iterations | 26 | +| time_elapsed | 246 | +| total_timesteps | 85644 | +| train/ | | +| approx_kl | 0.018887786 | +| clip_fraction | 0.241 | +| clip_range | 0.193 | +| entropy_loss | -8 | +| explained_variance | 0.107 | +| learning_rate | 5.11e-05 | +| loss | 3.57 | +| n_updates | 250 | +| policy_gradient_loss | -0.00903 | +| value_loss | 26.9 | +----------------------------------------- +--------------------------------------- +| rollout/ | | +| ep_len_mean | 2.47e+03 | +| ep_rew_mean | 58 | +| time/ | | +| fps | 346 | +| iterations | 27 | +| time_elapsed | 256 | +| total_timesteps | 88938 | +| train/ | | +| approx_kl | 0.0155593 | +| clip_fraction | 0.219 | +| clip_range | 0.193 | +| entropy_loss | -8.07 | +| explained_variance | 0.0512 | +| learning_rate | 5.11e-05 | +| loss | 3.59 | +| n_updates | 260 | +| policy_gradient_loss | -0.0133 | +| value_loss | 21 | +--------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.44e+03 | +| ep_rew_mean | 54.1 | +| time/ | | +| fps | 346 | +| iterations | 28 | +| time_elapsed | 266 | +| total_timesteps | 92232 | +| train/ | | +| approx_kl | 0.015150225 | +| clip_fraction | 0.198 | +| clip_range | 0.193 | +| entropy_loss | -8.1 | +| explained_variance | 0.149 | +| learning_rate | 5.11e-05 | +| loss | 3.1 | +| n_updates | 270 | +| policy_gradient_loss | -0.00993 | +| value_loss | 9.94 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.47e+03 | +| ep_rew_mean | 52.1 | +| time/ | | +| fps | 346 | +| iterations | 29 | +| time_elapsed | 275 | +| total_timesteps | 95526 | +| train/ | | +| approx_kl | 0.016742641 | +| clip_fraction | 0.223 | +| clip_range | 0.193 | +| entropy_loss | -7.91 | +| explained_variance | 0.276 | +| learning_rate | 5.11e-05 | +| loss | 4.49 | +| n_updates | 280 | +| policy_gradient_loss | -0.015 | +| value_loss | 7.73 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.44e+03 | +| ep_rew_mean | 41.7 | +| time/ | | +| fps | 346 | +| iterations | 30 | +| time_elapsed | 284 | +| total_timesteps | 98820 | +| train/ | | +| approx_kl | 0.016321812 | +| clip_fraction | 0.238 | +| clip_range | 0.193 | +| entropy_loss | -8.08 | +| explained_variance | 0.104 | +| learning_rate | 5.11e-05 | +| loss | 0.345 | +| n_updates | 290 | +| policy_gradient_loss | -0.0153 | +| value_loss | 5.16 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.43e+03 | +| ep_rew_mean | 40 | +| time/ | | +| fps | 346 | +| iterations | 31 | +| time_elapsed | 294 | +| total_timesteps | 102114 | +| train/ | | +| approx_kl | 0.016819764 | +| clip_fraction | 0.22 | +| clip_range | 0.193 | +| entropy_loss | -7.89 | +| explained_variance | -0.0187 | +| learning_rate | 5.11e-05 | +| loss | 1.07 | +| n_updates | 300 | +| policy_gradient_loss | -0.00886 | +| value_loss | 15.6 | +----------------------------------------- +[I 2023-03-30 21:50:56,850] Trial 1 finished with value: -205.0 and parameters: {'n_steps': 3294, 'gamma': 0.9019828232975781, 'learning_rate': 5.112209134805487e-05, 'clip_range': 0.1926590966798606, 'gae_lambda': 0.8670673597089896}. Best is trial 1 with value: -205.0. +Using cuda device +Wrapping the env in a DummyVecEnv. +Wrapping the env in a VecTransposeImage. +C:\ProgramData\Anaconda3\envs\StreetFighterAI\lib\site-packages\stable_baselines3\ppo\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 1586`, after every 24 untruncated mini-batches, there will be a truncated mini-batch of size 50 +We recommend using a `batch_size` that is a factor of `n_steps * n_envs`. +Info: (n_steps=1586 and n_envs=1) + warnings.warn( +Logging to logs/PPO_20 +----------------------------- +| time/ | | +| fps | 611 | +| iterations | 1 | +| time_elapsed | 2 | +| total_timesteps | 1586 | +----------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.1e+03 | +| ep_rew_mean | -13 | +| time/ | | +| fps | 448 | +| iterations | 2 | +| time_elapsed | 7 | +| total_timesteps | 3172 | +| train/ | | +| approx_kl | 0.007479368 | +| clip_fraction | 0.211 | +| clip_range | 0.161 | +| entropy_loss | -8.31 | +| explained_variance | -0.00514 | +| learning_rate | 5.02e-05 | +| loss | 5.13 | +| n_updates | 10 | +| policy_gradient_loss | -0.0115 | +| value_loss | 11.6 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.21e+03 | +| ep_rew_mean | -115 | +| time/ | | +| fps | 413 | +| iterations | 3 | +| time_elapsed | 11 | +| total_timesteps | 4758 | +| train/ | | +| approx_kl | 0.009029714 | +| clip_fraction | 0.168 | +| clip_range | 0.161 | +| entropy_loss | -8.3 | +| explained_variance | 0.0171 | +| learning_rate | 5.02e-05 | +| loss | 1.61 | +| n_updates | 20 | +| policy_gradient_loss | -0.0118 | +| value_loss | 15 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.21e+03 | +| ep_rew_mean | -115 | +| time/ | | +| fps | 395 | +| iterations | 4 | +| time_elapsed | 16 | +| total_timesteps | 6344 | +| train/ | | +| approx_kl | 0.011066959 | +| clip_fraction | 0.22 | +| clip_range | 0.161 | +| entropy_loss | -8.28 | +| explained_variance | 0.138 | +| learning_rate | 5.02e-05 | +| loss | 4.3 | +| n_updates | 30 | +| policy_gradient_loss | -0.0035 | +| value_loss | 26.5 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.13e+03 | +| ep_rew_mean | -64.3 | +| time/ | | +| fps | 385 | +| iterations | 5 | +| time_elapsed | 20 | +| total_timesteps | 7930 | +| train/ | | +| approx_kl | 0.006398973 | +| clip_fraction | 0.123 | +| clip_range | 0.161 | +| entropy_loss | -8.29 | +| explained_variance | 0.0506 | +| learning_rate | 5.02e-05 | +| loss | 7.38 | +| n_updates | 40 | +| policy_gradient_loss | -0.00387 | +| value_loss | 13.7 | +----------------------------------------- +------------------------------------------ +| rollout/ | | +| ep_len_mean | 2.07e+03 | +| ep_rew_mean | -124 | +| time/ | | +| fps | 378 | +| iterations | 6 | +| time_elapsed | 25 | +| total_timesteps | 9516 | +| train/ | | +| approx_kl | 0.0068364535 | +| clip_fraction | 0.234 | +| clip_range | 0.161 | +| entropy_loss | -8.28 | +| explained_variance | 0.0926 | +| learning_rate | 5.02e-05 | +| loss | 1.79 | +| n_updates | 50 | +| policy_gradient_loss | -0.00452 | +| value_loss | 8.46 | +------------------------------------------ +------------------------------------------ +| rollout/ | | +| ep_len_mean | 2.2e+03 | +| ep_rew_mean | -129 | +| time/ | | +| fps | 373 | +| iterations | 7 | +| time_elapsed | 29 | +| total_timesteps | 11102 | +| train/ | | +| approx_kl | 0.0063208304 | +| clip_fraction | 0.139 | +| clip_range | 0.161 | +| entropy_loss | -8.29 | +| explained_variance | 0.225 | +| learning_rate | 5.02e-05 | +| loss | 11 | +| n_updates | 60 | +| policy_gradient_loss | -0.00482 | +| value_loss | 55.3 | +------------------------------------------ +------------------------------------------ +| rollout/ | | +| ep_len_mean | 2.2e+03 | +| ep_rew_mean | -129 | +| time/ | | +| fps | 369 | +| iterations | 8 | +| time_elapsed | 34 | +| total_timesteps | 12688 | +| train/ | | +| approx_kl | 0.0049916673 | +| clip_fraction | 0.146 | +| clip_range | 0.161 | +| entropy_loss | -8.29 | +| explained_variance | 0.341 | +| learning_rate | 5.02e-05 | +| loss | 4.11 | +| n_updates | 70 | +| policy_gradient_loss | -0.00677 | +| value_loss | 21.2 | +------------------------------------------ +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.25e+03 | +| ep_rew_mean | -140 | +| time/ | | +| fps | 368 | +| iterations | 9 | +| time_elapsed | 38 | +| total_timesteps | 14274 | +| train/ | | +| approx_kl | 0.00807819 | +| clip_fraction | 0.222 | +| clip_range | 0.161 | +| entropy_loss | -8.29 | +| explained_variance | 0.368 | +| learning_rate | 5.02e-05 | +| loss | 0.672 | +| n_updates | 80 | +| policy_gradient_loss | -0.00792 | +| value_loss | 6.29 | +---------------------------------------- +------------------------------------------ +| rollout/ | | +| ep_len_mean | 2.09e+03 | +| ep_rew_mean | -96.1 | +| time/ | | +| fps | 366 | +| iterations | 10 | +| time_elapsed | 43 | +| total_timesteps | 15860 | +| train/ | | +| approx_kl | 0.0066149407 | +| clip_fraction | 0.157 | +| clip_range | 0.161 | +| entropy_loss | -8.29 | +| explained_variance | 0.102 | +| learning_rate | 5.02e-05 | +| loss | 47.7 | +| n_updates | 90 | +| policy_gradient_loss | -0.00711 | +| value_loss | 24.1 | +------------------------------------------ +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.09e+03 | +| ep_rew_mean | -96.1 | +| time/ | | +| fps | 366 | +| iterations | 11 | +| time_elapsed | 47 | +| total_timesteps | 17446 | +| train/ | | +| approx_kl | 0.009226098 | +| clip_fraction | 0.162 | +| clip_range | 0.161 | +| entropy_loss | -8.29 | +| explained_variance | 0.103 | +| learning_rate | 5.02e-05 | +| loss | 32.2 | +| n_updates | 100 | +| policy_gradient_loss | -0.00681 | +| value_loss | 30 | +----------------------------------------- +------------------------------------------ +| rollout/ | | +| ep_len_mean | 2.23e+03 | +| ep_rew_mean | -84.8 | +| time/ | | +| fps | 365 | +| iterations | 12 | +| time_elapsed | 52 | +| total_timesteps | 19032 | +| train/ | | +| approx_kl | 0.0071024043 | +| clip_fraction | 0.202 | +| clip_range | 0.161 | +| entropy_loss | -8.28 | +| explained_variance | 0.477 | +| learning_rate | 5.02e-05 | +| loss | 1.68 | +| n_updates | 110 | +| policy_gradient_loss | -0.00699 | +| value_loss | 8.59 | +------------------------------------------ +------------------------------------------ +| rollout/ | | +| ep_len_mean | 2.29e+03 | +| ep_rew_mean | -80.7 | +| time/ | | +| fps | 364 | +| iterations | 13 | +| time_elapsed | 56 | +| total_timesteps | 20618 | +| train/ | | +| approx_kl | 0.0079917265 | +| clip_fraction | 0.177 | +| clip_range | 0.161 | +| entropy_loss | -8.28 | +| explained_variance | 0.499 | +| learning_rate | 5.02e-05 | +| loss | 2.03 | +| n_updates | 120 | +| policy_gradient_loss | -0.00561 | +| value_loss | 9.53 | +------------------------------------------ +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.29e+03 | +| ep_rew_mean | -80.7 | +| time/ | | +| fps | 364 | +| iterations | 14 | +| time_elapsed | 60 | +| total_timesteps | 22204 | +| train/ | | +| approx_kl | 0.008118922 | +| clip_fraction | 0.183 | +| clip_range | 0.161 | +| entropy_loss | -8.28 | +| explained_variance | 0.592 | +| learning_rate | 5.02e-05 | +| loss | 1.45 | +| n_updates | 130 | +| policy_gradient_loss | -0.00841 | +| value_loss | 8.69 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.35e+03 | +| ep_rew_mean | -77.8 | +| time/ | | +| fps | 363 | +| iterations | 15 | +| time_elapsed | 65 | +| total_timesteps | 23790 | +| train/ | | +| approx_kl | 0.009747963 | +| clip_fraction | 0.177 | +| clip_range | 0.161 | +| entropy_loss | -8.27 | +| explained_variance | 0.568 | +| learning_rate | 5.02e-05 | +| loss | 3.4 | +| n_updates | 140 | +| policy_gradient_loss | -0.00655 | +| value_loss | 12.6 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.35e+03 | +| ep_rew_mean | -77.8 | +| time/ | | +| fps | 363 | +| iterations | 16 | +| time_elapsed | 69 | +| total_timesteps | 25376 | +| train/ | | +| approx_kl | 0.011058032 | +| clip_fraction | 0.209 | +| clip_range | 0.161 | +| entropy_loss | -8.25 | +| explained_variance | 0.54 | +| learning_rate | 5.02e-05 | +| loss | 0.934 | +| n_updates | 150 | +| policy_gradient_loss | -0.00562 | +| value_loss | 8.9 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.4e+03 | +| ep_rew_mean | -70.5 | +| time/ | | +| fps | 364 | +| iterations | 17 | +| time_elapsed | 74 | +| total_timesteps | 26962 | +| train/ | | +| approx_kl | 0.007287364 | +| clip_fraction | 0.151 | +| clip_range | 0.161 | +| entropy_loss | -8.26 | +| explained_variance | 0.473 | +| learning_rate | 5.02e-05 | +| loss | 0.892 | +| n_updates | 160 | +| policy_gradient_loss | -0.00825 | +| value_loss | 7.3 | +----------------------------------------- +------------------------------------------ +| rollout/ | | +| ep_len_mean | 2.4e+03 | +| ep_rew_mean | -70.5 | +| time/ | | +| fps | 364 | +| iterations | 18 | +| time_elapsed | 78 | +| total_timesteps | 28548 | +| train/ | | +| approx_kl | 0.0059617176 | +| clip_fraction | 0.119 | +| clip_range | 0.161 | +| entropy_loss | -8.27 | +| explained_variance | 0.193 | +| learning_rate | 5.02e-05 | +| loss | 5.01 | +| n_updates | 170 | +| policy_gradient_loss | -0.00776 | +| value_loss | 15.3 | +------------------------------------------ +------------------------------------------ +| rollout/ | | +| ep_len_mean | 2.49e+03 | +| ep_rew_mean | -64.2 | +| time/ | | +| fps | 364 | +| iterations | 19 | +| time_elapsed | 82 | +| total_timesteps | 30134 | +| train/ | | +| approx_kl | 0.0068875425 | +| clip_fraction | 0.143 | +| clip_range | 0.161 | +| entropy_loss | -8.27 | +| explained_variance | 0.208 | +| learning_rate | 5.02e-05 | +| loss | 0.754 | +| n_updates | 180 | +| policy_gradient_loss | -0.00782 | +| value_loss | 8.96 | +------------------------------------------ +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.49e+03 | +| ep_rew_mean | -64.2 | +| time/ | | +| fps | 364 | +| iterations | 20 | +| time_elapsed | 86 | +| total_timesteps | 31720 | +| train/ | | +| approx_kl | 0.006500314 | +| clip_fraction | 0.141 | +| clip_range | 0.161 | +| entropy_loss | -8.28 | +| explained_variance | 0.431 | +| learning_rate | 5.02e-05 | +| loss | 0.835 | +| n_updates | 190 | +| policy_gradient_loss | -0.0125 | +| value_loss | 4.6 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.47e+03 | +| ep_rew_mean | -59.9 | +| time/ | | +| fps | 363 | +| iterations | 21 | +| time_elapsed | 91 | +| total_timesteps | 33306 | +| train/ | | +| approx_kl | 0.00925716 | +| clip_fraction | 0.167 | +| clip_range | 0.161 | +| entropy_loss | -8.28 | +| explained_variance | 0.166 | +| learning_rate | 5.02e-05 | +| loss | 4.07 | +| n_updates | 200 | +| policy_gradient_loss | -0.0126 | +| value_loss | 13.9 | +---------------------------------------- +------------------------------------------ +| rollout/ | | +| ep_len_mean | 2.47e+03 | +| ep_rew_mean | -59.9 | +| time/ | | +| fps | 362 | +| iterations | 22 | +| time_elapsed | 96 | +| total_timesteps | 34892 | +| train/ | | +| approx_kl | 0.0061101955 | +| clip_fraction | 0.121 | +| clip_range | 0.161 | +| entropy_loss | -8.27 | +| explained_variance | 0.192 | +| learning_rate | 5.02e-05 | +| loss | 0.275 | +| n_updates | 210 | +| policy_gradient_loss | -0.0125 | +| value_loss | 6.15 | +------------------------------------------ +------------------------------------------ +| rollout/ | | +| ep_len_mean | 2.5e+03 | +| ep_rew_mean | -68.9 | +| time/ | | +| fps | 361 | +| iterations | 23 | +| time_elapsed | 100 | +| total_timesteps | 36478 | +| train/ | | +| approx_kl | 0.0070993374 | +| clip_fraction | 0.127 | +| clip_range | 0.161 | +| entropy_loss | -8.29 | +| explained_variance | 0.0764 | +| learning_rate | 5.02e-05 | +| loss | 1.86 | +| n_updates | 220 | +| policy_gradient_loss | -0.0111 | +| value_loss | 11.7 | +------------------------------------------ +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.5e+03 | +| ep_rew_mean | -65.4 | +| time/ | | +| fps | 360 | +| iterations | 24 | +| time_elapsed | 105 | +| total_timesteps | 38064 | +| train/ | | +| approx_kl | 0.010024515 | +| clip_fraction | 0.182 | +| clip_range | 0.161 | +| entropy_loss | -8.27 | +| explained_variance | 0.335 | +| learning_rate | 5.02e-05 | +| loss | 30 | +| n_updates | 230 | +| policy_gradient_loss | -0.012 | +| value_loss | 25.5 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.5e+03 | +| ep_rew_mean | -65.4 | +| time/ | | +| fps | 358 | +| iterations | 25 | +| time_elapsed | 110 | +| total_timesteps | 39650 | +| train/ | | +| approx_kl | 0.009245104 | +| clip_fraction | 0.179 | +| clip_range | 0.161 | +| entropy_loss | -8.27 | +| explained_variance | 0.325 | +| learning_rate | 5.02e-05 | +| loss | 9.09 | +| n_updates | 240 | +| policy_gradient_loss | -0.012 | +| value_loss | 9.09 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.5e+03 | +| ep_rew_mean | -73.7 | +| time/ | | +| fps | 355 | +| iterations | 26 | +| time_elapsed | 115 | +| total_timesteps | 41236 | +| train/ | | +| approx_kl | 0.007121284 | +| clip_fraction | 0.155 | +| clip_range | 0.161 | +| entropy_loss | -8.26 | +| explained_variance | 0.297 | +| learning_rate | 5.02e-05 | +| loss | 0.498 | +| n_updates | 250 | +| policy_gradient_loss | -0.00963 | +| value_loss | 5.33 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.48e+03 | +| ep_rew_mean | -70.9 | +| time/ | | +| fps | 353 | +| iterations | 27 | +| time_elapsed | 121 | +| total_timesteps | 42822 | +| train/ | | +| approx_kl | 0.006750791 | +| clip_fraction | 0.22 | +| clip_range | 0.161 | +| entropy_loss | -8.24 | +| explained_variance | 0.0803 | +| learning_rate | 5.02e-05 | +| loss | 11.9 | +| n_updates | 260 | +| policy_gradient_loss | -0.00203 | +| value_loss | 36.9 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.41e+03 | +| ep_rew_mean | -58.2 | +| time/ | | +| fps | 353 | +| iterations | 28 | +| time_elapsed | 125 | +| total_timesteps | 44408 | +| train/ | | +| approx_kl | 0.010176781 | +| clip_fraction | 0.163 | +| clip_range | 0.161 | +| entropy_loss | -8.23 | +| explained_variance | 0.524 | +| learning_rate | 5.02e-05 | +| loss | 3.07 | +| n_updates | 270 | +| policy_gradient_loss | -0.0123 | +| value_loss | 12.7 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.41e+03 | +| ep_rew_mean | -65.8 | +| time/ | | +| fps | 353 | +| iterations | 29 | +| time_elapsed | 130 | +| total_timesteps | 45994 | +| train/ | | +| approx_kl | 0.009089488 | +| clip_fraction | 0.196 | +| clip_range | 0.161 | +| entropy_loss | -8.25 | +| explained_variance | -0.0364 | +| learning_rate | 5.02e-05 | +| loss | 6.55 | +| n_updates | 280 | +| policy_gradient_loss | -0.00565 | +| value_loss | 31.2 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.41e+03 | +| ep_rew_mean | -65.8 | +| time/ | | +| fps | 353 | +| iterations | 30 | +| time_elapsed | 134 | +| total_timesteps | 47580 | +| train/ | | +| approx_kl | 0.010195761 | +| clip_fraction | 0.168 | +| clip_range | 0.161 | +| entropy_loss | -8.21 | +| explained_variance | 0.248 | +| learning_rate | 5.02e-05 | +| loss | 2.92 | +| n_updates | 290 | +| policy_gradient_loss | -0.00979 | +| value_loss | 21.1 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.41e+03 | +| ep_rew_mean | -62.6 | +| time/ | | +| fps | 354 | +| iterations | 31 | +| time_elapsed | 138 | +| total_timesteps | 49166 | +| train/ | | +| approx_kl | 0.00898233 | +| clip_fraction | 0.207 | +| clip_range | 0.161 | +| entropy_loss | -8.21 | +| explained_variance | 0.268 | +| learning_rate | 5.02e-05 | +| loss | 2.67 | +| n_updates | 300 | +| policy_gradient_loss | -0.00882 | +| value_loss | 9.28 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.39e+03 | +| ep_rew_mean | -61.5 | +| time/ | | +| fps | 353 | +| iterations | 32 | +| time_elapsed | 143 | +| total_timesteps | 50752 | +| train/ | | +| approx_kl | 0.011192194 | +| clip_fraction | 0.247 | +| clip_range | 0.161 | +| entropy_loss | -8.2 | +| explained_variance | 0.513 | +| learning_rate | 5.02e-05 | +| loss | 2.01 | +| n_updates | 310 | +| policy_gradient_loss | -0.00966 | +| value_loss | 5.51 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.39e+03 | +| ep_rew_mean | -61.5 | +| time/ | | +| fps | 351 | +| iterations | 33 | +| time_elapsed | 148 | +| total_timesteps | 52338 | +| train/ | | +| approx_kl | 0.010591626 | +| clip_fraction | 0.226 | +| clip_range | 0.161 | +| entropy_loss | -8.2 | +| explained_variance | 0.328 | +| learning_rate | 5.02e-05 | +| loss | 1.02 | +| n_updates | 320 | +| policy_gradient_loss | -0.0127 | +| value_loss | 8.93 | +----------------------------------------- +------------------------------------------ +| rollout/ | | +| ep_len_mean | 2.41e+03 | +| ep_rew_mean | -63 | +| time/ | | +| fps | 350 | +| iterations | 34 | +| time_elapsed | 154 | +| total_timesteps | 53924 | +| train/ | | +| approx_kl | 0.0077109425 | +| clip_fraction | 0.228 | +| clip_range | 0.161 | +| entropy_loss | -8.17 | +| explained_variance | 0.17 | +| learning_rate | 5.02e-05 | +| loss | 2.71 | +| n_updates | 330 | +| policy_gradient_loss | -0.00697 | +| value_loss | 8.1 | +------------------------------------------ +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.37e+03 | +| ep_rew_mean | -71.1 | +| time/ | | +| fps | 348 | +| iterations | 35 | +| time_elapsed | 159 | +| total_timesteps | 55510 | +| train/ | | +| approx_kl | 0.009447264 | +| clip_fraction | 0.209 | +| clip_range | 0.161 | +| entropy_loss | -8.2 | +| explained_variance | 0.244 | +| learning_rate | 5.02e-05 | +| loss | 13.6 | +| n_updates | 340 | +| policy_gradient_loss | -0.00654 | +| value_loss | 11.1 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.34e+03 | +| ep_rew_mean | -77.4 | +| time/ | | +| fps | 347 | +| iterations | 36 | +| time_elapsed | 164 | +| total_timesteps | 57096 | +| train/ | | +| approx_kl | 0.011554491 | +| clip_fraction | 0.213 | +| clip_range | 0.161 | +| entropy_loss | -8.18 | +| explained_variance | 0.243 | +| learning_rate | 5.02e-05 | +| loss | 1.56 | +| n_updates | 350 | +| policy_gradient_loss | -0.00918 | +| value_loss | 35.2 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.32e+03 | +| ep_rew_mean | -65.5 | +| time/ | | +| fps | 346 | +| iterations | 37 | +| time_elapsed | 169 | +| total_timesteps | 58682 | +| train/ | | +| approx_kl | 0.014401031 | +| clip_fraction | 0.277 | +| clip_range | 0.161 | +| entropy_loss | -8.09 | +| explained_variance | 0.676 | +| learning_rate | 5.02e-05 | +| loss | 3.44 | +| n_updates | 360 | +| policy_gradient_loss | -0.0114 | +| value_loss | 29 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.32e+03 | +| ep_rew_mean | -65.5 | +| time/ | | +| fps | 345 | +| iterations | 38 | +| time_elapsed | 174 | +| total_timesteps | 60268 | +| train/ | | +| approx_kl | 0.01188478 | +| clip_fraction | 0.218 | +| clip_range | 0.161 | +| entropy_loss | -8.03 | +| explained_variance | 0.506 | +| learning_rate | 5.02e-05 | +| loss | 3.72 | +| n_updates | 370 | +| policy_gradient_loss | -0.00428 | +| value_loss | 51.7 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.33e+03 | +| ep_rew_mean | -69 | +| time/ | | +| fps | 346 | +| iterations | 39 | +| time_elapsed | 178 | +| total_timesteps | 61854 | +| train/ | | +| approx_kl | 0.010957578 | +| clip_fraction | 0.247 | +| clip_range | 0.161 | +| entropy_loss | -8.11 | +| explained_variance | 0.534 | +| learning_rate | 5.02e-05 | +| loss | 1.98 | +| n_updates | 380 | +| policy_gradient_loss | -0.0112 | +| value_loss | 8.17 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.34e+03 | +| ep_rew_mean | -74.6 | +| time/ | | +| fps | 346 | +| iterations | 40 | +| time_elapsed | 182 | +| total_timesteps | 63440 | +| train/ | | +| approx_kl | 0.012994195 | +| clip_fraction | 0.245 | +| clip_range | 0.161 | +| entropy_loss | -8.05 | +| explained_variance | 0.572 | +| learning_rate | 5.02e-05 | +| loss | 2.95 | +| n_updates | 390 | +| policy_gradient_loss | -0.0108 | +| value_loss | 13.3 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.34e+03 | +| ep_rew_mean | -74.6 | +| time/ | | +| fps | 337 | +| iterations | 41 | +| time_elapsed | 192 | +| total_timesteps | 65026 | +| train/ | | +| approx_kl | 0.01015701 | +| clip_fraction | 0.236 | +| clip_range | 0.161 | +| entropy_loss | -8.1 | +| explained_variance | 0.263 | +| learning_rate | 5.02e-05 | +| loss | 4.03 | +| n_updates | 400 | +| policy_gradient_loss | -0.012 | +| value_loss | 27.9 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.33e+03 | +| ep_rew_mean | -82 | +| time/ | | +| fps | 338 | +| iterations | 42 | +| time_elapsed | 196 | +| total_timesteps | 66612 | +| train/ | | +| approx_kl | 0.013143239 | +| clip_fraction | 0.255 | +| clip_range | 0.161 | +| entropy_loss | -8.09 | +| explained_variance | 0.672 | +| learning_rate | 5.02e-05 | +| loss | 2.09 | +| n_updates | 410 | +| policy_gradient_loss | -0.0143 | +| value_loss | 3.7 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.31e+03 | +| ep_rew_mean | -78.7 | +| time/ | | +| fps | 338 | +| iterations | 43 | +| time_elapsed | 201 | +| total_timesteps | 68198 | +| train/ | | +| approx_kl | 0.019480813 | +| clip_fraction | 0.326 | +| clip_range | 0.161 | +| entropy_loss | -8.01 | +| explained_variance | 0.28 | +| learning_rate | 5.02e-05 | +| loss | 3.27 | +| n_updates | 420 | +| policy_gradient_loss | -0.000208 | +| value_loss | 31.3 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.31e+03 | +| ep_rew_mean | -78.7 | +| time/ | | +| fps | 339 | +| iterations | 44 | +| time_elapsed | 205 | +| total_timesteps | 69784 | +| train/ | | +| approx_kl | 0.01460914 | +| clip_fraction | 0.273 | +| clip_range | 0.161 | +| entropy_loss | -7.99 | +| explained_variance | 0.66 | +| learning_rate | 5.02e-05 | +| loss | 3.57 | +| n_updates | 430 | +| policy_gradient_loss | -0.0082 | +| value_loss | 15.4 | +---------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.35e+03 | +| ep_rew_mean | -76.4 | +| time/ | | +| fps | 338 | +| iterations | 45 | +| time_elapsed | 210 | +| total_timesteps | 71370 | +| train/ | | +| approx_kl | 0.01634773 | +| clip_fraction | 0.303 | +| clip_range | 0.161 | +| entropy_loss | -7.99 | +| explained_variance | 0.729 | +| learning_rate | 5.02e-05 | +| loss | 4.86 | +| n_updates | 440 | +| policy_gradient_loss | -0.00564 | +| value_loss | 13.7 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.34e+03 | +| ep_rew_mean | -79.5 | +| time/ | | +| fps | 337 | +| iterations | 46 | +| time_elapsed | 216 | +| total_timesteps | 72956 | +| train/ | | +| approx_kl | 0.013031598 | +| clip_fraction | 0.306 | +| clip_range | 0.161 | +| entropy_loss | -7.89 | +| explained_variance | 0.513 | +| learning_rate | 5.02e-05 | +| loss | 0.69 | +| n_updates | 450 | +| policy_gradient_loss | -0.00602 | +| value_loss | 7.02 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.34e+03 | +| ep_rew_mean | -79.5 | +| time/ | | +| fps | 336 | +| iterations | 47 | +| time_elapsed | 221 | +| total_timesteps | 74542 | +| train/ | | +| approx_kl | 0.012420004 | +| clip_fraction | 0.261 | +| clip_range | 0.161 | +| entropy_loss | -7.91 | +| explained_variance | 0.314 | +| learning_rate | 5.02e-05 | +| loss | 3.91 | +| n_updates | 460 | +| policy_gradient_loss | -0.00565 | +| value_loss | 20.3 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.36e+03 | +| ep_rew_mean | -76.7 | +| time/ | | +| fps | 335 | +| iterations | 48 | +| time_elapsed | 226 | +| total_timesteps | 76128 | +| train/ | | +| approx_kl | 0.012879474 | +| clip_fraction | 0.28 | +| clip_range | 0.161 | +| entropy_loss | -7.89 | +| explained_variance | 0.787 | +| learning_rate | 5.02e-05 | +| loss | 4.51 | +| n_updates | 470 | +| policy_gradient_loss | -0.0114 | +| value_loss | 7.77 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.36e+03 | +| ep_rew_mean | -76.7 | +| time/ | | +| fps | 334 | +| iterations | 49 | +| time_elapsed | 232 | +| total_timesteps | 77714 | +| train/ | | +| approx_kl | 0.015691841 | +| clip_fraction | 0.268 | +| clip_range | 0.161 | +| entropy_loss | -7.97 | +| explained_variance | 0.792 | +| learning_rate | 5.02e-05 | +| loss | 2.4 | +| n_updates | 480 | +| policy_gradient_loss | -0.00649 | +| value_loss | 13.2 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.36e+03 | +| ep_rew_mean | -75.5 | +| time/ | | +| fps | 334 | +| iterations | 50 | +| time_elapsed | 237 | +| total_timesteps | 79300 | +| train/ | | +| approx_kl | 0.01415793 | +| clip_fraction | 0.271 | +| clip_range | 0.161 | +| entropy_loss | -7.91 | +| explained_variance | 0.663 | +| learning_rate | 5.02e-05 | +| loss | 3.25 | +| n_updates | 490 | +| policy_gradient_loss | -0.0147 | +| value_loss | 10.4 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.34e+03 | +| ep_rew_mean | -76.9 | +| time/ | | +| fps | 333 | +| iterations | 51 | +| time_elapsed | 242 | +| total_timesteps | 80886 | +| train/ | | +| approx_kl | 0.011811551 | +| clip_fraction | 0.309 | +| clip_range | 0.161 | +| entropy_loss | -7.68 | +| explained_variance | 0.634 | +| learning_rate | 5.02e-05 | +| loss | 1.68 | +| n_updates | 500 | +| policy_gradient_loss | -0.00301 | +| value_loss | 10.3 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.34e+03 | +| ep_rew_mean | -83.2 | +| time/ | | +| fps | 332 | +| iterations | 52 | +| time_elapsed | 247 | +| total_timesteps | 82472 | +| train/ | | +| approx_kl | 0.015533115 | +| clip_fraction | 0.25 | +| clip_range | 0.161 | +| entropy_loss | -7.86 | +| explained_variance | 0.652 | +| learning_rate | 5.02e-05 | +| loss | 5.43 | +| n_updates | 510 | +| policy_gradient_loss | -0.0107 | +| value_loss | 10.6 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.33e+03 | +| ep_rew_mean | -87.6 | +| time/ | | +| fps | 333 | +| iterations | 53 | +| time_elapsed | 252 | +| total_timesteps | 84058 | +| train/ | | +| approx_kl | 0.017240252 | +| clip_fraction | 0.316 | +| clip_range | 0.161 | +| entropy_loss | -8 | +| explained_variance | 0.623 | +| learning_rate | 5.02e-05 | +| loss | 14.6 | +| n_updates | 520 | +| policy_gradient_loss | -0.00709 | +| value_loss | 33 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.31e+03 | +| ep_rew_mean | -82.3 | +| time/ | | +| fps | 333 | +| iterations | 54 | +| time_elapsed | 256 | +| total_timesteps | 85644 | +| train/ | | +| approx_kl | 0.015610819 | +| clip_fraction | 0.297 | +| clip_range | 0.161 | +| entropy_loss | -7.9 | +| explained_variance | 0.506 | +| learning_rate | 5.02e-05 | +| loss | 79.8 | +| n_updates | 530 | +| policy_gradient_loss | -0.0053 | +| value_loss | 30.7 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.31e+03 | +| ep_rew_mean | -82.3 | +| time/ | | +| fps | 334 | +| iterations | 55 | +| time_elapsed | 261 | +| total_timesteps | 87230 | +| train/ | | +| approx_kl | 0.01877381 | +| clip_fraction | 0.33 | +| clip_range | 0.161 | +| entropy_loss | -7.88 | +| explained_variance | 0.388 | +| learning_rate | 5.02e-05 | +| loss | 2.4 | +| n_updates | 540 | +| policy_gradient_loss | -0.00414 | +| value_loss | 19.6 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.3e+03 | +| ep_rew_mean | -89.3 | +| time/ | | +| fps | 334 | +| iterations | 56 | +| time_elapsed | 265 | +| total_timesteps | 88816 | +| train/ | | +| approx_kl | 0.018082947 | +| clip_fraction | 0.339 | +| clip_range | 0.161 | +| entropy_loss | -7.79 | +| explained_variance | 0.742 | +| learning_rate | 5.02e-05 | +| loss | 1.75 | +| n_updates | 550 | +| policy_gradient_loss | -0.00747 | +| value_loss | 8.59 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.31e+03 | +| ep_rew_mean | -89.3 | +| time/ | | +| fps | 334 | +| iterations | 57 | +| time_elapsed | 269 | +| total_timesteps | 90402 | +| train/ | | +| approx_kl | 0.033854794 | +| clip_fraction | 0.4 | +| clip_range | 0.161 | +| entropy_loss | -7.85 | +| explained_variance | 0.145 | +| learning_rate | 5.02e-05 | +| loss | 2.04 | +| n_updates | 560 | +| policy_gradient_loss | 0.00031 | +| value_loss | 43.8 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.28e+03 | +| ep_rew_mean | -81.3 | +| time/ | | +| fps | 335 | +| iterations | 58 | +| time_elapsed | 274 | +| total_timesteps | 91988 | +| train/ | | +| approx_kl | 0.02308767 | +| clip_fraction | 0.369 | +| clip_range | 0.161 | +| entropy_loss | -7.65 | +| explained_variance | 0.836 | +| learning_rate | 5.02e-05 | +| loss | 1.59 | +| n_updates | 570 | +| policy_gradient_loss | -0.00735 | +| value_loss | 12.1 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.28e+03 | +| ep_rew_mean | -81.3 | +| time/ | | +| fps | 335 | +| iterations | 59 | +| time_elapsed | 279 | +| total_timesteps | 93574 | +| train/ | | +| approx_kl | 0.020519579 | +| clip_fraction | 0.345 | +| clip_range | 0.161 | +| entropy_loss | -7.8 | +| explained_variance | 0.278 | +| learning_rate | 5.02e-05 | +| loss | 9.11 | +| n_updates | 580 | +| policy_gradient_loss | -0.00419 | +| value_loss | 45 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.28e+03 | +| ep_rew_mean | -81.3 | +| time/ | | +| fps | 335 | +| iterations | 60 | +| time_elapsed | 283 | +| total_timesteps | 95160 | +| train/ | | +| approx_kl | 0.021029348 | +| clip_fraction | 0.365 | +| clip_range | 0.161 | +| entropy_loss | -7.82 | +| explained_variance | 0.839 | +| learning_rate | 5.02e-05 | +| loss | 1.28 | +| n_updates | 590 | +| policy_gradient_loss | -0.00539 | +| value_loss | 4.64 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.28e+03 | +| ep_rew_mean | -81.3 | +| time/ | | +| fps | 335 | +| iterations | 61 | +| time_elapsed | 288 | +| total_timesteps | 96746 | +| train/ | | +| approx_kl | 0.017239623 | +| clip_fraction | 0.328 | +| clip_range | 0.161 | +| entropy_loss | -7.65 | +| explained_variance | 0.762 | +| learning_rate | 5.02e-05 | +| loss | 3.36 | +| n_updates | 600 | +| policy_gradient_loss | -0.00336 | +| value_loss | 13.7 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.39e+03 | +| ep_rew_mean | -84.9 | +| time/ | | +| fps | 335 | +| iterations | 62 | +| time_elapsed | 292 | +| total_timesteps | 98332 | +| train/ | | +| approx_kl | 0.014254608 | +| clip_fraction | 0.358 | +| clip_range | 0.161 | +| entropy_loss | -7.61 | +| explained_variance | 0.69 | +| learning_rate | 5.02e-05 | +| loss | 10.9 | +| n_updates | 610 | +| policy_gradient_loss | -0.00208 | +| value_loss | 5.69 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.39e+03 | +| ep_rew_mean | -84.9 | +| time/ | | +| fps | 336 | +| iterations | 63 | +| time_elapsed | 297 | +| total_timesteps | 99918 | +| train/ | | +| approx_kl | 0.012963827 | +| clip_fraction | 0.302 | +| clip_range | 0.161 | +| entropy_loss | -7.63 | +| explained_variance | 0.322 | +| learning_rate | 5.02e-05 | +| loss | 9.69 | +| n_updates | 620 | +| policy_gradient_loss | -0.00529 | +| value_loss | 29.8 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.41e+03 | +| ep_rew_mean | -83.5 | +| time/ | | +| fps | 336 | +| iterations | 64 | +| time_elapsed | 301 | +| total_timesteps | 101504 | +| train/ | | +| approx_kl | 0.018386848 | +| clip_fraction | 0.319 | +| clip_range | 0.161 | +| entropy_loss | -7.67 | +| explained_variance | 0.638 | +| learning_rate | 5.02e-05 | +| loss | 1.82 | +| n_updates | 630 | +| policy_gradient_loss | -0.00643 | +| value_loss | 6.2 | +----------------------------------------- +[I 2023-03-30 21:56:30,174] Trial 2 finished with value: -348.0 and parameters: {'n_steps': 1586, 'gamma': 0.9956348644941185, 'learning_rate': 5.0170841536324054e-05, 'clip_range': 0.16056638694970846, 'gae_lambda': 0.8608765423049661}. Best is trial 1 with value: -205.0. +Using cuda device +Wrapping the env in a DummyVecEnv. +Wrapping the env in a VecTransposeImage. +C:\ProgramData\Anaconda3\envs\StreetFighterAI\lib\site-packages\stable_baselines3\ppo\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 3813`, after every 59 untruncated mini-batches, there will be a truncated mini-batch of size 37 +We recommend using a `batch_size` that is a factor of `n_steps * n_envs`. +Info: (n_steps=3813 and n_envs=1) + warnings.warn( +Logging to logs/PPO_21 +--------------------------------- +| rollout/ | | +| ep_len_mean | 3.42e+03 | +| ep_rew_mean | -8 | +| time/ | | +| fps | 534 | +| iterations | 1 | +| time_elapsed | 7 | +| total_timesteps | 3813 | +--------------------------------- +------------------------------------------ +| rollout/ | | +| ep_len_mean | 3.23e+03 | +| ep_rew_mean | -32 | +| time/ | | +| fps | 404 | +| iterations | 2 | +| time_elapsed | 18 | +| total_timesteps | 7626 | +| train/ | | +| approx_kl | 0.0036819936 | +| clip_fraction | 0.136 | +| clip_range | 0.123 | +| entropy_loss | -8.31 | +| explained_variance | 0.000731 | +| learning_rate | 5.81e-05 | +| loss | 0.479 | +| n_updates | 10 | +| policy_gradient_loss | -0.00524 | +| value_loss | 9.78 | +------------------------------------------ +------------------------------------------ +| rollout/ | | +| ep_len_mean | 2.82e+03 | +| ep_rew_mean | -104 | +| time/ | | +| fps | 365 | +| iterations | 3 | +| time_elapsed | 31 | +| total_timesteps | 11439 | +| train/ | | +| approx_kl | 0.0041210777 | +| clip_fraction | 0.136 | +| clip_range | 0.123 | +| entropy_loss | -8.31 | +| explained_variance | -0.119 | +| learning_rate | 5.81e-05 | +| loss | 0.859 | +| n_updates | 20 | +| policy_gradient_loss | -0.0063 | +| value_loss | 8.05 | +------------------------------------------ +------------------------------------------ +| rollout/ | | +| ep_len_mean | 2.65e+03 | +| ep_rew_mean | -75.4 | +| time/ | | +| fps | 348 | +| iterations | 4 | +| time_elapsed | 43 | +| total_timesteps | 15252 | +| train/ | | +| approx_kl | 0.0068013067 | +| clip_fraction | 0.186 | +| clip_range | 0.123 | +| entropy_loss | -8.3 | +| explained_variance | -0.164 | +| learning_rate | 5.81e-05 | +| loss | 125 | +| n_updates | 30 | +| policy_gradient_loss | -0.00652 | +| value_loss | 27.2 | +------------------------------------------ +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.58e+03 | +| ep_rew_mean | -83 | +| time/ | | +| fps | 337 | +| iterations | 5 | +| time_elapsed | 56 | +| total_timesteps | 19065 | +| train/ | | +| approx_kl | 0.00511329 | +| clip_fraction | 0.199 | +| clip_range | 0.123 | +| entropy_loss | -8.3 | +| explained_variance | -0.527 | +| learning_rate | 5.81e-05 | +| loss | 1.5 | +| n_updates | 40 | +| policy_gradient_loss | -0.00346 | +| value_loss | 14.6 | +---------------------------------------- +------------------------------------------ +| rollout/ | | +| ep_len_mean | 2.54e+03 | +| ep_rew_mean | -92.9 | +| time/ | | +| fps | 336 | +| iterations | 6 | +| time_elapsed | 68 | +| total_timesteps | 22878 | +| train/ | | +| approx_kl | 0.0047720987 | +| clip_fraction | 0.186 | +| clip_range | 0.123 | +| entropy_loss | -8.3 | +| explained_variance | -0.222 | +| learning_rate | 5.81e-05 | +| loss | 3.05 | +| n_updates | 50 | +| policy_gradient_loss | -0.00716 | +| value_loss | 13.6 | +------------------------------------------ +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.56e+03 | +| ep_rew_mean | -122 | +| time/ | | +| fps | 336 | +| iterations | 7 | +| time_elapsed | 79 | +| total_timesteps | 26691 | +| train/ | | +| approx_kl | 0.005124747 | +| clip_fraction | 0.208 | +| clip_range | 0.123 | +| entropy_loss | -8.3 | +| explained_variance | -0.166 | +| learning_rate | 5.81e-05 | +| loss | 2.19 | +| n_updates | 60 | +| policy_gradient_loss | -0.00516 | +| value_loss | 15.4 | +----------------------------------------- +------------------------------------------ +| rollout/ | | +| ep_len_mean | 2.46e+03 | +| ep_rew_mean | -90 | +| time/ | | +| fps | 331 | +| iterations | 8 | +| time_elapsed | 92 | +| total_timesteps | 30504 | +| train/ | | +| approx_kl | 0.0077587436 | +| clip_fraction | 0.243 | +| clip_range | 0.123 | +| entropy_loss | -8.29 | +| explained_variance | -0.0477 | +| learning_rate | 5.81e-05 | +| loss | 1.42 | +| n_updates | 70 | +| policy_gradient_loss | -0.00297 | +| value_loss | 31.2 | +------------------------------------------ +------------------------------------------ +| rollout/ | | +| ep_len_mean | 2.46e+03 | +| ep_rew_mean | -78.2 | +| time/ | | +| fps | 327 | +| iterations | 9 | +| time_elapsed | 104 | +| total_timesteps | 34317 | +| train/ | | +| approx_kl | 0.0055853897 | +| clip_fraction | 0.222 | +| clip_range | 0.123 | +| entropy_loss | -8.29 | +| explained_variance | -0.242 | +| learning_rate | 5.81e-05 | +| loss | 0.979 | +| n_updates | 80 | +| policy_gradient_loss | -0.00466 | +| value_loss | 18.7 | +------------------------------------------ +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.45e+03 | +| ep_rew_mean | -81.7 | +| time/ | | +| fps | 326 | +| iterations | 10 | +| time_elapsed | 116 | +| total_timesteps | 38130 | +| train/ | | +| approx_kl | 0.005408008 | +| clip_fraction | 0.217 | +| clip_range | 0.123 | +| entropy_loss | -8.29 | +| explained_variance | -1.29 | +| learning_rate | 5.81e-05 | +| loss | 4.61 | +| n_updates | 90 | +| policy_gradient_loss | -0.00827 | +| value_loss | 5.36 | +----------------------------------------- +------------------------------------------ +| rollout/ | | +| ep_len_mean | 2.45e+03 | +| ep_rew_mean | -76.2 | +| time/ | | +| fps | 326 | +| iterations | 11 | +| time_elapsed | 128 | +| total_timesteps | 41943 | +| train/ | | +| approx_kl | 0.0057736286 | +| clip_fraction | 0.19 | +| clip_range | 0.123 | +| entropy_loss | -8.28 | +| explained_variance | -0.141 | +| learning_rate | 5.81e-05 | +| loss | 3.36 | +| n_updates | 100 | +| policy_gradient_loss | -0.00708 | +| value_loss | 19.9 | +------------------------------------------ +------------------------------------------ +| rollout/ | | +| ep_len_mean | 2.6e+03 | +| ep_rew_mean | -88.4 | +| time/ | | +| fps | 326 | +| iterations | 12 | +| time_elapsed | 140 | +| total_timesteps | 45756 | +| train/ | | +| approx_kl | 0.0061197034 | +| clip_fraction | 0.221 | +| clip_range | 0.123 | +| entropy_loss | -8.27 | +| explained_variance | -0.275 | +| learning_rate | 5.81e-05 | +| loss | 1.75 | +| n_updates | 110 | +| policy_gradient_loss | -0.00862 | +| value_loss | 9.48 | +------------------------------------------ +------------------------------------------ +| rollout/ | | +| ep_len_mean | 2.58e+03 | +| ep_rew_mean | -86.6 | +| time/ | | +| fps | 329 | +| iterations | 13 | +| time_elapsed | 150 | +| total_timesteps | 49569 | +| train/ | | +| approx_kl | 0.0073136846 | +| clip_fraction | 0.233 | +| clip_range | 0.123 | +| entropy_loss | -8.27 | +| explained_variance | -0.0933 | +| learning_rate | 5.81e-05 | +| loss | 0.466 | +| n_updates | 120 | +| policy_gradient_loss | -0.00841 | +| value_loss | 18 | +------------------------------------------ +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.8e+03 | +| ep_rew_mean | -18.5 | +| time/ | | +| fps | 330 | +| iterations | 14 | +| time_elapsed | 161 | +| total_timesteps | 53382 | +| train/ | | +| approx_kl | 0.012948585 | +| clip_fraction | 0.348 | +| clip_range | 0.123 | +| entropy_loss | -8.26 | +| explained_variance | 0.0057 | +| learning_rate | 5.81e-05 | +| loss | 2.17 | +| n_updates | 130 | +| policy_gradient_loss | 0.00365 | +| value_loss | 1.38e+03 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.79e+03 | +| ep_rew_mean | -17.8 | +| time/ | | +| fps | 332 | +| iterations | 15 | +| time_elapsed | 172 | +| total_timesteps | 57195 | +| train/ | | +| approx_kl | 0.010128591 | +| clip_fraction | 0.417 | +| clip_range | 0.123 | +| entropy_loss | -8.25 | +| explained_variance | -4.29 | +| learning_rate | 5.81e-05 | +| loss | 3.52 | +| n_updates | 140 | +| policy_gradient_loss | 0.00333 | +| value_loss | 7.59 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.69e+03 | +| ep_rew_mean | -25 | +| time/ | | +| fps | 333 | +| iterations | 16 | +| time_elapsed | 182 | +| total_timesteps | 61008 | +| train/ | | +| approx_kl | 0.009500639 | +| clip_fraction | 0.306 | +| clip_range | 0.123 | +| entropy_loss | -8.27 | +| explained_variance | -0.37 | +| learning_rate | 5.81e-05 | +| loss | 3.36 | +| n_updates | 150 | +| policy_gradient_loss | -0.00701 | +| value_loss | 9.43 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.69e+03 | +| ep_rew_mean | -35.8 | +| time/ | | +| fps | 334 | +| iterations | 17 | +| time_elapsed | 193 | +| total_timesteps | 64821 | +| train/ | | +| approx_kl | 0.013091733 | +| clip_fraction | 0.352 | +| clip_range | 0.123 | +| entropy_loss | -8.25 | +| explained_variance | -0.138 | +| learning_rate | 5.81e-05 | +| loss | 0.699 | +| n_updates | 160 | +| policy_gradient_loss | -0.00203 | +| value_loss | 25.9 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.67e+03 | +| ep_rew_mean | -34 | +| time/ | | +| fps | 335 | +| iterations | 18 | +| time_elapsed | 204 | +| total_timesteps | 68634 | +| train/ | | +| approx_kl | 0.013554989 | +| clip_fraction | 0.341 | +| clip_range | 0.123 | +| entropy_loss | -8.24 | +| explained_variance | -0.24 | +| learning_rate | 5.81e-05 | +| loss | 0.791 | +| n_updates | 170 | +| policy_gradient_loss | -0.00409 | +| value_loss | 18.6 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.64e+03 | +| ep_rew_mean | -52.1 | +| time/ | | +| fps | 335 | +| iterations | 19 | +| time_elapsed | 215 | +| total_timesteps | 72447 | +| train/ | | +| approx_kl | 0.010819951 | +| clip_fraction | 0.347 | +| clip_range | 0.123 | +| entropy_loss | -8.23 | +| explained_variance | -0.575 | +| learning_rate | 5.81e-05 | +| loss | 0.601 | +| n_updates | 180 | +| policy_gradient_loss | -0.00691 | +| value_loss | 9.12 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.63e+03 | +| ep_rew_mean | -60.8 | +| time/ | | +| fps | 333 | +| iterations | 20 | +| time_elapsed | 228 | +| total_timesteps | 76260 | +| train/ | | +| approx_kl | 0.015728015 | +| clip_fraction | 0.39 | +| clip_range | 0.123 | +| entropy_loss | -8.24 | +| explained_variance | -0.0299 | +| learning_rate | 5.81e-05 | +| loss | 1.85 | +| n_updates | 190 | +| policy_gradient_loss | -0.00169 | +| value_loss | 27.5 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.62e+03 | +| ep_rew_mean | -65.9 | +| time/ | | +| fps | 331 | +| iterations | 21 | +| time_elapsed | 241 | +| total_timesteps | 80073 | +| train/ | | +| approx_kl | 0.013783906 | +| clip_fraction | 0.427 | +| clip_range | 0.123 | +| entropy_loss | -8.22 | +| explained_variance | -0.214 | +| learning_rate | 5.81e-05 | +| loss | 1.94 | +| n_updates | 200 | +| policy_gradient_loss | 0.000583 | +| value_loss | 35.7 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.63e+03 | +| ep_rew_mean | -65.4 | +| time/ | | +| fps | 329 | +| iterations | 22 | +| time_elapsed | 254 | +| total_timesteps | 83886 | +| train/ | | +| approx_kl | 0.014645203 | +| clip_fraction | 0.43 | +| clip_range | 0.123 | +| entropy_loss | -8.21 | +| explained_variance | -0.164 | +| learning_rate | 5.81e-05 | +| loss | 2.59 | +| n_updates | 210 | +| policy_gradient_loss | 0.00338 | +| value_loss | 16.1 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.55e+03 | +| ep_rew_mean | -75.3 | +| time/ | | +| fps | 327 | +| iterations | 23 | +| time_elapsed | 267 | +| total_timesteps | 87699 | +| train/ | | +| approx_kl | 0.015013908 | +| clip_fraction | 0.426 | +| clip_range | 0.123 | +| entropy_loss | -8.19 | +| explained_variance | -0.904 | +| learning_rate | 5.81e-05 | +| loss | 1.16 | +| n_updates | 220 | +| policy_gradient_loss | -0.000645 | +| value_loss | 7.32 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.53e+03 | +| ep_rew_mean | -73.4 | +| time/ | | +| fps | 326 | +| iterations | 24 | +| time_elapsed | 280 | +| total_timesteps | 91512 | +| train/ | | +| approx_kl | 0.02647818 | +| clip_fraction | 0.481 | +| clip_range | 0.123 | +| entropy_loss | -8.11 | +| explained_variance | -0.0693 | +| learning_rate | 5.81e-05 | +| loss | 3.85 | +| n_updates | 230 | +| policy_gradient_loss | 0.00305 | +| value_loss | 32.6 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.5e+03 | +| ep_rew_mean | -71.4 | +| time/ | | +| fps | 325 | +| iterations | 25 | +| time_elapsed | 293 | +| total_timesteps | 95325 | +| train/ | | +| approx_kl | 0.019708665 | +| clip_fraction | 0.482 | +| clip_range | 0.123 | +| entropy_loss | -8.12 | +| explained_variance | -0.496 | +| learning_rate | 5.81e-05 | +| loss | 3.13 | +| n_updates | 240 | +| policy_gradient_loss | 0.0037 | +| value_loss | 13.6 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.47e+03 | +| ep_rew_mean | -78.4 | +| time/ | | +| fps | 323 | +| iterations | 26 | +| time_elapsed | 306 | +| total_timesteps | 99138 | +| train/ | | +| approx_kl | 0.017824553 | +| clip_fraction | 0.454 | +| clip_range | 0.123 | +| entropy_loss | -8.11 | +| explained_variance | -0.288 | +| learning_rate | 5.81e-05 | +| loss | 0.948 | +| n_updates | 250 | +| policy_gradient_loss | 0.000339 | +| value_loss | 15.5 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.41e+03 | +| ep_rew_mean | -63.9 | +| time/ | | +| fps | 322 | +| iterations | 27 | +| time_elapsed | 319 | +| total_timesteps | 102951 | +| train/ | | +| approx_kl | 0.01960509 | +| clip_fraction | 0.495 | +| clip_range | 0.123 | +| entropy_loss | -8.1 | +| explained_variance | -0.318 | +| learning_rate | 5.81e-05 | +| loss | 0.768 | +| n_updates | 260 | +| policy_gradient_loss | 0.00504 | +| value_loss | 22.9 | +---------------------------------------- +[I 2023-03-30 22:02:13,996] Trial 3 finished with value: -296.0 and parameters: {'n_steps': 3813, 'gamma': 0.904141731391283, 'learning_rate': 5.8079041373677925e-05, 'clip_range': 0.12331848349559273, 'gae_lambda': 0.9741511540746485}. Best is trial 1 with value: -205.0. +Using cuda device +Wrapping the env in a DummyVecEnv. +Wrapping the env in a VecTransposeImage. +C:\ProgramData\Anaconda3\envs\StreetFighterAI\lib\site-packages\stable_baselines3\ppo\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 2255`, after every 35 untruncated mini-batches, there will be a truncated mini-batch of size 15 +We recommend using a `batch_size` that is a factor of `n_steps * n_envs`. +Info: (n_steps=2255 and n_envs=1) + warnings.warn( +Logging to logs/PPO_22 +--------------------------------- +| rollout/ | | +| ep_len_mean | 1.96e+03 | +| ep_rew_mean | -277 | +| time/ | | +| fps | 612 | +| iterations | 1 | +| time_elapsed | 3 | +| total_timesteps | 2255 | +--------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 1.76e+03 | +| ep_rew_mean | -312 | +| time/ | | +| fps | 449 | +| iterations | 2 | +| time_elapsed | 10 | +| total_timesteps | 4510 | +| train/ | | +| approx_kl | 0.020538189 | +| clip_fraction | 0.184 | +| clip_range | 0.246 | +| entropy_loss | -8.3 | +| explained_variance | 0.00102 | +| learning_rate | 6.18e-05 | +| loss | 0.312 | +| n_updates | 10 | +| policy_gradient_loss | -0.0141 | +| value_loss | 28.1 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.02e+03 | +| ep_rew_mean | -205 | +| time/ | | +| fps | 397 | +| iterations | 3 | +| time_elapsed | 17 | +| total_timesteps | 6765 | +| train/ | | +| approx_kl | 0.014282044 | +| clip_fraction | 0.149 | +| clip_range | 0.246 | +| entropy_loss | -8.29 | +| explained_variance | 0.0389 | +| learning_rate | 6.18e-05 | +| loss | 3.57 | +| n_updates | 20 | +| policy_gradient_loss | -0.00883 | +| value_loss | 41.2 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.02e+03 | +| ep_rew_mean | -205 | +| time/ | | +| fps | 378 | +| iterations | 4 | +| time_elapsed | 23 | +| total_timesteps | 9020 | +| train/ | | +| approx_kl | 0.010472495 | +| clip_fraction | 0.124 | +| clip_range | 0.246 | +| entropy_loss | -8.27 | +| explained_variance | -0.182 | +| learning_rate | 6.18e-05 | +| loss | 0.526 | +| n_updates | 30 | +| policy_gradient_loss | -0.0058 | +| value_loss | 6.26 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.29e+03 | +| ep_rew_mean | -158 | +| time/ | | +| fps | 368 | +| iterations | 5 | +| time_elapsed | 30 | +| total_timesteps | 11275 | +| train/ | | +| approx_kl | 0.014202305 | +| clip_fraction | 0.116 | +| clip_range | 0.246 | +| entropy_loss | -8.26 | +| explained_variance | -0.0118 | +| learning_rate | 6.18e-05 | +| loss | 0.206 | +| n_updates | 40 | +| policy_gradient_loss | -0.00424 | +| value_loss | 8.23 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.4e+03 | +| ep_rew_mean | -160 | +| time/ | | +| fps | 361 | +| iterations | 6 | +| time_elapsed | 37 | +| total_timesteps | 13530 | +| train/ | | +| approx_kl | 0.009082135 | +| clip_fraction | 0.0682 | +| clip_range | 0.246 | +| entropy_loss | -8.25 | +| explained_variance | 0.0728 | +| learning_rate | 6.18e-05 | +| loss | 3.01 | +| n_updates | 50 | +| policy_gradient_loss | -0.0105 | +| value_loss | 4.79 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.38e+03 | +| ep_rew_mean | -162 | +| time/ | | +| fps | 360 | +| iterations | 7 | +| time_elapsed | 43 | +| total_timesteps | 15785 | +| train/ | | +| approx_kl | 0.016703699 | +| clip_fraction | 0.153 | +| clip_range | 0.246 | +| entropy_loss | -8.22 | +| explained_variance | 0.0113 | +| learning_rate | 6.18e-05 | +| loss | 5.27 | +| n_updates | 60 | +| policy_gradient_loss | -0.0144 | +| value_loss | 13 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.25e+03 | +| ep_rew_mean | -128 | +| time/ | | +| fps | 360 | +| iterations | 8 | +| time_elapsed | 49 | +| total_timesteps | 18040 | +| train/ | | +| approx_kl | 0.024009299 | +| clip_fraction | 0.152 | +| clip_range | 0.246 | +| entropy_loss | -8.2 | +| explained_variance | 0.0536 | +| learning_rate | 6.18e-05 | +| loss | 0.72 | +| n_updates | 70 | +| policy_gradient_loss | -0.0143 | +| value_loss | 15.3 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.25e+03 | +| ep_rew_mean | -128 | +| time/ | | +| fps | 360 | +| iterations | 9 | +| time_elapsed | 56 | +| total_timesteps | 20295 | +| train/ | | +| approx_kl | 0.02027614 | +| clip_fraction | 0.184 | +| clip_range | 0.246 | +| entropy_loss | -8.14 | +| explained_variance | 0.0651 | +| learning_rate | 6.18e-05 | +| loss | 0.961 | +| n_updates | 80 | +| policy_gradient_loss | -0.00954 | +| value_loss | 24.2 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.33e+03 | +| ep_rew_mean | -113 | +| time/ | | +| fps | 360 | +| iterations | 10 | +| time_elapsed | 62 | +| total_timesteps | 22550 | +| train/ | | +| approx_kl | 0.016172899 | +| clip_fraction | 0.143 | +| clip_range | 0.246 | +| entropy_loss | -8.12 | +| explained_variance | 0.00202 | +| learning_rate | 6.18e-05 | +| loss | 19.2 | +| n_updates | 90 | +| policy_gradient_loss | -0.00773 | +| value_loss | 8.99 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.28e+03 | +| ep_rew_mean | -104 | +| time/ | | +| fps | 359 | +| iterations | 11 | +| time_elapsed | 68 | +| total_timesteps | 24805 | +| train/ | | +| approx_kl | 0.012545445 | +| clip_fraction | 0.0956 | +| clip_range | 0.246 | +| entropy_loss | -8.15 | +| explained_variance | 0.114 | +| learning_rate | 6.18e-05 | +| loss | 0.305 | +| n_updates | 100 | +| policy_gradient_loss | -0.0125 | +| value_loss | 8.93 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.26e+03 | +| ep_rew_mean | -81.5 | +| time/ | | +| fps | 358 | +| iterations | 12 | +| time_elapsed | 75 | +| total_timesteps | 27060 | +| train/ | | +| approx_kl | 0.01598395 | +| clip_fraction | 0.135 | +| clip_range | 0.246 | +| entropy_loss | -8.1 | +| explained_variance | 0.102 | +| learning_rate | 6.18e-05 | +| loss | 0.936 | +| n_updates | 110 | +| policy_gradient_loss | -0.0195 | +| value_loss | 7.65 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.26e+03 | +| ep_rew_mean | -81.5 | +| time/ | | +| fps | 358 | +| iterations | 13 | +| time_elapsed | 81 | +| total_timesteps | 29315 | +| train/ | | +| approx_kl | 0.016748266 | +| clip_fraction | 0.138 | +| clip_range | 0.246 | +| entropy_loss | -8.09 | +| explained_variance | 0.0378 | +| learning_rate | 6.18e-05 | +| loss | 1.89 | +| n_updates | 120 | +| policy_gradient_loss | -0.016 | +| value_loss | 16 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.26e+03 | +| ep_rew_mean | -81.5 | +| time/ | | +| fps | 358 | +| iterations | 14 | +| time_elapsed | 88 | +| total_timesteps | 31570 | +| train/ | | +| approx_kl | 0.02423302 | +| clip_fraction | 0.145 | +| clip_range | 0.246 | +| entropy_loss | -8.1 | +| explained_variance | -0.000177 | +| learning_rate | 6.18e-05 | +| loss | 0.226 | +| n_updates | 130 | +| policy_gradient_loss | -0.0147 | +| value_loss | 47.5 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.58e+03 | +| ep_rew_mean | -80.2 | +| time/ | | +| fps | 358 | +| iterations | 15 | +| time_elapsed | 94 | +| total_timesteps | 33825 | +| train/ | | +| approx_kl | 0.016911915 | +| clip_fraction | 0.133 | +| clip_range | 0.246 | +| entropy_loss | -8.12 | +| explained_variance | 0.0574 | +| learning_rate | 6.18e-05 | +| loss | 0.128 | +| n_updates | 140 | +| policy_gradient_loss | -0.0177 | +| value_loss | 5.19 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.53e+03 | +| ep_rew_mean | -95.7 | +| time/ | | +| fps | 357 | +| iterations | 16 | +| time_elapsed | 100 | +| total_timesteps | 36080 | +| train/ | | +| approx_kl | 0.012477045 | +| clip_fraction | 0.0815 | +| clip_range | 0.246 | +| entropy_loss | -8.09 | +| explained_variance | 0.0981 | +| learning_rate | 6.18e-05 | +| loss | 1.54 | +| n_updates | 150 | +| policy_gradient_loss | -0.0104 | +| value_loss | 24.3 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.49e+03 | +| ep_rew_mean | -104 | +| time/ | | +| fps | 357 | +| iterations | 17 | +| time_elapsed | 107 | +| total_timesteps | 38335 | +| train/ | | +| approx_kl | 0.023185179 | +| clip_fraction | 0.145 | +| clip_range | 0.246 | +| entropy_loss | -8.09 | +| explained_variance | 0.0336 | +| learning_rate | 6.18e-05 | +| loss | 0.609 | +| n_updates | 160 | +| policy_gradient_loss | -0.0129 | +| value_loss | 22.4 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.47e+03 | +| ep_rew_mean | -105 | +| time/ | | +| fps | 357 | +| iterations | 18 | +| time_elapsed | 113 | +| total_timesteps | 40590 | +| train/ | | +| approx_kl | 0.015480906 | +| clip_fraction | 0.17 | +| clip_range | 0.246 | +| entropy_loss | -8.08 | +| explained_variance | 0.0682 | +| learning_rate | 6.18e-05 | +| loss | 0.449 | +| n_updates | 170 | +| policy_gradient_loss | -0.0136 | +| value_loss | 18 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.44e+03 | +| ep_rew_mean | -119 | +| time/ | | +| fps | 357 | +| iterations | 19 | +| time_elapsed | 119 | +| total_timesteps | 42845 | +| train/ | | +| approx_kl | 0.01642779 | +| clip_fraction | 0.136 | +| clip_range | 0.246 | +| entropy_loss | -8.03 | +| explained_variance | 0.0203 | +| learning_rate | 6.18e-05 | +| loss | 0.749 | +| n_updates | 180 | +| policy_gradient_loss | -0.00958 | +| value_loss | 13.1 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.48e+03 | +| ep_rew_mean | -113 | +| time/ | | +| fps | 356 | +| iterations | 20 | +| time_elapsed | 126 | +| total_timesteps | 45100 | +| train/ | | +| approx_kl | 0.013520324 | +| clip_fraction | 0.13 | +| clip_range | 0.246 | +| entropy_loss | -8.04 | +| explained_variance | 0.017 | +| learning_rate | 6.18e-05 | +| loss | 0.554 | +| n_updates | 190 | +| policy_gradient_loss | -0.0125 | +| value_loss | 33 | +----------------------------------------- +--------------------------------------- +| rollout/ | | +| ep_len_mean | 2.43e+03 | +| ep_rew_mean | -117 | +| time/ | | +| fps | 355 | +| iterations | 21 | +| time_elapsed | 133 | +| total_timesteps | 47355 | +| train/ | | +| approx_kl | 0.0185782 | +| clip_fraction | 0.156 | +| clip_range | 0.246 | +| entropy_loss | -8.02 | +| explained_variance | 0.255 | +| learning_rate | 6.18e-05 | +| loss | 0.618 | +| n_updates | 200 | +| policy_gradient_loss | -0.0175 | +| value_loss | 6.95 | +--------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.43e+03 | +| ep_rew_mean | -117 | +| time/ | | +| fps | 354 | +| iterations | 22 | +| time_elapsed | 139 | +| total_timesteps | 49610 | +| train/ | | +| approx_kl | 0.016632264 | +| clip_fraction | 0.131 | +| clip_range | 0.246 | +| entropy_loss | -8.02 | +| explained_variance | 0.0436 | +| learning_rate | 6.18e-05 | +| loss | 1.05 | +| n_updates | 210 | +| policy_gradient_loss | -0.0174 | +| value_loss | 11 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.43e+03 | +| ep_rew_mean | -117 | +| time/ | | +| fps | 354 | +| iterations | 23 | +| time_elapsed | 146 | +| total_timesteps | 51865 | +| train/ | | +| approx_kl | 0.019239776 | +| clip_fraction | 0.165 | +| clip_range | 0.246 | +| entropy_loss | -7.95 | +| explained_variance | 0.0435 | +| learning_rate | 6.18e-05 | +| loss | 1.27 | +| n_updates | 220 | +| policy_gradient_loss | -0.018 | +| value_loss | 6.75 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.68e+03 | +| ep_rew_mean | -116 | +| time/ | | +| fps | 353 | +| iterations | 24 | +| time_elapsed | 153 | +| total_timesteps | 54120 | +| train/ | | +| approx_kl | 0.022035323 | +| clip_fraction | 0.184 | +| clip_range | 0.246 | +| entropy_loss | -7.93 | +| explained_variance | 0.195 | +| learning_rate | 6.18e-05 | +| loss | 0.162 | +| n_updates | 230 | +| policy_gradient_loss | -0.0164 | +| value_loss | 4.23 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.67e+03 | +| ep_rew_mean | -113 | +| time/ | | +| fps | 352 | +| iterations | 25 | +| time_elapsed | 160 | +| total_timesteps | 56375 | +| train/ | | +| approx_kl | 0.016369374 | +| clip_fraction | 0.141 | +| clip_range | 0.246 | +| entropy_loss | -7.98 | +| explained_variance | 0.0345 | +| learning_rate | 6.18e-05 | +| loss | 0.162 | +| n_updates | 240 | +| policy_gradient_loss | -0.0173 | +| value_loss | 5.37 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.62e+03 | +| ep_rew_mean | -98.4 | +| time/ | | +| fps | 352 | +| iterations | 26 | +| time_elapsed | 166 | +| total_timesteps | 58630 | +| train/ | | +| approx_kl | 0.01639726 | +| clip_fraction | 0.13 | +| clip_range | 0.246 | +| entropy_loss | -7.96 | +| explained_variance | 0.136 | +| learning_rate | 6.18e-05 | +| loss | 0.725 | +| n_updates | 250 | +| policy_gradient_loss | -0.0164 | +| value_loss | 8.18 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.59e+03 | +| ep_rew_mean | -98.9 | +| time/ | | +| fps | 352 | +| iterations | 27 | +| time_elapsed | 172 | +| total_timesteps | 60885 | +| train/ | | +| approx_kl | 0.016931452 | +| clip_fraction | 0.124 | +| clip_range | 0.246 | +| entropy_loss | -7.97 | +| explained_variance | -0.0917 | +| learning_rate | 6.18e-05 | +| loss | 3.31 | +| n_updates | 260 | +| policy_gradient_loss | -0.00691 | +| value_loss | 22.9 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.55e+03 | +| ep_rew_mean | -108 | +| time/ | | +| fps | 353 | +| iterations | 28 | +| time_elapsed | 178 | +| total_timesteps | 63140 | +| train/ | | +| approx_kl | 0.020913824 | +| clip_fraction | 0.157 | +| clip_range | 0.246 | +| entropy_loss | -7.88 | +| explained_variance | 0.215 | +| learning_rate | 6.18e-05 | +| loss | 0.995 | +| n_updates | 270 | +| policy_gradient_loss | -0.0182 | +| value_loss | 7.35 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.55e+03 | +| ep_rew_mean | -103 | +| time/ | | +| fps | 353 | +| iterations | 29 | +| time_elapsed | 184 | +| total_timesteps | 65395 | +| train/ | | +| approx_kl | 0.021241019 | +| clip_fraction | 0.149 | +| clip_range | 0.246 | +| entropy_loss | -7.81 | +| explained_variance | -0.057 | +| learning_rate | 6.18e-05 | +| loss | 0.312 | +| n_updates | 280 | +| policy_gradient_loss | -0.0152 | +| value_loss | 32.1 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.57e+03 | +| ep_rew_mean | -101 | +| time/ | | +| fps | 353 | +| iterations | 30 | +| time_elapsed | 191 | +| total_timesteps | 67650 | +| train/ | | +| approx_kl | 0.022794545 | +| clip_fraction | 0.174 | +| clip_range | 0.246 | +| entropy_loss | -7.92 | +| explained_variance | 0.0735 | +| learning_rate | 6.18e-05 | +| loss | 1.95 | +| n_updates | 290 | +| policy_gradient_loss | -0.0239 | +| value_loss | 4.61 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.57e+03 | +| ep_rew_mean | -101 | +| time/ | | +| fps | 353 | +| iterations | 31 | +| time_elapsed | 197 | +| total_timesteps | 69905 | +| train/ | | +| approx_kl | 0.018762259 | +| clip_fraction | 0.164 | +| clip_range | 0.246 | +| entropy_loss | -7.94 | +| explained_variance | -0.0688 | +| learning_rate | 6.18e-05 | +| loss | 0.732 | +| n_updates | 300 | +| policy_gradient_loss | -0.0192 | +| value_loss | 4.37 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.59e+03 | +| ep_rew_mean | -108 | +| time/ | | +| fps | 353 | +| iterations | 32 | +| time_elapsed | 204 | +| total_timesteps | 72160 | +| train/ | | +| approx_kl | 0.018497027 | +| clip_fraction | 0.166 | +| clip_range | 0.246 | +| entropy_loss | -7.91 | +| explained_variance | 0.197 | +| learning_rate | 6.18e-05 | +| loss | 0.135 | +| n_updates | 310 | +| policy_gradient_loss | -0.0232 | +| value_loss | 3.52 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.56e+03 | +| ep_rew_mean | -113 | +| time/ | | +| fps | 353 | +| iterations | 33 | +| time_elapsed | 210 | +| total_timesteps | 74415 | +| train/ | | +| approx_kl | 0.023638394 | +| clip_fraction | 0.166 | +| clip_range | 0.246 | +| entropy_loss | -7.91 | +| explained_variance | -0.0457 | +| learning_rate | 6.18e-05 | +| loss | 0.139 | +| n_updates | 320 | +| policy_gradient_loss | -0.0173 | +| value_loss | 19.5 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.54e+03 | +| ep_rew_mean | -116 | +| time/ | | +| fps | 354 | +| iterations | 34 | +| time_elapsed | 216 | +| total_timesteps | 76670 | +| train/ | | +| approx_kl | 0.030456556 | +| clip_fraction | 0.245 | +| clip_range | 0.246 | +| entropy_loss | -7.84 | +| explained_variance | 0.0645 | +| learning_rate | 6.18e-05 | +| loss | 1.4 | +| n_updates | 330 | +| policy_gradient_loss | -0.0148 | +| value_loss | 20.3 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.53e+03 | +| ep_rew_mean | -120 | +| time/ | | +| fps | 354 | +| iterations | 35 | +| time_elapsed | 222 | +| total_timesteps | 78925 | +| train/ | | +| approx_kl | 0.023147207 | +| clip_fraction | 0.216 | +| clip_range | 0.246 | +| entropy_loss | -7.9 | +| explained_variance | 0.26 | +| learning_rate | 6.18e-05 | +| loss | 0.602 | +| n_updates | 340 | +| policy_gradient_loss | -0.011 | +| value_loss | 18.6 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.52e+03 | +| ep_rew_mean | -125 | +| time/ | | +| fps | 354 | +| iterations | 36 | +| time_elapsed | 228 | +| total_timesteps | 81180 | +| train/ | | +| approx_kl | 0.027237331 | +| clip_fraction | 0.201 | +| clip_range | 0.246 | +| entropy_loss | -7.88 | +| explained_variance | 0.313 | +| learning_rate | 6.18e-05 | +| loss | 0.175 | +| n_updates | 350 | +| policy_gradient_loss | -0.0156 | +| value_loss | 16.3 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.52e+03 | +| ep_rew_mean | -125 | +| time/ | | +| fps | 354 | +| iterations | 37 | +| time_elapsed | 235 | +| total_timesteps | 83435 | +| train/ | | +| approx_kl | 0.025215741 | +| clip_fraction | 0.196 | +| clip_range | 0.246 | +| entropy_loss | -7.85 | +| explained_variance | -0.0349 | +| learning_rate | 6.18e-05 | +| loss | 328 | +| n_updates | 360 | +| policy_gradient_loss | -0.0175 | +| value_loss | 30.2 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.54e+03 | +| ep_rew_mean | -122 | +| time/ | | +| fps | 354 | +| iterations | 38 | +| time_elapsed | 241 | +| total_timesteps | 85690 | +| train/ | | +| approx_kl | 0.023207983 | +| clip_fraction | 0.192 | +| clip_range | 0.246 | +| entropy_loss | -7.88 | +| explained_variance | 0.167 | +| learning_rate | 6.18e-05 | +| loss | 0.145 | +| n_updates | 370 | +| policy_gradient_loss | -0.0241 | +| value_loss | 4.89 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.53e+03 | +| ep_rew_mean | -124 | +| time/ | | +| fps | 354 | +| iterations | 39 | +| time_elapsed | 247 | +| total_timesteps | 87945 | +| train/ | | +| approx_kl | 0.023880122 | +| clip_fraction | 0.22 | +| clip_range | 0.246 | +| entropy_loss | -7.9 | +| explained_variance | 0.033 | +| learning_rate | 6.18e-05 | +| loss | 0.34 | +| n_updates | 380 | +| policy_gradient_loss | -0.0178 | +| value_loss | 4.61 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.53e+03 | +| ep_rew_mean | -127 | +| time/ | | +| fps | 354 | +| iterations | 40 | +| time_elapsed | 254 | +| total_timesteps | 90200 | +| train/ | | +| approx_kl | 0.033778906 | +| clip_fraction | 0.203 | +| clip_range | 0.246 | +| entropy_loss | -7.82 | +| explained_variance | 0.0941 | +| learning_rate | 6.18e-05 | +| loss | 1.27 | +| n_updates | 390 | +| policy_gradient_loss | -0.0201 | +| value_loss | 10.1 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.53e+03 | +| ep_rew_mean | -124 | +| time/ | | +| fps | 354 | +| iterations | 41 | +| time_elapsed | 260 | +| total_timesteps | 92455 | +| train/ | | +| approx_kl | 0.024718465 | +| clip_fraction | 0.213 | +| clip_range | 0.246 | +| entropy_loss | -7.89 | +| explained_variance | 0.163 | +| learning_rate | 6.18e-05 | +| loss | 1.01 | +| n_updates | 400 | +| policy_gradient_loss | -0.0143 | +| value_loss | 19 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.49e+03 | +| ep_rew_mean | -119 | +| time/ | | +| fps | 354 | +| iterations | 42 | +| time_elapsed | 267 | +| total_timesteps | 94710 | +| train/ | | +| approx_kl | 0.022747982 | +| clip_fraction | 0.191 | +| clip_range | 0.246 | +| entropy_loss | -7.92 | +| explained_variance | 0.308 | +| learning_rate | 6.18e-05 | +| loss | 10.6 | +| n_updates | 410 | +| policy_gradient_loss | -0.0181 | +| value_loss | 7.14 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.49e+03 | +| ep_rew_mean | -119 | +| time/ | | +| fps | 354 | +| iterations | 43 | +| time_elapsed | 273 | +| total_timesteps | 96965 | +| train/ | | +| approx_kl | 0.02743027 | +| clip_fraction | 0.228 | +| clip_range | 0.246 | +| entropy_loss | -7.86 | +| explained_variance | 0.0156 | +| learning_rate | 6.18e-05 | +| loss | 1.72 | +| n_updates | 420 | +| policy_gradient_loss | -0.0158 | +| value_loss | 20.6 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.5e+03 | +| ep_rew_mean | -116 | +| time/ | | +| fps | 353 | +| iterations | 44 | +| time_elapsed | 280 | +| total_timesteps | 99220 | +| train/ | | +| approx_kl | 0.028826194 | +| clip_fraction | 0.233 | +| clip_range | 0.246 | +| entropy_loss | -7.89 | +| explained_variance | 0.253 | +| learning_rate | 6.18e-05 | +| loss | 0.285 | +| n_updates | 430 | +| policy_gradient_loss | -0.0227 | +| value_loss | 5.22 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.47e+03 | +| ep_rew_mean | -117 | +| time/ | | +| fps | 353 | +| iterations | 45 | +| time_elapsed | 286 | +| total_timesteps | 101475 | +| train/ | | +| approx_kl | 0.025009144 | +| clip_fraction | 0.21 | +| clip_range | 0.246 | +| entropy_loss | -7.89 | +| explained_variance | 0.248 | +| learning_rate | 6.18e-05 | +| loss | 7.05 | +| n_updates | 440 | +| policy_gradient_loss | -0.0173 | +| value_loss | 10.1 | +----------------------------------------- +[I 2023-03-30 22:07:30,694] Trial 4 finished with value: -332.0 and parameters: {'n_steps': 2255, 'gamma': 0.9347334629907355, 'learning_rate': 6.175910217498569e-05, 'clip_range': 0.2461965528470431, 'gae_lambda': 0.8597964121436965}. Best is trial 1 with value: -205.0. +Using cuda device +Wrapping the env in a DummyVecEnv. +Wrapping the env in a VecTransposeImage. +C:\ProgramData\Anaconda3\envs\StreetFighterAI\lib\site-packages\stable_baselines3\ppo\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 7730`, after every 120 untruncated mini-batches, there will be a truncated mini-batch of size 50 +We recommend using a `batch_size` that is a factor of `n_steps * n_envs`. +Info: (n_steps=7730 and n_envs=1) + warnings.warn( +Logging to logs/PPO_23 +--------------------------------- +| rollout/ | | +| ep_len_mean | 2.49e+03 | +| ep_rew_mean | -20 | +| time/ | | +| fps | 537 | +| iterations | 1 | +| time_elapsed | 14 | +| total_timesteps | 7730 | +--------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.54e+03 | +| ep_rew_mean | -19.2 | +| time/ | | +| fps | 423 | +| iterations | 2 | +| time_elapsed | 36 | +| total_timesteps | 15460 | +| train/ | | +| approx_kl | 0.04281639 | +| clip_fraction | 0.204 | +| clip_range | 0.299 | +| entropy_loss | -8.29 | +| explained_variance | 0.0017 | +| learning_rate | 9.55e-05 | +| loss | 1.29 | +| n_updates | 10 | +| policy_gradient_loss | -0.00636 | +| value_loss | 25.2 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.32e+03 | +| ep_rew_mean | -55.8 | +| time/ | | +| fps | 393 | +| iterations | 3 | +| time_elapsed | 58 | +| total_timesteps | 23190 | +| train/ | | +| approx_kl | 0.029096674 | +| clip_fraction | 0.136 | +| clip_range | 0.299 | +| entropy_loss | -8.28 | +| explained_variance | -0.0552 | +| learning_rate | 9.55e-05 | +| loss | 1.15 | +| n_updates | 20 | +| policy_gradient_loss | -0.00801 | +| value_loss | 29.6 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.41e+03 | +| ep_rew_mean | -61 | +| time/ | | +| fps | 370 | +| iterations | 4 | +| time_elapsed | 83 | +| total_timesteps | 30920 | +| train/ | | +| approx_kl | 0.041381396 | +| clip_fraction | 0.168 | +| clip_range | 0.299 | +| entropy_loss | -8.25 | +| explained_variance | 0.206 | +| learning_rate | 9.55e-05 | +| loss | 2.05 | +| n_updates | 30 | +| policy_gradient_loss | -0.0118 | +| value_loss | 28.6 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.38e+03 | +| ep_rew_mean | -101 | +| time/ | | +| fps | 365 | +| iterations | 5 | +| time_elapsed | 105 | +| total_timesteps | 38650 | +| train/ | | +| approx_kl | 0.03648014 | +| clip_fraction | 0.194 | +| clip_range | 0.299 | +| entropy_loss | -8.25 | +| explained_variance | 0.214 | +| learning_rate | 9.55e-05 | +| loss | 4.98 | +| n_updates | 40 | +| policy_gradient_loss | -0.0153 | +| value_loss | 11.3 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.36e+03 | +| ep_rew_mean | -111 | +| time/ | | +| fps | 360 | +| iterations | 6 | +| time_elapsed | 128 | +| total_timesteps | 46380 | +| train/ | | +| approx_kl | 0.048289824 | +| clip_fraction | 0.203 | +| clip_range | 0.299 | +| entropy_loss | -8.22 | +| explained_variance | 0.161 | +| learning_rate | 9.55e-05 | +| loss | 2.33 | +| n_updates | 50 | +| policy_gradient_loss | -0.0181 | +| value_loss | 27.4 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.37e+03 | +| ep_rew_mean | -102 | +| time/ | | +| fps | 357 | +| iterations | 7 | +| time_elapsed | 151 | +| total_timesteps | 54110 | +| train/ | | +| approx_kl | 0.050513566 | +| clip_fraction | 0.256 | +| clip_range | 0.299 | +| entropy_loss | -8.18 | +| explained_variance | 0.0793 | +| learning_rate | 9.55e-05 | +| loss | 36.3 | +| n_updates | 60 | +| policy_gradient_loss | -0.0118 | +| value_loss | 18.6 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.37e+03 | +| ep_rew_mean | -95.8 | +| time/ | | +| fps | 353 | +| iterations | 8 | +| time_elapsed | 174 | +| total_timesteps | 61840 | +| train/ | | +| approx_kl | 0.052512296 | +| clip_fraction | 0.277 | +| clip_range | 0.299 | +| entropy_loss | -8.11 | +| explained_variance | 0.219 | +| learning_rate | 9.55e-05 | +| loss | 2.76 | +| n_updates | 70 | +| policy_gradient_loss | -0.0174 | +| value_loss | 11.8 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.39e+03 | +| ep_rew_mean | -112 | +| time/ | | +| fps | 350 | +| iterations | 9 | +| time_elapsed | 198 | +| total_timesteps | 69570 | +| train/ | | +| approx_kl | 0.04943707 | +| clip_fraction | 0.256 | +| clip_range | 0.299 | +| entropy_loss | -8.08 | +| explained_variance | 0.254 | +| learning_rate | 9.55e-05 | +| loss | 8.24 | +| n_updates | 80 | +| policy_gradient_loss | -0.0148 | +| value_loss | 17 | +---------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.4e+03 | +| ep_rew_mean | -115 | +| time/ | | +| fps | 349 | +| iterations | 10 | +| time_elapsed | 220 | +| total_timesteps | 77300 | +| train/ | | +| approx_kl | 0.07023027 | +| clip_fraction | 0.297 | +| clip_range | 0.299 | +| entropy_loss | -7.99 | +| explained_variance | 0.245 | +| learning_rate | 9.55e-05 | +| loss | 1.51 | +| n_updates | 90 | +| policy_gradient_loss | -0.0157 | +| value_loss | 16.3 | +---------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.38e+03 | +| ep_rew_mean | -116 | +| time/ | | +| fps | 348 | +| iterations | 11 | +| time_elapsed | 243 | +| total_timesteps | 85030 | +| train/ | | +| approx_kl | 0.06494863 | +| clip_fraction | 0.324 | +| clip_range | 0.299 | +| entropy_loss | -7.97 | +| explained_variance | 0.308 | +| learning_rate | 9.55e-05 | +| loss | 9.16 | +| n_updates | 100 | +| policy_gradient_loss | -0.0122 | +| value_loss | 16.3 | +---------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.41e+03 | +| ep_rew_mean | -126 | +| time/ | | +| fps | 347 | +| iterations | 12 | +| time_elapsed | 266 | +| total_timesteps | 92760 | +| train/ | | +| approx_kl | 0.07837609 | +| clip_fraction | 0.344 | +| clip_range | 0.299 | +| entropy_loss | -7.92 | +| explained_variance | 0.267 | +| learning_rate | 9.55e-05 | +| loss | 2.41 | +| n_updates | 110 | +| policy_gradient_loss | -0.0101 | +| value_loss | 15.8 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.33e+03 | +| ep_rew_mean | -118 | +| time/ | | +| fps | 347 | +| iterations | 13 | +| time_elapsed | 289 | +| total_timesteps | 100490 | +| train/ | | +| approx_kl | 0.078203924 | +| clip_fraction | 0.348 | +| clip_range | 0.299 | +| entropy_loss | -7.78 | +| explained_variance | 0.254 | +| learning_rate | 9.55e-05 | +| loss | 3.22 | +| n_updates | 120 | +| policy_gradient_loss | -0.00835 | +| value_loss | 31 | +----------------------------------------- +[I 2023-03-30 22:13:04,197] Trial 5 finished with value: -314.0 and parameters: {'n_steps': 7730, 'gamma': 0.984052776116379, 'learning_rate': 9.545314010671991e-05, 'clip_range': 0.29860092606270394, 'gae_lambda': 0.9241326779349373}. Best is trial 1 with value: -205.0. +Using cuda device +Wrapping the env in a DummyVecEnv. +Wrapping the env in a VecTransposeImage. +C:\ProgramData\Anaconda3\envs\StreetFighterAI\lib\site-packages\stable_baselines3\ppo\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 4033`, after every 63 untruncated mini-batches, there will be a truncated mini-batch of size 1 +We recommend using a `batch_size` that is a factor of `n_steps * n_envs`. +Info: (n_steps=4033 and n_envs=1) + warnings.warn( +Logging to logs/PPO_24 +--------------------------------- +| rollout/ | | +| ep_len_mean | 1.84e+03 | +| ep_rew_mean | -128 | +| time/ | | +| fps | 596 | +| iterations | 1 | +| time_elapsed | 6 | +| total_timesteps | 4033 | +--------------------------------- +C:\ProgramData\Anaconda3\envs\StreetFighterAI\lib\site-packages\stable_baselines3\ppo\ppo.py:261: FutureWarning: Non-finite norm encountered in torch.nn.utils.clip_grad_norm_; continuing anyway. Note that the default behavior will change in a future release to error out if a non-finite total norm is encountered. At that point, setting error_if_nonfinite=false will be required to retain the old behavior. + th.nn.utils.clip_grad_norm_(self.policy.parameters(), self.max_grad_norm) +[I 2023-03-30 22:13:11,640] Trial 6 finished with value: -1.0 and parameters: {'n_steps': 4033, 'gamma': 0.9021075358644198, 'learning_rate': 9.226619222512819e-05, 'clip_range': 0.2947845259713344, 'gae_lambda': 0.9634981775867985}. Best is trial 6 with value: -1.0. +Using cuda device +Wrapping the env in a DummyVecEnv. +Wrapping the env in a VecTransposeImage. +C:\ProgramData\Anaconda3\envs\StreetFighterAI\lib\site-packages\stable_baselines3\ppo\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 1654`, after every 25 untruncated mini-batches, there will be a truncated mini-batch of size 54 +We recommend using a `batch_size` that is a factor of `n_steps * n_envs`. +Info: (n_steps=1654 and n_envs=1) + warnings.warn( +Logging to logs/PPO_25 +----------------------------- +| time/ | | +| fps | 666 | +| iterations | 1 | +| time_elapsed | 2 | +| total_timesteps | 1654 | +----------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.24e+03 | +| ep_rew_mean | -183 | +| time/ | | +| fps | 465 | +| iterations | 2 | +| time_elapsed | 7 | +| total_timesteps | 3308 | +| train/ | | +| approx_kl | 0.011272669 | +| clip_fraction | 0.18 | +| clip_range | 0.228 | +| entropy_loss | -8.3 | +| explained_variance | 0.00627 | +| learning_rate | 9.79e-05 | +| loss | 0.815 | +| n_updates | 10 | +| policy_gradient_loss | -0.0141 | +| value_loss | 6.8 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.21e+03 | +| ep_rew_mean | -194 | +| time/ | | +| fps | 425 | +| iterations | 3 | +| time_elapsed | 11 | +| total_timesteps | 4962 | +| train/ | | +| approx_kl | 0.013628463 | +| clip_fraction | 0.114 | +| clip_range | 0.228 | +| entropy_loss | -8.29 | +| explained_variance | 0.0365 | +| learning_rate | 9.79e-05 | +| loss | 40.8 | +| n_updates | 20 | +| policy_gradient_loss | -0.0135 | +| value_loss | 20.9 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.14e+03 | +| ep_rew_mean | -218 | +| time/ | | +| fps | 401 | +| iterations | 4 | +| time_elapsed | 16 | +| total_timesteps | 6616 | +| train/ | | +| approx_kl | 0.023692455 | +| clip_fraction | 0.238 | +| clip_range | 0.228 | +| entropy_loss | -8.26 | +| explained_variance | 0.251 | +| learning_rate | 9.79e-05 | +| loss | 0.977 | +| n_updates | 30 | +| policy_gradient_loss | -0.0152 | +| value_loss | 17.7 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.14e+03 | +| ep_rew_mean | -218 | +| time/ | | +| fps | 389 | +| iterations | 5 | +| time_elapsed | 21 | +| total_timesteps | 8270 | +| train/ | | +| approx_kl | 0.02734942 | +| clip_fraction | 0.272 | +| clip_range | 0.228 | +| entropy_loss | -8.25 | +| explained_variance | -0.035 | +| learning_rate | 9.79e-05 | +| loss | 0.673 | +| n_updates | 40 | +| policy_gradient_loss | 0.00113 | +| value_loss | 33.6 | +---------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.29e+03 | +| ep_rew_mean | -176 | +| time/ | | +| fps | 382 | +| iterations | 6 | +| time_elapsed | 25 | +| total_timesteps | 9924 | +| train/ | | +| approx_kl | 0.01778004 | +| clip_fraction | 0.247 | +| clip_range | 0.228 | +| entropy_loss | -8.26 | +| explained_variance | 0.369 | +| learning_rate | 9.79e-05 | +| loss | 1.41 | +| n_updates | 50 | +| policy_gradient_loss | -0.0143 | +| value_loss | 4.89 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.29e+03 | +| ep_rew_mean | -176 | +| time/ | | +| fps | 375 | +| iterations | 7 | +| time_elapsed | 30 | +| total_timesteps | 11578 | +| train/ | | +| approx_kl | 0.016047975 | +| clip_fraction | 0.174 | +| clip_range | 0.228 | +| entropy_loss | -8.25 | +| explained_variance | 0.164 | +| learning_rate | 9.79e-05 | +| loss | 1.49 | +| n_updates | 60 | +| policy_gradient_loss | -0.0128 | +| value_loss | 7.98 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.57e+03 | +| ep_rew_mean | -148 | +| time/ | | +| fps | 363 | +| iterations | 8 | +| time_elapsed | 36 | +| total_timesteps | 13232 | +| train/ | | +| approx_kl | 0.016572453 | +| clip_fraction | 0.174 | +| clip_range | 0.228 | +| entropy_loss | -8.25 | +| explained_variance | -0.193 | +| learning_rate | 9.79e-05 | +| loss | 0.18 | +| n_updates | 70 | +| policy_gradient_loss | -0.0197 | +| value_loss | 3.7 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.42e+03 | +| ep_rew_mean | -152 | +| time/ | | +| fps | 354 | +| iterations | 9 | +| time_elapsed | 42 | +| total_timesteps | 14886 | +| train/ | | +| approx_kl | 0.018261585 | +| clip_fraction | 0.163 | +| clip_range | 0.228 | +| entropy_loss | -8.23 | +| explained_variance | 0.0612 | +| learning_rate | 9.79e-05 | +| loss | 1.23 | +| n_updates | 80 | +| policy_gradient_loss | -0.017 | +| value_loss | 8.05 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.42e+03 | +| ep_rew_mean | -152 | +| time/ | | +| fps | 347 | +| iterations | 10 | +| time_elapsed | 47 | +| total_timesteps | 16540 | +| train/ | | +| approx_kl | 0.025278179 | +| clip_fraction | 0.233 | +| clip_range | 0.228 | +| entropy_loss | -8.19 | +| explained_variance | 0.00951 | +| learning_rate | 9.79e-05 | +| loss | 1.36 | +| n_updates | 90 | +| policy_gradient_loss | -0.0161 | +| value_loss | 14.8 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.53e+03 | +| ep_rew_mean | -141 | +| time/ | | +| fps | 342 | +| iterations | 11 | +| time_elapsed | 53 | +| total_timesteps | 18194 | +| train/ | | +| approx_kl | 0.019890858 | +| clip_fraction | 0.243 | +| clip_range | 0.228 | +| entropy_loss | -8.16 | +| explained_variance | 0.32 | +| learning_rate | 9.79e-05 | +| loss | 1.63 | +| n_updates | 100 | +| policy_gradient_loss | -0.0133 | +| value_loss | 5.37 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.39e+03 | +| ep_rew_mean | -163 | +| time/ | | +| fps | 339 | +| iterations | 12 | +| time_elapsed | 58 | +| total_timesteps | 19848 | +| train/ | | +| approx_kl | 0.027444609 | +| clip_fraction | 0.273 | +| clip_range | 0.228 | +| entropy_loss | -8.1 | +| explained_variance | 0.173 | +| learning_rate | 9.79e-05 | +| loss | 1.03 | +| n_updates | 110 | +| policy_gradient_loss | -0.0161 | +| value_loss | 10.9 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.39e+03 | +| ep_rew_mean | -163 | +| time/ | | +| fps | 336 | +| iterations | 13 | +| time_elapsed | 63 | +| total_timesteps | 21502 | +| train/ | | +| approx_kl | 0.029858373 | +| clip_fraction | 0.226 | +| clip_range | 0.228 | +| entropy_loss | -8.15 | +| explained_variance | 0.124 | +| learning_rate | 9.79e-05 | +| loss | 87.4 | +| n_updates | 120 | +| policy_gradient_loss | -0.0143 | +| value_loss | 37.1 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.4e+03 | +| ep_rew_mean | -168 | +| time/ | | +| fps | 333 | +| iterations | 14 | +| time_elapsed | 69 | +| total_timesteps | 23156 | +| train/ | | +| approx_kl | 0.02500601 | +| clip_fraction | 0.272 | +| clip_range | 0.228 | +| entropy_loss | -8.14 | +| explained_variance | 0.227 | +| learning_rate | 9.79e-05 | +| loss | 1.26 | +| n_updates | 130 | +| policy_gradient_loss | -0.0173 | +| value_loss | 6.66 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.47e+03 | +| ep_rew_mean | -161 | +| time/ | | +| fps | 330 | +| iterations | 15 | +| time_elapsed | 75 | +| total_timesteps | 24810 | +| train/ | | +| approx_kl | 0.025755124 | +| clip_fraction | 0.234 | +| clip_range | 0.228 | +| entropy_loss | -8.14 | +| explained_variance | 0.238 | +| learning_rate | 9.79e-05 | +| loss | 1.24 | +| n_updates | 140 | +| policy_gradient_loss | -0.016 | +| value_loss | 13.3 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.47e+03 | +| ep_rew_mean | -161 | +| time/ | | +| fps | 328 | +| iterations | 16 | +| time_elapsed | 80 | +| total_timesteps | 26464 | +| train/ | | +| approx_kl | 0.021200689 | +| clip_fraction | 0.234 | +| clip_range | 0.228 | +| entropy_loss | -8.07 | +| explained_variance | 0.176 | +| learning_rate | 9.79e-05 | +| loss | 0.559 | +| n_updates | 150 | +| policy_gradient_loss | -0.0132 | +| value_loss | 9.71 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.47e+03 | +| ep_rew_mean | -161 | +| time/ | | +| fps | 326 | +| iterations | 17 | +| time_elapsed | 86 | +| total_timesteps | 28118 | +| train/ | | +| approx_kl | 0.02932891 | +| clip_fraction | 0.27 | +| clip_range | 0.228 | +| entropy_loss | -8.08 | +| explained_variance | 0.302 | +| learning_rate | 9.79e-05 | +| loss | 2.16 | +| n_updates | 160 | +| policy_gradient_loss | -0.0225 | +| value_loss | 4.34 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.47e+03 | +| ep_rew_mean | -161 | +| time/ | | +| fps | 324 | +| iterations | 18 | +| time_elapsed | 91 | +| total_timesteps | 29772 | +| train/ | | +| approx_kl | 0.016725304 | +| clip_fraction | 0.212 | +| clip_range | 0.228 | +| entropy_loss | -8.06 | +| explained_variance | 0.0835 | +| learning_rate | 9.79e-05 | +| loss | 0.398 | +| n_updates | 170 | +| policy_gradient_loss | -0.0136 | +| value_loss | 5.97 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.84e+03 | +| ep_rew_mean | -164 | +| time/ | | +| fps | 322 | +| iterations | 19 | +| time_elapsed | 97 | +| total_timesteps | 31426 | +| train/ | | +| approx_kl | 0.020989887 | +| clip_fraction | 0.216 | +| clip_range | 0.228 | +| entropy_loss | -8.08 | +| explained_variance | 0.364 | +| learning_rate | 9.79e-05 | +| loss | 0.103 | +| n_updates | 180 | +| policy_gradient_loss | -0.0259 | +| value_loss | 1.02 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.84e+03 | +| ep_rew_mean | -164 | +| time/ | | +| fps | 321 | +| iterations | 20 | +| time_elapsed | 102 | +| total_timesteps | 33080 | +| train/ | | +| approx_kl | 0.02685814 | +| clip_fraction | 0.201 | +| clip_range | 0.228 | +| entropy_loss | -8.03 | +| explained_variance | 0.114 | +| learning_rate | 9.79e-05 | +| loss | 1.95 | +| n_updates | 190 | +| policy_gradient_loss | -0.0192 | +| value_loss | 16.3 | +---------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.84e+03 | +| ep_rew_mean | -164 | +| time/ | | +| fps | 320 | +| iterations | 21 | +| time_elapsed | 108 | +| total_timesteps | 34734 | +| train/ | | +| approx_kl | 0.02518316 | +| clip_fraction | 0.22 | +| clip_range | 0.228 | +| entropy_loss | -8.03 | +| explained_variance | 0.293 | +| learning_rate | 9.79e-05 | +| loss | 1.66 | +| n_updates | 200 | +| policy_gradient_loss | -0.0237 | +| value_loss | 6.07 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.92e+03 | +| ep_rew_mean | -148 | +| time/ | | +| fps | 319 | +| iterations | 22 | +| time_elapsed | 114 | +| total_timesteps | 36388 | +| train/ | | +| approx_kl | 0.020095803 | +| clip_fraction | 0.191 | +| clip_range | 0.228 | +| entropy_loss | -8.04 | +| explained_variance | 0.242 | +| learning_rate | 9.79e-05 | +| loss | 0.859 | +| n_updates | 210 | +| policy_gradient_loss | -0.0166 | +| value_loss | 5.24 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.87e+03 | +| ep_rew_mean | -164 | +| time/ | | +| fps | 317 | +| iterations | 23 | +| time_elapsed | 119 | +| total_timesteps | 38042 | +| train/ | | +| approx_kl | 0.023355601 | +| clip_fraction | 0.246 | +| clip_range | 0.228 | +| entropy_loss | -7.97 | +| explained_variance | 0.0157 | +| learning_rate | 9.79e-05 | +| loss | 0.642 | +| n_updates | 220 | +| policy_gradient_loss | -0.0248 | +| value_loss | 4.15 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.87e+03 | +| ep_rew_mean | -164 | +| time/ | | +| fps | 317 | +| iterations | 24 | +| time_elapsed | 125 | +| total_timesteps | 39696 | +| train/ | | +| approx_kl | 0.034935288 | +| clip_fraction | 0.267 | +| clip_range | 0.228 | +| entropy_loss | -7.95 | +| explained_variance | 0.0452 | +| learning_rate | 9.79e-05 | +| loss | 201 | +| n_updates | 230 | +| policy_gradient_loss | -0.0162 | +| value_loss | 46.4 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.92e+03 | +| ep_rew_mean | -152 | +| time/ | | +| fps | 316 | +| iterations | 25 | +| time_elapsed | 130 | +| total_timesteps | 41350 | +| train/ | | +| approx_kl | 0.025743902 | +| clip_fraction | 0.265 | +| clip_range | 0.228 | +| entropy_loss | -7.94 | +| explained_variance | 0.214 | +| learning_rate | 9.79e-05 | +| loss | 1.29 | +| n_updates | 240 | +| policy_gradient_loss | -0.0199 | +| value_loss | 7.38 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.86e+03 | +| ep_rew_mean | -162 | +| time/ | | +| fps | 315 | +| iterations | 26 | +| time_elapsed | 136 | +| total_timesteps | 43004 | +| train/ | | +| approx_kl | 0.03637928 | +| clip_fraction | 0.312 | +| clip_range | 0.228 | +| entropy_loss | -7.93 | +| explained_variance | 0.491 | +| learning_rate | 9.79e-05 | +| loss | 0.508 | +| n_updates | 250 | +| policy_gradient_loss | -0.0222 | +| value_loss | 2.89 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.86e+03 | +| ep_rew_mean | -162 | +| time/ | | +| fps | 315 | +| iterations | 27 | +| time_elapsed | 141 | +| total_timesteps | 44658 | +| train/ | | +| approx_kl | 0.056980833 | +| clip_fraction | 0.394 | +| clip_range | 0.228 | +| entropy_loss | -7.87 | +| explained_variance | 0.109 | +| learning_rate | 9.79e-05 | +| loss | 1.26 | +| n_updates | 260 | +| policy_gradient_loss | -0.00769 | +| value_loss | 19.5 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.86e+03 | +| ep_rew_mean | -156 | +| time/ | | +| fps | 316 | +| iterations | 28 | +| time_elapsed | 146 | +| total_timesteps | 46312 | +| train/ | | +| approx_kl | 0.03702618 | +| clip_fraction | 0.365 | +| clip_range | 0.228 | +| entropy_loss | -7.85 | +| explained_variance | 0.291 | +| learning_rate | 9.79e-05 | +| loss | 0.537 | +| n_updates | 270 | +| policy_gradient_loss | -0.0122 | +| value_loss | 4.75 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.86e+03 | +| ep_rew_mean | -156 | +| time/ | | +| fps | 318 | +| iterations | 29 | +| time_elapsed | 150 | +| total_timesteps | 47966 | +| train/ | | +| approx_kl | 0.033755746 | +| clip_fraction | 0.299 | +| clip_range | 0.228 | +| entropy_loss | -7.77 | +| explained_variance | 0.31 | +| learning_rate | 9.79e-05 | +| loss | 7.27 | +| n_updates | 280 | +| policy_gradient_loss | -0.016 | +| value_loss | 7.63 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.83e+03 | +| ep_rew_mean | -159 | +| time/ | | +| fps | 319 | +| iterations | 30 | +| time_elapsed | 155 | +| total_timesteps | 49620 | +| train/ | | +| approx_kl | 0.03693611 | +| clip_fraction | 0.286 | +| clip_range | 0.228 | +| entropy_loss | -7.81 | +| explained_variance | 0.343 | +| learning_rate | 9.79e-05 | +| loss | 0.828 | +| n_updates | 290 | +| policy_gradient_loss | -0.0217 | +| value_loss | 4.62 | +---------------------------------------- +--------------------------------------- +| rollout/ | | +| ep_len_mean | 2.81e+03 | +| ep_rew_mean | -150 | +| time/ | | +| fps | 319 | +| iterations | 31 | +| time_elapsed | 160 | +| total_timesteps | 51274 | +| train/ | | +| approx_kl | 0.0407202 | +| clip_fraction | 0.331 | +| clip_range | 0.228 | +| entropy_loss | -7.73 | +| explained_variance | 0.164 | +| learning_rate | 9.79e-05 | +| loss | 19.9 | +| n_updates | 300 | +| policy_gradient_loss | -0.00744 | +| value_loss | 25.4 | +--------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.81e+03 | +| ep_rew_mean | -150 | +| time/ | | +| fps | 321 | +| iterations | 32 | +| time_elapsed | 164 | +| total_timesteps | 52928 | +| train/ | | +| approx_kl | 0.049017448 | +| clip_fraction | 0.356 | +| clip_range | 0.228 | +| entropy_loss | -7.8 | +| explained_variance | -0.0167 | +| learning_rate | 9.79e-05 | +| loss | 0.897 | +| n_updates | 310 | +| policy_gradient_loss | -0.0214 | +| value_loss | 4.18 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.8e+03 | +| ep_rew_mean | -156 | +| time/ | | +| fps | 322 | +| iterations | 33 | +| time_elapsed | 169 | +| total_timesteps | 54582 | +| train/ | | +| approx_kl | 0.035868283 | +| clip_fraction | 0.323 | +| clip_range | 0.228 | +| entropy_loss | -7.79 | +| explained_variance | 0.177 | +| learning_rate | 9.79e-05 | +| loss | 0.298 | +| n_updates | 320 | +| policy_gradient_loss | -0.0128 | +| value_loss | 2.9 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.76e+03 | +| ep_rew_mean | -161 | +| time/ | | +| fps | 322 | +| iterations | 34 | +| time_elapsed | 174 | +| total_timesteps | 56236 | +| train/ | | +| approx_kl | 0.040655132 | +| clip_fraction | 0.321 | +| clip_range | 0.228 | +| entropy_loss | -7.6 | +| explained_variance | 0.0977 | +| learning_rate | 9.79e-05 | +| loss | 3.28 | +| n_updates | 330 | +| policy_gradient_loss | -0.0114 | +| value_loss | 29.4 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.73e+03 | +| ep_rew_mean | -162 | +| time/ | | +| fps | 323 | +| iterations | 35 | +| time_elapsed | 178 | +| total_timesteps | 57890 | +| train/ | | +| approx_kl | 0.049799267 | +| clip_fraction | 0.364 | +| clip_range | 0.228 | +| entropy_loss | -7.61 | +| explained_variance | 0.0564 | +| learning_rate | 9.79e-05 | +| loss | 0.473 | +| n_updates | 340 | +| policy_gradient_loss | -0.0171 | +| value_loss | 12.4 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.65e+03 | +| ep_rew_mean | -141 | +| time/ | | +| fps | 323 | +| iterations | 36 | +| time_elapsed | 183 | +| total_timesteps | 59544 | +| train/ | | +| approx_kl | 0.051947072 | +| clip_fraction | 0.352 | +| clip_range | 0.228 | +| entropy_loss | -7.51 | +| explained_variance | 0.194 | +| learning_rate | 9.79e-05 | +| loss | 2.48 | +| n_updates | 350 | +| policy_gradient_loss | -0.0138 | +| value_loss | 17.2 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.65e+03 | +| ep_rew_mean | -141 | +| time/ | | +| fps | 324 | +| iterations | 37 | +| time_elapsed | 188 | +| total_timesteps | 61198 | +| train/ | | +| approx_kl | 0.04901576 | +| clip_fraction | 0.4 | +| clip_range | 0.228 | +| entropy_loss | -7.55 | +| explained_variance | -0.0663 | +| learning_rate | 9.79e-05 | +| loss | 7.66 | +| n_updates | 360 | +| policy_gradient_loss | -0.00211 | +| value_loss | 28.4 | +---------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.67e+03 | +| ep_rew_mean | -132 | +| time/ | | +| fps | 326 | +| iterations | 38 | +| time_elapsed | 192 | +| total_timesteps | 62852 | +| train/ | | +| approx_kl | 0.04695523 | +| clip_fraction | 0.388 | +| clip_range | 0.228 | +| entropy_loss | -7.6 | +| explained_variance | 0.29 | +| learning_rate | 9.79e-05 | +| loss | 0.643 | +| n_updates | 370 | +| policy_gradient_loss | -0.0192 | +| value_loss | 4.24 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.63e+03 | +| ep_rew_mean | -141 | +| time/ | | +| fps | 326 | +| iterations | 39 | +| time_elapsed | 197 | +| total_timesteps | 64506 | +| train/ | | +| approx_kl | 0.046350323 | +| clip_fraction | 0.355 | +| clip_range | 0.228 | +| entropy_loss | -7.45 | +| explained_variance | 0.121 | +| learning_rate | 9.79e-05 | +| loss | 2.98 | +| n_updates | 380 | +| policy_gradient_loss | -0.0267 | +| value_loss | 5.78 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.63e+03 | +| ep_rew_mean | -145 | +| time/ | | +| fps | 326 | +| iterations | 40 | +| time_elapsed | 202 | +| total_timesteps | 66160 | +| train/ | | +| approx_kl | 0.042303674 | +| clip_fraction | 0.365 | +| clip_range | 0.228 | +| entropy_loss | -7.55 | +| explained_variance | 0.0295 | +| learning_rate | 9.79e-05 | +| loss | 2.42 | +| n_updates | 390 | +| policy_gradient_loss | -0.0111 | +| value_loss | 49.1 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.63e+03 | +| ep_rew_mean | -145 | +| time/ | | +| fps | 327 | +| iterations | 41 | +| time_elapsed | 207 | +| total_timesteps | 67814 | +| train/ | | +| approx_kl | 0.06833778 | +| clip_fraction | 0.442 | +| clip_range | 0.228 | +| entropy_loss | -7.26 | +| explained_variance | 0.0526 | +| learning_rate | 9.79e-05 | +| loss | 1.06 | +| n_updates | 400 | +| policy_gradient_loss | -0.00992 | +| value_loss | 13.1 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.63e+03 | +| ep_rew_mean | -148 | +| time/ | | +| fps | 327 | +| iterations | 42 | +| time_elapsed | 212 | +| total_timesteps | 69468 | +| train/ | | +| approx_kl | 0.051798023 | +| clip_fraction | 0.393 | +| clip_range | 0.228 | +| entropy_loss | -7.56 | +| explained_variance | 0.304 | +| learning_rate | 9.79e-05 | +| loss | 0.899 | +| n_updates | 410 | +| policy_gradient_loss | -0.0254 | +| value_loss | 6.58 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.62e+03 | +| ep_rew_mean | -136 | +| time/ | | +| fps | 326 | +| iterations | 43 | +| time_elapsed | 217 | +| total_timesteps | 71122 | +| train/ | | +| approx_kl | 0.04322006 | +| clip_fraction | 0.383 | +| clip_range | 0.228 | +| entropy_loss | -7.52 | +| explained_variance | 0.288 | +| learning_rate | 9.79e-05 | +| loss | 0.502 | +| n_updates | 420 | +| policy_gradient_loss | -0.0132 | +| value_loss | 15.5 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.58e+03 | +| ep_rew_mean | -141 | +| time/ | | +| fps | 326 | +| iterations | 44 | +| time_elapsed | 223 | +| total_timesteps | 72776 | +| train/ | | +| approx_kl | 0.051466085 | +| clip_fraction | 0.397 | +| clip_range | 0.228 | +| entropy_loss | -7.23 | +| explained_variance | 0.217 | +| learning_rate | 9.79e-05 | +| loss | 1.78 | +| n_updates | 430 | +| policy_gradient_loss | -0.0112 | +| value_loss | 15.6 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.58e+03 | +| ep_rew_mean | -141 | +| time/ | | +| fps | 325 | +| iterations | 45 | +| time_elapsed | 228 | +| total_timesteps | 74430 | +| train/ | | +| approx_kl | 0.06645863 | +| clip_fraction | 0.421 | +| clip_range | 0.228 | +| entropy_loss | -7.28 | +| explained_variance | 0.279 | +| learning_rate | 9.79e-05 | +| loss | 1.55 | +| n_updates | 440 | +| policy_gradient_loss | -0.0173 | +| value_loss | 15.6 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.58e+03 | +| ep_rew_mean | -139 | +| time/ | | +| fps | 324 | +| iterations | 46 | +| time_elapsed | 234 | +| total_timesteps | 76084 | +| train/ | | +| approx_kl | 0.042081438 | +| clip_fraction | 0.38 | +| clip_range | 0.228 | +| entropy_loss | -7.37 | +| explained_variance | 0.2 | +| learning_rate | 9.79e-05 | +| loss | 5.59 | +| n_updates | 450 | +| policy_gradient_loss | -0.016 | +| value_loss | 5.48 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.56e+03 | +| ep_rew_mean | -137 | +| time/ | | +| fps | 324 | +| iterations | 47 | +| time_elapsed | 239 | +| total_timesteps | 77738 | +| train/ | | +| approx_kl | 0.040408526 | +| clip_fraction | 0.36 | +| clip_range | 0.228 | +| entropy_loss | -7.2 | +| explained_variance | 0.439 | +| learning_rate | 9.79e-05 | +| loss | 1.19 | +| n_updates | 460 | +| policy_gradient_loss | -0.019 | +| value_loss | 8.17 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.56e+03 | +| ep_rew_mean | -132 | +| time/ | | +| fps | 323 | +| iterations | 48 | +| time_elapsed | 245 | +| total_timesteps | 79392 | +| train/ | | +| approx_kl | 0.047893133 | +| clip_fraction | 0.399 | +| clip_range | 0.228 | +| entropy_loss | -7.24 | +| explained_variance | 0.29 | +| learning_rate | 9.79e-05 | +| loss | 9.79 | +| n_updates | 470 | +| policy_gradient_loss | -0.023 | +| value_loss | 9.88 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.56e+03 | +| ep_rew_mean | -132 | +| time/ | | +| fps | 323 | +| iterations | 49 | +| time_elapsed | 250 | +| total_timesteps | 81046 | +| train/ | | +| approx_kl | 0.050825655 | +| clip_fraction | 0.409 | +| clip_range | 0.228 | +| entropy_loss | -7.27 | +| explained_variance | 0.125 | +| learning_rate | 9.79e-05 | +| loss | 0.481 | +| n_updates | 480 | +| policy_gradient_loss | -0.0222 | +| value_loss | 5.85 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.53e+03 | +| ep_rew_mean | -137 | +| time/ | | +| fps | 322 | +| iterations | 50 | +| time_elapsed | 256 | +| total_timesteps | 82700 | +| train/ | | +| approx_kl | 0.045231882 | +| clip_fraction | 0.368 | +| clip_range | 0.228 | +| entropy_loss | -7.47 | +| explained_variance | 0.378 | +| learning_rate | 9.79e-05 | +| loss | 0.404 | +| n_updates | 490 | +| policy_gradient_loss | -0.0237 | +| value_loss | 5.36 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.48e+03 | +| ep_rew_mean | -121 | +| time/ | | +| fps | 322 | +| iterations | 51 | +| time_elapsed | 261 | +| total_timesteps | 84354 | +| train/ | | +| approx_kl | 0.08339866 | +| clip_fraction | 0.483 | +| clip_range | 0.228 | +| entropy_loss | -7.08 | +| explained_variance | 0.0398 | +| learning_rate | 9.79e-05 | +| loss | 1.24 | +| n_updates | 500 | +| policy_gradient_loss | -0.0024 | +| value_loss | 26.8 | +---------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.48e+03 | +| ep_rew_mean | -121 | +| time/ | | +| fps | 321 | +| iterations | 52 | +| time_elapsed | 267 | +| total_timesteps | 86008 | +| train/ | | +| approx_kl | 0.07751507 | +| clip_fraction | 0.455 | +| clip_range | 0.228 | +| entropy_loss | -6.97 | +| explained_variance | 0.102 | +| learning_rate | 9.79e-05 | +| loss | 13.2 | +| n_updates | 510 | +| policy_gradient_loss | 9.07e-05 | +| value_loss | 26.6 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.49e+03 | +| ep_rew_mean | -116 | +| time/ | | +| fps | 321 | +| iterations | 53 | +| time_elapsed | 272 | +| total_timesteps | 87662 | +| train/ | | +| approx_kl | 0.050660215 | +| clip_fraction | 0.407 | +| clip_range | 0.228 | +| entropy_loss | -7.09 | +| explained_variance | 0.558 | +| learning_rate | 9.79e-05 | +| loss | 1.27 | +| n_updates | 520 | +| policy_gradient_loss | -0.028 | +| value_loss | 5.08 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.45e+03 | +| ep_rew_mean | -118 | +| time/ | | +| fps | 321 | +| iterations | 54 | +| time_elapsed | 278 | +| total_timesteps | 89316 | +| train/ | | +| approx_kl | 0.059903584 | +| clip_fraction | 0.435 | +| clip_range | 0.228 | +| entropy_loss | -7 | +| explained_variance | 0.34 | +| learning_rate | 9.79e-05 | +| loss | 2.36 | +| n_updates | 530 | +| policy_gradient_loss | -0.0178 | +| value_loss | 7.52 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.43e+03 | +| ep_rew_mean | -124 | +| time/ | | +| fps | 320 | +| iterations | 55 | +| time_elapsed | 283 | +| total_timesteps | 90970 | +| train/ | | +| approx_kl | 0.05906586 | +| clip_fraction | 0.397 | +| clip_range | 0.228 | +| entropy_loss | -6.97 | +| explained_variance | 0.411 | +| learning_rate | 9.79e-05 | +| loss | 3.49 | +| n_updates | 540 | +| policy_gradient_loss | -0.0154 | +| value_loss | 15.3 | +---------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.42e+03 | +| ep_rew_mean | -129 | +| time/ | | +| fps | 320 | +| iterations | 56 | +| time_elapsed | 289 | +| total_timesteps | 92624 | +| train/ | | +| approx_kl | 0.06667252 | +| clip_fraction | 0.446 | +| clip_range | 0.228 | +| entropy_loss | -7.01 | +| explained_variance | 0.12 | +| learning_rate | 9.79e-05 | +| loss | 2.8 | +| n_updates | 550 | +| policy_gradient_loss | -0.00396 | +| value_loss | 49.1 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.41e+03 | +| ep_rew_mean | -128 | +| time/ | | +| fps | 319 | +| iterations | 57 | +| time_elapsed | 295 | +| total_timesteps | 94278 | +| train/ | | +| approx_kl | 0.086899824 | +| clip_fraction | 0.46 | +| clip_range | 0.228 | +| entropy_loss | -6.97 | +| explained_variance | 0.37 | +| learning_rate | 9.79e-05 | +| loss | 0.64 | +| n_updates | 560 | +| policy_gradient_loss | -0.0243 | +| value_loss | 20.1 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.41e+03 | +| ep_rew_mean | -128 | +| time/ | | +| fps | 318 | +| iterations | 58 | +| time_elapsed | 300 | +| total_timesteps | 95932 | +| train/ | | +| approx_kl | 0.07774362 | +| clip_fraction | 0.49 | +| clip_range | 0.228 | +| entropy_loss | -6.63 | +| explained_variance | 0.221 | +| learning_rate | 9.79e-05 | +| loss | 3.45 | +| n_updates | 570 | +| policy_gradient_loss | -0.00944 | +| value_loss | 8.12 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.42e+03 | +| ep_rew_mean | -130 | +| time/ | | +| fps | 318 | +| iterations | 59 | +| time_elapsed | 306 | +| total_timesteps | 97586 | +| train/ | | +| approx_kl | 0.055749163 | +| clip_fraction | 0.422 | +| clip_range | 0.228 | +| entropy_loss | -6.97 | +| explained_variance | 0.27 | +| learning_rate | 9.79e-05 | +| loss | 0.485 | +| n_updates | 580 | +| policy_gradient_loss | -0.0239 | +| value_loss | 3.6 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.4e+03 | +| ep_rew_mean | -134 | +| time/ | | +| fps | 317 | +| iterations | 60 | +| time_elapsed | 312 | +| total_timesteps | 99240 | +| train/ | | +| approx_kl | 0.05942291 | +| clip_fraction | 0.465 | +| clip_range | 0.228 | +| entropy_loss | -7.15 | +| explained_variance | 0.522 | +| learning_rate | 9.79e-05 | +| loss | 1.21 | +| n_updates | 590 | +| policy_gradient_loss | -0.0177 | +| value_loss | 12.3 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.4e+03 | +| ep_rew_mean | -134 | +| time/ | | +| fps | 317 | +| iterations | 61 | +| time_elapsed | 317 | +| total_timesteps | 100894 | +| train/ | | +| approx_kl | 0.090608686 | +| clip_fraction | 0.468 | +| clip_range | 0.228 | +| entropy_loss | -7.05 | +| explained_variance | 0.342 | +| learning_rate | 9.79e-05 | +| loss | 4.04 | +| n_updates | 600 | +| policy_gradient_loss | 0.000938 | +| value_loss | 25.8 | +----------------------------------------- +[I 2023-03-30 22:18:58,250] Trial 7 finished with value: -352.0 and parameters: {'n_steps': 1654, 'gamma': 0.9631671321909901, 'learning_rate': 9.790024836371174e-05, 'clip_range': 0.22794548657535632, 'gae_lambda': 0.8643034328071537}. Best is trial 6 with value: -1.0. +Using cuda device +Wrapping the env in a DummyVecEnv. +Wrapping the env in a VecTransposeImage. +C:\ProgramData\Anaconda3\envs\StreetFighterAI\lib\site-packages\stable_baselines3\ppo\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 1146`, after every 17 untruncated mini-batches, there will be a truncated mini-batch of size 58 +We recommend using a `batch_size` that is a factor of `n_steps * n_envs`. +Info: (n_steps=1146 and n_envs=1) + warnings.warn( +Logging to logs/PPO_26 +----------------------------- +| time/ | | +| fps | 624 | +| iterations | 1 | +| time_elapsed | 1 | +| total_timesteps | 1146 | +----------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 1.96e+03 | +| ep_rew_mean | 1 | +| time/ | | +| fps | 453 | +| iterations | 2 | +| time_elapsed | 5 | +| total_timesteps | 2292 | +| train/ | | +| approx_kl | 0.04023962 | +| clip_fraction | 0.18 | +| clip_range | 0.332 | +| entropy_loss | -8.29 | +| explained_variance | 0.00577 | +| learning_rate | 8.88e-05 | +| loss | 2.31 | +| n_updates | 10 | +| policy_gradient_loss | -0.0237 | +| value_loss | 11.7 | +---------------------------------------- +--------------------------------------- +| rollout/ | | +| ep_len_mean | 1.96e+03 | +| ep_rew_mean | 1 | +| time/ | | +| fps | 419 | +| iterations | 3 | +| time_elapsed | 8 | +| total_timesteps | 3438 | +| train/ | | +| approx_kl | 0.0303674 | +| clip_fraction | 0.155 | +| clip_range | 0.332 | +| entropy_loss | -8.25 | +| explained_variance | -0.03 | +| learning_rate | 8.88e-05 | +| loss | 1.87 | +| n_updates | 20 | +| policy_gradient_loss | -0.0214 | +| value_loss | 5.96 | +--------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 1.9e+03 | +| ep_rew_mean | 138 | +| time/ | | +| fps | 406 | +| iterations | 4 | +| time_elapsed | 11 | +| total_timesteps | 4584 | +| train/ | | +| approx_kl | 0.029227091 | +| clip_fraction | 0.13 | +| clip_range | 0.332 | +| entropy_loss | -8.25 | +| explained_variance | 0.0631 | +| learning_rate | 8.88e-05 | +| loss | 0.793 | +| n_updates | 30 | +| policy_gradient_loss | -0.0214 | +| value_loss | 9.91 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 1.9e+03 | +| ep_rew_mean | 138 | +| time/ | | +| fps | 399 | +| iterations | 5 | +| time_elapsed | 14 | +| total_timesteps | 5730 | +| train/ | | +| approx_kl | 0.04213173 | +| clip_fraction | 0.147 | +| clip_range | 0.332 | +| entropy_loss | -8.21 | +| explained_variance | 0.0443 | +| learning_rate | 8.88e-05 | +| loss | 2.83 | +| n_updates | 40 | +| policy_gradient_loss | -0.0103 | +| value_loss | 34.6 | +---------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.1e+03 | +| ep_rew_mean | 68 | +| time/ | | +| fps | 393 | +| iterations | 6 | +| time_elapsed | 17 | +| total_timesteps | 6876 | +| train/ | | +| approx_kl | 0.02153559 | +| clip_fraction | 0.143 | +| clip_range | 0.332 | +| entropy_loss | -8.14 | +| explained_variance | -0.0437 | +| learning_rate | 8.88e-05 | +| loss | 0.602 | +| n_updates | 50 | +| policy_gradient_loss | -0.0157 | +| value_loss | 11.8 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 1.92e+03 | +| ep_rew_mean | 22.2 | +| time/ | | +| fps | 391 | +| iterations | 7 | +| time_elapsed | 20 | +| total_timesteps | 8022 | +| train/ | | +| approx_kl | 0.040259663 | +| clip_fraction | 0.2 | +| clip_range | 0.332 | +| entropy_loss | -8.05 | +| explained_variance | -0.161 | +| learning_rate | 8.88e-05 | +| loss | 13.2 | +| n_updates | 60 | +| policy_gradient_loss | -0.025 | +| value_loss | 15.7 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 1.92e+03 | +| ep_rew_mean | 22.2 | +| time/ | | +| fps | 388 | +| iterations | 8 | +| time_elapsed | 23 | +| total_timesteps | 9168 | +| train/ | | +| approx_kl | 0.029950712 | +| clip_fraction | 0.155 | +| clip_range | 0.332 | +| entropy_loss | -8.02 | +| explained_variance | 0.0118 | +| learning_rate | 8.88e-05 | +| loss | 6.02 | +| n_updates | 70 | +| policy_gradient_loss | -0.028 | +| value_loss | 16.9 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 1.89e+03 | +| ep_rew_mean | -28.4 | +| time/ | | +| fps | 386 | +| iterations | 9 | +| time_elapsed | 26 | +| total_timesteps | 10314 | +| train/ | | +| approx_kl | 0.034316827 | +| clip_fraction | 0.127 | +| clip_range | 0.332 | +| entropy_loss | -8.03 | +| explained_variance | -0.00279 | +| learning_rate | 8.88e-05 | +| loss | 0.944 | +| n_updates | 80 | +| policy_gradient_loss | -0.0281 | +| value_loss | 5.72 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 1.89e+03 | +| ep_rew_mean | -28.4 | +| time/ | | +| fps | 381 | +| iterations | 10 | +| time_elapsed | 30 | +| total_timesteps | 11460 | +| train/ | | +| approx_kl | 0.047511037 | +| clip_fraction | 0.184 | +| clip_range | 0.332 | +| entropy_loss | -7.96 | +| explained_variance | 0.0175 | +| learning_rate | 8.88e-05 | +| loss | 1.72 | +| n_updates | 90 | +| policy_gradient_loss | -0.0188 | +| value_loss | 24.5 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 1.89e+03 | +| ep_rew_mean | -28.4 | +| time/ | | +| fps | 380 | +| iterations | 11 | +| time_elapsed | 33 | +| total_timesteps | 12606 | +| train/ | | +| approx_kl | 0.03660329 | +| clip_fraction | 0.183 | +| clip_range | 0.332 | +| entropy_loss | -7.83 | +| explained_variance | -0.00303 | +| learning_rate | 8.88e-05 | +| loss | 2.56 | +| n_updates | 100 | +| policy_gradient_loss | -0.017 | +| value_loss | 6.9 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.12e+03 | +| ep_rew_mean | -27.5 | +| time/ | | +| fps | 378 | +| iterations | 12 | +| time_elapsed | 36 | +| total_timesteps | 13752 | +| train/ | | +| approx_kl | 0.030439496 | +| clip_fraction | 0.146 | +| clip_range | 0.332 | +| entropy_loss | -7.91 | +| explained_variance | 0.0077 | +| learning_rate | 8.88e-05 | +| loss | 9.06 | +| n_updates | 110 | +| policy_gradient_loss | -0.0198 | +| value_loss | 6.01 | +----------------------------------------- +--------------------------------------- +| rollout/ | | +| ep_len_mean | 2.05e+03 | +| ep_rew_mean | 3 | +| time/ | | +| fps | 376 | +| iterations | 13 | +| time_elapsed | 39 | +| total_timesteps | 14898 | +| train/ | | +| approx_kl | 0.0319912 | +| clip_fraction | 0.138 | +| clip_range | 0.332 | +| entropy_loss | -7.87 | +| explained_variance | -0.236 | +| learning_rate | 8.88e-05 | +| loss | 0.376 | +| n_updates | 120 | +| policy_gradient_loss | -0.0228 | +| value_loss | 5.92 | +--------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.05e+03 | +| ep_rew_mean | 3 | +| time/ | | +| fps | 374 | +| iterations | 14 | +| time_elapsed | 42 | +| total_timesteps | 16044 | +| train/ | | +| approx_kl | 0.029652404 | +| clip_fraction | 0.153 | +| clip_range | 0.332 | +| entropy_loss | -7.91 | +| explained_variance | -0.0174 | +| learning_rate | 8.88e-05 | +| loss | 0.688 | +| n_updates | 130 | +| policy_gradient_loss | -0.0226 | +| value_loss | 23.6 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.09e+03 | +| ep_rew_mean | -8.75 | +| time/ | | +| fps | 372 | +| iterations | 15 | +| time_elapsed | 46 | +| total_timesteps | 17190 | +| train/ | | +| approx_kl | 0.034059085 | +| clip_fraction | 0.172 | +| clip_range | 0.332 | +| entropy_loss | -7.86 | +| explained_variance | -0.0714 | +| learning_rate | 8.88e-05 | +| loss | 0.296 | +| n_updates | 140 | +| policy_gradient_loss | -0.0245 | +| value_loss | 7.12 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.02e+03 | +| ep_rew_mean | -40.6 | +| time/ | | +| fps | 372 | +| iterations | 16 | +| time_elapsed | 49 | +| total_timesteps | 18336 | +| train/ | | +| approx_kl | 0.032943897 | +| clip_fraction | 0.121 | +| clip_range | 0.332 | +| entropy_loss | -7.86 | +| explained_variance | -0.16 | +| learning_rate | 8.88e-05 | +| loss | 8.37 | +| n_updates | 150 | +| policy_gradient_loss | -0.0113 | +| value_loss | 8.67 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.02e+03 | +| ep_rew_mean | -40.6 | +| time/ | | +| fps | 371 | +| iterations | 17 | +| time_elapsed | 52 | +| total_timesteps | 19482 | +| train/ | | +| approx_kl | 0.03772574 | +| clip_fraction | 0.124 | +| clip_range | 0.332 | +| entropy_loss | -7.87 | +| explained_variance | 0.0207 | +| learning_rate | 8.88e-05 | +| loss | 2.78 | +| n_updates | 160 | +| policy_gradient_loss | -0.0135 | +| value_loss | 53.4 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.02e+03 | +| ep_rew_mean | -40.6 | +| time/ | | +| fps | 370 | +| iterations | 18 | +| time_elapsed | 55 | +| total_timesteps | 20628 | +| train/ | | +| approx_kl | 0.024023427 | +| clip_fraction | 0.16 | +| clip_range | 0.332 | +| entropy_loss | -7.73 | +| explained_variance | 0.0897 | +| learning_rate | 8.88e-05 | +| loss | 0.491 | +| n_updates | 170 | +| policy_gradient_loss | -0.0294 | +| value_loss | 4.74 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.11e+03 | +| ep_rew_mean | -28.9 | +| time/ | | +| fps | 370 | +| iterations | 19 | +| time_elapsed | 58 | +| total_timesteps | 21774 | +| train/ | | +| approx_kl | 0.030223705 | +| clip_fraction | 0.123 | +| clip_range | 0.332 | +| entropy_loss | -7.82 | +| explained_variance | -0.0303 | +| learning_rate | 8.88e-05 | +| loss | 1.69 | +| n_updates | 180 | +| policy_gradient_loss | -0.0151 | +| value_loss | 5.96 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.05e+03 | +| ep_rew_mean | -46 | +| time/ | | +| fps | 371 | +| iterations | 20 | +| time_elapsed | 61 | +| total_timesteps | 22920 | +| train/ | | +| approx_kl | 0.03593646 | +| clip_fraction | 0.194 | +| clip_range | 0.332 | +| entropy_loss | -7.64 | +| explained_variance | 0.161 | +| learning_rate | 8.88e-05 | +| loss | 4.26 | +| n_updates | 190 | +| policy_gradient_loss | -0.0292 | +| value_loss | 8.23 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 1.96e+03 | +| ep_rew_mean | -21.8 | +| time/ | | +| fps | 371 | +| iterations | 21 | +| time_elapsed | 64 | +| total_timesteps | 24066 | +| train/ | | +| approx_kl | 0.023679743 | +| clip_fraction | 0.111 | +| clip_range | 0.332 | +| entropy_loss | -7.71 | +| explained_variance | 0.126 | +| learning_rate | 8.88e-05 | +| loss | 4.02 | +| n_updates | 200 | +| policy_gradient_loss | -0.0215 | +| value_loss | 24.6 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 1.96e+03 | +| ep_rew_mean | -21.8 | +| time/ | | +| fps | 371 | +| iterations | 22 | +| time_elapsed | 67 | +| total_timesteps | 25212 | +| train/ | | +| approx_kl | 0.026022209 | +| clip_fraction | 0.178 | +| clip_range | 0.332 | +| entropy_loss | -7.65 | +| explained_variance | -0.2 | +| learning_rate | 8.88e-05 | +| loss | 2.98 | +| n_updates | 210 | +| policy_gradient_loss | -0.0145 | +| value_loss | 44.2 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 1.97e+03 | +| ep_rew_mean | -21.3 | +| time/ | | +| fps | 372 | +| iterations | 23 | +| time_elapsed | 70 | +| total_timesteps | 26358 | +| train/ | | +| approx_kl | 0.02791216 | +| clip_fraction | 0.104 | +| clip_range | 0.332 | +| entropy_loss | -7.58 | +| explained_variance | 0.0539 | +| learning_rate | 8.88e-05 | +| loss | 1.41 | +| n_updates | 220 | +| policy_gradient_loss | -0.0185 | +| value_loss | 6.16 | +---------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 1.95e+03 | +| ep_rew_mean | -21.6 | +| time/ | | +| fps | 372 | +| iterations | 24 | +| time_elapsed | 73 | +| total_timesteps | 27504 | +| train/ | | +| approx_kl | 0.03433499 | +| clip_fraction | 0.138 | +| clip_range | 0.332 | +| entropy_loss | -7.69 | +| explained_variance | -0.0641 | +| learning_rate | 8.88e-05 | +| loss | 4.67 | +| n_updates | 230 | +| policy_gradient_loss | -0.0315 | +| value_loss | 4.38 | +---------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 1.88e+03 | +| ep_rew_mean | -42.7 | +| time/ | | +| fps | 372 | +| iterations | 25 | +| time_elapsed | 76 | +| total_timesteps | 28650 | +| train/ | | +| approx_kl | 0.03307491 | +| clip_fraction | 0.157 | +| clip_range | 0.332 | +| entropy_loss | -7.54 | +| explained_variance | 0.074 | +| learning_rate | 8.88e-05 | +| loss | 2.61 | +| n_updates | 240 | +| policy_gradient_loss | -0.0279 | +| value_loss | 9.81 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 1.84e+03 | +| ep_rew_mean | -59.4 | +| time/ | | +| fps | 372 | +| iterations | 26 | +| time_elapsed | 79 | +| total_timesteps | 29796 | +| train/ | | +| approx_kl | 0.041843403 | +| clip_fraction | 0.136 | +| clip_range | 0.332 | +| entropy_loss | -7.47 | +| explained_variance | -0.00451 | +| learning_rate | 8.88e-05 | +| loss | 4.38 | +| n_updates | 250 | +| policy_gradient_loss | -0.0115 | +| value_loss | 71.8 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 1.84e+03 | +| ep_rew_mean | -59.4 | +| time/ | | +| fps | 372 | +| iterations | 27 | +| time_elapsed | 83 | +| total_timesteps | 30942 | +| train/ | | +| approx_kl | 0.036084294 | +| clip_fraction | 0.154 | +| clip_range | 0.332 | +| entropy_loss | -7.33 | +| explained_variance | 0.0532 | +| learning_rate | 8.88e-05 | +| loss | 3.64 | +| n_updates | 260 | +| policy_gradient_loss | -0.0072 | +| value_loss | 58.6 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 1.86e+03 | +| ep_rew_mean | -42.5 | +| time/ | | +| fps | 372 | +| iterations | 28 | +| time_elapsed | 86 | +| total_timesteps | 32088 | +| train/ | | +| approx_kl | 0.036504086 | +| clip_fraction | 0.141 | +| clip_range | 0.332 | +| entropy_loss | -7.55 | +| explained_variance | 0.148 | +| learning_rate | 8.88e-05 | +| loss | 1.76 | +| n_updates | 270 | +| policy_gradient_loss | -0.0167 | +| value_loss | 12.2 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 1.86e+03 | +| ep_rew_mean | -42.5 | +| time/ | | +| fps | 370 | +| iterations | 29 | +| time_elapsed | 89 | +| total_timesteps | 33234 | +| train/ | | +| approx_kl | 0.034296088 | +| clip_fraction | 0.187 | +| clip_range | 0.332 | +| entropy_loss | -7.61 | +| explained_variance | 0.0593 | +| learning_rate | 8.88e-05 | +| loss | 12.4 | +| n_updates | 280 | +| policy_gradient_loss | -0.0195 | +| value_loss | 31.1 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 1.88e+03 | +| ep_rew_mean | -54.7 | +| time/ | | +| fps | 368 | +| iterations | 30 | +| time_elapsed | 93 | +| total_timesteps | 34380 | +| train/ | | +| approx_kl | 0.029918602 | +| clip_fraction | 0.173 | +| clip_range | 0.332 | +| entropy_loss | -7.44 | +| explained_variance | 0.178 | +| learning_rate | 8.88e-05 | +| loss | 2.16 | +| n_updates | 290 | +| policy_gradient_loss | -0.0263 | +| value_loss | 5.47 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 1.88e+03 | +| ep_rew_mean | -54.7 | +| time/ | | +| fps | 367 | +| iterations | 31 | +| time_elapsed | 96 | +| total_timesteps | 35526 | +| train/ | | +| approx_kl | 0.04137721 | +| clip_fraction | 0.132 | +| clip_range | 0.332 | +| entropy_loss | -7.45 | +| explained_variance | 0.00431 | +| learning_rate | 8.88e-05 | +| loss | 1.78 | +| n_updates | 300 | +| policy_gradient_loss | -0.0145 | +| value_loss | 39.8 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 1.92e+03 | +| ep_rew_mean | -53.7 | +| time/ | | +| fps | 367 | +| iterations | 32 | +| time_elapsed | 99 | +| total_timesteps | 36672 | +| train/ | | +| approx_kl | 0.030837413 | +| clip_fraction | 0.134 | +| clip_range | 0.332 | +| entropy_loss | -7.38 | +| explained_variance | 0.0959 | +| learning_rate | 8.88e-05 | +| loss | 0.065 | +| n_updates | 310 | +| policy_gradient_loss | -0.031 | +| value_loss | 3.25 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 1.92e+03 | +| ep_rew_mean | -53.7 | +| time/ | | +| fps | 365 | +| iterations | 33 | +| time_elapsed | 103 | +| total_timesteps | 37818 | +| train/ | | +| approx_kl | 0.029948711 | +| clip_fraction | 0.139 | +| clip_range | 0.332 | +| entropy_loss | -7.3 | +| explained_variance | 0.16 | +| learning_rate | 8.88e-05 | +| loss | 2.53 | +| n_updates | 320 | +| policy_gradient_loss | -0.021 | +| value_loss | 7.3 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 1.92e+03 | +| ep_rew_mean | -62.9 | +| time/ | | +| fps | 364 | +| iterations | 34 | +| time_elapsed | 106 | +| total_timesteps | 38964 | +| train/ | | +| approx_kl | 0.03208603 | +| clip_fraction | 0.163 | +| clip_range | 0.332 | +| entropy_loss | -7.35 | +| explained_variance | 0.152 | +| learning_rate | 8.88e-05 | +| loss | 0.512 | +| n_updates | 330 | +| policy_gradient_loss | -0.0206 | +| value_loss | 6.39 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 1.91e+03 | +| ep_rew_mean | -62.5 | +| time/ | | +| fps | 363 | +| iterations | 35 | +| time_elapsed | 110 | +| total_timesteps | 40110 | +| train/ | | +| approx_kl | 0.028910978 | +| clip_fraction | 0.148 | +| clip_range | 0.332 | +| entropy_loss | -7.23 | +| explained_variance | -0.049 | +| learning_rate | 8.88e-05 | +| loss | 1.4 | +| n_updates | 340 | +| policy_gradient_loss | -0.0218 | +| value_loss | 28.6 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 1.91e+03 | +| ep_rew_mean | -62.5 | +| time/ | | +| fps | 362 | +| iterations | 36 | +| time_elapsed | 113 | +| total_timesteps | 41256 | +| train/ | | +| approx_kl | 0.034623235 | +| clip_fraction | 0.122 | +| clip_range | 0.332 | +| entropy_loss | -7.21 | +| explained_variance | 0.00912 | +| learning_rate | 8.88e-05 | +| loss | 4.85 | +| n_updates | 350 | +| policy_gradient_loss | -0.0166 | +| value_loss | 17.1 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 1.88e+03 | +| ep_rew_mean | -53.5 | +| time/ | | +| fps | 362 | +| iterations | 37 | +| time_elapsed | 117 | +| total_timesteps | 42402 | +| train/ | | +| approx_kl | 0.025375202 | +| clip_fraction | 0.146 | +| clip_range | 0.332 | +| entropy_loss | -7.14 | +| explained_variance | 0.27 | +| learning_rate | 8.88e-05 | +| loss | 4.6 | +| n_updates | 360 | +| policy_gradient_loss | -0.0222 | +| value_loss | 7.8 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 1.87e+03 | +| ep_rew_mean | -66.1 | +| time/ | | +| fps | 361 | +| iterations | 38 | +| time_elapsed | 120 | +| total_timesteps | 43548 | +| train/ | | +| approx_kl | 0.042737268 | +| clip_fraction | 0.15 | +| clip_range | 0.332 | +| entropy_loss | -7.07 | +| explained_variance | 0.0585 | +| learning_rate | 8.88e-05 | +| loss | 3.26 | +| n_updates | 370 | +| policy_gradient_loss | -0.0223 | +| value_loss | 15.6 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 1.85e+03 | +| ep_rew_mean | -74.1 | +| time/ | | +| fps | 361 | +| iterations | 39 | +| time_elapsed | 123 | +| total_timesteps | 44694 | +| train/ | | +| approx_kl | 0.027932568 | +| clip_fraction | 0.129 | +| clip_range | 0.332 | +| entropy_loss | -6.9 | +| explained_variance | 0.0755 | +| learning_rate | 8.88e-05 | +| loss | 2.24 | +| n_updates | 380 | +| policy_gradient_loss | -0.0163 | +| value_loss | 49.9 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 1.85e+03 | +| ep_rew_mean | -74.1 | +| time/ | | +| fps | 362 | +| iterations | 40 | +| time_elapsed | 126 | +| total_timesteps | 45840 | +| train/ | | +| approx_kl | 0.030873783 | +| clip_fraction | 0.0904 | +| clip_range | 0.332 | +| entropy_loss | -6.99 | +| explained_variance | 0.152 | +| learning_rate | 8.88e-05 | +| loss | 2.15 | +| n_updates | 390 | +| policy_gradient_loss | -0.0199 | +| value_loss | 42.7 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 1.85e+03 | +| ep_rew_mean | -82.7 | +| time/ | | +| fps | 362 | +| iterations | 41 | +| time_elapsed | 129 | +| total_timesteps | 46986 | +| train/ | | +| approx_kl | 0.026348379 | +| clip_fraction | 0.126 | +| clip_range | 0.332 | +| entropy_loss | -7.23 | +| explained_variance | 0.0323 | +| learning_rate | 8.88e-05 | +| loss | 0.438 | +| n_updates | 400 | +| policy_gradient_loss | -0.0228 | +| value_loss | 6.85 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 1.85e+03 | +| ep_rew_mean | -82.7 | +| time/ | | +| fps | 362 | +| iterations | 42 | +| time_elapsed | 132 | +| total_timesteps | 48132 | +| train/ | | +| approx_kl | 0.04224583 | +| clip_fraction | 0.206 | +| clip_range | 0.332 | +| entropy_loss | -7.16 | +| explained_variance | 0.173 | +| learning_rate | 8.88e-05 | +| loss | 1.07 | +| n_updates | 410 | +| policy_gradient_loss | -0.0242 | +| value_loss | 38 | +---------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 1.86e+03 | +| ep_rew_mean | -75.8 | +| time/ | | +| fps | 362 | +| iterations | 43 | +| time_elapsed | 136 | +| total_timesteps | 49278 | +| train/ | | +| approx_kl | 0.03634002 | +| clip_fraction | 0.164 | +| clip_range | 0.332 | +| entropy_loss | -7.21 | +| explained_variance | 0.28 | +| learning_rate | 8.88e-05 | +| loss | 3.7 | +| n_updates | 420 | +| policy_gradient_loss | -0.032 | +| value_loss | 9.29 | +---------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 1.86e+03 | +| ep_rew_mean | -73.5 | +| time/ | | +| fps | 362 | +| iterations | 44 | +| time_elapsed | 139 | +| total_timesteps | 50424 | +| train/ | | +| approx_kl | 0.03677476 | +| clip_fraction | 0.153 | +| clip_range | 0.332 | +| entropy_loss | -7.17 | +| explained_variance | 0.11 | +| learning_rate | 8.88e-05 | +| loss | 0.533 | +| n_updates | 430 | +| policy_gradient_loss | -0.0277 | +| value_loss | 9.42 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 1.86e+03 | +| ep_rew_mean | -73.5 | +| time/ | | +| fps | 362 | +| iterations | 45 | +| time_elapsed | 142 | +| total_timesteps | 51570 | +| train/ | | +| approx_kl | 0.034621768 | +| clip_fraction | 0.154 | +| clip_range | 0.332 | +| entropy_loss | -7.17 | +| explained_variance | 0.368 | +| learning_rate | 8.88e-05 | +| loss | 5.39 | +| n_updates | 440 | +| policy_gradient_loss | -0.0276 | +| value_loss | 8.99 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 1.85e+03 | +| ep_rew_mean | -64.9 | +| time/ | | +| fps | 362 | +| iterations | 46 | +| time_elapsed | 145 | +| total_timesteps | 52716 | +| train/ | | +| approx_kl | 0.036459163 | +| clip_fraction | 0.165 | +| clip_range | 0.332 | +| entropy_loss | -7.21 | +| explained_variance | 0.374 | +| learning_rate | 8.88e-05 | +| loss | 12.4 | +| n_updates | 450 | +| policy_gradient_loss | -0.037 | +| value_loss | 9.29 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 1.85e+03 | +| ep_rew_mean | -60.5 | +| time/ | | +| fps | 361 | +| iterations | 47 | +| time_elapsed | 148 | +| total_timesteps | 53862 | +| train/ | | +| approx_kl | 0.03863145 | +| clip_fraction | 0.144 | +| clip_range | 0.332 | +| entropy_loss | -7.03 | +| explained_variance | 0.36 | +| learning_rate | 8.88e-05 | +| loss | 0.534 | +| n_updates | 460 | +| policy_gradient_loss | -0.0239 | +| value_loss | 10.8 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 1.85e+03 | +| ep_rew_mean | -60.5 | +| time/ | | +| fps | 361 | +| iterations | 48 | +| time_elapsed | 152 | +| total_timesteps | 55008 | +| train/ | | +| approx_kl | 0.049923413 | +| clip_fraction | 0.157 | +| clip_range | 0.332 | +| entropy_loss | -7.04 | +| explained_variance | 0.0693 | +| learning_rate | 8.88e-05 | +| loss | 5.03 | +| n_updates | 470 | +| policy_gradient_loss | -0.0285 | +| value_loss | 11.7 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 1.86e+03 | +| ep_rew_mean | -64.5 | +| time/ | | +| fps | 361 | +| iterations | 49 | +| time_elapsed | 155 | +| total_timesteps | 56154 | +| train/ | | +| approx_kl | 0.03346165 | +| clip_fraction | 0.145 | +| clip_range | 0.332 | +| entropy_loss | -7 | +| explained_variance | -0.0154 | +| learning_rate | 8.88e-05 | +| loss | 2.6 | +| n_updates | 480 | +| policy_gradient_loss | -0.0256 | +| value_loss | 2.62 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 1.86e+03 | +| ep_rew_mean | -64.5 | +| time/ | | +| fps | 361 | +| iterations | 50 | +| time_elapsed | 158 | +| total_timesteps | 57300 | +| train/ | | +| approx_kl | 0.041344777 | +| clip_fraction | 0.162 | +| clip_range | 0.332 | +| entropy_loss | -7.07 | +| explained_variance | 0.133 | +| learning_rate | 8.88e-05 | +| loss | 5.4 | +| n_updates | 490 | +| policy_gradient_loss | -0.0249 | +| value_loss | 20.1 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 1.88e+03 | +| ep_rew_mean | -64.6 | +| time/ | | +| fps | 360 | +| iterations | 51 | +| time_elapsed | 161 | +| total_timesteps | 58446 | +| train/ | | +| approx_kl | 0.03568868 | +| clip_fraction | 0.159 | +| clip_range | 0.332 | +| entropy_loss | -6.96 | +| explained_variance | 0.364 | +| learning_rate | 8.88e-05 | +| loss | 1.36 | +| n_updates | 500 | +| policy_gradient_loss | -0.0308 | +| value_loss | 4.16 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 1.88e+03 | +| ep_rew_mean | -64.6 | +| time/ | | +| fps | 360 | +| iterations | 52 | +| time_elapsed | 165 | +| total_timesteps | 59592 | +| train/ | | +| approx_kl | 0.032623842 | +| clip_fraction | 0.145 | +| clip_range | 0.332 | +| entropy_loss | -6.74 | +| explained_variance | 0.407 | +| learning_rate | 8.88e-05 | +| loss | 13.7 | +| n_updates | 510 | +| policy_gradient_loss | -0.0268 | +| value_loss | 7.94 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 1.87e+03 | +| ep_rew_mean | -68.9 | +| time/ | | +| fps | 359 | +| iterations | 53 | +| time_elapsed | 168 | +| total_timesteps | 60738 | +| train/ | | +| approx_kl | 0.027725061 | +| clip_fraction | 0.126 | +| clip_range | 0.332 | +| entropy_loss | -6.87 | +| explained_variance | 0.437 | +| learning_rate | 8.88e-05 | +| loss | 0.632 | +| n_updates | 520 | +| policy_gradient_loss | -0.0337 | +| value_loss | 4.87 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 1.87e+03 | +| ep_rew_mean | -68.9 | +| time/ | | +| fps | 359 | +| iterations | 54 | +| time_elapsed | 172 | +| total_timesteps | 61884 | +| train/ | | +| approx_kl | 0.06401909 | +| clip_fraction | 0.228 | +| clip_range | 0.332 | +| entropy_loss | -6.97 | +| explained_variance | 0.0456 | +| learning_rate | 8.88e-05 | +| loss | 1.16 | +| n_updates | 530 | +| policy_gradient_loss | -0.0282 | +| value_loss | 24.5 | +---------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 1.88e+03 | +| ep_rew_mean | -75.2 | +| time/ | | +| fps | 359 | +| iterations | 55 | +| time_elapsed | 175 | +| total_timesteps | 63030 | +| train/ | | +| approx_kl | 0.06221285 | +| clip_fraction | 0.215 | +| clip_range | 0.332 | +| entropy_loss | -7.02 | +| explained_variance | 0.025 | +| learning_rate | 8.88e-05 | +| loss | 4.39 | +| n_updates | 540 | +| policy_gradient_loss | -0.0343 | +| value_loss | 5.32 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 1.89e+03 | +| ep_rew_mean | -80.6 | +| time/ | | +| fps | 359 | +| iterations | 56 | +| time_elapsed | 178 | +| total_timesteps | 64176 | +| train/ | | +| approx_kl | 0.042404637 | +| clip_fraction | 0.201 | +| clip_range | 0.332 | +| entropy_loss | -6.95 | +| explained_variance | -0.0445 | +| learning_rate | 8.88e-05 | +| loss | 9.84 | +| n_updates | 550 | +| policy_gradient_loss | -0.0255 | +| value_loss | 32.7 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 1.89e+03 | +| ep_rew_mean | -80.6 | +| time/ | | +| fps | 359 | +| iterations | 57 | +| time_elapsed | 181 | +| total_timesteps | 65322 | +| train/ | | +| approx_kl | 0.04397238 | +| clip_fraction | 0.151 | +| clip_range | 0.332 | +| entropy_loss | -6.98 | +| explained_variance | 0.0972 | +| learning_rate | 8.88e-05 | +| loss | 26.8 | +| n_updates | 560 | +| policy_gradient_loss | -0.0251 | +| value_loss | 38.3 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 1.88e+03 | +| ep_rew_mean | -83 | +| time/ | | +| fps | 360 | +| iterations | 58 | +| time_elapsed | 184 | +| total_timesteps | 66468 | +| train/ | | +| approx_kl | 0.044086635 | +| clip_fraction | 0.197 | +| clip_range | 0.332 | +| entropy_loss | -7.2 | +| explained_variance | 0.118 | +| learning_rate | 8.88e-05 | +| loss | 1.07 | +| n_updates | 570 | +| policy_gradient_loss | -0.0239 | +| value_loss | 5.21 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 1.88e+03 | +| ep_rew_mean | -83 | +| time/ | | +| fps | 360 | +| iterations | 59 | +| time_elapsed | 187 | +| total_timesteps | 67614 | +| train/ | | +| approx_kl | 0.052704122 | +| clip_fraction | 0.203 | +| clip_range | 0.332 | +| entropy_loss | -6.97 | +| explained_variance | 0.0489 | +| learning_rate | 8.88e-05 | +| loss | 1.06 | +| n_updates | 580 | +| policy_gradient_loss | -0.0281 | +| value_loss | 17.1 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 1.9e+03 | +| ep_rew_mean | -89.1 | +| time/ | | +| fps | 360 | +| iterations | 60 | +| time_elapsed | 190 | +| total_timesteps | 68760 | +| train/ | | +| approx_kl | 0.046419837 | +| clip_fraction | 0.196 | +| clip_range | 0.332 | +| entropy_loss | -7.05 | +| explained_variance | -0.0384 | +| learning_rate | 8.88e-05 | +| loss | 2.11 | +| n_updates | 590 | +| policy_gradient_loss | -0.027 | +| value_loss | 2.89 | +----------------------------------------- +--------------------------------------- +| rollout/ | | +| ep_len_mean | 1.86e+03 | +| ep_rew_mean | -79.1 | +| time/ | | +| fps | 360 | +| iterations | 61 | +| time_elapsed | 193 | +| total_timesteps | 69906 | +| train/ | | +| approx_kl | 0.0403891 | +| clip_fraction | 0.169 | +| clip_range | 0.332 | +| entropy_loss | -7.04 | +| explained_variance | 0.142 | +| learning_rate | 8.88e-05 | +| loss | 18.5 | +| n_updates | 600 | +| policy_gradient_loss | -0.026 | +| value_loss | 43.4 | +--------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 1.85e+03 | +| ep_rew_mean | -85.6 | +| time/ | | +| fps | 360 | +| iterations | 62 | +| time_elapsed | 196 | +| total_timesteps | 71052 | +| train/ | | +| approx_kl | 0.04120052 | +| clip_fraction | 0.175 | +| clip_range | 0.332 | +| entropy_loss | -7.16 | +| explained_variance | 0.0448 | +| learning_rate | 8.88e-05 | +| loss | 1.63 | +| n_updates | 610 | +| policy_gradient_loss | -0.0246 | +| value_loss | 42 | +---------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 1.85e+03 | +| ep_rew_mean | -85.6 | +| time/ | | +| fps | 361 | +| iterations | 63 | +| time_elapsed | 199 | +| total_timesteps | 72198 | +| train/ | | +| approx_kl | 0.04098662 | +| clip_fraction | 0.184 | +| clip_range | 0.332 | +| entropy_loss | -6.94 | +| explained_variance | 0.137 | +| learning_rate | 8.88e-05 | +| loss | 5.39 | +| n_updates | 620 | +| policy_gradient_loss | -0.0287 | +| value_loss | 44 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 1.87e+03 | +| ep_rew_mean | -88 | +| time/ | | +| fps | 361 | +| iterations | 64 | +| time_elapsed | 202 | +| total_timesteps | 73344 | +| train/ | | +| approx_kl | 0.056530125 | +| clip_fraction | 0.226 | +| clip_range | 0.332 | +| entropy_loss | -7.3 | +| explained_variance | 0.307 | +| learning_rate | 8.88e-05 | +| loss | 1.07 | +| n_updates | 630 | +| policy_gradient_loss | -0.0322 | +| value_loss | 8.53 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 1.86e+03 | +| ep_rew_mean | -93.7 | +| time/ | | +| fps | 361 | +| iterations | 65 | +| time_elapsed | 205 | +| total_timesteps | 74490 | +| train/ | | +| approx_kl | 0.05633619 | +| clip_fraction | 0.226 | +| clip_range | 0.332 | +| entropy_loss | -7.17 | +| explained_variance | 0.406 | +| learning_rate | 8.88e-05 | +| loss | 10.5 | +| n_updates | 640 | +| policy_gradient_loss | -0.0258 | +| value_loss | 15 | +---------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 1.86e+03 | +| ep_rew_mean | -93.7 | +| time/ | | +| fps | 361 | +| iterations | 66 | +| time_elapsed | 209 | +| total_timesteps | 75636 | +| train/ | | +| approx_kl | 0.08828102 | +| clip_fraction | 0.324 | +| clip_range | 0.332 | +| entropy_loss | -6.98 | +| explained_variance | 0.132 | +| learning_rate | 8.88e-05 | +| loss | 3 | +| n_updates | 650 | +| policy_gradient_loss | -0.0222 | +| value_loss | 37.7 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 1.85e+03 | +| ep_rew_mean | -95.9 | +| time/ | | +| fps | 360 | +| iterations | 67 | +| time_elapsed | 213 | +| total_timesteps | 76782 | +| train/ | | +| approx_kl | 0.042576507 | +| clip_fraction | 0.216 | +| clip_range | 0.332 | +| entropy_loss | -6.86 | +| explained_variance | 0.0496 | +| learning_rate | 8.88e-05 | +| loss | 6.04 | +| n_updates | 660 | +| policy_gradient_loss | -0.0288 | +| value_loss | 8.43 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 1.85e+03 | +| ep_rew_mean | -91.6 | +| time/ | | +| fps | 359 | +| iterations | 68 | +| time_elapsed | 216 | +| total_timesteps | 77928 | +| train/ | | +| approx_kl | 0.04540308 | +| clip_fraction | 0.229 | +| clip_range | 0.332 | +| entropy_loss | -7.07 | +| explained_variance | 0.249 | +| learning_rate | 8.88e-05 | +| loss | 5.47 | +| n_updates | 670 | +| policy_gradient_loss | -0.0288 | +| value_loss | 16 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 1.85e+03 | +| ep_rew_mean | -91.6 | +| time/ | | +| fps | 359 | +| iterations | 69 | +| time_elapsed | 219 | +| total_timesteps | 79074 | +| train/ | | +| approx_kl | 0.040512584 | +| clip_fraction | 0.211 | +| clip_range | 0.332 | +| entropy_loss | -7.02 | +| explained_variance | -0.00908 | +| learning_rate | 8.88e-05 | +| loss | 1.86 | +| n_updates | 680 | +| policy_gradient_loss | -0.0131 | +| value_loss | 13.4 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 1.85e+03 | +| ep_rew_mean | -91.6 | +| time/ | | +| fps | 359 | +| iterations | 70 | +| time_elapsed | 223 | +| total_timesteps | 80220 | +| train/ | | +| approx_kl | 0.05254833 | +| clip_fraction | 0.188 | +| clip_range | 0.332 | +| entropy_loss | -7.14 | +| explained_variance | 0.295 | +| learning_rate | 8.88e-05 | +| loss | 0.321 | +| n_updates | 690 | +| policy_gradient_loss | -0.0257 | +| value_loss | 8.67 | +---------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 1.88e+03 | +| ep_rew_mean | -94 | +| time/ | | +| fps | 358 | +| iterations | 71 | +| time_elapsed | 226 | +| total_timesteps | 81366 | +| train/ | | +| approx_kl | 0.04479891 | +| clip_fraction | 0.224 | +| clip_range | 0.332 | +| entropy_loss | -7.18 | +| explained_variance | -0.0191 | +| learning_rate | 8.88e-05 | +| loss | 0.529 | +| n_updates | 700 | +| policy_gradient_loss | -0.0383 | +| value_loss | 1.7 | +---------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 1.88e+03 | +| ep_rew_mean | -94 | +| time/ | | +| fps | 358 | +| iterations | 72 | +| time_elapsed | 230 | +| total_timesteps | 82512 | +| train/ | | +| approx_kl | 0.05337418 | +| clip_fraction | 0.207 | +| clip_range | 0.332 | +| entropy_loss | -7.21 | +| explained_variance | 0.168 | +| learning_rate | 8.88e-05 | +| loss | 0.286 | +| n_updates | 710 | +| policy_gradient_loss | -0.034 | +| value_loss | 10.8 | +---------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 1.9e+03 | +| ep_rew_mean | -95.4 | +| time/ | | +| fps | 358 | +| iterations | 73 | +| time_elapsed | 233 | +| total_timesteps | 83658 | +| train/ | | +| approx_kl | 0.03758472 | +| clip_fraction | 0.184 | +| clip_range | 0.332 | +| entropy_loss | -7.42 | +| explained_variance | -0.0312 | +| learning_rate | 8.88e-05 | +| loss | 0.699 | +| n_updates | 720 | +| policy_gradient_loss | -0.0247 | +| value_loss | 6.12 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 1.9e+03 | +| ep_rew_mean | -95.4 | +| time/ | | +| fps | 358 | +| iterations | 74 | +| time_elapsed | 236 | +| total_timesteps | 84804 | +| train/ | | +| approx_kl | 0.058067992 | +| clip_fraction | 0.221 | +| clip_range | 0.332 | +| entropy_loss | -7.14 | +| explained_variance | 0.0152 | +| learning_rate | 8.88e-05 | +| loss | 0.904 | +| n_updates | 730 | +| policy_gradient_loss | -0.0133 | +| value_loss | 13.2 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 1.91e+03 | +| ep_rew_mean | -97.7 | +| time/ | | +| fps | 358 | +| iterations | 75 | +| time_elapsed | 239 | +| total_timesteps | 85950 | +| train/ | | +| approx_kl | 0.035781853 | +| clip_fraction | 0.175 | +| clip_range | 0.332 | +| entropy_loss | -7.28 | +| explained_variance | 0.0129 | +| learning_rate | 8.88e-05 | +| loss | 0.581 | +| n_updates | 740 | +| policy_gradient_loss | -0.019 | +| value_loss | 4.61 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 1.91e+03 | +| ep_rew_mean | -97.7 | +| time/ | | +| fps | 358 | +| iterations | 76 | +| time_elapsed | 242 | +| total_timesteps | 87096 | +| train/ | | +| approx_kl | 0.062611975 | +| clip_fraction | 0.322 | +| clip_range | 0.332 | +| entropy_loss | -7.08 | +| explained_variance | 0.06 | +| learning_rate | 8.88e-05 | +| loss | 1.4 | +| n_updates | 750 | +| policy_gradient_loss | -0.0256 | +| value_loss | 22.8 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 1.91e+03 | +| ep_rew_mean | -97.7 | +| time/ | | +| fps | 358 | +| iterations | 77 | +| time_elapsed | 246 | +| total_timesteps | 88242 | +| train/ | | +| approx_kl | 0.05377618 | +| clip_fraction | 0.227 | +| clip_range | 0.332 | +| entropy_loss | -7.17 | +| explained_variance | 0.19 | +| learning_rate | 8.88e-05 | +| loss | 0.718 | +| n_updates | 760 | +| policy_gradient_loss | -0.0249 | +| value_loss | 8.35 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 1.92e+03 | +| ep_rew_mean | -95.3 | +| time/ | | +| fps | 358 | +| iterations | 78 | +| time_elapsed | 249 | +| total_timesteps | 89388 | +| train/ | | +| approx_kl | 0.050721783 | +| clip_fraction | 0.225 | +| clip_range | 0.332 | +| entropy_loss | -6.94 | +| explained_variance | -0.0205 | +| learning_rate | 8.88e-05 | +| loss | 0.33 | +| n_updates | 770 | +| policy_gradient_loss | -0.0391 | +| value_loss | 2.79 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 1.92e+03 | +| ep_rew_mean | -97.8 | +| time/ | | +| fps | 358 | +| iterations | 79 | +| time_elapsed | 252 | +| total_timesteps | 90534 | +| train/ | | +| approx_kl | 0.046041932 | +| clip_fraction | 0.215 | +| clip_range | 0.332 | +| entropy_loss | -7.21 | +| explained_variance | -0.076 | +| learning_rate | 8.88e-05 | +| loss | 2.6 | +| n_updates | 780 | +| policy_gradient_loss | -0.0325 | +| value_loss | 5.97 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 1.9e+03 | +| ep_rew_mean | -90.9 | +| time/ | | +| fps | 359 | +| iterations | 80 | +| time_elapsed | 255 | +| total_timesteps | 91680 | +| train/ | | +| approx_kl | 0.07306535 | +| clip_fraction | 0.309 | +| clip_range | 0.332 | +| entropy_loss | -7.15 | +| explained_variance | 0.0302 | +| learning_rate | 8.88e-05 | +| loss | 1.4 | +| n_updates | 790 | +| policy_gradient_loss | -0.0324 | +| value_loss | 18.2 | +---------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 1.9e+03 | +| ep_rew_mean | -90.9 | +| time/ | | +| fps | 359 | +| iterations | 81 | +| time_elapsed | 258 | +| total_timesteps | 92826 | +| train/ | | +| approx_kl | 0.06419113 | +| clip_fraction | 0.28 | +| clip_range | 0.332 | +| entropy_loss | -6.92 | +| explained_variance | -0.326 | +| learning_rate | 8.88e-05 | +| loss | 1.45 | +| n_updates | 800 | +| policy_gradient_loss | 0.00325 | +| value_loss | 24.5 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 1.9e+03 | +| ep_rew_mean | -90.9 | +| time/ | | +| fps | 359 | +| iterations | 82 | +| time_elapsed | 261 | +| total_timesteps | 93972 | +| train/ | | +| approx_kl | 0.047866795 | +| clip_fraction | 0.237 | +| clip_range | 0.332 | +| entropy_loss | -7.02 | +| explained_variance | 0.135 | +| learning_rate | 8.88e-05 | +| loss | 0.433 | +| n_updates | 810 | +| policy_gradient_loss | -0.0285 | +| value_loss | 5.26 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 1.92e+03 | +| ep_rew_mean | -88.8 | +| time/ | | +| fps | 359 | +| iterations | 83 | +| time_elapsed | 264 | +| total_timesteps | 95118 | +| train/ | | +| approx_kl | 0.066993006 | +| clip_fraction | 0.28 | +| clip_range | 0.332 | +| entropy_loss | -7.12 | +| explained_variance | -0.127 | +| learning_rate | 8.88e-05 | +| loss | 4.38 | +| n_updates | 820 | +| policy_gradient_loss | -0.0312 | +| value_loss | 9.15 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 1.91e+03 | +| ep_rew_mean | -91.8 | +| time/ | | +| fps | 359 | +| iterations | 84 | +| time_elapsed | 267 | +| total_timesteps | 96264 | +| train/ | | +| approx_kl | 0.05563952 | +| clip_fraction | 0.247 | +| clip_range | 0.332 | +| entropy_loss | -7.08 | +| explained_variance | 0.154 | +| learning_rate | 8.88e-05 | +| loss | 0.568 | +| n_updates | 830 | +| policy_gradient_loss | -0.0269 | +| value_loss | 8.24 | +---------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 1.91e+03 | +| ep_rew_mean | -91.8 | +| time/ | | +| fps | 359 | +| iterations | 85 | +| time_elapsed | 271 | +| total_timesteps | 97410 | +| train/ | | +| approx_kl | 0.05921689 | +| clip_fraction | 0.293 | +| clip_range | 0.332 | +| entropy_loss | -6.99 | +| explained_variance | 0.0197 | +| learning_rate | 8.88e-05 | +| loss | 1.75 | +| n_updates | 840 | +| policy_gradient_loss | -0.0203 | +| value_loss | 33 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 1.92e+03 | +| ep_rew_mean | -93.6 | +| time/ | | +| fps | 359 | +| iterations | 86 | +| time_elapsed | 274 | +| total_timesteps | 98556 | +| train/ | | +| approx_kl | 0.048403326 | +| clip_fraction | 0.219 | +| clip_range | 0.332 | +| entropy_loss | -6.95 | +| explained_variance | 0.00879 | +| learning_rate | 8.88e-05 | +| loss | 0.85 | +| n_updates | 850 | +| policy_gradient_loss | -0.0288 | +| value_loss | 6.54 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 1.92e+03 | +| ep_rew_mean | -93.6 | +| time/ | | +| fps | 358 | +| iterations | 87 | +| time_elapsed | 277 | +| total_timesteps | 99702 | +| train/ | | +| approx_kl | 0.04475287 | +| clip_fraction | 0.178 | +| clip_range | 0.332 | +| entropy_loss | -7.01 | +| explained_variance | 0.204 | +| learning_rate | 8.88e-05 | +| loss | 0.859 | +| n_updates | 860 | +| policy_gradient_loss | -0.0151 | +| value_loss | 17.5 | +---------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 1.93e+03 | +| ep_rew_mean | -93.8 | +| time/ | | +| fps | 358 | +| iterations | 88 | +| time_elapsed | 280 | +| total_timesteps | 100848 | +| train/ | | +| approx_kl | 0.06419406 | +| clip_fraction | 0.244 | +| clip_range | 0.332 | +| entropy_loss | -7 | +| explained_variance | -0.22 | +| learning_rate | 8.88e-05 | +| loss | 0.243 | +| n_updates | 870 | +| policy_gradient_loss | -0.025 | +| value_loss | 1.64 | +---------------------------------------- +[I 2023-03-30 22:24:11,258] Trial 8 finished with value: -343.0 and parameters: {'n_steps': 1146, 'gamma': 0.9192032939378013, 'learning_rate': 8.879233904874816e-05, 'clip_range': 0.3323467236435492, 'gae_lambda': 0.8683502580240515}. Best is trial 6 with value: -1.0. +Using cuda device +Wrapping the env in a DummyVecEnv. +Wrapping the env in a VecTransposeImage. +C:\ProgramData\Anaconda3\envs\StreetFighterAI\lib\site-packages\stable_baselines3\ppo\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 3539`, after every 55 untruncated mini-batches, there will be a truncated mini-batch of size 19 +We recommend using a `batch_size` that is a factor of `n_steps * n_envs`. +Info: (n_steps=3539 and n_envs=1) + warnings.warn( +Logging to logs/PPO_27 +--------------------------------- +| rollout/ | | +| ep_len_mean | 2.15e+03 | +| ep_rew_mean | -50 | +| time/ | | +| fps | 587 | +| iterations | 1 | +| time_elapsed | 6 | +| total_timesteps | 3539 | +--------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.52e+03 | +| ep_rew_mean | -45 | +| time/ | | +| fps | 444 | +| iterations | 2 | +| time_elapsed | 15 | +| total_timesteps | 7078 | +| train/ | | +| approx_kl | 0.006164868 | +| clip_fraction | 0.12 | +| clip_range | 0.155 | +| entropy_loss | -8.31 | +| explained_variance | -0.00174 | +| learning_rate | 5.95e-05 | +| loss | 0.546 | +| n_updates | 10 | +| policy_gradient_loss | -0.00714 | +| value_loss | 8.14 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.56e+03 | +| ep_rew_mean | -109 | +| time/ | | +| fps | 404 | +| iterations | 3 | +| time_elapsed | 26 | +| total_timesteps | 10617 | +| train/ | | +| approx_kl | 0.00478289 | +| clip_fraction | 0.0976 | +| clip_range | 0.155 | +| entropy_loss | -8.31 | +| explained_variance | 0.014 | +| learning_rate | 5.95e-05 | +| loss | 0.361 | +| n_updates | 20 | +| policy_gradient_loss | -0.00716 | +| value_loss | 6.41 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.6e+03 | +| ep_rew_mean | -129 | +| time/ | | +| fps | 385 | +| iterations | 4 | +| time_elapsed | 36 | +| total_timesteps | 14156 | +| train/ | | +| approx_kl | 0.007813611 | +| clip_fraction | 0.146 | +| clip_range | 0.155 | +| entropy_loss | -8.3 | +| explained_variance | 0.0131 | +| learning_rate | 5.95e-05 | +| loss | 2.84 | +| n_updates | 30 | +| policy_gradient_loss | -0.00689 | +| value_loss | 14 | +----------------------------------------- +------------------------------------------ +| rollout/ | | +| ep_len_mean | 2.52e+03 | +| ep_rew_mean | -111 | +| time/ | | +| fps | 377 | +| iterations | 5 | +| time_elapsed | 46 | +| total_timesteps | 17695 | +| train/ | | +| approx_kl | 0.0056522703 | +| clip_fraction | 0.0913 | +| clip_range | 0.155 | +| entropy_loss | -8.3 | +| explained_variance | 0.0146 | +| learning_rate | 5.95e-05 | +| loss | 0.276 | +| n_updates | 40 | +| policy_gradient_loss | -0.00467 | +| value_loss | 13.4 | +------------------------------------------ +------------------------------------------ +| rollout/ | | +| ep_len_mean | 2.47e+03 | +| ep_rew_mean | -126 | +| time/ | | +| fps | 373 | +| iterations | 6 | +| time_elapsed | 56 | +| total_timesteps | 21234 | +| train/ | | +| approx_kl | 0.0062621506 | +| clip_fraction | 0.123 | +| clip_range | 0.155 | +| entropy_loss | -8.29 | +| explained_variance | 0.0869 | +| learning_rate | 5.95e-05 | +| loss | 18.6 | +| n_updates | 50 | +| policy_gradient_loss | -0.00766 | +| value_loss | 7.72 | +------------------------------------------ +------------------------------------------ +| rollout/ | | +| ep_len_mean | 2.34e+03 | +| ep_rew_mean | -129 | +| time/ | | +| fps | 371 | +| iterations | 7 | +| time_elapsed | 66 | +| total_timesteps | 24773 | +| train/ | | +| approx_kl | 0.0071724947 | +| clip_fraction | 0.159 | +| clip_range | 0.155 | +| entropy_loss | -8.28 | +| explained_variance | 0.176 | +| learning_rate | 5.95e-05 | +| loss | 0.872 | +| n_updates | 60 | +| policy_gradient_loss | -0.00702 | +| value_loss | 11.1 | +------------------------------------------ +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.35e+03 | +| ep_rew_mean | -106 | +| time/ | | +| fps | 369 | +| iterations | 8 | +| time_elapsed | 76 | +| total_timesteps | 28312 | +| train/ | | +| approx_kl | 0.007235888 | +| clip_fraction | 0.147 | +| clip_range | 0.155 | +| entropy_loss | -8.28 | +| explained_variance | 0.0603 | +| learning_rate | 5.95e-05 | +| loss | 0.535 | +| n_updates | 70 | +| policy_gradient_loss | -0.00766 | +| value_loss | 20.2 | +----------------------------------------- +------------------------------------------ +| rollout/ | | +| ep_len_mean | 2.41e+03 | +| ep_rew_mean | -113 | +| time/ | | +| fps | 365 | +| iterations | 9 | +| time_elapsed | 87 | +| total_timesteps | 31851 | +| train/ | | +| approx_kl | 0.0057272953 | +| clip_fraction | 0.133 | +| clip_range | 0.155 | +| entropy_loss | -8.27 | +| explained_variance | 0.0634 | +| learning_rate | 5.95e-05 | +| loss | 0.31 | +| n_updates | 80 | +| policy_gradient_loss | -0.00732 | +| value_loss | 7.16 | +------------------------------------------ +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.72e+03 | +| ep_rew_mean | 198 | +| time/ | | +| fps | 362 | +| iterations | 10 | +| time_elapsed | 97 | +| total_timesteps | 35390 | +| train/ | | +| approx_kl | 0.006537366 | +| clip_fraction | 0.154 | +| clip_range | 0.155 | +| entropy_loss | -8.26 | +| explained_variance | -0.00102 | +| learning_rate | 5.95e-05 | +| loss | 9.6 | +| n_updates | 90 | +| policy_gradient_loss | -0.0035 | +| value_loss | 5.98e+03 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.66e+03 | +| ep_rew_mean | 176 | +| time/ | | +| fps | 361 | +| iterations | 11 | +| time_elapsed | 107 | +| total_timesteps | 38929 | +| train/ | | +| approx_kl | 0.007428738 | +| clip_fraction | 0.147 | +| clip_range | 0.155 | +| entropy_loss | -8.25 | +| explained_variance | 0.275 | +| learning_rate | 5.95e-05 | +| loss | 0.198 | +| n_updates | 100 | +| policy_gradient_loss | -0.0124 | +| value_loss | 4.22 | +----------------------------------------- +------------------------------------------ +| rollout/ | | +| ep_len_mean | 2.59e+03 | +| ep_rew_mean | 148 | +| time/ | | +| fps | 361 | +| iterations | 12 | +| time_elapsed | 117 | +| total_timesteps | 42468 | +| train/ | | +| approx_kl | 0.0071307733 | +| clip_fraction | 0.139 | +| clip_range | 0.155 | +| entropy_loss | -8.25 | +| explained_variance | 0.0965 | +| learning_rate | 5.95e-05 | +| loss | 0.68 | +| n_updates | 110 | +| policy_gradient_loss | -0.0106 | +| value_loss | 8.22 | +------------------------------------------ +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.55e+03 | +| ep_rew_mean | 114 | +| time/ | | +| fps | 360 | +| iterations | 13 | +| time_elapsed | 127 | +| total_timesteps | 46007 | +| train/ | | +| approx_kl | 0.007813596 | +| clip_fraction | 0.141 | +| clip_range | 0.155 | +| entropy_loss | -8.24 | +| explained_variance | 0.0396 | +| learning_rate | 5.95e-05 | +| loss | 0.861 | +| n_updates | 120 | +| policy_gradient_loss | -0.00828 | +| value_loss | 26.4 | +----------------------------------------- +------------------------------------------ +| rollout/ | | +| ep_len_mean | 2.49e+03 | +| ep_rew_mean | 89.8 | +| time/ | | +| fps | 358 | +| iterations | 14 | +| time_elapsed | 138 | +| total_timesteps | 49546 | +| train/ | | +| approx_kl | 0.0077910186 | +| clip_fraction | 0.148 | +| clip_range | 0.155 | +| entropy_loss | -8.23 | +| explained_variance | 0.0748 | +| learning_rate | 5.95e-05 | +| loss | 1.18 | +| n_updates | 130 | +| policy_gradient_loss | -0.00939 | +| value_loss | 16.7 | +------------------------------------------ +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.5e+03 | +| ep_rew_mean | 74 | +| time/ | | +| fps | 356 | +| iterations | 15 | +| time_elapsed | 149 | +| total_timesteps | 53085 | +| train/ | | +| approx_kl | 0.009532078 | +| clip_fraction | 0.184 | +| clip_range | 0.155 | +| entropy_loss | -8.23 | +| explained_variance | 0.191 | +| learning_rate | 5.95e-05 | +| loss | 1.42 | +| n_updates | 140 | +| policy_gradient_loss | -0.011 | +| value_loss | 20.1 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.52e+03 | +| ep_rew_mean | 61.8 | +| time/ | | +| fps | 353 | +| iterations | 16 | +| time_elapsed | 160 | +| total_timesteps | 56624 | +| train/ | | +| approx_kl | 0.008085081 | +| clip_fraction | 0.178 | +| clip_range | 0.155 | +| entropy_loss | -8.23 | +| explained_variance | 0.185 | +| learning_rate | 5.95e-05 | +| loss | 0.266 | +| n_updates | 150 | +| policy_gradient_loss | -0.0106 | +| value_loss | 10.2 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.54e+03 | +| ep_rew_mean | 61.8 | +| time/ | | +| fps | 354 | +| iterations | 17 | +| time_elapsed | 169 | +| total_timesteps | 60163 | +| train/ | | +| approx_kl | 0.008233994 | +| clip_fraction | 0.181 | +| clip_range | 0.155 | +| entropy_loss | -8.22 | +| explained_variance | 0.143 | +| learning_rate | 5.95e-05 | +| loss | 0.381 | +| n_updates | 160 | +| policy_gradient_loss | -0.0085 | +| value_loss | 6.92 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.53e+03 | +| ep_rew_mean | 58.3 | +| time/ | | +| fps | 354 | +| iterations | 18 | +| time_elapsed | 179 | +| total_timesteps | 63702 | +| train/ | | +| approx_kl | 0.00866387 | +| clip_fraction | 0.173 | +| clip_range | 0.155 | +| entropy_loss | -8.21 | +| explained_variance | 0.21 | +| learning_rate | 5.95e-05 | +| loss | 0.428 | +| n_updates | 170 | +| policy_gradient_loss | -0.0136 | +| value_loss | 4.9 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.58e+03 | +| ep_rew_mean | 54.2 | +| time/ | | +| fps | 354 | +| iterations | 19 | +| time_elapsed | 189 | +| total_timesteps | 67241 | +| train/ | | +| approx_kl | 0.008494033 | +| clip_fraction | 0.195 | +| clip_range | 0.155 | +| entropy_loss | -8.2 | +| explained_variance | 0.0444 | +| learning_rate | 5.95e-05 | +| loss | 24.3 | +| n_updates | 180 | +| policy_gradient_loss | -0.00716 | +| value_loss | 16.3 | +----------------------------------------- +---------------------------------------- +| rollout/ | | +| ep_len_mean | 2.6e+03 | +| ep_rew_mean | 44.4 | +| time/ | | +| fps | 353 | +| iterations | 20 | +| time_elapsed | 200 | +| total_timesteps | 70780 | +| train/ | | +| approx_kl | 0.00970717 | +| clip_fraction | 0.195 | +| clip_range | 0.155 | +| entropy_loss | -8.17 | +| explained_variance | 0.282 | +| learning_rate | 5.95e-05 | +| loss | 0.367 | +| n_updates | 190 | +| policy_gradient_loss | -0.0132 | +| value_loss | 4.43 | +---------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.58e+03 | +| ep_rew_mean | 39.4 | +| time/ | | +| fps | 352 | +| iterations | 21 | +| time_elapsed | 211 | +| total_timesteps | 74319 | +| train/ | | +| approx_kl | 0.008659723 | +| clip_fraction | 0.206 | +| clip_range | 0.155 | +| entropy_loss | -8.18 | +| explained_variance | 0.214 | +| learning_rate | 5.95e-05 | +| loss | 0.268 | +| n_updates | 200 | +| policy_gradient_loss | -0.014 | +| value_loss | 6.89 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.59e+03 | +| ep_rew_mean | 39.8 | +| time/ | | +| fps | 352 | +| iterations | 22 | +| time_elapsed | 220 | +| total_timesteps | 77858 | +| train/ | | +| approx_kl | 0.011562935 | +| clip_fraction | 0.223 | +| clip_range | 0.155 | +| entropy_loss | -8.21 | +| explained_variance | 0.16 | +| learning_rate | 5.95e-05 | +| loss | 5.91 | +| n_updates | 210 | +| policy_gradient_loss | -0.00776 | +| value_loss | 8.23 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.58e+03 | +| ep_rew_mean | 25.9 | +| time/ | | +| fps | 352 | +| iterations | 23 | +| time_elapsed | 230 | +| total_timesteps | 81397 | +| train/ | | +| approx_kl | 0.011082681 | +| clip_fraction | 0.22 | +| clip_range | 0.155 | +| entropy_loss | -8.15 | +| explained_variance | 0.438 | +| learning_rate | 5.95e-05 | +| loss | 0.259 | +| n_updates | 220 | +| policy_gradient_loss | -0.017 | +| value_loss | 3.91 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.58e+03 | +| ep_rew_mean | 23.9 | +| time/ | | +| fps | 353 | +| iterations | 24 | +| time_elapsed | 240 | +| total_timesteps | 84936 | +| train/ | | +| approx_kl | 0.010984284 | +| clip_fraction | 0.237 | +| clip_range | 0.155 | +| entropy_loss | -8.15 | +| explained_variance | 0.0983 | +| learning_rate | 5.95e-05 | +| loss | 4.34 | +| n_updates | 230 | +| policy_gradient_loss | -0.0112 | +| value_loss | 14.1 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.59e+03 | +| ep_rew_mean | 18.8 | +| time/ | | +| fps | 353 | +| iterations | 25 | +| time_elapsed | 250 | +| total_timesteps | 88475 | +| train/ | | +| approx_kl | 0.012365894 | +| clip_fraction | 0.254 | +| clip_range | 0.155 | +| entropy_loss | -8.14 | +| explained_variance | 0.179 | +| learning_rate | 5.95e-05 | +| loss | 3.71 | +| n_updates | 240 | +| policy_gradient_loss | -0.0117 | +| value_loss | 7.45 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.56e+03 | +| ep_rew_mean | 9.77 | +| time/ | | +| fps | 352 | +| iterations | 26 | +| time_elapsed | 260 | +| total_timesteps | 92014 | +| train/ | | +| approx_kl | 0.010001008 | +| clip_fraction | 0.231 | +| clip_range | 0.155 | +| entropy_loss | -8.2 | +| explained_variance | 0.146 | +| learning_rate | 5.95e-05 | +| loss | 19 | +| n_updates | 250 | +| policy_gradient_loss | -0.011 | +| value_loss | 5.73 | +----------------------------------------- +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.55e+03 | +| ep_rew_mean | -3.51 | +| time/ | | +| fps | 351 | +| iterations | 27 | +| time_elapsed | 271 | +| total_timesteps | 95553 | +| train/ | | +| approx_kl | 0.011729387 | +| clip_fraction | 0.264 | +| clip_range | 0.155 | +| entropy_loss | -8.15 | +| explained_variance | 0.0695 | +| learning_rate | 5.95e-05 | +| loss | 0.242 | +| n_updates | 260 | +| policy_gradient_loss | -0.0102 | +| value_loss | 16.5 | +----------------------------------------- +------------------------------------------ +| rollout/ | | +| ep_len_mean | 2.5e+03 | +| ep_rew_mean | -3.13 | +| time/ | | +| fps | 350 | +| iterations | 28 | +| time_elapsed | 282 | +| total_timesteps | 99092 | +| train/ | | +| approx_kl | 0.0153510645 | +| clip_fraction | 0.298 | +| clip_range | 0.155 | +| entropy_loss | -8.12 | +| explained_variance | 0.0587 | +| learning_rate | 5.95e-05 | +| loss | 3.32 | +| n_updates | 270 | +| policy_gradient_loss | -0.00956 | +| value_loss | 15.8 | +------------------------------------------ +----------------------------------------- +| rollout/ | | +| ep_len_mean | 2.49e+03 | +| ep_rew_mean | -3.24 | +| time/ | | +| fps | 350 | +| iterations | 29 | +| time_elapsed | 292 | +| total_timesteps | 102631 | +| train/ | | +| approx_kl | 0.018420441 | +| clip_fraction | 0.324 | +| clip_range | 0.155 | +| entropy_loss | -8.1 | +| explained_variance | -0.00146 | +| learning_rate | 5.95e-05 | +| loss | 0.384 | +| n_updates | 280 | +| policy_gradient_loss | -0.00777 | +| value_loss | 13.8 | +----------------------------------------- +[I 2023-03-30 22:29:37,908] Trial 9 finished with value: -345.0 and parameters: {'n_steps': 3539, 'gamma': 0.9483166689072441, 'learning_rate': 5.947863028406936e-05, 'clip_range': 0.15487331840468324, 'gae_lambda': 0.8132195074364921}. Best is trial 6 with value: -1.0. +{'n_steps': 4033, 'gamma': 0.9021075358644198, 'learning_rate': 9.226619222512819e-05, 'clip_range': 0.2947845259713344, 'gae_lambda': 0.9634981775867985} +FrozenTrial(number=6, state=TrialState.COMPLETE, values=[-1.0], datetime_start=datetime.datetime(2023, 3, 30, 22, 13, 4, 197753), datetime_complete=datetime.datetime(2023, 3, 30, 22, 13, 11, 640374), params={'n_steps': 4033, 'gamma': 0.9021075358644198, 'learning_rate': 9.226619222512819e-05, 'clip_range': 0.2947845259713344, 'gae_lambda': 0.9634981775867985}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'n_steps': IntDistribution(high=8192, log=True, low=1024, step=1), 'gamma': FloatDistribution(high=0.9999, log=False, low=0.9, step=None), 'learning_rate': FloatDistribution(high=0.0001, log=True, low=5e-05, step=None), 'clip_range': FloatDistribution(high=0.4, log=False, low=0.1, step=None), 'gae_lambda': FloatDistribution(high=0.99, log=False, low=0.8, step=None)}, trial_id=6, value=None) \ No newline at end of file diff --git a/000_image_stack_ram_based_reward/rmsprop_optim.py b/000_image_stack_ram_based_reward/rmsprop_optim.py new file mode 100644 index 0000000..788f675 --- /dev/null +++ b/000_image_stack_ram_based_reward/rmsprop_optim.py @@ -0,0 +1,93 @@ +import torch +from torch.optim import Optimizer + +class RMSpropTF(Optimizer): + def __init__(self, params, lr=1e-2, alpha=0.9, eps=1e-10, + weight_decay=0, momentum=0., centered=False, + decoupled_decay=False, lr_in_momentum=True + ): + if not 0.0 <= lr: + raise ValueError("Invalid learning rate: {}".format(lr)) + if not 0.0 <= eps: + raise ValueError("Invalid epsilon value: {}".format(eps)) + if not 0.0 <= momentum: + raise ValueError("Invalid momentum value: {}".format(momentum)) + if not 0.0 <= weight_decay: + raise ValueError("Invalid weight_decay value: {}".format(weight_decay)) + if not 0.0 <= alpha: + raise ValueError("Invalid alpha value: {}".format(alpha)) + defaults = dict(lr=lr, momentum=momentum, alpha=alpha, eps=eps, + centered=centered, weight_decay=weight_decay, + decoupled_decay=decoupled_decay, + lr_in_momentum=lr_in_momentum + ) + super(RMSpropTF, self).__init__(params, defaults) + + def __setstate__(self, state): + super(RMSpropTF, self).__setstate__(state) + for group in self.param_groups: + group.setdefault('momentum', 0) + group.setdefault('centered', False) + + @torch.no_grad() + def step(self, closure=None): + """Performs a single optimization step. + Arguments: + closure (callable, optional): A closure that reevaluates the + model + and returns the loss. + """ + loss = None + if closure is not None: + with torch.enable_grad(): + loss = closure() + for group in self.param_groups: + for p in group['params']: + if p.grad is None: + continue + grad = p.grad + if grad.is_sparse: + raise RuntimeError('RMSprop does not support sparse gradients') + state = self.state[p] + # State initialization + if len(state) == 0: + state['step'] = 0 + state['square_avg'] = torch.ones_like(p) # PyTorch inits to zero + if group['momentum'] > 0: + state['momentum_buffer'] = torch.zeros_like(p) + if group['centered']: + state['grad_avg'] = torch.zeros_like(p) + square_avg = state['square_avg'] + one_minus_alpha = 1. - group['alpha'] + state['step'] += 1 + if group['weight_decay'] != 0: + if group['decoupled_decay']: + p.mul_(1. - group['lr'] * group['weight_decay']) + else: + grad = grad.add(p, alpha=group['weight_decay']) + + # Tensorflow order of ops for updating squared avg + square_avg.add_(grad.pow(2) - square_avg, alpha=one_minus_alpha) + # square_avg.mul_(alpha).addcmul_(grad, grad, value=1 - alpha) # PyTorch original + if group['centered']: + grad_avg = state['grad_avg'] + grad_avg.add_(grad - grad_avg, alpha=one_minus_alpha) + avg = square_avg.addcmul(grad_avg, grad_avg, value=-1).add(group['eps']).sqrt_() # eps in sqrt + # grad_avg.mul_(alpha).add_(grad, alpha=1 - alpha) # + # PyTorch original + else: + avg = square_avg.add(group['eps']).sqrt_() # eps moved in sqrt + if group['momentum'] > 0: + buf = state['momentum_buffer'] + # Tensorflow accumulates the LR scaling in the momentum buffer + if group['lr_in_momentum']: + buf.mul_(group['momentum']).addcdiv_(grad, avg, value=group['lr']) + p.add_(-buf) + else: + # PyTorch scales the param update by LR + buf.mul_(group['momentum']).addcdiv_(grad, avg) + p.add_(buf, alpha=-group['lr']) + else: + p.addcdiv_(grad, avg, value=-group['lr']) + return loss + diff --git a/000_image_stack_ram_based_reward/street_fighter_custom_wrapper.py b/000_image_stack_ram_based_reward/street_fighter_custom_wrapper.py new file mode 100644 index 0000000..eafa231 --- /dev/null +++ b/000_image_stack_ram_based_reward/street_fighter_custom_wrapper.py @@ -0,0 +1,97 @@ +import collections + +import gym +import cv2 +import numpy as np + +# Custom environment wrapper +class StreetFighterCustomWrapper(gym.Wrapper): + def __init__(self, env, testing=False): + super(StreetFighterCustomWrapper, self).__init__(env) + self.env = env + + # Use a deque to store the last 4 frames + self.num_frames = 3 + self.frame_stack = collections.deque(maxlen=self.num_frames) + + self.full_hp = 176 + self.prev_player_health = self.full_hp + self.prev_oppont_health = self.full_hp + + # Update observation space to include stacked grayscale images + self.observation_space = gym.spaces.Box(low=0, high=255, shape=(84, 84, 3), dtype=np.uint8) + + self.testing = testing + + def _preprocess_observation(self, observation): + obs_gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY) + obs_gray_resized = cv2.resize(obs_gray, (84, 84), interpolation=cv2.INTER_AREA) + + # Add the resized image to the frame stack + self.frame_stack.append(obs_gray_resized) + + # Stack the frames and return the "image" + stacked_frames = np.stack(self.frame_stack, axis=-1) + return stacked_frames + + def reset(self): + observation = self.env.reset() + self.prev_player_health = self.full_hp + self.prev_oppont_health = self.full_hp + + obs_gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY) + obs_gray_resized = cv2.resize(obs_gray, (84, 84), interpolation=cv2.INTER_AREA) + + # Clear the frame stack and add the first observation [num_frames] times + self.frame_stack.clear() + for _ in range(self.num_frames): + self.frame_stack.append(obs_gray_resized) + + return np.stack(self.frame_stack, axis=-1) + + def step(self, action): + + obs, reward, done, info = self.env.step(action) + + # During fighting, either player or opponent has positive health points. + if info['health'] > 0 or info['enemy_health'] > 0: + + # Player Loses + if info['health'] < 0 and info['enemy_health'] > 0: + # reward = (-self.full_hp) * info['enemy_health'] * 0.05 # max = 0.05 * 176 * 176 = 1548.8 + reward = -info['enemy_health'] # Use the left over health points as penalty + + # Prevent data overflow + if reward < -self.full_hp: + reward = 0 + + done = True + + # Player Wins + elif info['enemy_health'] < 0 and info['health'] > 0: + # reward = self.full_hp * info['health'] * 0.05 + reward = info['health'] + + + # Prevent data overflow + if reward > self.full_hp: + reward = 0 + + done = True + + # During Fighting + else: + reward = (self.prev_oppont_health - info['enemy_health']) - (self.prev_player_health - info['health']) + + # Prevent data overflow + if reward > 99: + reward = 0 + + self.prev_player_health = info['health'] + self.prev_oppont_health = info['enemy_health'] + + if self.testing: + done = False + + return self._preprocess_observation(obs), reward, done, info + \ No newline at end of file diff --git a/000_image_stack_ram_based_reward/street_fighter_notebook.ipynb b/000_image_stack_ram_based_reward/street_fighter_notebook.ipynb new file mode 100644 index 0000000..ff092ed --- /dev/null +++ b/000_image_stack_ram_based_reward/street_fighter_notebook.ipynb @@ -0,0 +1,314 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "bfc79b8c", + "metadata": {}, + "outputs": [], + "source": [ + "import retro" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "c24fbcab", + "metadata": {}, + "outputs": [], + "source": [ + "game = \"StreetFighterIISpecialChampionEdition-Genesis\"\n", + "state = \"Champion.Level1.ChunLiVsGuile\"\n", + "env = retro.make(game=game, state=state)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "59839d9c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1], dtype=int8)" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "env.action_space.sample()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "e068cb0a", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(200, 256, 3)" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "env.observation_space.sample().shape" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "1cb0297f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(200, 256, 3)\n", + "{'enemy_matches_won': 0, 'score': 0, 'matches_won': 0, 'continuetimer': 0, 'enemy_health': 176, 'health': 176}\n" + ] + } + ], + "source": [ + "observation = env.reset()\n", + "print(observation.shape)\n", + "\n", + "action = env.action_space.sample()\n", + "obs, rewards, done, info = env.step(action)\n", + "print(info)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "0eaa5cc8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MultiBinary(12)\n" + ] + } + ], + "source": [ + "from gym.spaces import Box, MultiBinary\n", + "\n", + "print(MultiBinary(12))" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "49f6cf5c", + "metadata": {}, + "outputs": [], + "source": [ + "import cv2\n", + "\n", + "import gym\n", + "import numpy as np\n", + "from gym.spaces import Box, MultiBinary\n", + "\n", + "class StreetFighter(gym.Env):\n", + " def __init__(self):\n", + " super().__init__()\n", + " self.observation_space = Box(low=0, high=255, shape=(84, 84), dtype=np.uint8)\n", + " self.action_space = MultiBinary(12)\n", + " self.game = retro.make(game=\"StreetFighterIISpecialChampionEdition-Genesis\", use_restricted_actions=retro.Actions.FILTERED)\n", + " \n", + " self.full_hp = 176\n", + " self.player_health = self.full_hp\n", + " self.oppont_health = self.full_hp\n", + " \n", + " self.score = 0\n", + " \n", + " def __preprocess(self, observation):\n", + " gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)\n", + " resize = cv2.resize(gray, (84,84), interpolation=cv2.INTER_CUBIC)\n", + " return resize\n", + "\n", + " def step(self, action):\n", + "\n", + " obs, reward, done, info = self.game.step(action)\n", + " custom_obs = self.__preprocess(obs) # It's just frame, not frame_delta\n", + "\n", + " # During fighting, either player or opponent has positive health points.\n", + " if info['health'] > 0 or info['enemy_health'] > 0:\n", + "\n", + " # Player Loses\n", + " if info['health'] < 0 and info['health'] != self.player_health and info['enemy_health'] != 0:\n", + " reward = (-self.full_hp) * info['enemy_health']\n", + "\n", + " # Player Wins\n", + " elif info['enemy_health'] < 0 and info['enemy_health'] != self.oppont_health and info['health'] != 0:\n", + " reward = self.full_hp * info['health']\n", + "\n", + " # During Fighting\n", + " else:\n", + " reward = (self.oppont_health - info['enemy_health']) - (self.player_health - info['health'])\n", + " \n", + " self.player_health = info['health']\n", + " self.oppont_health = info['enemy_health']\n", + " \n", + " return custom_obs, reward, done, info\n", + " \n", + " def render(self, *args, **kwargs):\n", + " self.game.render()\n", + " \n", + " def reset(self):\n", + " obs = self.game.reset()\n", + " custom_obs = self.__preprocess(obs)\n", + " self.previous_frame = obs\n", + " \n", + " self.player_health = self.full_hp\n", + " self.oppont_health = self.full_hp\n", + " return custom_obs\n", + "\n", + " def close(self):\n", + " self.game.close()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "6ec30177", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(84, 84)\n" + ] + } + ], + "source": [ + "env.close()\n", + "env = StreetFighter()\n", + "print(env.observation_space.shape)\n", + "env.close()" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "7d9eab3a", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\ProgramData\\Anaconda3\\envs\\StreetFighterAI\\lib\\site-packages\\pyglet\\image\\codecs\\wic.py:289: UserWarning: [WinError -2147417850] Cannot change thread mode after it is set\n", + " warnings.warn(str(err))\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-22 154 176\n", + "-32 122 176\n", + "29 122 147\n", + "7 122 140\n", + "-31 91 140\n", + "29 91 111\n", + "-23 68 111\n", + "-24 44 111\n", + "-24 20 111\n", + "31 20 80\n", + "10 20 70\n", + "45 20 25\n", + "5 20 20\n", + "-15 5 20\n", + "19 5 1\n", + "-176 -1 1\n", + "46 176 130\n", + "7 176 123\n", + "-24 152 123\n", + "29 152 94\n", + "-24 128 94\n", + "7 128 87\n", + "39 128 48\n", + "-31 97 48\n", + "36 97 12\n", + "-24 73 12\n", + "-24 49 12\n", + "8624 49 -1\n", + "39 176 137\n", + "-24 152 137\n", + "-23 129 137\n", + "-23 106 137\n", + "-26 80 137\n", + "-24 56 137\n", + "-23 33 137\n", + "-21 12 137\n", + "-12 0 137\n", + "-24112 -1 137\n" + ] + } + ], + "source": [ + "## Checking Rewards functionality\n", + "import time\n", + "\n", + "env = StreetFighter()\n", + "obs = env.reset()\n", + "done = False\n", + "\n", + "for game in range(5):\n", + " while not done:\n", + " if done:\n", + " obs = env.reset()\n", + " env.render()\n", + " obs, reward, done, info = env.step(env.action_space.sample())\n", + " if reward != 0:\n", + " print(reward, info['health'], info['enemy_health'])\n", + " time.sleep(0.01)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b1ae8310", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/000_image_stack_ram_based_reward/test.py b/000_image_stack_ram_based_reward/test.py new file mode 100644 index 0000000..d0611e7 --- /dev/null +++ b/000_image_stack_ram_based_reward/test.py @@ -0,0 +1,69 @@ +import time + +import retro +from stable_baselines3 import PPO + +from street_fighter_custom_wrapper import StreetFighterCustomWrapper + +def make_env(game, state): + def _init(): + env = retro.make( + game=game, + state=state, + use_restricted_actions=retro.Actions.FILTERED, + obs_type=retro.Observations.IMAGE + ) + env = StreetFighterCustomWrapper(env) + return env + return _init + +game = "StreetFighterIISpecialChampionEdition-Genesis" +state_stages = [ + "Champion.Level1.ChunLiVsGuile", # Average reward for random strategy: -102.3 + "ChampionX.Level1.ChunLiVsKen", # Average reward for random strategy: -247.6 + "Champion.Level2.ChunLiVsKen", + "Champion.Level3.ChunLiVsChunLi", + "Champion.Level4.ChunLiVsZangief", + "Champion.Level5.ChunLiVsDhalsim", + "Champion.Level6.ChunLiVsRyu", + "Champion.Level7.ChunLiVsEHonda", + "Champion.Level8.ChunLiVsBlanka", + "Champion.Level9.ChunLiVsBalrog", + "Champion.Level10.ChunLiVsVega", + "Champion.Level11.ChunLiVsSagat", + "Champion.Level12.ChunLiVsBison" + # Add other stages as necessary +] + +env = make_env(game, state_stages[0])() + +model = PPO( + "CnnPolicy", + env, + verbose=1 +) +model_path = r"optuna/trial_1_best_model" # Average reward for optuna/trial_1_best_model: -82.3 +model.load(model_path) + +obs = env.reset() +done = False + +num_episodes = 30 +episode_reward_sum = 0 +for _ in range(num_episodes): + done = False + obs = env.reset() + total_reward = 0 + while not done: + timestamp = time.time() + obs, reward, done, info = env.step(env.action_space.sample()) + + if reward != 0: + total_reward += reward + print("Reward: {}, playerHP: {}, enemyHP:{}".format(reward, info['health'], info['enemy_health'])) + env.render() + print("Total reward: {}".format(total_reward)) + episode_reward_sum += total_reward + +env.close() +print("Average reward for {}: {}".format(model_path, episode_reward_sum/num_episodes)) \ No newline at end of file diff --git a/000_image_stack_ram_based_reward/train.py b/000_image_stack_ram_based_reward/train.py new file mode 100644 index 0000000..0e767d3 --- /dev/null +++ b/000_image_stack_ram_based_reward/train.py @@ -0,0 +1,125 @@ +import os +import random + +import retro +from stable_baselines3 import PPO +from stable_baselines3.common.vec_env import SubprocVecEnv +from stable_baselines3.common.callbacks import BaseCallback, CheckpointCallback + +from rmsprop_optim import RMSpropTF +from custom_cnn import CustomCNN +from street_fighter_custom_wrapper import StreetFighterCustomWrapper + +class RandomOpponentChangeCallback(BaseCallback): + def __init__(self, stages, opponent_interval, verbose=0): + super(RandomOpponentChangeCallback, self).__init__(verbose) + self.stages = stages + self.opponent_interval = opponent_interval + + def _on_step(self) -> bool: + if self.n_calls % self.opponent_interval == 0: + new_state = random.choice(self.stages) + print("\nCurrent state:", new_state) + self.training_env.env_method("load_state", new_state, indices=None) + return True + +def make_env(game, state, seed=0): + def _init(): + env = retro.make( + game=game, + state=state, + use_restricted_actions=retro.Actions.FILTERED, + obs_type=retro.Observations.IMAGE + ) + env = StreetFighterCustomWrapper(env) + env.seed(seed) + return env + return _init + +def main(): + # Set up the environment and model + game = "StreetFighterIISpecialChampionEdition-Genesis" + state_stages = [ + "ChampionX.Level1.ChunLiVsKen", + "ChampionX.Level2.ChunLiVsChunLi", + "ChampionX.Level3.ChunLiVsZangief", + "ChampionX.Level4.ChunLiVsDhalsim", + "ChampionX.Level5.ChunLiVsRyu", + "ChampionX.Level6.ChunLiVsEHonda", + "ChampionX.Level7.ChunLiVsBlanka", + "ChampionX.Level8.ChunLiVsGuile", + "ChampionX.Level9.ChunLiVsBalrog", + "ChampionX.Level10.ChunLiVsVega", + "ChampionX.Level11.ChunLiVsSagat", + "ChampionX.Level12.ChunLiVsBison" + # Add other stages as necessary + ] + # Champion is at difficulty level 4, ChampionX is at difficulty level 8. + + num_envs = 8 + + env = SubprocVecEnv([make_env(game, state_stages[0], seed=i) for i in range(num_envs)]) + + # Using CustomCNN as the feature extractor + policy_kwargs = { + 'features_extractor_class': CustomCNN + } + + model = PPO( + "CnnPolicy", + env, + device="cuda", + policy_kwargs=policy_kwargs, + verbose=1, + n_steps=5400, + batch_size=64, + learning_rate=0.0001, + ent_coef=0.01, + clip_range=0.2, + gamma=0.99, + gae_lambda=0.95, + tensorboard_log="logs/" + ) + + # Set the save directory + save_dir = "trained_models" + os.makedirs(save_dir, exist_ok=True) + + # Load the model from file + # model_path = "trained_models/ppo_chunli_1296000_steps.zip" + + # Load model and modify the learning rate and entropy coefficient + # custom_objects = { + # "learning_rate": 0.0002 + # } + # model = PPO.load(model_path, env=env, device="cuda")#, custom_objects=custom_objects) + + # Set up callbacks + opponent_interval = 5400 # stage_interval * num_envs = total_steps_per_stage + checkpoint_interval = 54000 # checkpoint_interval * num_envs = total_steps_per_checkpoint (Every 80 rounds) + checkpoint_callback = CheckpointCallback(save_freq=checkpoint_interval, save_path=save_dir, name_prefix="ppo_chunli") + stage_increase_callback = RandomOpponentChangeCallback(state_stages, opponent_interval, save_dir) + + # model_params = { + # 'n_steps': 5, + # 'gamma': 0.99, + # 'gae_lambda':1, + # 'learning_rate': 7e-4, + # 'vf_coef': 0.5, + # 'ent_coef': 0.0, + # 'max_grad_norm':0.5, + # 'rms_prop_eps':1e-05 + # } + # model = A2C('CnnPolicy', env, tensorboard_log='logs/', verbose=1, **model_params, policy_kwargs=dict(optimizer_class=RMSpropTF)) + + model.learn( + total_timesteps=int(6048000), # total_timesteps = stage_interval * num_envs * num_stages (1120 rounds) + callback=[checkpoint_callback, stage_increase_callback] + ) + env.close() + + # Save the final model + model.save(os.path.join(save_dir, "ppo_sf2_chunli_final.zip")) + +if __name__ == "__main__": + main() diff --git a/000_image_stack_ram_based_reward/tune.py b/000_image_stack_ram_based_reward/tune.py new file mode 100644 index 0000000..2c60de1 --- /dev/null +++ b/000_image_stack_ram_based_reward/tune.py @@ -0,0 +1,81 @@ +import gym +import retro +import optuna +from stable_baselines3 import PPO +from stable_baselines3.common.monitor import Monitor +from stable_baselines3.common.evaluation import evaluate_policy + +from custom_cnn import CustomCNN +from street_fighter_custom_wrapper import StreetFighterCustomWrapper + +def make_env(game, state, seed=0): + def _init(): + env = retro.RetroEnv( + game=game, + state=state, + use_restricted_actions=retro.Actions.FILTERED, + obs_type=retro.Observations.IMAGE + ) + env = StreetFighterCustomWrapper(env) + env = Monitor(env) + env.seed(seed) + return env + return _init + +def objective(trial): + game = "StreetFighterIISpecialChampionEdition-Genesis" + env = make_env(game, state="ChampionX.Level1.ChunLiVsKen")() + + # Suggest hyperparameters + learning_rate = trial.suggest_float("learning_rate", 5e-5, 1e-3, log=True) + n_steps = trial.suggest_int("n_steps", 256, 8192, log=True) + batch_size = trial.suggest_int("batch_size", 16, 128, log=True) + gamma = trial.suggest_float("gamma", 0.9, 0.9999) + gae_lambda = trial.suggest_float("gae_lambda", 0.9, 1.0) + clip_range = trial.suggest_float("clip_range", 0.1, 0.4) + ent_coef = trial.suggest_float("ent_coef", 1e-4, 1e-2, log=True) + vf_coef = trial.suggest_float("vf_coef", 0.1, 1.0) + + # Using CustomCNN as the feature extractor + policy_kwargs = { + 'features_extractor_class': CustomCNN + } + + # Train the model + model = PPO( + "CnnPolicy", + env, + device="cuda", + policy_kwargs=policy_kwargs, + verbose=1, + n_steps=n_steps, + batch_size=batch_size, + learning_rate=learning_rate, + ent_coef=ent_coef, + clip_range=clip_range, + vf_coef=vf_coef, + gamma=gamma, + gae_lambda=gae_lambda + ) + + for iteration in range(10): + model.learn(total_timesteps=100000) + mean_reward, _std_reward = evaluate_policy(model, env, n_eval_episodes=10) + + trial.report(mean_reward, iteration) + + if trial.should_prune(): + raise optuna.TrialPruned() + + return mean_reward + +study = optuna.create_study(direction="maximize") +study.optimize(objective, n_trials=100, timeout=7200) # Run optimization for 100 trials or 2 hours, whichever comes first + +print("Best trial:") +trial = study.best_trial + +print(" Value: ", trial.value) +print(" Params: ") +for key, value in trial.params.items(): + print(f"{key}: {value}") diff --git a/000_image_stack_ram_based_reward/tune_ppo.py b/000_image_stack_ram_based_reward/tune_ppo.py new file mode 100644 index 0000000..818da65 --- /dev/null +++ b/000_image_stack_ram_based_reward/tune_ppo.py @@ -0,0 +1,69 @@ +import os + +import retro +import optuna +from stable_baselines3 import PPO +from stable_baselines3.common.monitor import Monitor +from stable_baselines3.common.evaluation import evaluate_policy + +from street_fighter_custom_wrapper import StreetFighterCustomWrapper + +LOG_DIR = 'logs/' +OPT_DIR = 'optuna/' +os.makedirs(LOG_DIR, exist_ok=True) +os.makedirs(OPT_DIR, exist_ok=True) + +def optimize_ppo(trial): + return { + 'n_steps':trial.suggest_int('n_steps', 1024, 8192, log=True), + 'gamma':trial.suggest_float('gamma', 0.9, 0.9999), + 'learning_rate':trial.suggest_float('learning_rate', 5e-5, 1e-4, log=True), + 'clip_range':trial.suggest_float('clip_range', 0.1, 0.4), + 'gae_lambda':trial.suggest_float('gae_lambda', 0.8, 0.99) + } + +def make_env(game, state): + def _init(): + env = retro.make( + game=game, + state=state, + use_restricted_actions=retro.Actions.FILTERED, + obs_type=retro.Observations.IMAGE + ) + env = StreetFighterCustomWrapper(env) + return env + return _init + +def optimize_agent(trial): + game = "StreetFighterIISpecialChampionEdition-Genesis" + state = "Champion.Level1.ChunLiVsGuile"#"ChampionX.Level1.ChunLiVsKen" + + try: + model_params = optimize_ppo(trial) + + # Create environment + env = make_env(game, state)() + env = Monitor(env, LOG_DIR) + + # Create algo + model = PPO('CnnPolicy', env, tensorboard_log=LOG_DIR, verbose=1, **model_params) + model.learn(total_timesteps=100000) + + # Evaluate model + mean_reward, _ = evaluate_policy(model, env, n_eval_episodes=30) + env.close() + + SAVE_PATH = os.path.join(OPT_DIR, 'trial_{}_best_model'.format(trial.number)) + model.save(SAVE_PATH) + + return mean_reward + + except Exception as e: + return -1 + +# Creating the experiment +study = optuna.create_study(direction='maximize') +study.optimize(optimize_agent, n_trials=10, n_jobs=1) + +print(study.best_params) +print(study.best_trial) diff --git a/001_image_stack/__pycache__/street_fighter_custom_wrapper.cpython-38.pyc b/001_image_stack/__pycache__/street_fighter_custom_wrapper.cpython-38.pyc deleted file mode 100644 index 5ab9e2f..0000000 Binary files a/001_image_stack/__pycache__/street_fighter_custom_wrapper.cpython-38.pyc and /dev/null differ diff --git a/001_image_stack/__pycache__/custom_cnn.cpython-38.pyc b/001_image_stack_vision_based_reward/__pycache__/custom_cnn.cpython-38.pyc similarity index 100% rename from 001_image_stack/__pycache__/custom_cnn.cpython-38.pyc rename to 001_image_stack_vision_based_reward/__pycache__/custom_cnn.cpython-38.pyc diff --git a/001_image_stack_vision_based_reward/__pycache__/street_fighter_custom_wrapper.cpython-38.pyc b/001_image_stack_vision_based_reward/__pycache__/street_fighter_custom_wrapper.cpython-38.pyc new file mode 100644 index 0000000..d0c11f5 Binary files /dev/null and b/001_image_stack_vision_based_reward/__pycache__/street_fighter_custom_wrapper.cpython-38.pyc differ diff --git a/001_image_stack_vision_based_reward/check_reward.py b/001_image_stack_vision_based_reward/check_reward.py new file mode 100644 index 0000000..298cb6f --- /dev/null +++ b/001_image_stack_vision_based_reward/check_reward.py @@ -0,0 +1,39 @@ +import time + +import retro +from stable_baselines3 import PPO +from stable_baselines3.common.vec_env import DummyVecEnv + +from custom_cnn import CustomCNN +from street_fighter_custom_wrapper import StreetFighterCustomWrapper + +def make_env(game, state): + def _init(): + env = retro.RetroEnv( + game=game, + state=state, + use_restricted_actions=retro.Actions.FILTERED, + obs_type=retro.Observations.IMAGE + ) + env = StreetFighterCustomWrapper(env, testing=True) + return env + return _init + +game = "StreetFighterIISpecialChampionEdition-Genesis" +state = "Champion.Level1.ChunLiVsGuile" + +env = make_env(game, state)() +model = PPO.load(r"trained_models_continued/ppo_chunli_6048000_steps") +obs = env.reset() +done = False + +while not done: + timestamp = time.time() + action, _ = model.predict(obs) + obs, reward, done, info = env.step(action) + print(info) + if reward != 0: + print(reward, info['health'], info['enemy_health']) + env.render() + +env.close() \ No newline at end of file diff --git a/001_image_stack_vision_based_reward/custom_cnn.py b/001_image_stack_vision_based_reward/custom_cnn.py new file mode 100644 index 0000000..25c50ea --- /dev/null +++ b/001_image_stack_vision_based_reward/custom_cnn.py @@ -0,0 +1,24 @@ +import gym +import torch +import torch.nn as nn +from stable_baselines3.common.torch_layers import BaseFeaturesExtractor + +# Custom feature extractor (CNN) +class CustomCNN(BaseFeaturesExtractor): + def __init__(self, observation_space: gym.Space): + super(CustomCNN, self).__init__(observation_space, features_dim=512) + self.cnn = nn.Sequential( + nn.Conv2d(4, 32, kernel_size=5, stride=2, padding=0), + nn.ReLU(), + nn.Conv2d(32, 64, kernel_size=5, stride=2, padding=0), + nn.ReLU(), + nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=0), + nn.ReLU(), + nn.Flatten(), + nn.Linear(16384, self.features_dim), + nn.ReLU() + ) + + def forward(self, observations: torch.Tensor) -> torch.Tensor: + return self.cnn(observations) + \ No newline at end of file diff --git a/001_image_stack_vision_based_reward/evaluate.py b/001_image_stack_vision_based_reward/evaluate.py new file mode 100644 index 0000000..03da618 --- /dev/null +++ b/001_image_stack_vision_based_reward/evaluate.py @@ -0,0 +1,47 @@ +import retro + +from stable_baselines3 import PPO +from stable_baselines3.common.vec_env import DummyVecEnv +from stable_baselines3.common.monitor import Monitor +from stable_baselines3.common.evaluation import evaluate_policy + +from street_fighter_custom_wrapper import StreetFighterCustomWrapper + +def make_env(game, state): + def _init(): + env = retro.RetroEnv( + game=game, + state=state, + use_restricted_actions=retro.Actions.FILTERED, + obs_type=retro.Observations.IMAGE + ) + env = StreetFighterCustomWrapper(env) + return env + return _init + +game = "StreetFighterIISpecialChampionEdition-Genesis" +state_stages = [ + "Champion.Level1.ChunLiVsGuile", + "Champion.Level2.ChunLiVsKen", + "Champion.Level3.ChunLiVsChunLi", + "Champion.Level4.ChunLiVsZangief", + "Champion.Level5.ChunLiVsDhalsim", + "Champion.Level6.ChunLiVsRyu", + "Champion.Level7.ChunLiVsEHonda", + "Champion.Level8.ChunLiVsBlanka", + "Champion.Level9.ChunLiVsBalrog", + "Champion.Level10.ChunLiVsVega", + "Champion.Level11.ChunLiVsSagat", + "Champion.Level12.ChunLiVsBison" + # Add other stages as necessary +] + +env = make_env(game, state_stages[0])() + +# Wrap the environment +env = Monitor(env, 'logs/') +env = DummyVecEnv([lambda: env]) + +model = PPO.load('trained_models/ppo_chunli_1296000_steps') +mean_reward, std_reward = evaluate_policy(model, env, render=True, n_eval_episodes=10) +print(f"Mean reward: {mean_reward:.2f} +/- {std_reward:.2f}") \ No newline at end of file diff --git a/001_image_stack_vision_based_reward/logs/monitor.csv b/001_image_stack_vision_based_reward/logs/monitor.csv new file mode 100644 index 0000000..671bb3b --- /dev/null +++ b/001_image_stack_vision_based_reward/logs/monitor.csv @@ -0,0 +1,12 @@ +#{"t_start": 1680163278.6497958, "env_id": null} +r,l,t +-1115.766667,2842,13.829476 +-1115.766667,2842,22.367655 +-1115.766667,2842,32.010939 +-1115.766667,2842,41.401216 +-1115.766667,2842,50.451062 +-1115.766667,2842,59.522487 +-1115.766667,2842,68.723222 +-1115.766667,2842,78.205462 +-1115.766667,2842,88.455592 +-1115.766667,2842,97.656297 diff --git a/001_image_stack/street_fighter_custom_wrapper.py b/001_image_stack_vision_based_reward/street_fighter_custom_wrapper.py similarity index 96% rename from 001_image_stack/street_fighter_custom_wrapper.py rename to 001_image_stack_vision_based_reward/street_fighter_custom_wrapper.py index 5fd4d35..e2e4c53 100644 --- a/001_image_stack/street_fighter_custom_wrapper.py +++ b/001_image_stack_vision_based_reward/street_fighter_custom_wrapper.py @@ -12,8 +12,6 @@ class StreetFighterCustomWrapper(gym.Wrapper): def __init__(self, env, testing=False, threshold=0.65): super(StreetFighterCustomWrapper, self).__init__(env) - self.action_space = MultiBinary(12) - # Use a deque to store the last 4 frames self.frame_stack = collections.deque(maxlen=4) @@ -89,7 +87,7 @@ class StreetFighterCustomWrapper(gym.Wrapper): def step(self, action): # observation, _, _, info = self.env.step(action) - observation, _reward, _done, info = self.env.step(self.env.action_space.sample()) + observation, _reward, _done, info = self.env.step(action) custom_reward = self._get_reward() custom_reward -= 1.0 / 60.0 # penalty for each step (-1 points per second) diff --git a/001_image_stack/test.py b/001_image_stack_vision_based_reward/test.py similarity index 96% rename from 001_image_stack/test.py rename to 001_image_stack_vision_based_reward/test.py index 614b247..db08ae4 100644 --- a/001_image_stack/test.py +++ b/001_image_stack_vision_based_reward/test.py @@ -53,7 +53,7 @@ model = PPO( policy_kwargs=policy_kwargs, verbose=1 ) -model.load(r"trained_models_continued/ppo_chunli_432000_steps") +model.load(r"trained_models/ppo_chunli_1296000_steps") obs = env.reset() done = False diff --git a/001_image_stack/train.py b/001_image_stack_vision_based_reward/train.py similarity index 91% rename from 001_image_stack/train.py rename to 001_image_stack_vision_based_reward/train.py index 4e2195f..9861457 100644 --- a/001_image_stack/train.py +++ b/001_image_stack_vision_based_reward/train.py @@ -1,13 +1,9 @@ import os import random -import gym -import cv2 import retro -import numpy as np from stable_baselines3 import PPO from stable_baselines3.common.vec_env import SubprocVecEnv -from stable_baselines3.common.preprocessing import is_image_space, is_image_space_channels_first from stable_baselines3.common.callbacks import BaseCallback, CheckpointCallback from custom_cnn import CustomCNN @@ -77,20 +73,16 @@ def main(): verbose=1, n_steps=5400, batch_size=64, - n_epochs=10, learning_rate=0.0003, ent_coef=0.01, clip_range=0.2, - clip_range_vf=None, gamma=0.99, gae_lambda=0.95, - max_grad_norm=0.5, - use_sde=False, - sde_sample_freq=-1 + tensorboard_log="logs/" ) # Set the save directory - save_dir = "trained_models_continued" + save_dir = "trained_models_continued_new" os.makedirs(save_dir, exist_ok=True) # Load the model from file @@ -99,8 +91,7 @@ def main(): # Load model and modify the learning rate and entropy coefficient custom_objects = { - "learning_rate": 0.00005, - "ent_coef": 0.2 + "learning_rate": 0.0001 } model = PPO.load(model_path, env=env, device="cuda", custom_objects=custom_objects) @@ -110,7 +101,6 @@ def main(): checkpoint_callback = CheckpointCallback(save_freq=checkpoint_interval, save_path=save_dir, name_prefix="ppo_chunli") stage_increase_callback = RandomOpponentChangeCallback(state_stages, opponent_interval, save_dir) - model.learn( total_timesteps=int(6048000), # total_timesteps = stage_interval * num_envs * num_stages (1120 rounds) callback=[checkpoint_callback, stage_increase_callback] diff --git a/001_image_stack_vision_based_reward/trainging_log_continued.txt b/001_image_stack_vision_based_reward/trainging_log_continued.txt new file mode 100644 index 0000000..b299f62 --- /dev/null +++ b/001_image_stack_vision_based_reward/trainging_log_continued.txt @@ -0,0 +1,2791 @@ +(StreetFighterAI) PS C:\Users\unitec\Documents\AIProjects\street-fighter-ai\001_image_stack> python .\train.py +Using cuda device + +Current state: ChampionX.Level7.ChunLiVsBlanka +------------------------------ +| time/ | | +| fps | 1534 | +| iterations | 1 | +| time_elapsed | 28 | +| total_timesteps | 43200 | +------------------------------ + +Current state: ChampionX.Level8.ChunLiVsGuile +----------------------------------------- +| time/ | | +| fps | 696 | +| iterations | 2 | +| time_elapsed | 123 | +| total_timesteps | 86400 | +| train/ | | +| approx_kl | 0.019640451 | +| clip_fraction | 0.222 | +| clip_range | 0.2 | +| entropy_loss | -8.12 | +| explained_variance | 0.731 | +| learning_rate | 0.0002 | +| loss | 0.529 | +| n_updates | 300 | +| policy_gradient_loss | 0.0037 | +| value_loss | 17.4 | +----------------------------------------- + +Current state: ChampionX.Level1.ChunLiVsKen +---------------------------------------- +| time/ | | +| fps | 587 | +| iterations | 3 | +| time_elapsed | 220 | +| total_timesteps | 129600 | +| train/ | | +| approx_kl | 0.01716586 | +| clip_fraction | 0.184 | +| clip_range | 0.2 | +| entropy_loss | -8.15 | +| explained_variance | 0.681 | +| learning_rate | 0.0002 | +| loss | 0.305 | +| n_updates | 310 | +| policy_gradient_loss | -0.00363 | +| value_loss | 12.6 | +---------------------------------------- + +Current state: ChampionX.Level12.ChunLiVsBison +----------------------------------------- +| time/ | | +| fps | 545 | +| iterations | 4 | +| time_elapsed | 316 | +| total_timesteps | 172800 | +| train/ | | +| approx_kl | 0.017642297 | +| clip_fraction | 0.18 | +| clip_range | 0.2 | +| entropy_loss | -8.14 | +| explained_variance | 0.752 | +| learning_rate | 0.0002 | +| loss | 0.693 | +| n_updates | 320 | +| policy_gradient_loss | -0.0013 | +| value_loss | 15.3 | +----------------------------------------- + +Current state: ChampionX.Level5.ChunLiVsRyu +----------------------------------------- +| time/ | | +| fps | 523 | +| iterations | 5 | +| time_elapsed | 412 | +| total_timesteps | 216000 | +| train/ | | +| approx_kl | 0.016423995 | +| clip_fraction | 0.159 | +| clip_range | 0.2 | +| entropy_loss | -8.14 | +| explained_variance | 0.769 | +| learning_rate | 0.0002 | +| loss | 0.238 | +| n_updates | 330 | +| policy_gradient_loss | -0.00348 | +| value_loss | 17.4 | +----------------------------------------- + +Current state: ChampionX.Level4.ChunLiVsDhalsim +---------------------------------------- +| time/ | | +| fps | 508 | +| iterations | 6 | +| time_elapsed | 509 | +| total_timesteps | 259200 | +| train/ | | +| approx_kl | 0.01582943 | +| clip_fraction | 0.155 | +| clip_range | 0.2 | +| entropy_loss | -8.15 | +| explained_variance | 0.734 | +| learning_rate | 0.0002 | +| loss | 0.688 | +| n_updates | 340 | +| policy_gradient_loss | -0.00491 | +| value_loss | 14.8 | +---------------------------------------- + +Current state: ChampionX.Level11.ChunLiVsSagat +----------------------------------------- +| time/ | | +| fps | 498 | +| iterations | 7 | +| time_elapsed | 606 | +| total_timesteps | 302400 | +| train/ | | +| approx_kl | 0.019045277 | +| clip_fraction | 0.176 | +| clip_range | 0.2 | +| entropy_loss | -8.15 | +| explained_variance | 0.778 | +| learning_rate | 0.0002 | +| loss | 0.729 | +| n_updates | 350 | +| policy_gradient_loss | -0.00323 | +| value_loss | 15.8 | +----------------------------------------- + +Current state: ChampionX.Level3.ChunLiVsZangief +----------------------------------------- +| time/ | | +| fps | 490 | +| iterations | 8 | +| time_elapsed | 705 | +| total_timesteps | 345600 | +| train/ | | +| approx_kl | 0.018350422 | +| clip_fraction | 0.177 | +| clip_range | 0.2 | +| entropy_loss | -8.16 | +| explained_variance | 0.789 | +| learning_rate | 0.0002 | +| loss | 1.17 | +| n_updates | 360 | +| policy_gradient_loss | -0.0043 | +| value_loss | 12.4 | +----------------------------------------- + +Current state: ChampionX.Level5.ChunLiVsRyu +----------------------------------------- +| time/ | | +| fps | 484 | +| iterations | 9 | +| time_elapsed | 802 | +| total_timesteps | 388800 | +| train/ | | +| approx_kl | 0.018348452 | +| clip_fraction | 0.183 | +| clip_range | 0.2 | +| entropy_loss | -8.15 | +| explained_variance | 0.797 | +| learning_rate | 0.0002 | +| loss | 1.45 | +| n_updates | 370 | +| policy_gradient_loss | -0.000873 | +| value_loss | 16 | +----------------------------------------- + +Current state: ChampionX.Level3.ChunLiVsZangief +----------------------------------------- +| time/ | | +| fps | 480 | +| iterations | 10 | +| time_elapsed | 899 | +| total_timesteps | 432000 | +| train/ | | +| approx_kl | 0.017740099 | +| clip_fraction | 0.175 | +| clip_range | 0.2 | +| entropy_loss | -8.14 | +| explained_variance | 0.81 | +| learning_rate | 0.0002 | +| loss | 0.596 | +| n_updates | 380 | +| policy_gradient_loss | -0.00329 | +| value_loss | 20.7 | +----------------------------------------- + +Current state: ChampionX.Level9.ChunLiVsBalrog +----------------------------------------- +| time/ | | +| fps | 475 | +| iterations | 11 | +| time_elapsed | 998 | +| total_timesteps | 475200 | +| train/ | | +| approx_kl | 0.020382024 | +| clip_fraction | 0.204 | +| clip_range | 0.2 | +| entropy_loss | -8.14 | +| explained_variance | 0.783 | +| learning_rate | 0.0002 | +| loss | 0.51 | +| n_updates | 390 | +| policy_gradient_loss | -0.0046 | +| value_loss | 17.3 | +----------------------------------------- + +Current state: ChampionX.Level12.ChunLiVsBison +---------------------------------------- +| time/ | | +| fps | 473 | +| iterations | 12 | +| time_elapsed | 1095 | +| total_timesteps | 518400 | +| train/ | | +| approx_kl | 0.01975372 | +| clip_fraction | 0.192 | +| clip_range | 0.2 | +| entropy_loss | -8.12 | +| explained_variance | 0.78 | +| learning_rate | 0.0002 | +| loss | 0.59 | +| n_updates | 400 | +| policy_gradient_loss | -0.00151 | +| value_loss | 22.9 | +---------------------------------------- + +Current state: ChampionX.Level11.ChunLiVsSagat +----------------------------------------- +| time/ | | +| fps | 470 | +| iterations | 13 | +| time_elapsed | 1192 | +| total_timesteps | 561600 | +| train/ | | +| approx_kl | 0.019312538 | +| clip_fraction | 0.199 | +| clip_range | 0.2 | +| entropy_loss | -8.13 | +| explained_variance | 0.697 | +| learning_rate | 0.0002 | +| loss | 1.05 | +| n_updates | 410 | +| policy_gradient_loss | -0.000962 | +| value_loss | 21.6 | +----------------------------------------- + +Current state: ChampionX.Level4.ChunLiVsDhalsim +----------------------------------------- +| time/ | | +| fps | 468 | +| iterations | 14 | +| time_elapsed | 1290 | +| total_timesteps | 604800 | +| train/ | | +| approx_kl | 0.018606355 | +| clip_fraction | 0.189 | +| clip_range | 0.2 | +| entropy_loss | -8.12 | +| explained_variance | 0.742 | +| learning_rate | 0.0002 | +| loss | 0.385 | +| n_updates | 420 | +| policy_gradient_loss | -0.00191 | +| value_loss | 18.1 | +----------------------------------------- + +Current state: ChampionX.Level3.ChunLiVsZangief +----------------------------------------- +| time/ | | +| fps | 467 | +| iterations | 15 | +| time_elapsed | 1387 | +| total_timesteps | 648000 | +| train/ | | +| approx_kl | 0.017203132 | +| clip_fraction | 0.179 | +| clip_range | 0.2 | +| entropy_loss | -8.12 | +| explained_variance | 0.787 | +| learning_rate | 0.0002 | +| loss | 0.26 | +| n_updates | 430 | +| policy_gradient_loss | -0.0021 | +| value_loss | 15.2 | +----------------------------------------- + +Current state: ChampionX.Level7.ChunLiVsBlanka +----------------------------------------- +| time/ | | +| fps | 465 | +| iterations | 16 | +| time_elapsed | 1484 | +| total_timesteps | 691200 | +| train/ | | +| approx_kl | 0.018841917 | +| clip_fraction | 0.184 | +| clip_range | 0.2 | +| entropy_loss | -8.09 | +| explained_variance | 0.791 | +| learning_rate | 0.0002 | +| loss | 0.811 | +| n_updates | 440 | +| policy_gradient_loss | -0.00263 | +| value_loss | 12.1 | +----------------------------------------- + +Current state: ChampionX.Level4.ChunLiVsDhalsim +----------------------------------------- +| time/ | | +| fps | 464 | +| iterations | 17 | +| time_elapsed | 1581 | +| total_timesteps | 734400 | +| train/ | | +| approx_kl | 0.016460957 | +| clip_fraction | 0.161 | +| clip_range | 0.2 | +| entropy_loss | -8.11 | +| explained_variance | 0.809 | +| learning_rate | 0.0002 | +| loss | 1.47 | +| n_updates | 450 | +| policy_gradient_loss | -0.00405 | +| value_loss | 17.5 | +----------------------------------------- + +Current state: ChampionX.Level7.ChunLiVsBlanka +----------------------------------------- +| time/ | | +| fps | 463 | +| iterations | 18 | +| time_elapsed | 1678 | +| total_timesteps | 777600 | +| train/ | | +| approx_kl | 0.018824814 | +| clip_fraction | 0.187 | +| clip_range | 0.2 | +| entropy_loss | -8.1 | +| explained_variance | 0.766 | +| learning_rate | 0.0002 | +| loss | 0.312 | +| n_updates | 460 | +| policy_gradient_loss | -0.00269 | +| value_loss | 15.2 | +----------------------------------------- + +Current state: ChampionX.Level4.ChunLiVsDhalsim +----------------------------------------- +| time/ | | +| fps | 462 | +| iterations | 19 | +| time_elapsed | 1776 | +| total_timesteps | 820800 | +| train/ | | +| approx_kl | 0.017789861 | +| clip_fraction | 0.168 | +| clip_range | 0.2 | +| entropy_loss | -8.09 | +| explained_variance | 0.762 | +| learning_rate | 0.0002 | +| loss | 1.01 | +| n_updates | 470 | +| policy_gradient_loss | -0.00204 | +| value_loss | 16.2 | +----------------------------------------- + +Current state: ChampionX.Level9.ChunLiVsBalrog +----------------------------------------- +| time/ | | +| fps | 461 | +| iterations | 20 | +| time_elapsed | 1872 | +| total_timesteps | 864000 | +| train/ | | +| approx_kl | 0.018345973 | +| clip_fraction | 0.173 | +| clip_range | 0.2 | +| entropy_loss | -8.1 | +| explained_variance | 0.79 | +| learning_rate | 0.0002 | +| loss | 0.736 | +| n_updates | 480 | +| policy_gradient_loss | -0.00369 | +| value_loss | 12.8 | +----------------------------------------- + +Current state: ChampionX.Level12.ChunLiVsBison +---------------------------------------- +| time/ | | +| fps | 460 | +| iterations | 21 | +| time_elapsed | 1969 | +| total_timesteps | 907200 | +| train/ | | +| approx_kl | 0.02151764 | +| clip_fraction | 0.192 | +| clip_range | 0.2 | +| entropy_loss | -8.09 | +| explained_variance | 0.782 | +| learning_rate | 0.0002 | +| loss | 0.267 | +| n_updates | 490 | +| policy_gradient_loss | -0.00102 | +| value_loss | 13.8 | +---------------------------------------- + +Current state: ChampionX.Level4.ChunLiVsDhalsim +----------------------------------------- +| time/ | | +| fps | 459 | +| iterations | 22 | +| time_elapsed | 2066 | +| total_timesteps | 950400 | +| train/ | | +| approx_kl | 0.021028183 | +| clip_fraction | 0.19 | +| clip_range | 0.2 | +| entropy_loss | -8.13 | +| explained_variance | 0.676 | +| learning_rate | 0.0002 | +| loss | 0.253 | +| n_updates | 500 | +| policy_gradient_loss | -0.00186 | +| value_loss | 20 | +----------------------------------------- + +Current state: ChampionX.Level3.ChunLiVsZangief +----------------------------------------- +| time/ | | +| fps | 459 | +| iterations | 23 | +| time_elapsed | 2163 | +| total_timesteps | 993600 | +| train/ | | +| approx_kl | 0.019285567 | +| clip_fraction | 0.18 | +| clip_range | 0.2 | +| entropy_loss | -8.12 | +| explained_variance | 0.729 | +| learning_rate | 0.0002 | +| loss | 0.329 | +| n_updates | 510 | +| policy_gradient_loss | -0.00156 | +| value_loss | 20.8 | +----------------------------------------- + +Current state: ChampionX.Level10.ChunLiVsVega +----------------------------------------- +| time/ | | +| fps | 458 | +| iterations | 24 | +| time_elapsed | 2260 | +| total_timesteps | 1036800 | +| train/ | | +| approx_kl | 0.019038767 | +| clip_fraction | 0.195 | +| clip_range | 0.2 | +| entropy_loss | -8.09 | +| explained_variance | 0.665 | +| learning_rate | 0.0002 | +| loss | 0.685 | +| n_updates | 520 | +| policy_gradient_loss | -0.000273 | +| value_loss | 15.8 | +----------------------------------------- + +Current state: ChampionX.Level7.ChunLiVsBlanka +----------------------------------------- +| time/ | | +| fps | 458 | +| iterations | 25 | +| time_elapsed | 2357 | +| total_timesteps | 1080000 | +| train/ | | +| approx_kl | 0.020219645 | +| clip_fraction | 0.192 | +| clip_range | 0.2 | +| entropy_loss | -8.09 | +| explained_variance | 0.776 | +| learning_rate | 0.0002 | +| loss | 1.49 | +| n_updates | 530 | +| policy_gradient_loss | -0.00111 | +| value_loss | 21.8 | +----------------------------------------- + +Current state: ChampionX.Level11.ChunLiVsSagat +----------------------------------------- +| time/ | | +| fps | 457 | +| iterations | 26 | +| time_elapsed | 2455 | +| total_timesteps | 1123200 | +| train/ | | +| approx_kl | 0.018398428 | +| clip_fraction | 0.179 | +| clip_range | 0.2 | +| entropy_loss | -8.12 | +| explained_variance | 0.784 | +| learning_rate | 0.0002 | +| loss | 0.225 | +| n_updates | 540 | +| policy_gradient_loss | -0.00625 | +| value_loss | 12.3 | +----------------------------------------- + +Current state: ChampionX.Level1.ChunLiVsKen +---------------------------------------- +| time/ | | +| fps | 456 | +| iterations | 27 | +| time_elapsed | 2552 | +| total_timesteps | 1166400 | +| train/ | | +| approx_kl | 0.02056862 | +| clip_fraction | 0.178 | +| clip_range | 0.2 | +| entropy_loss | -8.1 | +| explained_variance | 0.718 | +| learning_rate | 0.0002 | +| loss | 0.265 | +| n_updates | 550 | +| policy_gradient_loss | -0.00118 | +| value_loss | 21.3 | +---------------------------------------- + +Current state: ChampionX.Level10.ChunLiVsVega +----------------------------------------- +| time/ | | +| fps | 456 | +| iterations | 28 | +| time_elapsed | 2649 | +| total_timesteps | 1209600 | +| train/ | | +| approx_kl | 0.018739836 | +| clip_fraction | 0.182 | +| clip_range | 0.2 | +| entropy_loss | -8.09 | +| explained_variance | 0.786 | +| learning_rate | 0.0002 | +| loss | 0.562 | +| n_updates | 560 | +| policy_gradient_loss | -0.00141 | +| value_loss | 16.7 | +----------------------------------------- + +Current state: ChampionX.Level7.ChunLiVsBlanka +----------------------------------------- +| time/ | | +| fps | 456 | +| iterations | 29 | +| time_elapsed | 2747 | +| total_timesteps | 1252800 | +| train/ | | +| approx_kl | 0.019046063 | +| clip_fraction | 0.178 | +| clip_range | 0.2 | +| entropy_loss | -8.1 | +| explained_variance | 0.77 | +| learning_rate | 0.0002 | +| loss | 0.655 | +| n_updates | 570 | +| policy_gradient_loss | -0.00238 | +| value_loss | 19 | +----------------------------------------- + +Current state: ChampionX.Level11.ChunLiVsSagat +----------------------------------------- +| time/ | | +| fps | 455 | +| iterations | 30 | +| time_elapsed | 2845 | +| total_timesteps | 1296000 | +| train/ | | +| approx_kl | 0.017575732 | +| clip_fraction | 0.181 | +| clip_range | 0.2 | +| entropy_loss | -8.12 | +| explained_variance | 0.763 | +| learning_rate | 0.0002 | +| loss | 0.461 | +| n_updates | 580 | +| policy_gradient_loss | -0.00471 | +| value_loss | 12.1 | +----------------------------------------- + +Current state: ChampionX.Level6.ChunLiVsEHonda +----------------------------------------- +| time/ | | +| fps | 455 | +| iterations | 31 | +| time_elapsed | 2942 | +| total_timesteps | 1339200 | +| train/ | | +| approx_kl | 0.020356499 | +| clip_fraction | 0.179 | +| clip_range | 0.2 | +| entropy_loss | -8.12 | +| explained_variance | 0.772 | +| learning_rate | 0.0002 | +| loss | 1.84 | +| n_updates | 590 | +| policy_gradient_loss | -0.00473 | +| value_loss | 11.5 | +----------------------------------------- + +Current state: ChampionX.Level6.ChunLiVsEHonda +---------------------------------------- +| time/ | | +| fps | 454 | +| iterations | 32 | +| time_elapsed | 3039 | +| total_timesteps | 1382400 | +| train/ | | +| approx_kl | 0.02154484 | +| clip_fraction | 0.186 | +| clip_range | 0.2 | +| entropy_loss | -8.09 | +| explained_variance | 0.82 | +| learning_rate | 0.0002 | +| loss | 2.06 | +| n_updates | 600 | +| policy_gradient_loss | 0.00338 | +| value_loss | 23.1 | +---------------------------------------- + +Current state: ChampionX.Level7.ChunLiVsBlanka +----------------------------------------- +| time/ | | +| fps | 454 | +| iterations | 33 | +| time_elapsed | 3137 | +| total_timesteps | 1425600 | +| train/ | | +| approx_kl | 0.022631256 | +| clip_fraction | 0.196 | +| clip_range | 0.2 | +| entropy_loss | -8.03 | +| explained_variance | 0.81 | +| learning_rate | 0.0002 | +| loss | 0.664 | +| n_updates | 610 | +| policy_gradient_loss | 0.0058 | +| value_loss | 21.7 | +----------------------------------------- + +Current state: ChampionX.Level8.ChunLiVsGuile +----------------------------------------- +| time/ | | +| fps | 454 | +| iterations | 34 | +| time_elapsed | 3234 | +| total_timesteps | 1468800 | +| train/ | | +| approx_kl | 0.019701418 | +| clip_fraction | 0.172 | +| clip_range | 0.2 | +| entropy_loss | -8.07 | +| explained_variance | 0.833 | +| learning_rate | 0.0002 | +| loss | 2.15 | +| n_updates | 620 | +| policy_gradient_loss | 0.00112 | +| value_loss | 22.3 | +----------------------------------------- + +Current state: ChampionX.Level6.ChunLiVsEHonda +----------------------------------------- +| time/ | | +| fps | 453 | +| iterations | 35 | +| time_elapsed | 3332 | +| total_timesteps | 1512000 | +| train/ | | +| approx_kl | 0.020245243 | +| clip_fraction | 0.183 | +| clip_range | 0.2 | +| entropy_loss | -8.05 | +| explained_variance | 0.815 | +| learning_rate | 0.0002 | +| loss | 0.494 | +| n_updates | 630 | +| policy_gradient_loss | -0.00146 | +| value_loss | 13.8 | +----------------------------------------- + +Current state: ChampionX.Level10.ChunLiVsVega +----------------------------------------- +| time/ | | +| fps | 453 | +| iterations | 36 | +| time_elapsed | 3430 | +| total_timesteps | 1555200 | +| train/ | | +| approx_kl | 0.022184841 | +| clip_fraction | 0.187 | +| clip_range | 0.2 | +| entropy_loss | -8.02 | +| explained_variance | 0.761 | +| learning_rate | 0.0002 | +| loss | 0.232 | +| n_updates | 640 | +| policy_gradient_loss | 0.00242 | +| value_loss | 18.8 | +----------------------------------------- + +Current state: ChampionX.Level4.ChunLiVsDhalsim +---------------------------------------- +| time/ | | +| fps | 453 | +| iterations | 37 | +| time_elapsed | 3526 | +| total_timesteps | 1598400 | +| train/ | | +| approx_kl | 0.01909801 | +| clip_fraction | 0.172 | +| clip_range | 0.2 | +| entropy_loss | -8.07 | +| explained_variance | 0.82 | +| learning_rate | 0.0002 | +| loss | 1.39 | +| n_updates | 650 | +| policy_gradient_loss | 0.00125 | +| value_loss | 18.5 | +---------------------------------------- + +Current state: ChampionX.Level11.ChunLiVsSagat +----------------------------------------- +| time/ | | +| fps | 453 | +| iterations | 38 | +| time_elapsed | 3623 | +| total_timesteps | 1641600 | +| train/ | | +| approx_kl | 0.019127825 | +| clip_fraction | 0.175 | +| clip_range | 0.2 | +| entropy_loss | -8.1 | +| explained_variance | 0.796 | +| learning_rate | 0.0002 | +| loss | 0.646 | +| n_updates | 660 | +| policy_gradient_loss | -0.0033 | +| value_loss | 15 | +----------------------------------------- + +Current state: ChampionX.Level2.ChunLiVsChunLi +----------------------------------------- +| time/ | | +| fps | 452 | +| iterations | 39 | +| time_elapsed | 3720 | +| total_timesteps | 1684800 | +| train/ | | +| approx_kl | 0.018327592 | +| clip_fraction | 0.179 | +| clip_range | 0.2 | +| entropy_loss | -8.1 | +| explained_variance | 0.718 | +| learning_rate | 0.0002 | +| loss | 0.905 | +| n_updates | 670 | +| policy_gradient_loss | 0.000898 | +| value_loss | 14.6 | +----------------------------------------- + +Current state: ChampionX.Level1.ChunLiVsKen +----------------------------------------- +| time/ | | +| fps | 452 | +| iterations | 40 | +| time_elapsed | 3818 | +| total_timesteps | 1728000 | +| train/ | | +| approx_kl | 0.019133803 | +| clip_fraction | 0.173 | +| clip_range | 0.2 | +| entropy_loss | -8.03 | +| explained_variance | 0.839 | +| learning_rate | 0.0002 | +| loss | 0.95 | +| n_updates | 680 | +| policy_gradient_loss | -0.00206 | +| value_loss | 17.9 | +----------------------------------------- + +Current state: ChampionX.Level1.ChunLiVsKen +----------------------------------------- +| time/ | | +| fps | 452 | +| iterations | 41 | +| time_elapsed | 3916 | +| total_timesteps | 1771200 | +| train/ | | +| approx_kl | 0.021123584 | +| clip_fraction | 0.202 | +| clip_range | 0.2 | +| entropy_loss | -8.06 | +| explained_variance | 0.775 | +| learning_rate | 0.0002 | +| loss | 0.615 | +| n_updates | 690 | +| policy_gradient_loss | 0.000314 | +| value_loss | 14 | +----------------------------------------- + +Current state: ChampionX.Level2.ChunLiVsChunLi +----------------------------------------- +| time/ | | +| fps | 452 | +| iterations | 42 | +| time_elapsed | 4013 | +| total_timesteps | 1814400 | +| train/ | | +| approx_kl | 0.018802634 | +| clip_fraction | 0.164 | +| clip_range | 0.2 | +| entropy_loss | -8.04 | +| explained_variance | 0.811 | +| learning_rate | 0.0002 | +| loss | 0.295 | +| n_updates | 700 | +| policy_gradient_loss | 0.00141 | +| value_loss | 19.3 | +----------------------------------------- + +Current state: ChampionX.Level9.ChunLiVsBalrog +---------------------------------------- +| time/ | | +| fps | 451 | +| iterations | 43 | +| time_elapsed | 4109 | +| total_timesteps | 1857600 | +| train/ | | +| approx_kl | 0.01865595 | +| clip_fraction | 0.169 | +| clip_range | 0.2 | +| entropy_loss | -8.04 | +| explained_variance | 0.818 | +| learning_rate | 0.0002 | +| loss | 0.357 | +| n_updates | 710 | +| policy_gradient_loss | 0.000324 | +| value_loss | 19.1 | +---------------------------------------- + +Current state: ChampionX.Level11.ChunLiVsSagat +----------------------------------------- +| time/ | | +| fps | 451 | +| iterations | 44 | +| time_elapsed | 4205 | +| total_timesteps | 1900800 | +| train/ | | +| approx_kl | 0.022585243 | +| clip_fraction | 0.195 | +| clip_range | 0.2 | +| entropy_loss | -8.04 | +| explained_variance | 0.768 | +| learning_rate | 0.0002 | +| loss | 0.515 | +| n_updates | 720 | +| policy_gradient_loss | 0.00268 | +| value_loss | 18.3 | +----------------------------------------- + +Current state: ChampionX.Level9.ChunLiVsBalrog +----------------------------------------- +| time/ | | +| fps | 451 | +| iterations | 45 | +| time_elapsed | 4301 | +| total_timesteps | 1944000 | +| train/ | | +| approx_kl | 0.020417377 | +| clip_fraction | 0.173 | +| clip_range | 0.2 | +| entropy_loss | -8.07 | +| explained_variance | 0.683 | +| learning_rate | 0.0002 | +| loss | 0.654 | +| n_updates | 730 | +| policy_gradient_loss | 0.00203 | +| value_loss | 20.7 | +----------------------------------------- + +Current state: ChampionX.Level8.ChunLiVsGuile +---------------------------------------- +| time/ | | +| fps | 451 | +| iterations | 46 | +| time_elapsed | 4397 | +| total_timesteps | 1987200 | +| train/ | | +| approx_kl | 0.01640241 | +| clip_fraction | 0.136 | +| clip_range | 0.2 | +| entropy_loss | -8.09 | +| explained_variance | 0.855 | +| learning_rate | 0.0002 | +| loss | 0.681 | +| n_updates | 740 | +| policy_gradient_loss | -0.00244 | +| value_loss | 18.1 | +---------------------------------------- + +Current state: ChampionX.Level9.ChunLiVsBalrog +----------------------------------------- +| time/ | | +| fps | 451 | +| iterations | 47 | +| time_elapsed | 4492 | +| total_timesteps | 2030400 | +| train/ | | +| approx_kl | 0.020942345 | +| clip_fraction | 0.155 | +| clip_range | 0.2 | +| entropy_loss | -8.09 | +| explained_variance | 0.799 | +| learning_rate | 0.0002 | +| loss | 0.847 | +| n_updates | 750 | +| policy_gradient_loss | 0.00232 | +| value_loss | 20.4 | +----------------------------------------- + +Current state: ChampionX.Level6.ChunLiVsEHonda +---------------------------------------- +| time/ | | +| fps | 451 | +| iterations | 48 | +| time_elapsed | 4588 | +| total_timesteps | 2073600 | +| train/ | | +| approx_kl | 0.02003836 | +| clip_fraction | 0.168 | +| clip_range | 0.2 | +| entropy_loss | -8.07 | +| explained_variance | 0.815 | +| learning_rate | 0.0002 | +| loss | 0.501 | +| n_updates | 760 | +| policy_gradient_loss | -0.0017 | +| value_loss | 13.2 | +---------------------------------------- + +Current state: ChampionX.Level8.ChunLiVsGuile +----------------------------------------- +| time/ | | +| fps | 451 | +| iterations | 49 | +| time_elapsed | 4684 | +| total_timesteps | 2116800 | +| train/ | | +| approx_kl | 0.022403738 | +| clip_fraction | 0.173 | +| clip_range | 0.2 | +| entropy_loss | -8.1 | +| explained_variance | 0.735 | +| learning_rate | 0.0002 | +| loss | 4.1 | +| n_updates | 770 | +| policy_gradient_loss | 0.00325 | +| value_loss | 35.2 | +----------------------------------------- + +Current state: ChampionX.Level1.ChunLiVsKen +----------------------------------------- +| time/ | | +| fps | 451 | +| iterations | 50 | +| time_elapsed | 4780 | +| total_timesteps | 2160000 | +| train/ | | +| approx_kl | 0.020465719 | +| clip_fraction | 0.171 | +| clip_range | 0.2 | +| entropy_loss | -8.09 | +| explained_variance | 0.814 | +| learning_rate | 0.0002 | +| loss | 0.346 | +| n_updates | 780 | +| policy_gradient_loss | 0.00119 | +| value_loss | 18.8 | +----------------------------------------- + +Current state: ChampionX.Level6.ChunLiVsEHonda +----------------------------------------- +| time/ | | +| fps | 451 | +| iterations | 51 | +| time_elapsed | 4877 | +| total_timesteps | 2203200 | +| train/ | | +| approx_kl | 0.019918704 | +| clip_fraction | 0.163 | +| clip_range | 0.2 | +| entropy_loss | -8.07 | +| explained_variance | 0.823 | +| learning_rate | 0.0002 | +| loss | 0.223 | +| n_updates | 790 | +| policy_gradient_loss | -0.0011 | +| value_loss | 16 | +----------------------------------------- + +Current state: ChampionX.Level8.ChunLiVsGuile +----------------------------------------- +| time/ | | +| fps | 451 | +| iterations | 52 | +| time_elapsed | 4973 | +| total_timesteps | 2246400 | +| train/ | | +| approx_kl | 0.026293177 | +| clip_fraction | 0.189 | +| clip_range | 0.2 | +| entropy_loss | -8.01 | +| explained_variance | 0.786 | +| learning_rate | 0.0002 | +| loss | 1.37 | +| n_updates | 800 | +| policy_gradient_loss | 0.00725 | +| value_loss | 21.2 | +----------------------------------------- + +Current state: ChampionX.Level8.ChunLiVsGuile +----------------------------------------- +| time/ | | +| fps | 451 | +| iterations | 53 | +| time_elapsed | 5068 | +| total_timesteps | 2289600 | +| train/ | | +| approx_kl | 0.018323697 | +| clip_fraction | 0.159 | +| clip_range | 0.2 | +| entropy_loss | -8.06 | +| explained_variance | 0.822 | +| learning_rate | 0.0002 | +| loss | 1.02 | +| n_updates | 810 | +| policy_gradient_loss | 0.000499 | +| value_loss | 17.8 | +----------------------------------------- + +Current state: ChampionX.Level12.ChunLiVsBison +----------------------------------------- +| time/ | | +| fps | 451 | +| iterations | 54 | +| time_elapsed | 5164 | +| total_timesteps | 2332800 | +| train/ | | +| approx_kl | 0.022256708 | +| clip_fraction | 0.186 | +| clip_range | 0.2 | +| entropy_loss | -8.04 | +| explained_variance | 0.781 | +| learning_rate | 0.0002 | +| loss | 0.717 | +| n_updates | 820 | +| policy_gradient_loss | 0.00159 | +| value_loss | 16.5 | +----------------------------------------- + +Current state: ChampionX.Level7.ChunLiVsBlanka +----------------------------------------- +| time/ | | +| fps | 451 | +| iterations | 55 | +| time_elapsed | 5260 | +| total_timesteps | 2376000 | +| train/ | | +| approx_kl | 0.020457426 | +| clip_fraction | 0.177 | +| clip_range | 0.2 | +| entropy_loss | -7.99 | +| explained_variance | 0.791 | +| learning_rate | 0.0002 | +| loss | 2.93 | +| n_updates | 830 | +| policy_gradient_loss | -0.00147 | +| value_loss | 17.3 | +----------------------------------------- + +Current state: ChampionX.Level7.ChunLiVsBlanka +--------------------------------------- +| time/ | | +| fps | 451 | +| iterations | 56 | +| time_elapsed | 5356 | +| total_timesteps | 2419200 | +| train/ | | +| approx_kl | 0.0213855 | +| clip_fraction | 0.178 | +| clip_range | 0.2 | +| entropy_loss | -8.05 | +| explained_variance | 0.728 | +| learning_rate | 0.0002 | +| loss | 0.302 | +| n_updates | 840 | +| policy_gradient_loss | 0.00053 | +| value_loss | 17.1 | +--------------------------------------- + +Current state: ChampionX.Level10.ChunLiVsVega +----------------------------------------- +| time/ | | +| fps | 451 | +| iterations | 57 | +| time_elapsed | 5451 | +| total_timesteps | 2462400 | +| train/ | | +| approx_kl | 0.021137744 | +| clip_fraction | 0.173 | +| clip_range | 0.2 | +| entropy_loss | -8.08 | +| explained_variance | 0.788 | +| learning_rate | 0.0002 | +| loss | 0.303 | +| n_updates | 850 | +| policy_gradient_loss | -0.00111 | +| value_loss | 14.6 | +----------------------------------------- + +Current state: ChampionX.Level9.ChunLiVsBalrog +---------------------------------------- +| time/ | | +| fps | 451 | +| iterations | 58 | +| time_elapsed | 5547 | +| total_timesteps | 2505600 | +| train/ | | +| approx_kl | 0.02023245 | +| clip_fraction | 0.169 | +| clip_range | 0.2 | +| entropy_loss | -8.05 | +| explained_variance | 0.816 | +| learning_rate | 0.0002 | +| loss | 0.361 | +| n_updates | 860 | +| policy_gradient_loss | 0.000275 | +| value_loss | 16.8 | +---------------------------------------- + +Current state: ChampionX.Level12.ChunLiVsBison +----------------------------------------- +| time/ | | +| fps | 451 | +| iterations | 59 | +| time_elapsed | 5643 | +| total_timesteps | 2548800 | +| train/ | | +| approx_kl | 0.019979084 | +| clip_fraction | 0.175 | +| clip_range | 0.2 | +| entropy_loss | -8.04 | +| explained_variance | 0.791 | +| learning_rate | 0.0002 | +| loss | 0.204 | +| n_updates | 870 | +| policy_gradient_loss | -0.00152 | +| value_loss | 12.4 | +----------------------------------------- + +Current state: ChampionX.Level7.ChunLiVsBlanka +---------------------------------------- +| time/ | | +| fps | 451 | +| iterations | 60 | +| time_elapsed | 5740 | +| total_timesteps | 2592000 | +| train/ | | +| approx_kl | 0.02290177 | +| clip_fraction | 0.189 | +| clip_range | 0.2 | +| entropy_loss | -8.06 | +| explained_variance | 0.744 | +| learning_rate | 0.0002 | +| loss | 0.599 | +| n_updates | 880 | +| policy_gradient_loss | 0.00403 | +| value_loss | 22.4 | +---------------------------------------- + +Current state: ChampionX.Level8.ChunLiVsGuile +----------------------------------------- +| time/ | | +| fps | 451 | +| iterations | 61 | +| time_elapsed | 5837 | +| total_timesteps | 2635200 | +| train/ | | +| approx_kl | 0.019065047 | +| clip_fraction | 0.172 | +| clip_range | 0.2 | +| entropy_loss | -8.06 | +| explained_variance | 0.736 | +| learning_rate | 0.0002 | +| loss | 0.933 | +| n_updates | 890 | +| policy_gradient_loss | -0.000417 | +| value_loss | 20.4 | +----------------------------------------- + +Current state: ChampionX.Level6.ChunLiVsEHonda +----------------------------------------- +| time/ | | +| fps | 451 | +| iterations | 62 | +| time_elapsed | 5935 | +| total_timesteps | 2678400 | +| train/ | | +| approx_kl | 0.018739864 | +| clip_fraction | 0.173 | +| clip_range | 0.2 | +| entropy_loss | -8.08 | +| explained_variance | 0.818 | +| learning_rate | 0.0002 | +| loss | 1.44 | +| n_updates | 900 | +| policy_gradient_loss | -0.002 | +| value_loss | 15.5 | +----------------------------------------- + +Current state: ChampionX.Level5.ChunLiVsRyu +---------------------------------------- +| time/ | | +| fps | 451 | +| iterations | 63 | +| time_elapsed | 6032 | +| total_timesteps | 2721600 | +| train/ | | +| approx_kl | 0.02123648 | +| clip_fraction | 0.172 | +| clip_range | 0.2 | +| entropy_loss | -8.03 | +| explained_variance | 0.82 | +| learning_rate | 0.0002 | +| loss | 0.792 | +| n_updates | 910 | +| policy_gradient_loss | 8.58e-05 | +| value_loss | 16 | +---------------------------------------- + +Current state: ChampionX.Level9.ChunLiVsBalrog +----------------------------------------- +| time/ | | +| fps | 450 | +| iterations | 64 | +| time_elapsed | 6130 | +| total_timesteps | 2764800 | +| train/ | | +| approx_kl | 0.024432074 | +| clip_fraction | 0.209 | +| clip_range | 0.2 | +| entropy_loss | -7.99 | +| explained_variance | 0.829 | +| learning_rate | 0.0002 | +| loss | 0.864 | +| n_updates | 920 | +| policy_gradient_loss | 0.00649 | +| value_loss | 20 | +----------------------------------------- + +Current state: ChampionX.Level12.ChunLiVsBison +----------------------------------------- +| time/ | | +| fps | 450 | +| iterations | 65 | +| time_elapsed | 6228 | +| total_timesteps | 2808000 | +| train/ | | +| approx_kl | 0.022781633 | +| clip_fraction | 0.184 | +| clip_range | 0.2 | +| entropy_loss | -8.07 | +| explained_variance | 0.78 | +| learning_rate | 0.0002 | +| loss | 2.75 | +| n_updates | 930 | +| policy_gradient_loss | 0.00143 | +| value_loss | 16.6 | +----------------------------------------- + +Current state: ChampionX.Level7.ChunLiVsBlanka +----------------------------------------- +| time/ | | +| fps | 450 | +| iterations | 66 | +| time_elapsed | 6327 | +| total_timesteps | 2851200 | +| train/ | | +| approx_kl | 0.020004842 | +| clip_fraction | 0.165 | +| clip_range | 0.2 | +| entropy_loss | -8.07 | +| explained_variance | 0.784 | +| learning_rate | 0.0002 | +| loss | 0.68 | +| n_updates | 940 | +| policy_gradient_loss | -0.000158 | +| value_loss | 24.8 | +----------------------------------------- + +Current state: ChampionX.Level4.ChunLiVsDhalsim +----------------------------------------- +| time/ | | +| fps | 450 | +| iterations | 67 | +| time_elapsed | 6425 | +| total_timesteps | 2894400 | +| train/ | | +| approx_kl | 0.019052736 | +| clip_fraction | 0.177 | +| clip_range | 0.2 | +| entropy_loss | -8.07 | +| explained_variance | 0.801 | +| learning_rate | 0.0002 | +| loss | 0.805 | +| n_updates | 950 | +| policy_gradient_loss | -0.00147 | +| value_loss | 16.7 | +----------------------------------------- + +Current state: ChampionX.Level10.ChunLiVsVega +----------------------------------------- +| time/ | | +| fps | 450 | +| iterations | 68 | +| time_elapsed | 6522 | +| total_timesteps | 2937600 | +| train/ | | +| approx_kl | 0.018338915 | +| clip_fraction | 0.166 | +| clip_range | 0.2 | +| entropy_loss | -8.07 | +| explained_variance | 0.824 | +| learning_rate | 0.0002 | +| loss | 0.278 | +| n_updates | 960 | +| policy_gradient_loss | -0.00394 | +| value_loss | 14.7 | +----------------------------------------- + +Current state: ChampionX.Level12.ChunLiVsBison +----------------------------------------- +| time/ | | +| fps | 450 | +| iterations | 69 | +| time_elapsed | 6619 | +| total_timesteps | 2980800 | +| train/ | | +| approx_kl | 0.022207119 | +| clip_fraction | 0.203 | +| clip_range | 0.2 | +| entropy_loss | -8.03 | +| explained_variance | 0.777 | +| learning_rate | 0.0002 | +| loss | 1.76 | +| n_updates | 970 | +| policy_gradient_loss | 0.00349 | +| value_loss | 21.1 | +----------------------------------------- + +Current state: ChampionX.Level4.ChunLiVsDhalsim +----------------------------------------- +| time/ | | +| fps | 450 | +| iterations | 70 | +| time_elapsed | 6717 | +| total_timesteps | 3024000 | +| train/ | | +| approx_kl | 0.023251278 | +| clip_fraction | 0.207 | +| clip_range | 0.2 | +| entropy_loss | -8 | +| explained_variance | 0.769 | +| learning_rate | 0.0002 | +| loss | 0.308 | +| n_updates | 980 | +| policy_gradient_loss | 0.00178 | +| value_loss | 16.2 | +----------------------------------------- + +Current state: ChampionX.Level11.ChunLiVsSagat +----------------------------------------- +| time/ | | +| fps | 450 | +| iterations | 71 | +| time_elapsed | 6815 | +| total_timesteps | 3067200 | +| train/ | | +| approx_kl | 0.018753793 | +| clip_fraction | 0.166 | +| clip_range | 0.2 | +| entropy_loss | -8.09 | +| explained_variance | 0.772 | +| learning_rate | 0.0002 | +| loss | 1.91 | +| n_updates | 990 | +| policy_gradient_loss | 0.000509 | +| value_loss | 20 | +----------------------------------------- + +Current state: ChampionX.Level4.ChunLiVsDhalsim +----------------------------------------- +| time/ | | +| fps | 449 | +| iterations | 72 | +| time_elapsed | 6913 | +| total_timesteps | 3110400 | +| train/ | | +| approx_kl | 0.018791752 | +| clip_fraction | 0.185 | +| clip_range | 0.2 | +| entropy_loss | -8.01 | +| explained_variance | 0.716 | +| learning_rate | 0.0002 | +| loss | 0.526 | +| n_updates | 1000 | +| policy_gradient_loss | 0.00248 | +| value_loss | 17.2 | +----------------------------------------- + +Current state: ChampionX.Level3.ChunLiVsZangief +---------------------------------------- +| time/ | | +| fps | 449 | +| iterations | 73 | +| time_elapsed | 7011 | +| total_timesteps | 3153600 | +| train/ | | +| approx_kl | 0.02178302 | +| clip_fraction | 0.18 | +| clip_range | 0.2 | +| entropy_loss | -8.03 | +| explained_variance | 0.675 | +| learning_rate | 0.0002 | +| loss | 2.77 | +| n_updates | 1010 | +| policy_gradient_loss | 0.00759 | +| value_loss | 30.3 | +---------------------------------------- + +Current state: ChampionX.Level11.ChunLiVsSagat +----------------------------------------- +| time/ | | +| fps | 449 | +| iterations | 74 | +| time_elapsed | 7108 | +| total_timesteps | 3196800 | +| train/ | | +| approx_kl | 0.019278381 | +| clip_fraction | 0.171 | +| clip_range | 0.2 | +| entropy_loss | -8.05 | +| explained_variance | 0.748 | +| learning_rate | 0.0002 | +| loss | 0.566 | +| n_updates | 1020 | +| policy_gradient_loss | 0.000132 | +| value_loss | 15.9 | +----------------------------------------- + +Current state: ChampionX.Level6.ChunLiVsEHonda +----------------------------------------- +| time/ | | +| fps | 449 | +| iterations | 75 | +| time_elapsed | 7206 | +| total_timesteps | 3240000 | +| train/ | | +| approx_kl | 0.018280571 | +| clip_fraction | 0.153 | +| clip_range | 0.2 | +| entropy_loss | -8.04 | +| explained_variance | 0.716 | +| learning_rate | 0.0002 | +| loss | 2.45 | +| n_updates | 1030 | +| policy_gradient_loss | 0.000711 | +| value_loss | 23.1 | +----------------------------------------- + +Current state: ChampionX.Level1.ChunLiVsKen +----------------------------------------- +| time/ | | +| fps | 449 | +| iterations | 76 | +| time_elapsed | 7303 | +| total_timesteps | 3283200 | +| train/ | | +| approx_kl | 0.017658442 | +| clip_fraction | 0.154 | +| clip_range | 0.2 | +| entropy_loss | -8.07 | +| explained_variance | 0.838 | +| learning_rate | 0.0002 | +| loss | 1.92 | +| n_updates | 1040 | +| policy_gradient_loss | 0.000735 | +| value_loss | 20 | +----------------------------------------- + +Current state: ChampionX.Level8.ChunLiVsGuile +----------------------------------------- +| time/ | | +| fps | 449 | +| iterations | 77 | +| time_elapsed | 7401 | +| total_timesteps | 3326400 | +| train/ | | +| approx_kl | 0.019725492 | +| clip_fraction | 0.176 | +| clip_range | 0.2 | +| entropy_loss | -8.05 | +| explained_variance | 0.791 | +| learning_rate | 0.0002 | +| loss | 2.38 | +| n_updates | 1050 | +| policy_gradient_loss | 0.00148 | +| value_loss | 28.7 | +----------------------------------------- + +Current state: ChampionX.Level4.ChunLiVsDhalsim +----------------------------------------- +| time/ | | +| fps | 449 | +| iterations | 78 | +| time_elapsed | 7498 | +| total_timesteps | 3369600 | +| train/ | | +| approx_kl | 0.016949095 | +| clip_fraction | 0.152 | +| clip_range | 0.2 | +| entropy_loss | -8.09 | +| explained_variance | 0.784 | +| learning_rate | 0.0002 | +| loss | 0.878 | +| n_updates | 1060 | +| policy_gradient_loss | -0.00178 | +| value_loss | 19.1 | +----------------------------------------- + +Current state: ChampionX.Level11.ChunLiVsSagat +---------------------------------------- +| time/ | | +| fps | 449 | +| iterations | 79 | +| time_elapsed | 7596 | +| total_timesteps | 3412800 | +| train/ | | +| approx_kl | 0.02026636 | +| clip_fraction | 0.181 | +| clip_range | 0.2 | +| entropy_loss | -8.05 | +| explained_variance | 0.775 | +| learning_rate | 0.0002 | +| loss | 3.35 | +| n_updates | 1070 | +| policy_gradient_loss | 0.000907 | +| value_loss | 15 | +---------------------------------------- + +Current state: ChampionX.Level7.ChunLiVsBlanka +----------------------------------------- +| time/ | | +| fps | 449 | +| iterations | 80 | +| time_elapsed | 7693 | +| total_timesteps | 3456000 | +| train/ | | +| approx_kl | 0.020292694 | +| clip_fraction | 0.172 | +| clip_range | 0.2 | +| entropy_loss | -8.08 | +| explained_variance | 0.729 | +| learning_rate | 0.0002 | +| loss | 0.937 | +| n_updates | 1080 | +| policy_gradient_loss | -0.000479 | +| value_loss | 14.2 | +----------------------------------------- + +Current state: ChampionX.Level11.ChunLiVsSagat +----------------------------------------- +| time/ | | +| fps | 449 | +| iterations | 81 | +| time_elapsed | 7790 | +| total_timesteps | 3499200 | +| train/ | | +| approx_kl | 0.021046823 | +| clip_fraction | 0.17 | +| clip_range | 0.2 | +| entropy_loss | -8.04 | +| explained_variance | 0.814 | +| learning_rate | 0.0002 | +| loss | 1.19 | +| n_updates | 1090 | +| policy_gradient_loss | 0.00343 | +| value_loss | 21.8 | +----------------------------------------- + +Current state: ChampionX.Level5.ChunLiVsRyu +----------------------------------------- +| time/ | | +| fps | 449 | +| iterations | 82 | +| time_elapsed | 7888 | +| total_timesteps | 3542400 | +| train/ | | +| approx_kl | 0.018265078 | +| clip_fraction | 0.16 | +| clip_range | 0.2 | +| entropy_loss | -8.1 | +| explained_variance | 0.771 | +| learning_rate | 0.0002 | +| loss | 1.41 | +| n_updates | 1100 | +| policy_gradient_loss | -0.00154 | +| value_loss | 17.9 | +----------------------------------------- + +Current state: ChampionX.Level3.ChunLiVsZangief +---------------------------------------- +| time/ | | +| fps | 448 | +| iterations | 83 | +| time_elapsed | 7986 | +| total_timesteps | 3585600 | +| train/ | | +| approx_kl | 0.01761453 | +| clip_fraction | 0.156 | +| clip_range | 0.2 | +| entropy_loss | -8.08 | +| explained_variance | 0.848 | +| learning_rate | 0.0002 | +| loss | 1.5 | +| n_updates | 1110 | +| policy_gradient_loss | 4e-05 | +| value_loss | 22.4 | +---------------------------------------- + +Current state: ChampionX.Level6.ChunLiVsEHonda +----------------------------------------- +| time/ | | +| fps | 448 | +| iterations | 84 | +| time_elapsed | 8083 | +| total_timesteps | 3628800 | +| train/ | | +| approx_kl | 0.019479048 | +| clip_fraction | 0.167 | +| clip_range | 0.2 | +| entropy_loss | -8.13 | +| explained_variance | 0.782 | +| learning_rate | 0.0002 | +| loss | 2.93 | +| n_updates | 1120 | +| policy_gradient_loss | -0.00179 | +| value_loss | 16.4 | +----------------------------------------- + +Current state: ChampionX.Level4.ChunLiVsDhalsim +----------------------------------------- +| time/ | | +| fps | 448 | +| iterations | 85 | +| time_elapsed | 8180 | +| total_timesteps | 3672000 | +| train/ | | +| approx_kl | 0.017283197 | +| clip_fraction | 0.149 | +| clip_range | 0.2 | +| entropy_loss | -8.12 | +| explained_variance | 0.791 | +| learning_rate | 0.0002 | +| loss | 1.54 | +| n_updates | 1130 | +| policy_gradient_loss | -0.00178 | +| value_loss | 20.1 | +----------------------------------------- + +Current state: ChampionX.Level3.ChunLiVsZangief +----------------------------------------- +| time/ | | +| fps | 448 | +| iterations | 86 | +| time_elapsed | 8278 | +| total_timesteps | 3715200 | +| train/ | | +| approx_kl | 0.019106768 | +| clip_fraction | 0.178 | +| clip_range | 0.2 | +| entropy_loss | -8.09 | +| explained_variance | 0.787 | +| learning_rate | 0.0002 | +| loss | 4.53 | +| n_updates | 1140 | +| policy_gradient_loss | 0.00461 | +| value_loss | 25.3 | +----------------------------------------- + +Current state: ChampionX.Level2.ChunLiVsChunLi +----------------------------------------- +| time/ | | +| fps | 448 | +| iterations | 87 | +| time_elapsed | 8376 | +| total_timesteps | 3758400 | +| train/ | | +| approx_kl | 0.019611303 | +| clip_fraction | 0.182 | +| clip_range | 0.2 | +| entropy_loss | -8.09 | +| explained_variance | 0.782 | +| learning_rate | 0.0002 | +| loss | 1.7 | +| n_updates | 1150 | +| policy_gradient_loss | 0.000516 | +| value_loss | 16.3 | +----------------------------------------- + +Current state: ChampionX.Level1.ChunLiVsKen +----------------------------------------- +| time/ | | +| fps | 448 | +| iterations | 88 | +| time_elapsed | 8473 | +| total_timesteps | 3801600 | +| train/ | | +| approx_kl | 0.017416934 | +| clip_fraction | 0.168 | +| clip_range | 0.2 | +| entropy_loss | -8.05 | +| explained_variance | 0.773 | +| learning_rate | 0.0002 | +| loss | 2.32 | +| n_updates | 1160 | +| policy_gradient_loss | 0.000683 | +| value_loss | 26.5 | +----------------------------------------- + +Current state: ChampionX.Level10.ChunLiVsVega +----------------------------------------- +| time/ | | +| fps | 448 | +| iterations | 89 | +| time_elapsed | 8570 | +| total_timesteps | 3844800 | +| train/ | | +| approx_kl | 0.020442067 | +| clip_fraction | 0.18 | +| clip_range | 0.2 | +| entropy_loss | -8.11 | +| explained_variance | 0.799 | +| learning_rate | 0.0002 | +| loss | 0.598 | +| n_updates | 1170 | +| policy_gradient_loss | 0.00176 | +| value_loss | 15 | +----------------------------------------- + +Current state: ChampionX.Level12.ChunLiVsBison +----------------------------------------- +| time/ | | +| fps | 448 | +| iterations | 90 | +| time_elapsed | 8668 | +| total_timesteps | 3888000 | +| train/ | | +| approx_kl | 0.017660897 | +| clip_fraction | 0.159 | +| clip_range | 0.2 | +| entropy_loss | -8.04 | +| explained_variance | 0.795 | +| learning_rate | 0.0002 | +| loss | 0.928 | +| n_updates | 1180 | +| policy_gradient_loss | 0.00123 | +| value_loss | 17.6 | +----------------------------------------- + +Current state: ChampionX.Level1.ChunLiVsKen +----------------------------------------- +| time/ | | +| fps | 448 | +| iterations | 91 | +| time_elapsed | 8766 | +| total_timesteps | 3931200 | +| train/ | | +| approx_kl | 0.016381918 | +| clip_fraction | 0.163 | +| clip_range | 0.2 | +| entropy_loss | -8.06 | +| explained_variance | 0.811 | +| learning_rate | 0.0002 | +| loss | 0.488 | +| n_updates | 1190 | +| policy_gradient_loss | 0.000215 | +| value_loss | 13.9 | +----------------------------------------- + +Current state: ChampionX.Level11.ChunLiVsSagat +----------------------------------------- +| time/ | | +| fps | 448 | +| iterations | 92 | +| time_elapsed | 8863 | +| total_timesteps | 3974400 | +| train/ | | +| approx_kl | 0.017840233 | +| clip_fraction | 0.167 | +| clip_range | 0.2 | +| entropy_loss | -8.03 | +| explained_variance | 0.762 | +| learning_rate | 0.0002 | +| loss | 0.54 | +| n_updates | 1200 | +| policy_gradient_loss | 0.00261 | +| value_loss | 20.3 | +----------------------------------------- + +Current state: ChampionX.Level10.ChunLiVsVega +----------------------------------------- +| time/ | | +| fps | 448 | +| iterations | 93 | +| time_elapsed | 8961 | +| total_timesteps | 4017600 | +| train/ | | +| approx_kl | 0.020303266 | +| clip_fraction | 0.16 | +| clip_range | 0.2 | +| entropy_loss | -8.05 | +| explained_variance | 0.782 | +| learning_rate | 0.0002 | +| loss | 0.876 | +| n_updates | 1210 | +| policy_gradient_loss | 0.00217 | +| value_loss | 19.1 | +----------------------------------------- + +Current state: ChampionX.Level1.ChunLiVsKen +----------------------------------------- +| time/ | | +| fps | 448 | +| iterations | 94 | +| time_elapsed | 9058 | +| total_timesteps | 4060800 | +| train/ | | +| approx_kl | 0.018209128 | +| clip_fraction | 0.158 | +| clip_range | 0.2 | +| entropy_loss | -8.09 | +| explained_variance | 0.827 | +| learning_rate | 0.0002 | +| loss | 0.47 | +| n_updates | 1220 | +| policy_gradient_loss | 0.00344 | +| value_loss | 22.1 | +----------------------------------------- + +Current state: ChampionX.Level9.ChunLiVsBalrog +----------------------------------------- +| time/ | | +| fps | 448 | +| iterations | 95 | +| time_elapsed | 9155 | +| total_timesteps | 4104000 | +| train/ | | +| approx_kl | 0.016349936 | +| clip_fraction | 0.16 | +| clip_range | 0.2 | +| entropy_loss | -8.07 | +| explained_variance | 0.816 | +| learning_rate | 0.0002 | +| loss | 0.436 | +| n_updates | 1230 | +| policy_gradient_loss | -0.00384 | +| value_loss | 12.7 | +----------------------------------------- + +Current state: ChampionX.Level7.ChunLiVsBlanka +----------------------------------------- +| time/ | | +| fps | 448 | +| iterations | 96 | +| time_elapsed | 9253 | +| total_timesteps | 4147200 | +| train/ | | +| approx_kl | 0.016977612 | +| clip_fraction | 0.148 | +| clip_range | 0.2 | +| entropy_loss | -8.09 | +| explained_variance | 0.807 | +| learning_rate | 0.0002 | +| loss | 0.708 | +| n_updates | 1240 | +| policy_gradient_loss | 0.000471 | +| value_loss | 20.7 | +----------------------------------------- + +Current state: ChampionX.Level6.ChunLiVsEHonda +----------------------------------------- +| time/ | | +| fps | 448 | +| iterations | 97 | +| time_elapsed | 9349 | +| total_timesteps | 4190400 | +| train/ | | +| approx_kl | 0.020063082 | +| clip_fraction | 0.177 | +| clip_range | 0.2 | +| entropy_loss | -8.11 | +| explained_variance | 0.751 | +| learning_rate | 0.0002 | +| loss | 0.891 | +| n_updates | 1250 | +| policy_gradient_loss | 0.00348 | +| value_loss | 21.2 | +----------------------------------------- + +Current state: ChampionX.Level6.ChunLiVsEHonda +----------------------------------------- +| time/ | | +| fps | 448 | +| iterations | 98 | +| time_elapsed | 9445 | +| total_timesteps | 4233600 | +| train/ | | +| approx_kl | 0.019297507 | +| clip_fraction | 0.163 | +| clip_range | 0.2 | +| entropy_loss | -8.12 | +| explained_variance | 0.773 | +| learning_rate | 0.0002 | +| loss | 0.771 | +| n_updates | 1260 | +| policy_gradient_loss | 0.0029 | +| value_loss | 15.1 | +----------------------------------------- + +Current state: ChampionX.Level11.ChunLiVsSagat +----------------------------------------- +| time/ | | +| fps | 448 | +| iterations | 99 | +| time_elapsed | 9540 | +| total_timesteps | 4276800 | +| train/ | | +| approx_kl | 0.017202292 | +| clip_fraction | 0.154 | +| clip_range | 0.2 | +| entropy_loss | -8.08 | +| explained_variance | 0.818 | +| learning_rate | 0.0002 | +| loss | 1.97 | +| n_updates | 1270 | +| policy_gradient_loss | 0.00314 | +| value_loss | 22.2 | +----------------------------------------- + +Current state: ChampionX.Level8.ChunLiVsGuile +----------------------------------------- +| time/ | | +| fps | 448 | +| iterations | 100 | +| time_elapsed | 9635 | +| total_timesteps | 4320000 | +| train/ | | +| approx_kl | 0.019228933 | +| clip_fraction | 0.172 | +| clip_range | 0.2 | +| entropy_loss | -8.04 | +| explained_variance | 0.803 | +| learning_rate | 0.0002 | +| loss | 1.84 | +| n_updates | 1280 | +| policy_gradient_loss | 0.00495 | +| value_loss | 27.9 | +----------------------------------------- + +Current state: ChampionX.Level7.ChunLiVsBlanka +---------------------------------------- +| time/ | | +| fps | 448 | +| iterations | 101 | +| time_elapsed | 9732 | +| total_timesteps | 4363200 | +| train/ | | +| approx_kl | 0.01626399 | +| clip_fraction | 0.148 | +| clip_range | 0.2 | +| entropy_loss | -8.1 | +| explained_variance | 0.863 | +| learning_rate | 0.0002 | +| loss | 1.23 | +| n_updates | 1290 | +| policy_gradient_loss | -0.000295 | +| value_loss | 18.7 | +---------------------------------------- + +Current state: ChampionX.Level10.ChunLiVsVega +---------------------------------------- +| time/ | | +| fps | 448 | +| iterations | 102 | +| time_elapsed | 9827 | +| total_timesteps | 4406400 | +| train/ | | +| approx_kl | 0.01741675 | +| clip_fraction | 0.167 | +| clip_range | 0.2 | +| entropy_loss | -8.06 | +| explained_variance | 0.81 | +| learning_rate | 0.0002 | +| loss | 0.693 | +| n_updates | 1300 | +| policy_gradient_loss | 0.00085 | +| value_loss | 16.9 | +---------------------------------------- + +Current state: ChampionX.Level3.ChunLiVsZangief +----------------------------------------- +| time/ | | +| fps | 448 | +| iterations | 103 | +| time_elapsed | 9922 | +| total_timesteps | 4449600 | +| train/ | | +| approx_kl | 0.017767375 | +| clip_fraction | 0.146 | +| clip_range | 0.2 | +| entropy_loss | -8.12 | +| explained_variance | 0.782 | +| learning_rate | 0.0002 | +| loss | 0.44 | +| n_updates | 1310 | +| policy_gradient_loss | 0.000446 | +| value_loss | 16.6 | +----------------------------------------- + +Current state: ChampionX.Level7.ChunLiVsBlanka +----------------------------------------- +| time/ | | +| fps | 448 | +| iterations | 104 | +| time_elapsed | 10018 | +| total_timesteps | 4492800 | +| train/ | | +| approx_kl | 0.018537082 | +| clip_fraction | 0.177 | +| clip_range | 0.2 | +| entropy_loss | -8.06 | +| explained_variance | 0.782 | +| learning_rate | 0.0002 | +| loss | 0.594 | +| n_updates | 1320 | +| policy_gradient_loss | 0.00192 | +| value_loss | 16.5 | +----------------------------------------- + +Current state: ChampionX.Level10.ChunLiVsVega +----------------------------------------- +| time/ | | +| fps | 448 | +| iterations | 105 | +| time_elapsed | 10113 | +| total_timesteps | 4536000 | +| train/ | | +| approx_kl | 0.016387263 | +| clip_fraction | 0.151 | +| clip_range | 0.2 | +| entropy_loss | -8.08 | +| explained_variance | 0.779 | +| learning_rate | 0.0002 | +| loss | 0.897 | +| n_updates | 1330 | +| policy_gradient_loss | 0.00349 | +| value_loss | 24 | +----------------------------------------- + +Current state: ChampionX.Level3.ChunLiVsZangief +----------------------------------------- +| time/ | | +| fps | 448 | +| iterations | 106 | +| time_elapsed | 10208 | +| total_timesteps | 4579200 | +| train/ | | +| approx_kl | 0.016566757 | +| clip_fraction | 0.168 | +| clip_range | 0.2 | +| entropy_loss | -8.1 | +| explained_variance | 0.826 | +| learning_rate | 0.0002 | +| loss | 0.545 | +| n_updates | 1340 | +| policy_gradient_loss | 0.00131 | +| value_loss | 16.9 | +----------------------------------------- + +Current state: ChampionX.Level7.ChunLiVsBlanka +----------------------------------------- +| time/ | | +| fps | 448 | +| iterations | 107 | +| time_elapsed | 10304 | +| total_timesteps | 4622400 | +| train/ | | +| approx_kl | 0.015347375 | +| clip_fraction | 0.159 | +| clip_range | 0.2 | +| entropy_loss | -8.08 | +| explained_variance | 0.81 | +| learning_rate | 0.0002 | +| loss | 0.311 | +| n_updates | 1350 | +| policy_gradient_loss | -0.00268 | +| value_loss | 12.9 | +----------------------------------------- + +Current state: ChampionX.Level1.ChunLiVsKen +----------------------------------------- +| time/ | | +| fps | 448 | +| iterations | 108 | +| time_elapsed | 10400 | +| total_timesteps | 4665600 | +| train/ | | +| approx_kl | 0.016015483 | +| clip_fraction | 0.155 | +| clip_range | 0.2 | +| entropy_loss | -8.11 | +| explained_variance | 0.797 | +| learning_rate | 0.0002 | +| loss | 2.26 | +| n_updates | 1360 | +| policy_gradient_loss | -0.00208 | +| value_loss | 20.3 | +----------------------------------------- + +Current state: ChampionX.Level6.ChunLiVsEHonda +----------------------------------------- +| time/ | | +| fps | 448 | +| iterations | 109 | +| time_elapsed | 10495 | +| total_timesteps | 4708800 | +| train/ | | +| approx_kl | 0.016567804 | +| clip_fraction | 0.155 | +| clip_range | 0.2 | +| entropy_loss | -8.15 | +| explained_variance | 0.829 | +| learning_rate | 0.0002 | +| loss | 1.34 | +| n_updates | 1370 | +| policy_gradient_loss | 0.0028 | +| value_loss | 17.5 | +----------------------------------------- + +Current state: ChampionX.Level3.ChunLiVsZangief +----------------------------------------- +| time/ | | +| fps | 448 | +| iterations | 110 | +| time_elapsed | 10591 | +| total_timesteps | 4752000 | +| train/ | | +| approx_kl | 0.018200098 | +| clip_fraction | 0.168 | +| clip_range | 0.2 | +| entropy_loss | -8.13 | +| explained_variance | 0.831 | +| learning_rate | 0.0002 | +| loss | 0.665 | +| n_updates | 1380 | +| policy_gradient_loss | 0.00141 | +| value_loss | 19.2 | +----------------------------------------- + +Current state: ChampionX.Level7.ChunLiVsBlanka +----------------------------------------- +| time/ | | +| fps | 448 | +| iterations | 111 | +| time_elapsed | 10686 | +| total_timesteps | 4795200 | +| train/ | | +| approx_kl | 0.018930672 | +| clip_fraction | 0.185 | +| clip_range | 0.2 | +| entropy_loss | -8.15 | +| explained_variance | 0.833 | +| learning_rate | 0.0002 | +| loss | 1.09 | +| n_updates | 1390 | +| policy_gradient_loss | 0.00529 | +| value_loss | 19.5 | +----------------------------------------- + +Current state: ChampionX.Level9.ChunLiVsBalrog +----------------------------------------- +| time/ | | +| fps | 448 | +| iterations | 112 | +| time_elapsed | 10782 | +| total_timesteps | 4838400 | +| train/ | | +| approx_kl | 0.015160192 | +| clip_fraction | 0.158 | +| clip_range | 0.2 | +| entropy_loss | -8.12 | +| explained_variance | 0.833 | +| learning_rate | 0.0002 | +| loss | 2.37 | +| n_updates | 1400 | +| policy_gradient_loss | -0.000663 | +| value_loss | 21 | +----------------------------------------- + +Current state: ChampionX.Level7.ChunLiVsBlanka +----------------------------------------- +| time/ | | +| fps | 448 | +| iterations | 113 | +| time_elapsed | 10878 | +| total_timesteps | 4881600 | +| train/ | | +| approx_kl | 0.017860955 | +| clip_fraction | 0.171 | +| clip_range | 0.2 | +| entropy_loss | -8.12 | +| explained_variance | 0.82 | +| learning_rate | 0.0002 | +| loss | 0.924 | +| n_updates | 1410 | +| policy_gradient_loss | -0.000111 | +| value_loss | 16.9 | +----------------------------------------- + +Current state: ChampionX.Level7.ChunLiVsBlanka +---------------------------------------- +| time/ | | +| fps | 448 | +| iterations | 114 | +| time_elapsed | 10974 | +| total_timesteps | 4924800 | +| train/ | | +| approx_kl | 0.02072464 | +| clip_fraction | 0.183 | +| clip_range | 0.2 | +| entropy_loss | -8.15 | +| explained_variance | 0.788 | +| learning_rate | 0.0002 | +| loss | 1.77 | +| n_updates | 1420 | +| policy_gradient_loss | 0.00299 | +| value_loss | 23.1 | +---------------------------------------- + +Current state: ChampionX.Level8.ChunLiVsGuile +----------------------------------------- +| time/ | | +| fps | 448 | +| iterations | 115 | +| time_elapsed | 11069 | +| total_timesteps | 4968000 | +| train/ | | +| approx_kl | 0.016052378 | +| clip_fraction | 0.158 | +| clip_range | 0.2 | +| entropy_loss | -8.16 | +| explained_variance | 0.845 | +| learning_rate | 0.0002 | +| loss | 0.692 | +| n_updates | 1430 | +| policy_gradient_loss | -0.00267 | +| value_loss | 16.4 | +----------------------------------------- + +Current state: ChampionX.Level12.ChunLiVsBison +----------------------------------------- +| time/ | | +| fps | 448 | +| iterations | 116 | +| time_elapsed | 11165 | +| total_timesteps | 5011200 | +| train/ | | +| approx_kl | 0.019034935 | +| clip_fraction | 0.177 | +| clip_range | 0.2 | +| entropy_loss | -8.15 | +| explained_variance | 0.814 | +| learning_rate | 0.0002 | +| loss | 1.25 | +| n_updates | 1440 | +| policy_gradient_loss | -0.000176 | +| value_loss | 20.7 | +----------------------------------------- + +Current state: ChampionX.Level7.ChunLiVsBlanka +----------------------------------------- +| time/ | | +| fps | 448 | +| iterations | 117 | +| time_elapsed | 11260 | +| total_timesteps | 5054400 | +| train/ | | +| approx_kl | 0.017005827 | +| clip_fraction | 0.179 | +| clip_range | 0.2 | +| entropy_loss | -8.15 | +| explained_variance | 0.811 | +| learning_rate | 0.0002 | +| loss | 2.66 | +| n_updates | 1450 | +| policy_gradient_loss | 0.000235 | +| value_loss | 14 | +----------------------------------------- + +Current state: ChampionX.Level1.ChunLiVsKen +----------------------------------------- +| time/ | | +| fps | 448 | +| iterations | 118 | +| time_elapsed | 11356 | +| total_timesteps | 5097600 | +| train/ | | +| approx_kl | 0.016972119 | +| clip_fraction | 0.169 | +| clip_range | 0.2 | +| entropy_loss | -8.16 | +| explained_variance | 0.785 | +| learning_rate | 0.0002 | +| loss | 0.495 | +| n_updates | 1460 | +| policy_gradient_loss | 0.00187 | +| value_loss | 19.5 | +----------------------------------------- + +Current state: ChampionX.Level2.ChunLiVsChunLi +----------------------------------------- +| time/ | | +| fps | 448 | +| iterations | 119 | +| time_elapsed | 11451 | +| total_timesteps | 5140800 | +| train/ | | +| approx_kl | 0.015783915 | +| clip_fraction | 0.159 | +| clip_range | 0.2 | +| entropy_loss | -8.15 | +| explained_variance | 0.812 | +| learning_rate | 0.0002 | +| loss | 0.603 | +| n_updates | 1470 | +| policy_gradient_loss | -0.000571 | +| value_loss | 21.2 | +----------------------------------------- + +Current state: ChampionX.Level7.ChunLiVsBlanka +----------------------------------------- +| time/ | | +| fps | 448 | +| iterations | 120 | +| time_elapsed | 11547 | +| total_timesteps | 5184000 | +| train/ | | +| approx_kl | 0.017954912 | +| clip_fraction | 0.186 | +| clip_range | 0.2 | +| entropy_loss | -8.14 | +| explained_variance | 0.781 | +| learning_rate | 0.0002 | +| loss | 0.7 | +| n_updates | 1480 | +| policy_gradient_loss | 0.00359 | +| value_loss | 24.6 | +----------------------------------------- + +Current state: ChampionX.Level5.ChunLiVsRyu +----------------------------------------- +| time/ | | +| fps | 448 | +| iterations | 121 | +| time_elapsed | 11642 | +| total_timesteps | 5227200 | +| train/ | | +| approx_kl | 0.017439196 | +| clip_fraction | 0.182 | +| clip_range | 0.2 | +| entropy_loss | -8.15 | +| explained_variance | 0.782 | +| learning_rate | 0.0002 | +| loss | 0.972 | +| n_updates | 1490 | +| policy_gradient_loss | 0.0017 | +| value_loss | 21.6 | +----------------------------------------- + +Current state: ChampionX.Level3.ChunLiVsZangief +----------------------------------------- +| time/ | | +| fps | 448 | +| iterations | 122 | +| time_elapsed | 11738 | +| total_timesteps | 5270400 | +| train/ | | +| approx_kl | 0.016962286 | +| clip_fraction | 0.173 | +| clip_range | 0.2 | +| entropy_loss | -8.16 | +| explained_variance | 0.807 | +| learning_rate | 0.0002 | +| loss | 0.875 | +| n_updates | 1500 | +| policy_gradient_loss | 0.000824 | +| value_loss | 18.2 | +----------------------------------------- + +Current state: ChampionX.Level12.ChunLiVsBison +----------------------------------------- +| time/ | | +| fps | 449 | +| iterations | 123 | +| time_elapsed | 11833 | +| total_timesteps | 5313600 | +| train/ | | +| approx_kl | 0.017236924 | +| clip_fraction | 0.162 | +| clip_range | 0.2 | +| entropy_loss | -8.16 | +| explained_variance | 0.779 | +| learning_rate | 0.0002 | +| loss | 0.853 | +| n_updates | 1510 | +| policy_gradient_loss | 0.000141 | +| value_loss | 18.9 | +----------------------------------------- + +Current state: ChampionX.Level5.ChunLiVsRyu +----------------------------------------- +| time/ | | +| fps | 449 | +| iterations | 124 | +| time_elapsed | 11928 | +| total_timesteps | 5356800 | +| train/ | | +| approx_kl | 0.016021965 | +| clip_fraction | 0.157 | +| clip_range | 0.2 | +| entropy_loss | -8.16 | +| explained_variance | 0.83 | +| learning_rate | 0.0002 | +| loss | 1 | +| n_updates | 1520 | +| policy_gradient_loss | 0.00109 | +| value_loss | 20 | +----------------------------------------- + +Current state: ChampionX.Level10.ChunLiVsVega +----------------------------------------- +| time/ | | +| fps | 449 | +| iterations | 125 | +| time_elapsed | 12024 | +| total_timesteps | 5400000 | +| train/ | | +| approx_kl | 0.015824681 | +| clip_fraction | 0.166 | +| clip_range | 0.2 | +| entropy_loss | -8.18 | +| explained_variance | 0.803 | +| learning_rate | 0.0002 | +| loss | 0.51 | +| n_updates | 1530 | +| policy_gradient_loss | 0.00165 | +| value_loss | 17.8 | +----------------------------------------- + +Current state: ChampionX.Level9.ChunLiVsBalrog +----------------------------------------- +| time/ | | +| fps | 449 | +| iterations | 126 | +| time_elapsed | 12119 | +| total_timesteps | 5443200 | +| train/ | | +| approx_kl | 0.014095656 | +| clip_fraction | 0.14 | +| clip_range | 0.2 | +| entropy_loss | -8.16 | +| explained_variance | 0.809 | +| learning_rate | 0.0002 | +| loss | 0.666 | +| n_updates | 1540 | +| policy_gradient_loss | -0.00077 | +| value_loss | 20.5 | +----------------------------------------- + +Current state: ChampionX.Level9.ChunLiVsBalrog +---------------------------------------- +| time/ | | +| fps | 449 | +| iterations | 127 | +| time_elapsed | 12216 | +| total_timesteps | 5486400 | +| train/ | | +| approx_kl | 0.01563808 | +| clip_fraction | 0.154 | +| clip_range | 0.2 | +| entropy_loss | -8.15 | +| explained_variance | 0.798 | +| learning_rate | 0.0002 | +| loss | 0.739 | +| n_updates | 1550 | +| policy_gradient_loss | -0.000601 | +| value_loss | 17.6 | +---------------------------------------- + +Current state: ChampionX.Level5.ChunLiVsRyu +----------------------------------------- +| time/ | | +| fps | 449 | +| iterations | 128 | +| time_elapsed | 12311 | +| total_timesteps | 5529600 | +| train/ | | +| approx_kl | 0.016478073 | +| clip_fraction | 0.159 | +| clip_range | 0.2 | +| entropy_loss | -8.15 | +| explained_variance | 0.763 | +| learning_rate | 0.0002 | +| loss | 1.21 | +| n_updates | 1560 | +| policy_gradient_loss | 0.000911 | +| value_loss | 21.1 | +----------------------------------------- + +Current state: ChampionX.Level6.ChunLiVsEHonda +----------------------------------------- +| time/ | | +| fps | 449 | +| iterations | 129 | +| time_elapsed | 12407 | +| total_timesteps | 5572800 | +| train/ | | +| approx_kl | 0.016799105 | +| clip_fraction | 0.155 | +| clip_range | 0.2 | +| entropy_loss | -8.14 | +| explained_variance | 0.795 | +| learning_rate | 0.0002 | +| loss | 1.04 | +| n_updates | 1570 | +| policy_gradient_loss | 0.00415 | +| value_loss | 33.2 | +----------------------------------------- + +Current state: ChampionX.Level8.ChunLiVsGuile +----------------------------------------- +| time/ | | +| fps | 449 | +| iterations | 130 | +| time_elapsed | 12503 | +| total_timesteps | 5616000 | +| train/ | | +| approx_kl | 0.013092292 | +| clip_fraction | 0.136 | +| clip_range | 0.2 | +| entropy_loss | -8.15 | +| explained_variance | 0.801 | +| learning_rate | 0.0002 | +| loss | 1.22 | +| n_updates | 1580 | +| policy_gradient_loss | -0.00466 | +| value_loss | 16.9 | +----------------------------------------- + +Current state: ChampionX.Level3.ChunLiVsZangief +----------------------------------------- +| time/ | | +| fps | 449 | +| iterations | 131 | +| time_elapsed | 12598 | +| total_timesteps | 5659200 | +| train/ | | +| approx_kl | 0.022095175 | +| clip_fraction | 0.218 | +| clip_range | 0.2 | +| entropy_loss | -8.11 | +| explained_variance | 0.767 | +| learning_rate | 0.0002 | +| loss | 1.75 | +| n_updates | 1590 | +| policy_gradient_loss | 0.00969 | +| value_loss | 28.1 | +----------------------------------------- + +Current state: ChampionX.Level2.ChunLiVsChunLi +----------------------------------------- +| time/ | | +| fps | 449 | +| iterations | 132 | +| time_elapsed | 12693 | +| total_timesteps | 5702400 | +| train/ | | +| approx_kl | 0.015401343 | +| clip_fraction | 0.155 | +| clip_range | 0.2 | +| entropy_loss | -8.13 | +| explained_variance | 0.783 | +| learning_rate | 0.0002 | +| loss | 0.389 | +| n_updates | 1600 | +| policy_gradient_loss | 0.00122 | +| value_loss | 16.4 | +----------------------------------------- + +Current state: ChampionX.Level3.ChunLiVsZangief +----------------------------------------- +| time/ | | +| fps | 449 | +| iterations | 133 | +| time_elapsed | 12789 | +| total_timesteps | 5745600 | +| train/ | | +| approx_kl | 0.013617316 | +| clip_fraction | 0.135 | +| clip_range | 0.2 | +| entropy_loss | -8.11 | +| explained_variance | 0.82 | +| learning_rate | 0.0002 | +| loss | 1.51 | +| n_updates | 1610 | +| policy_gradient_loss | -0.0011 | +| value_loss | 18.3 | +----------------------------------------- + +Current state: ChampionX.Level1.ChunLiVsKen +----------------------------------------- +| time/ | | +| fps | 449 | +| iterations | 134 | +| time_elapsed | 12886 | +| total_timesteps | 5788800 | +| train/ | | +| approx_kl | 0.018610569 | +| clip_fraction | 0.2 | +| clip_range | 0.2 | +| entropy_loss | -8.11 | +| explained_variance | 0.72 | +| learning_rate | 0.0002 | +| loss | 0.652 | +| n_updates | 1620 | +| policy_gradient_loss | 0.00408 | +| value_loss | 24.8 | +----------------------------------------- + +Current state: ChampionX.Level5.ChunLiVsRyu +----------------------------------------- +| time/ | | +| fps | 449 | +| iterations | 135 | +| time_elapsed | 12984 | +| total_timesteps | 5832000 | +| train/ | | +| approx_kl | 0.013793538 | +| clip_fraction | 0.135 | +| clip_range | 0.2 | +| entropy_loss | -8.12 | +| explained_variance | 0.811 | +| learning_rate | 0.0002 | +| loss | 1.18 | +| n_updates | 1630 | +| policy_gradient_loss | 3.8e-05 | +| value_loss | 19.7 | +----------------------------------------- + +Current state: ChampionX.Level3.ChunLiVsZangief +----------------------------------------- +| time/ | | +| fps | 449 | +| iterations | 136 | +| time_elapsed | 13081 | +| total_timesteps | 5875200 | +| train/ | | +| approx_kl | 0.015575893 | +| clip_fraction | 0.164 | +| clip_range | 0.2 | +| entropy_loss | -8.13 | +| explained_variance | 0.803 | +| learning_rate | 0.0002 | +| loss | 0.503 | +| n_updates | 1640 | +| policy_gradient_loss | 0.000462 | +| value_loss | 17.1 | +----------------------------------------- + +Current state: ChampionX.Level7.ChunLiVsBlanka +----------------------------------------- +| time/ | | +| fps | 449 | +| iterations | 137 | +| time_elapsed | 13178 | +| total_timesteps | 5918400 | +| train/ | | +| approx_kl | 0.016451944 | +| clip_fraction | 0.165 | +| clip_range | 0.2 | +| entropy_loss | -8.12 | +| explained_variance | 0.802 | +| learning_rate | 0.0002 | +| loss | 0.83 | +| n_updates | 1650 | +| policy_gradient_loss | 0.000427 | +| value_loss | 19.6 | +----------------------------------------- + +Current state: ChampionX.Level2.ChunLiVsChunLi +----------------------------------------- +| time/ | | +| fps | 449 | +| iterations | 138 | +| time_elapsed | 13275 | +| total_timesteps | 5961600 | +| train/ | | +| approx_kl | 0.013083423 | +| clip_fraction | 0.132 | +| clip_range | 0.2 | +| entropy_loss | -8.13 | +| explained_variance | 0.816 | +| learning_rate | 0.0002 | +| loss | 1.46 | +| n_updates | 1660 | +| policy_gradient_loss | -0.000823 | +| value_loss | 23.6 | +----------------------------------------- + +Current state: ChampionX.Level5.ChunLiVsRyu +----------------------------------------- +| time/ | | +| fps | 449 | +| iterations | 139 | +| time_elapsed | 13373 | +| total_timesteps | 6004800 | +| train/ | | +| approx_kl | 0.016260127 | +| clip_fraction | 0.16 | +| clip_range | 0.2 | +| entropy_loss | -8.14 | +| explained_variance | 0.805 | +| learning_rate | 0.0002 | +| loss | 1.25 | +| n_updates | 1670 | +| policy_gradient_loss | -0.000364 | +| value_loss | 16.2 | +----------------------------------------- + +Current state: ChampionX.Level5.ChunLiVsRyu +----------------------------------------- +| time/ | | +| fps | 448 | +| iterations | 140 | +| time_elapsed | 13470 | +| total_timesteps | 6048000 | +| train/ | | +| approx_kl | 0.016119048 | +| clip_fraction | 0.162 | +| clip_range | 0.2 | +| entropy_loss | -8.16 | +| explained_variance | 0.796 | +| learning_rate | 0.0002 | +| loss | 0.9 | +| n_updates | 1680 | +| policy_gradient_loss | 0.000371 | +| value_loss | 17.3 | +----------------------------------------- \ No newline at end of file diff --git a/001_image_stack/training_log.txt b/001_image_stack_vision_based_reward/training_log.txt similarity index 100% rename from 001_image_stack/training_log.txt rename to 001_image_stack_vision_based_reward/training_log.txt diff --git a/001_image_stack_vision_based_reward/tune.py b/001_image_stack_vision_based_reward/tune.py new file mode 100644 index 0000000..2c60de1 --- /dev/null +++ b/001_image_stack_vision_based_reward/tune.py @@ -0,0 +1,81 @@ +import gym +import retro +import optuna +from stable_baselines3 import PPO +from stable_baselines3.common.monitor import Monitor +from stable_baselines3.common.evaluation import evaluate_policy + +from custom_cnn import CustomCNN +from street_fighter_custom_wrapper import StreetFighterCustomWrapper + +def make_env(game, state, seed=0): + def _init(): + env = retro.RetroEnv( + game=game, + state=state, + use_restricted_actions=retro.Actions.FILTERED, + obs_type=retro.Observations.IMAGE + ) + env = StreetFighterCustomWrapper(env) + env = Monitor(env) + env.seed(seed) + return env + return _init + +def objective(trial): + game = "StreetFighterIISpecialChampionEdition-Genesis" + env = make_env(game, state="ChampionX.Level1.ChunLiVsKen")() + + # Suggest hyperparameters + learning_rate = trial.suggest_float("learning_rate", 5e-5, 1e-3, log=True) + n_steps = trial.suggest_int("n_steps", 256, 8192, log=True) + batch_size = trial.suggest_int("batch_size", 16, 128, log=True) + gamma = trial.suggest_float("gamma", 0.9, 0.9999) + gae_lambda = trial.suggest_float("gae_lambda", 0.9, 1.0) + clip_range = trial.suggest_float("clip_range", 0.1, 0.4) + ent_coef = trial.suggest_float("ent_coef", 1e-4, 1e-2, log=True) + vf_coef = trial.suggest_float("vf_coef", 0.1, 1.0) + + # Using CustomCNN as the feature extractor + policy_kwargs = { + 'features_extractor_class': CustomCNN + } + + # Train the model + model = PPO( + "CnnPolicy", + env, + device="cuda", + policy_kwargs=policy_kwargs, + verbose=1, + n_steps=n_steps, + batch_size=batch_size, + learning_rate=learning_rate, + ent_coef=ent_coef, + clip_range=clip_range, + vf_coef=vf_coef, + gamma=gamma, + gae_lambda=gae_lambda + ) + + for iteration in range(10): + model.learn(total_timesteps=100000) + mean_reward, _std_reward = evaluate_policy(model, env, n_eval_episodes=10) + + trial.report(mean_reward, iteration) + + if trial.should_prune(): + raise optuna.TrialPruned() + + return mean_reward + +study = optuna.create_study(direction="maximize") +study.optimize(objective, n_trials=100, timeout=7200) # Run optimization for 100 trials or 2 hours, whichever comes first + +print("Best trial:") +trial = study.best_trial + +print(" Value: ", trial.value) +print(" Params: ") +for key, value in trial.params.items(): + print(f"{key}: {value}") diff --git a/003_frame_delta_ram_based/__pycache__/custom_cnn.cpython-38.pyc b/003_frame_delta_ram_based/__pycache__/custom_cnn.cpython-38.pyc new file mode 100644 index 0000000..73996e1 Binary files /dev/null and b/003_frame_delta_ram_based/__pycache__/custom_cnn.cpython-38.pyc differ diff --git a/003_frame_delta_ram_based/__pycache__/street_fighter_custom_wrapper.cpython-38.pyc b/003_frame_delta_ram_based/__pycache__/street_fighter_custom_wrapper.cpython-38.pyc new file mode 100644 index 0000000..de0618e Binary files /dev/null and b/003_frame_delta_ram_based/__pycache__/street_fighter_custom_wrapper.cpython-38.pyc differ diff --git a/003_frame_delta_ram_based/custom_cnn.py b/003_frame_delta_ram_based/custom_cnn.py new file mode 100644 index 0000000..5ba84fa --- /dev/null +++ b/003_frame_delta_ram_based/custom_cnn.py @@ -0,0 +1,25 @@ +import gym +import torch +import torch.nn as nn +from stable_baselines3.common.torch_layers import BaseFeaturesExtractor + +# Custom feature extractor (CNN) +class CustomCNN(BaseFeaturesExtractor): + def __init__(self, observation_space: gym.Space): + super(CustomCNN, self).__init__(observation_space, features_dim=512) + self.cnn = nn.Sequential( + nn.Conv2d(1, 32, kernel_size=5, stride=2, padding=0), + nn.ReLU(), + nn.Conv2d(32, 64, kernel_size=5, stride=2, padding=0), + nn.ReLU(), + nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=0), + nn.ReLU(), + nn.Flatten(), + nn.Linear(16384, self.features_dim), + nn.ReLU() + ) + + def forward(self, observations: torch.Tensor) -> torch.Tensor: + observations = observations.unsqueeze(1) + return self.cnn(observations) + \ No newline at end of file diff --git a/003_frame_delta_ram_based/logs/monitor.csv b/003_frame_delta_ram_based/logs/monitor.csv new file mode 100644 index 0000000..531e49e --- /dev/null +++ b/003_frame_delta_ram_based/logs/monitor.csv @@ -0,0 +1,2 @@ +#{"t_start": 1680175884.8182795, "env_id": null} +r,l,t diff --git a/003_frame_delta_ram_based/street_fighter_custom_wrapper.py b/003_frame_delta_ram_based/street_fighter_custom_wrapper.py new file mode 100644 index 0000000..65b9c75 --- /dev/null +++ b/003_frame_delta_ram_based/street_fighter_custom_wrapper.py @@ -0,0 +1,72 @@ +import gym +import cv2 +import numpy as np + +# Custom environment wrapper +class StreetFighterCustomWrapper(gym.Wrapper): + def __init__(self, env, testing=False): + super(StreetFighterCustomWrapper, self).__init__(env) + self.env = env + self.testing = testing + + # Store the previous frame + self.prev_frame = None + + self.full_hp = 176 + self.prev_player_health = self.full_hp + self.prev_oppont_health = self.full_hp + + # Update observation space to include one grayscale frame difference image + self.observation_space = gym.spaces.Box(low=0, high=255, shape=(84, 84, 1), dtype=np.uint8) + + def _preprocess_observation(self, observation): + obs_gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY) + obs_gray_resized = cv2.resize(obs_gray, (84, 84), interpolation=cv2.INTER_AREA) / 255.0 + return obs_gray_resized + + def reset(self): + self.prev_player_health = self.full_hp + self.prev_oppont_health = self.full_hp + + observation = self.env.reset() + # Reset the previous frame + self.prev_frame = self._preprocess_observation(observation) + return np.zeros_like(self.prev_frame) + + def step(self, action): + observation, _reward, _done, info = self.env.step(action) + + obs_gray_resized = self._preprocess_observation(observation) + + if self.prev_frame is not None: + frame_delta = obs_gray_resized - self.prev_frame + else: + frame_delta = np.zeros_like(obs_gray_resized) + + self.prev_frame = obs_gray_resized + + # During fighting, either player or opponent has positive health points. + if info['health'] > 0 or info['enemy_health'] > 0: + + # Player Loses + if info['health'] < 0 and info['enemy_health'] > 0: + reward = (-self.full_hp) * info['enemy_health'] + done = True + + # Player Wins + elif info['enemy_health'] < 0 and info['health'] > 0: + reward = self.full_hp * info['health'] + done = True + + # During Fighting + else: + reward = (self.prev_oppont_health - info['enemy_health']) - (self.prev_player_health - info['health']) + + self.prev_player_health = info['health'] + self.prev_oppont_health = info['enemy_health'] + + if self.testing: + done = False + + return frame_delta, reward, done, info + \ No newline at end of file diff --git a/003_frame_delta_ram_based/test.py b/003_frame_delta_ram_based/test.py new file mode 100644 index 0000000..aaf494c --- /dev/null +++ b/003_frame_delta_ram_based/test.py @@ -0,0 +1,70 @@ +import time + +import cv2 +import retro +from stable_baselines3 import PPO +from stable_baselines3.common.vec_env import DummyVecEnv + +from custom_cnn import CustomCNN +from street_fighter_custom_wrapper import StreetFighterCustomWrapper + +def make_env(game, state): + def _init(): + env = retro.RetroEnv( + game=game, + state=state, + use_restricted_actions=retro.Actions.FILTERED, + obs_type=retro.Observations.IMAGE + ) + env = StreetFighterCustomWrapper(env, testing=True) + return env + return _init + +game = "StreetFighterIISpecialChampionEdition-Genesis" +state_stages = [ + "Champion.Level1.ChunLiVsGuile", + "Champion.Level2.ChunLiVsKen", + "Champion.Level3.ChunLiVsChunLi", + "Champion.Level4.ChunLiVsZangief", + "Champion.Level5.ChunLiVsDhalsim", + "Champion.Level6.ChunLiVsRyu", + "Champion.Level7.ChunLiVsEHonda", + "Champion.Level8.ChunLiVsBlanka", + "Champion.Level9.ChunLiVsBalrog", + "Champion.Level10.ChunLiVsVega", + "Champion.Level11.ChunLiVsSagat", + "Champion.Level12.ChunLiVsBison" + # Add other stages as necessary +] + +env = make_env(game, state_stages[0])() + +# Wrap the environment +env = DummyVecEnv([lambda: env]) + +policy_kwargs = { + 'features_extractor_class': CustomCNN +} + +model = PPO( + "CnnPolicy", + env, + device="cuda", + policy_kwargs=policy_kwargs, + verbose=1 +) +model.load(r"trained_models_continued/ppo_chunli_6048000_steps") + +obs = env.reset() +done = False + +while True: + timestamp = time.time() + action, _ = model.predict(obs) + obs, rewards, done, info = env.step(action) + env.render() + render_time = time.time() - timestamp + if render_time < 0.0111: + time.sleep(0.0111 - render_time) # Add a delay for 90 FPS + +# env.close() diff --git a/003_frame_delta_ram_based/train.py b/003_frame_delta_ram_based/train.py new file mode 100644 index 0000000..e4d1bc2 --- /dev/null +++ b/003_frame_delta_ram_based/train.py @@ -0,0 +1,124 @@ +import os +import random + +import retro +from stable_baselines3 import PPO, A2C +from stable_baselines3.common.vec_env import SubprocVecEnv +from stable_baselines3.common.callbacks import BaseCallback, CheckpointCallback + +from custom_cnn import CustomCNN +from street_fighter_custom_wrapper import StreetFighterCustomWrapper + +class RandomOpponentChangeCallback(BaseCallback): + def __init__(self, stages, opponent_interval, verbose=0): + super(RandomOpponentChangeCallback, self).__init__(verbose) + self.stages = stages + self.opponent_interval = opponent_interval + + def _on_step(self) -> bool: + if self.n_calls % self.opponent_interval == 0: + new_state = random.choice(self.stages) + print("\nCurrent state:", new_state) + self.training_env.env_method("load_state", new_state, indices=None) + return True + +def make_env(game, state, seed=0): + def _init(): + env = retro.make( + game=game, + state=state, + use_restricted_actions=retro.Actions.FILTERED, + obs_type=retro.Observations.IMAGE + ) + env = StreetFighterCustomWrapper(env) + env.seed(seed) + return env + return _init + +def main(): + # Set up the environment and model + game = "StreetFighterIISpecialChampionEdition-Genesis" + state_stages = [ + "ChampionX.Level1.ChunLiVsKen", + "ChampionX.Level2.ChunLiVsChunLi", + "ChampionX.Level3.ChunLiVsZangief", + "ChampionX.Level4.ChunLiVsDhalsim", + "ChampionX.Level5.ChunLiVsRyu", + "ChampionX.Level6.ChunLiVsEHonda", + "ChampionX.Level7.ChunLiVsBlanka", + "ChampionX.Level8.ChunLiVsGuile", + "ChampionX.Level9.ChunLiVsBalrog", + "ChampionX.Level10.ChunLiVsVega", + "ChampionX.Level11.ChunLiVsSagat", + "ChampionX.Level12.ChunLiVsBison" + # Add other stages as necessary + ] + # Champion is at difficulty level 4, ChampionX is at difficulty level 8. + + num_envs = 8 + + env = SubprocVecEnv([make_env(game, state_stages[0], seed=i) for i in range(num_envs)]) + + # Using CustomCNN as the feature extractor + policy_kwargs = { + 'features_extractor_class': CustomCNN + } + + model = PPO( + "CnnPolicy", + env, + device="cuda", + policy_kwargs=policy_kwargs, + verbose=1, + n_steps=5400, + batch_size=64, + learning_rate=0.0001, + ent_coef=0.01, + clip_range=0.2, + gamma=0.99, + gae_lambda=0.95, + tensorboard_log="logs/" + ) + + # Set the save directory + save_dir = "trained_models" + os.makedirs(save_dir, exist_ok=True) + + # Load the model from file + # model_path = "trained_models/ppo_chunli_1296000_steps.zip" + + # Load model and modify the learning rate and entropy coefficient + # custom_objects = { + # "learning_rate": 0.0002 + # } + # model = PPO.load(model_path, env=env, device="cuda")#, custom_objects=custom_objects) + + # Set up callbacks + opponent_interval = 5400 # stage_interval * num_envs = total_steps_per_stage + checkpoint_interval = 54000 # checkpoint_interval * num_envs = total_steps_per_checkpoint (Every 80 rounds) + checkpoint_callback = CheckpointCallback(save_freq=checkpoint_interval, save_path=save_dir, name_prefix="ppo_chunli") + stage_increase_callback = RandomOpponentChangeCallback(state_stages, opponent_interval, save_dir) + + # model_params = { + # 'n_steps': 5, + # 'gamma': 0.99, + # 'gae_lambda':1, + # 'learning_rate': 7e-4, + # 'vf_coef': 0.5, + # 'ent_coef': 0.0, + # 'max_grad_norm':0.5, + # 'rms_prop_eps':1e-05 + # } + # model = A2C('CnnPolicy', env, tensorboard_log='logs/', verbose=1, **model_params, policy_kwargs=dict(optimizer_class=RMSpropTF)) + + model.learn( + total_timesteps=int(6048000), # total_timesteps = stage_interval * num_envs * num_stages (1120 rounds) + callback=[checkpoint_callback, stage_increase_callback] + ) + env.close() + + # Save the final model + model.save(os.path.join(save_dir, "ppo_sf2_chunli_final.zip")) + +if __name__ == "__main__": + main() diff --git a/003_frame_delta_ram_based/tune_ppo.py b/003_frame_delta_ram_based/tune_ppo.py new file mode 100644 index 0000000..e5128e2 --- /dev/null +++ b/003_frame_delta_ram_based/tune_ppo.py @@ -0,0 +1,73 @@ +import os + +import retro +import optuna +from stable_baselines3 import PPO +from stable_baselines3.common.evaluation import evaluate_policy +from stable_baselines3.common.monitor import Monitor +from stable_baselines3.common.vec_env import DummyVecEnv, VecFrameStack + +from street_fighter_custom_wrapper import StreetFighterCustomWrapper + +LOG_DIR = 'logs/' +OPT_DIR = 'optuna/' +os.makedirs(LOG_DIR, exist_ok=True) +os.makedirs(OPT_DIR, exist_ok=True) + +def optimize_ppo(trial): + return { + 'n_steps':trial.suggest_int('n_steps', 1024, 8192, log=True), + 'gamma':trial.suggest_float('gamma', 0.9, 0.9999), + 'learning_rate':trial.suggest_float('learning_rate', 5e-5, 1e-4, log=True), + 'clip_range':trial.suggest_float('clip_range', 0.1, 0.4), + 'gae_lambda':trial.suggest_float('gae_lambda', 0.8, 0.99) + } + +def make_env(game, state, seed=0): + def _init(): + env = retro.make( + game=game, + state=state, + use_restricted_actions=retro.Actions.FILTERED, + obs_type=retro.Observations.IMAGE + ) + env = StreetFighterCustomWrapper(env) + env.seed(seed) + return env + return _init + +def optimize_agent(trial): + game = "StreetFighterIISpecialChampionEdition-Genesis" + state = "ChampionX.Level1.ChunLiVsKen" + + # try: + model_params = optimize_ppo(trial) + + # Create environment + env = make_env(game, state)() + env = Monitor(env, LOG_DIR) + env = DummyVecEnv([lambda: env]) + env = VecFrameStack(env, 4, channels_order='last') + + # Create algo + model = PPO('CnnPolicy', env, tensorboard_log=LOG_DIR, verbose=0, **model_params) + model.learn(total_timesteps=100000) + + # Evaluate model + mean_reward, _ = evaluate_policy(model, env, n_eval_episodes=5) + env.close() + + SAVE_PATH = os.path.join(OPT_DIR, 'trial_{}_best_model'.format(trial.number)) + model.save(SAVE_PATH) + + return mean_reward + + # except Exception as e: + # return -1 + +# Creating the experiment +study = optuna.create_study(direction='maximize') +study.optimize(optimize_agent, n_trials=10, n_jobs=1) + +print(study.best_params) +print(study.best_trial)