diff --git a/001_image_stack/__pycache__/custom_cnn.cpython-38.pyc b/001_image_stack/__pycache__/custom_cnn.cpython-38.pyc new file mode 100644 index 0000000..efaaa79 Binary files /dev/null and b/001_image_stack/__pycache__/custom_cnn.cpython-38.pyc differ diff --git a/001_image_stack/__pycache__/street_fighter_custom_wrapper.cpython-38.pyc b/001_image_stack/__pycache__/street_fighter_custom_wrapper.cpython-38.pyc new file mode 100644 index 0000000..5ab9e2f Binary files /dev/null and b/001_image_stack/__pycache__/street_fighter_custom_wrapper.cpython-38.pyc differ diff --git a/001_image_stack/custom_cnn.py b/001_image_stack/custom_cnn.py new file mode 100644 index 0000000..25c50ea --- /dev/null +++ b/001_image_stack/custom_cnn.py @@ -0,0 +1,24 @@ +import gym +import torch +import torch.nn as nn +from stable_baselines3.common.torch_layers import BaseFeaturesExtractor + +# Custom feature extractor (CNN) +class CustomCNN(BaseFeaturesExtractor): + def __init__(self, observation_space: gym.Space): + super(CustomCNN, self).__init__(observation_space, features_dim=512) + self.cnn = nn.Sequential( + nn.Conv2d(4, 32, kernel_size=5, stride=2, padding=0), + nn.ReLU(), + nn.Conv2d(32, 64, kernel_size=5, stride=2, padding=0), + nn.ReLU(), + nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=0), + nn.ReLU(), + nn.Flatten(), + nn.Linear(16384, self.features_dim), + nn.ReLU() + ) + + def forward(self, observations: torch.Tensor) -> torch.Tensor: + return self.cnn(observations) + \ No newline at end of file diff --git a/001_image_stack/street_fighter_custom_wrapper.py b/001_image_stack/street_fighter_custom_wrapper.py new file mode 100644 index 0000000..5fd4d35 --- /dev/null +++ b/001_image_stack/street_fighter_custom_wrapper.py @@ -0,0 +1,106 @@ +import collections + +import gym +import cv2 +import numpy as np +import torch +from torchvision.transforms import Normalize +from gym.spaces import MultiBinary + +# Custom environment wrapper +class StreetFighterCustomWrapper(gym.Wrapper): + def __init__(self, env, testing=False, threshold=0.65): + super(StreetFighterCustomWrapper, self).__init__(env) + + self.action_space = MultiBinary(12) + + # Use a deque to store the last 4 frames + self.frame_stack = collections.deque(maxlen=4) + + self.threshold = threshold + self.game_screen_gray = None + + self.prev_player_health = 1.0 + self.prev_opponent_health = 1.0 + + # Update observation space to include 4 stacked grayscale images + self.observation_space = gym.spaces.Box( + low=0.0, high=1.0, shape=(4, 84, 84), dtype=np.float32 + ) + + self.testing = testing + + # Normalize the image for MobileNetV3Small. + self.normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + + def _preprocess_observation(self, observation): + self.game_screen_gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY) + resized_image = cv2.resize(self.game_screen_gray, (84, 84), interpolation=cv2.INTER_AREA) / 255.0 + # Add the resized image to the frame stack + self.frame_stack.append(resized_image) + + # Stack the last 4 frames and return the stacked frames + stacked_frames = np.stack(self.frame_stack, axis=0) + return stacked_frames + + def _get_win_or_lose_bonus(self): + if self.prev_player_health > self.prev_opponent_health: + # print('You win!') + return 300 + else: + # print('You lose!') + return -300 + + def _get_reward(self): + player_health_area = self.game_screen_gray[15:20, 32:120] + oppoent_health_area = self.game_screen_gray[15:20, 136:224] + + # Get health points using the number of pixels above 129. + player_health = np.sum(player_health_area > 129) / player_health_area.size + opponent_health = np.sum(oppoent_health_area > 129) / oppoent_health_area.size + + player_health_diff = self.prev_player_health - player_health + opponent_health_diff = self.prev_opponent_health - opponent_health + + reward = (opponent_health_diff - player_health_diff) * 200 # max would be 200 + + # Penalty for each step without any change in health + if opponent_health_diff <= 0.0000001: + reward -= 12.0 / 60.0 # -12 points per second if no damage to opponent + + self.prev_player_health = player_health + self.prev_opponent_health = opponent_health + + # Print the health values of the player and the opponent + # print("Player health: %f Opponent health:%f" % (player_health, opponent_health)) + return reward + + def reset(self): + observation = self.env.reset() + self.prev_player_health = 1.0 + self.prev_opponent_health = 1.0 + + # Clear the frame stack and add the first observation 4 times + self.frame_stack.clear() + for _ in range(4): + self.frame_stack.append(self._preprocess_observation(observation)[0]) + + return self._preprocess_observation(observation) + + def step(self, action): + # observation, _, _, info = self.env.step(action) + observation, _reward, _done, info = self.env.step(self.env.action_space.sample()) + custom_reward = self._get_reward() + custom_reward -= 1.0 / 60.0 # penalty for each step (-1 points per second) + + custom_done = False + if self.prev_player_health <= 0.00001 or self.prev_opponent_health <= 0.00001: + custom_reward += self._get_win_or_lose_bonus() + if not self.testing: + custom_done = True + else: + self.prev_player_health = 1.0 + self.prev_opponent_health = 1.0 + + return self._preprocess_observation(observation), custom_reward, custom_done, info + \ No newline at end of file diff --git a/001_image_stack/test.py b/001_image_stack/test.py new file mode 100644 index 0000000..614b247 --- /dev/null +++ b/001_image_stack/test.py @@ -0,0 +1,70 @@ +import time + +import cv2 +import retro +from stable_baselines3 import PPO +from stable_baselines3.common.vec_env import DummyVecEnv + +from custom_cnn import CustomCNN +from street_fighter_custom_wrapper import StreetFighterCustomWrapper + +def make_env(game, state): + def _init(): + env = retro.RetroEnv( + game=game, + state=state, + use_restricted_actions=retro.Actions.FILTERED, + obs_type=retro.Observations.IMAGE + ) + env = StreetFighterCustomWrapper(env, testing=True) + return env + return _init + +game = "StreetFighterIISpecialChampionEdition-Genesis" +state_stages = [ + "Champion.Level1.ChunLiVsGuile", + "Champion.Level2.ChunLiVsKen", + "Champion.Level3.ChunLiVsChunLi", + "Champion.Level4.ChunLiVsZangief", + "Champion.Level5.ChunLiVsDhalsim", + "Champion.Level6.ChunLiVsRyu", + "Champion.Level7.ChunLiVsEHonda", + "Champion.Level8.ChunLiVsBlanka", + "Champion.Level9.ChunLiVsBalrog", + "Champion.Level10.ChunLiVsVega", + "Champion.Level11.ChunLiVsSagat", + "Champion.Level12.ChunLiVsBison" + # Add other stages as necessary +] + +env = make_env(game, state_stages[0])() + +# Wrap the environment +env = DummyVecEnv([lambda: env]) + +policy_kwargs = { + 'features_extractor_class': CustomCNN +} + +model = PPO( + "CnnPolicy", + env, + device="cuda", + policy_kwargs=policy_kwargs, + verbose=1 +) +model.load(r"trained_models_continued/ppo_chunli_432000_steps") + +obs = env.reset() +done = False + +while True: + timestamp = time.time() + action, _ = model.predict(obs) + obs, rewards, done, info = env.step(action) + env.render() + render_time = time.time() - timestamp + if render_time < 0.0111: + time.sleep(0.0111 - render_time) # Add a delay for 90 FPS + +# env.close() diff --git a/001_image_stack/train.py b/001_image_stack/train.py new file mode 100644 index 0000000..4e2195f --- /dev/null +++ b/001_image_stack/train.py @@ -0,0 +1,123 @@ +import os +import random + +import gym +import cv2 +import retro +import numpy as np +from stable_baselines3 import PPO +from stable_baselines3.common.vec_env import SubprocVecEnv +from stable_baselines3.common.preprocessing import is_image_space, is_image_space_channels_first +from stable_baselines3.common.callbacks import BaseCallback, CheckpointCallback + +from custom_cnn import CustomCNN +from street_fighter_custom_wrapper import StreetFighterCustomWrapper + +class RandomOpponentChangeCallback(BaseCallback): + def __init__(self, stages, opponent_interval, verbose=0): + super(RandomOpponentChangeCallback, self).__init__(verbose) + self.stages = stages + self.opponent_interval = opponent_interval + + def _on_step(self) -> bool: + if self.n_calls % self.opponent_interval == 0: + new_state = random.choice(self.stages) + print("\nCurrent state:", new_state) + self.training_env.env_method("load_state", new_state, indices=None) + return True + +def make_env(game, state, seed=0): + def _init(): + env = retro.RetroEnv( + game=game, + state=state, + use_restricted_actions=retro.Actions.FILTERED, + obs_type=retro.Observations.IMAGE + ) + env = StreetFighterCustomWrapper(env) + env.seed(seed) + return env + return _init + +def main(): + # Set up the environment and model + game = "StreetFighterIISpecialChampionEdition-Genesis" + state_stages = [ + "ChampionX.Level1.ChunLiVsKen", + "ChampionX.Level2.ChunLiVsChunLi", + "ChampionX.Level3.ChunLiVsZangief", + "ChampionX.Level4.ChunLiVsDhalsim", + "ChampionX.Level5.ChunLiVsRyu", + "ChampionX.Level6.ChunLiVsEHonda", + "ChampionX.Level7.ChunLiVsBlanka", + "ChampionX.Level8.ChunLiVsGuile", + "ChampionX.Level9.ChunLiVsBalrog", + "ChampionX.Level10.ChunLiVsVega", + "ChampionX.Level11.ChunLiVsSagat", + "ChampionX.Level12.ChunLiVsBison" + # Add other stages as necessary + ] + # Champion is at difficulty level 4, ChampionX is at difficulty level 8. + + num_envs = 8 + + # env = SubprocVecEnv([make_env(game, state_stages[0], seed=i) for i in range(num_envs)]) + env = SubprocVecEnv([make_env(game, state_stages[0], seed=i) for i in range(num_envs)]) + + # Using CustomCNN as the feature extractor + policy_kwargs = { + 'features_extractor_class': CustomCNN + } + + model = PPO( + "CnnPolicy", + env, + device="cuda", + policy_kwargs=policy_kwargs, + verbose=1, + n_steps=5400, + batch_size=64, + n_epochs=10, + learning_rate=0.0003, + ent_coef=0.01, + clip_range=0.2, + clip_range_vf=None, + gamma=0.99, + gae_lambda=0.95, + max_grad_norm=0.5, + use_sde=False, + sde_sample_freq=-1 + ) + + # Set the save directory + save_dir = "trained_models_continued" + os.makedirs(save_dir, exist_ok=True) + + # Load the model from file + # Change the path to the actual path of the model file + model_path = "trained_models/ppo_chunli_1296000_steps.zip" + + # Load model and modify the learning rate and entropy coefficient + custom_objects = { + "learning_rate": 0.00005, + "ent_coef": 0.2 + } + model = PPO.load(model_path, env=env, device="cuda", custom_objects=custom_objects) + + # Set up callbacks + opponent_interval = 5400 # stage_interval * num_envs = total_steps_per_stage + checkpoint_interval = 54000 # checkpoint_interval * num_envs = total_steps_per_checkpoint (Every 80 rounds) + checkpoint_callback = CheckpointCallback(save_freq=checkpoint_interval, save_path=save_dir, name_prefix="ppo_chunli") + stage_increase_callback = RandomOpponentChangeCallback(state_stages, opponent_interval, save_dir) + + + model.learn( + total_timesteps=int(6048000), # total_timesteps = stage_interval * num_envs * num_stages (1120 rounds) + callback=[checkpoint_callback, stage_increase_callback] + ) + + # Save the final model + model.save(os.path.join(save_dir, "ppo_sf2_chunli_final.zip")) + +if __name__ == "__main__": + main() diff --git a/001_image_stack/training_log.txt b/001_image_stack/training_log.txt new file mode 100644 index 0000000..a5db296 --- /dev/null +++ b/001_image_stack/training_log.txt @@ -0,0 +1,631 @@ +(StreetFighterAI) PS C:\Users\unitec\Documents\AIProjects\street-fighter-ai\001_image_stack> python .\train.py +Using cuda device + +Current state: ChampionX.Level4.ChunLiVsDhalsim +------------------------------ +| time/ | | +| fps | 1489 | +| iterations | 1 | +| time_elapsed | 28 | +| total_timesteps | 43200 | +------------------------------ + +Current state: ChampionX.Level6.ChunLiVsEHonda +----------------------------------------- +| time/ | | +| fps | 693 | +| iterations | 2 | +| time_elapsed | 124 | +| total_timesteps | 86400 | +| train/ | | +| approx_kl | 0.008018286 | +| clip_fraction | 0.0528 | +| clip_range | 0.2 | +| entropy_loss | -8.31 | +| explained_variance | -0.000782 | +| learning_rate | 0.0003 | +| loss | 189 | +| n_updates | 10 | +| policy_gradient_loss | -0.00354 | +| value_loss | 398 | +----------------------------------------- + +Current state: ChampionX.Level1.ChunLiVsKen +----------------------------------------- +| time/ | | +| fps | 476 | +| iterations | 3 | +| time_elapsed | 271 | +| total_timesteps | 129600 | +| train/ | | +| approx_kl | 0.010610209 | +| clip_fraction | 0.119 | +| clip_range | 0.2 | +| entropy_loss | -8.3 | +| explained_variance | 0.0323 | +| learning_rate | 0.0003 | +| loss | 0.228 | +| n_updates | 20 | +| policy_gradient_loss | -0.00663 | +| value_loss | 103 | +----------------------------------------- + +Current state: ChampionX.Level10.ChunLiVsVega +----------------------------------------- +| time/ | | +| fps | 467 | +| iterations | 4 | +| time_elapsed | 369 | +| total_timesteps | 172800 | +| train/ | | +| approx_kl | 0.011115557 | +| clip_fraction | 0.122 | +| clip_range | 0.2 | +| entropy_loss | -8.3 | +| explained_variance | 0.125 | +| learning_rate | 0.0003 | +| loss | 5.37 | +| n_updates | 30 | +| policy_gradient_loss | -0.00485 | +| value_loss | 83.8 | +----------------------------------------- + +Current state: ChampionX.Level11.ChunLiVsSagat +----------------------------------------- +| time/ | | +| fps | 462 | +| iterations | 5 | +| time_elapsed | 466 | +| total_timesteps | 216000 | +| train/ | | +| approx_kl | 0.012769428 | +| clip_fraction | 0.133 | +| clip_range | 0.2 | +| entropy_loss | -8.29 | +| explained_variance | 0.409 | +| learning_rate | 0.0003 | +| loss | 18.4 | +| n_updates | 40 | +| policy_gradient_loss | -0.00746 | +| value_loss | 31.6 | +----------------------------------------- + +Current state: ChampionX.Level7.ChunLiVsBlanka +----------------------------------------- +| time/ | | +| fps | 460 | +| iterations | 6 | +| time_elapsed | 563 | +| total_timesteps | 259200 | +| train/ | | +| approx_kl | 0.014561476 | +| clip_fraction | 0.184 | +| clip_range | 0.2 | +| entropy_loss | -8.29 | +| explained_variance | 0.15 | +| learning_rate | 0.0003 | +| loss | 0.66 | +| n_updates | 50 | +| policy_gradient_loss | -0.00799 | +| value_loss | 23.2 | +----------------------------------------- + +Current state: ChampionX.Level12.ChunLiVsBison +----------------------------------------- +| time/ | | +| fps | 447 | +| iterations | 7 | +| time_elapsed | 675 | +| total_timesteps | 302400 | +| train/ | | +| approx_kl | 0.013581872 | +| clip_fraction | 0.147 | +| clip_range | 0.2 | +| entropy_loss | -8.28 | +| explained_variance | 0.447 | +| learning_rate | 0.0003 | +| loss | 0.794 | +| n_updates | 60 | +| policy_gradient_loss | -0.00405 | +| value_loss | 33.4 | +----------------------------------------- + +Current state: ChampionX.Level4.ChunLiVsDhalsim +----------------------------------------- +| time/ | | +| fps | 440 | +| iterations | 8 | +| time_elapsed | 784 | +| total_timesteps | 345600 | +| train/ | | +| approx_kl | 0.015053411 | +| clip_fraction | 0.186 | +| clip_range | 0.2 | +| entropy_loss | -8.28 | +| explained_variance | 0.39 | +| learning_rate | 0.0003 | +| loss | 0.313 | +| n_updates | 70 | +| policy_gradient_loss | -0.00594 | +| value_loss | 22.3 | +----------------------------------------- + +Current state: ChampionX.Level5.ChunLiVsRyu +---------------------------------------- +| time/ | | +| fps | 433 | +| iterations | 9 | +| time_elapsed | 896 | +| total_timesteps | 388800 | +| train/ | | +| approx_kl | 0.01407744 | +| clip_fraction | 0.152 | +| clip_range | 0.2 | +| entropy_loss | -8.27 | +| explained_variance | 0.326 | +| learning_rate | 0.0003 | +| loss | 0.396 | +| n_updates | 80 | +| policy_gradient_loss | -0.00862 | +| value_loss | 15.7 | +---------------------------------------- + +Current state: ChampionX.Level9.ChunLiVsBalrog +----------------------------------------- +| time/ | | +| fps | 428 | +| iterations | 10 | +| time_elapsed | 1007 | +| total_timesteps | 432000 | +| train/ | | +| approx_kl | 0.013460734 | +| clip_fraction | 0.148 | +| clip_range | 0.2 | +| entropy_loss | -8.27 | +| explained_variance | 0.384 | +| learning_rate | 0.0003 | +| loss | 0.227 | +| n_updates | 90 | +| policy_gradient_loss | -0.00498 | +| value_loss | 16.7 | +----------------------------------------- + +Current state: ChampionX.Level9.ChunLiVsBalrog +------------------------------------------ +| time/ | | +| fps | 424 | +| iterations | 11 | +| time_elapsed | 1118 | +| total_timesteps | 475200 | +| train/ | | +| approx_kl | 0.0145851895 | +| clip_fraction | 0.165 | +| clip_range | 0.2 | +| entropy_loss | -8.26 | +| explained_variance | 0.352 | +| learning_rate | 0.0003 | +| loss | 0.147 | +| n_updates | 100 | +| policy_gradient_loss | -0.00597 | +| value_loss | 19.8 | +------------------------------------------ + +Current state: ChampionX.Level4.ChunLiVsDhalsim +----------------------------------------- +| time/ | | +| fps | 424 | +| iterations | 12 | +| time_elapsed | 1219 | +| total_timesteps | 518400 | +| train/ | | +| approx_kl | 0.015144574 | +| clip_fraction | 0.161 | +| clip_range | 0.2 | +| entropy_loss | -8.25 | +| explained_variance | 0.383 | +| learning_rate | 0.0003 | +| loss | 1.52 | +| n_updates | 110 | +| policy_gradient_loss | -0.00749 | +| value_loss | 24.1 | +----------------------------------------- + +Current state: ChampionX.Level6.ChunLiVsEHonda +----------------------------------------- +| time/ | | +| fps | 425 | +| iterations | 13 | +| time_elapsed | 1318 | +| total_timesteps | 561600 | +| train/ | | +| approx_kl | 0.015790801 | +| clip_fraction | 0.158 | +| clip_range | 0.2 | +| entropy_loss | -8.25 | +| explained_variance | 0.555 | +| learning_rate | 0.0003 | +| loss | 0.665 | +| n_updates | 120 | +| policy_gradient_loss | -0.00889 | +| value_loss | 20.7 | +----------------------------------------- + +Current state: ChampionX.Level6.ChunLiVsEHonda +----------------------------------------- +| time/ | | +| fps | 426 | +| iterations | 14 | +| time_elapsed | 1417 | +| total_timesteps | 604800 | +| train/ | | +| approx_kl | 0.016785465 | +| clip_fraction | 0.173 | +| clip_range | 0.2 | +| entropy_loss | -8.24 | +| explained_variance | 0.609 | +| learning_rate | 0.0003 | +| loss | 0.313 | +| n_updates | 130 | +| policy_gradient_loss | -0.00758 | +| value_loss | 14.9 | +----------------------------------------- + +Current state: ChampionX.Level10.ChunLiVsVega +----------------------------------------- +| time/ | | +| fps | 428 | +| iterations | 15 | +| time_elapsed | 1513 | +| total_timesteps | 648000 | +| train/ | | +| approx_kl | 0.017042443 | +| clip_fraction | 0.176 | +| clip_range | 0.2 | +| entropy_loss | -8.24 | +| explained_variance | 0.759 | +| learning_rate | 0.0003 | +| loss | 0.634 | +| n_updates | 140 | +| policy_gradient_loss | -0.00617 | +| value_loss | 15.4 | +----------------------------------------- + +Current state: ChampionX.Level3.ChunLiVsZangief +----------------------------------------- +| time/ | | +| fps | 427 | +| iterations | 16 | +| time_elapsed | 1618 | +| total_timesteps | 691200 | +| train/ | | +| approx_kl | 0.017714709 | +| clip_fraction | 0.176 | +| clip_range | 0.2 | +| entropy_loss | -8.23 | +| explained_variance | 0.79 | +| learning_rate | 0.0003 | +| loss | 0.941 | +| n_updates | 150 | +| policy_gradient_loss | -0.00703 | +| value_loss | 17.5 | +----------------------------------------- + +Current state: ChampionX.Level7.ChunLiVsBlanka +----------------------------------------- +| time/ | | +| fps | 424 | +| iterations | 17 | +| time_elapsed | 1728 | +| total_timesteps | 734400 | +| train/ | | +| approx_kl | 0.018709755 | +| clip_fraction | 0.196 | +| clip_range | 0.2 | +| entropy_loss | -8.22 | +| explained_variance | 0.746 | +| learning_rate | 0.0003 | +| loss | 0.505 | +| n_updates | 160 | +| policy_gradient_loss | -0.00795 | +| value_loss | 11.9 | +----------------------------------------- + +Current state: ChampionX.Level11.ChunLiVsSagat +----------------------------------------- +| time/ | | +| fps | 423 | +| iterations | 18 | +| time_elapsed | 1837 | +| total_timesteps | 777600 | +| train/ | | +| approx_kl | 0.017850244 | +| clip_fraction | 0.182 | +| clip_range | 0.2 | +| entropy_loss | -8.21 | +| explained_variance | 0.687 | +| learning_rate | 0.0003 | +| loss | 0.379 | +| n_updates | 170 | +| policy_gradient_loss | -0.00568 | +| value_loss | 15.2 | +----------------------------------------- + +Current state: ChampionX.Level3.ChunLiVsZangief +---------------------------------------- +| time/ | | +| fps | 420 | +| iterations | 19 | +| time_elapsed | 1950 | +| total_timesteps | 820800 | +| train/ | | +| approx_kl | 0.02048213 | +| clip_fraction | 0.221 | +| clip_range | 0.2 | +| entropy_loss | -8.21 | +| explained_variance | 0.707 | +| learning_rate | 0.0003 | +| loss | 0.391 | +| n_updates | 180 | +| policy_gradient_loss | -0.00419 | +| value_loss | 13 | +---------------------------------------- + +Current state: ChampionX.Level11.ChunLiVsSagat +----------------------------------------- +| time/ | | +| fps | 418 | +| iterations | 20 | +| time_elapsed | 2062 | +| total_timesteps | 864000 | +| train/ | | +| approx_kl | 0.016617421 | +| clip_fraction | 0.172 | +| clip_range | 0.2 | +| entropy_loss | -8.2 | +| explained_variance | 0.744 | +| learning_rate | 0.0003 | +| loss | 1.66 | +| n_updates | 190 | +| policy_gradient_loss | -0.00437 | +| value_loss | 15.8 | +----------------------------------------- + +Current state: ChampionX.Level6.ChunLiVsEHonda +----------------------------------------- +| time/ | | +| fps | 417 | +| iterations | 21 | +| time_elapsed | 2174 | +| total_timesteps | 907200 | +| train/ | | +| approx_kl | 0.017259926 | +| clip_fraction | 0.171 | +| clip_range | 0.2 | +| entropy_loss | -8.19 | +| explained_variance | 0.756 | +| learning_rate | 0.0003 | +| loss | 0.457 | +| n_updates | 200 | +| policy_gradient_loss | -0.00897 | +| value_loss | 14.9 | +----------------------------------------- + +Current state: ChampionX.Level10.ChunLiVsVega +----------------------------------------- +| time/ | | +| fps | 417 | +| iterations | 22 | +| time_elapsed | 2276 | +| total_timesteps | 950400 | +| train/ | | +| approx_kl | 0.018794816 | +| clip_fraction | 0.19 | +| clip_range | 0.2 | +| entropy_loss | -8.18 | +| explained_variance | 0.834 | +| learning_rate | 0.0003 | +| loss | 3.33 | +| n_updates | 210 | +| policy_gradient_loss | -0.00535 | +| value_loss | 15.5 | +----------------------------------------- + +Current state: ChampionX.Level5.ChunLiVsRyu +----------------------------------------- +| time/ | | +| fps | 418 | +| iterations | 23 | +| time_elapsed | 2374 | +| total_timesteps | 993600 | +| train/ | | +| approx_kl | 0.019361915 | +| clip_fraction | 0.188 | +| clip_range | 0.2 | +| entropy_loss | -8.18 | +| explained_variance | 0.825 | +| learning_rate | 0.0003 | +| loss | 0.235 | +| n_updates | 220 | +| policy_gradient_loss | -0.00762 | +| value_loss | 13.4 | +----------------------------------------- + +Current state: ChampionX.Level2.ChunLiVsChunLi +----------------------------------------- +| time/ | | +| fps | 419 | +| iterations | 24 | +| time_elapsed | 2471 | +| total_timesteps | 1036800 | +| train/ | | +| approx_kl | 0.022115083 | +| clip_fraction | 0.233 | +| clip_range | 0.2 | +| entropy_loss | -8.18 | +| explained_variance | 0.8 | +| learning_rate | 0.0003 | +| loss | 0.211 | +| n_updates | 230 | +| policy_gradient_loss | -0.00771 | +| value_loss | 11.7 | +----------------------------------------- + +Current state: ChampionX.Level3.ChunLiVsZangief +----------------------------------------- +| time/ | | +| fps | 419 | +| iterations | 25 | +| time_elapsed | 2574 | +| total_timesteps | 1080000 | +| train/ | | +| approx_kl | 0.023090197 | +| clip_fraction | 0.233 | +| clip_range | 0.2 | +| entropy_loss | -8.17 | +| explained_variance | 0.759 | +| learning_rate | 0.0003 | +| loss | 0.445 | +| n_updates | 240 | +| policy_gradient_loss | -0.00523 | +| value_loss | 13.7 | +----------------------------------------- + +Current state: ChampionX.Level11.ChunLiVsSagat +----------------------------------------- +| time/ | | +| fps | 418 | +| iterations | 26 | +| time_elapsed | 2683 | +| total_timesteps | 1123200 | +| train/ | | +| approx_kl | 0.024867734 | +| clip_fraction | 0.281 | +| clip_range | 0.2 | +| entropy_loss | -8.16 | +| explained_variance | 0.688 | +| learning_rate | 0.0003 | +| loss | 0.557 | +| n_updates | 250 | +| policy_gradient_loss | 0.00215 | +| value_loss | 13.9 | +----------------------------------------- + +Current state: ChampionX.Level10.ChunLiVsVega +----------------------------------------- +| time/ | | +| fps | 417 | +| iterations | 27 | +| time_elapsed | 2793 | +| total_timesteps | 1166400 | +| train/ | | +| approx_kl | 0.020454599 | +| clip_fraction | 0.203 | +| clip_range | 0.2 | +| entropy_loss | -8.19 | +| explained_variance | 0.766 | +| learning_rate | 0.0003 | +| loss | 0.314 | +| n_updates | 260 | +| policy_gradient_loss | -0.0058 | +| value_loss | 17.1 | +----------------------------------------- + +Current state: ChampionX.Level10.ChunLiVsVega +----------------------------------------- +| time/ | | +| fps | 416 | +| iterations | 28 | +| time_elapsed | 2904 | +| total_timesteps | 1209600 | +| train/ | | +| approx_kl | 0.020690009 | +| clip_fraction | 0.208 | +| clip_range | 0.2 | +| entropy_loss | -8.17 | +| explained_variance | 0.827 | +| learning_rate | 0.0003 | +| loss | 1.38 | +| n_updates | 270 | +| policy_gradient_loss | 2.12e-05 | +| value_loss | 20.4 | +----------------------------------------- + +Current state: ChampionX.Level10.ChunLiVsVega +----------------------------------------- +| time/ | | +| fps | 415 | +| iterations | 29 | +| time_elapsed | 3015 | +| total_timesteps | 1252800 | +| train/ | | +| approx_kl | 0.020646438 | +| clip_fraction | 0.208 | +| clip_range | 0.2 | +| entropy_loss | -8.15 | +| explained_variance | 0.81 | +| learning_rate | 0.0003 | +| loss | 0.235 | +| n_updates | 280 | +| policy_gradient_loss | -0.00852 | +| value_loss | 12.5 | +----------------------------------------- + +Current state: ChampionX.Level9.ChunLiVsBalrog +----------------------------------------- +| time/ | | +| fps | 414 | +| iterations | 30 | +| time_elapsed | 3128 | +| total_timesteps | 1296000 | +| train/ | | +| approx_kl | 0.021910097 | +| clip_fraction | 0.212 | +| clip_range | 0.2 | +| entropy_loss | -8.15 | +| explained_variance | 0.79 | +| learning_rate | 0.0003 | +| loss | 0.0264 | +| n_updates | 290 | +| policy_gradient_loss | -0.00872 | +| value_loss | 12.5 | +----------------------------------------- + +Current state: ChampionX.Level12.ChunLiVsBison +----------------------------------------- +| time/ | | +| fps | 412 | +| iterations | 31 | +| time_elapsed | 3243 | +| total_timesteps | 1339200 | +| train/ | | +| approx_kl | 0.025281599 | +| clip_fraction | 0.254 | +| clip_range | 0.2 | +| entropy_loss | -8.13 | +| explained_variance | 0.773 | +| learning_rate | 0.0003 | +| loss | 1.18 | +| n_updates | 300 | +| policy_gradient_loss | -0.00679 | +| value_loss | 12.6 | +----------------------------------------- + +Current state: ChampionX.Level2.ChunLiVsChunLi +----------------------------------------- +| time/ | | +| fps | 412 | +| iterations | 32 | +| time_elapsed | 3349 | +| total_timesteps | 1382400 | +| train/ | | +| approx_kl | 0.026466375 | +| clip_fraction | 0.259 | +| clip_range | 0.2 | +| entropy_loss | -8.12 | +| explained_variance | 0.647 | +| learning_rate | 0.0003 | +| loss | 0.518 | +| n_updates | 310 | +| policy_gradient_loss | 0.000522 | +| value_loss | 18.8 | +----------------------------------------- \ No newline at end of file diff --git a/002_lstm/__pycache__/cnn_lstm.cpython-38.pyc b/002_lstm/__pycache__/cnn_lstm.cpython-38.pyc new file mode 100644 index 0000000..942f36c Binary files /dev/null and b/002_lstm/__pycache__/cnn_lstm.cpython-38.pyc differ diff --git a/002_lstm/__pycache__/street_fighter_custom_wrapper.cpython-38.pyc b/002_lstm/__pycache__/street_fighter_custom_wrapper.cpython-38.pyc new file mode 100644 index 0000000..f44c664 Binary files /dev/null and b/002_lstm/__pycache__/street_fighter_custom_wrapper.cpython-38.pyc differ diff --git a/002_lstm/cnn_lstm.py b/002_lstm/cnn_lstm.py new file mode 100644 index 0000000..fb4c20d --- /dev/null +++ b/002_lstm/cnn_lstm.py @@ -0,0 +1,35 @@ +import torch +import torch.nn as nn + +class CNNEncoder(nn.Module): + def __init__(self, features_dim=512): + super(CNNEncoder, self).__init__() + self.conv1 = nn.Conv2d(16, 32, kernel_size=5, stride=2) + self.relu1 = nn.ReLU() + self.conv2 = nn.Conv2d(32, 64, kernel_size=5, stride=2) + self.relu2 = nn.ReLU() + self.conv3 = nn.Conv2d(64, 64, kernel_size=3, stride=1) + self.relu3 = nn.ReLU() + self.fc = nn.Linear(16384, 512) + + def forward(self, x): + x = self.relu1(self.conv1(x)) + x = self.relu2(self.conv2(x)) + x = self.relu3(self.conv3(x)) + x = x.view(x.size(0), -1) + x = self.fc(x) + return x + +class CNNLSTM(nn.Module): + def __init__(self, features_dim=512): + super(CNNLSTM, self).__init__() + self.encoder = CNNEncoder(512) + self.lstm = nn.LSTM(512, 512) + + def forward(self, x, hidden): + x = self.encoder(x) + x, hidden = self.lstm(x.unsqueeze(0), hidden) + return x.squeeze(0), hidden + + def init_hidden(self, batch_size): + return (torch.zeros(1, batch_size, 512), torch.zeros(1, batch_size, 512)) \ No newline at end of file diff --git a/002_lstm/street_fighter_custom_wrapper.py b/002_lstm/street_fighter_custom_wrapper.py new file mode 100644 index 0000000..8a336dc --- /dev/null +++ b/002_lstm/street_fighter_custom_wrapper.py @@ -0,0 +1,102 @@ +import collections + +import gym +import cv2 +import numpy as np +from torchvision.transforms import Normalize +from gym.spaces import MultiBinary + +# Custom environment wrapper +class StreetFighterCustomWrapper(gym.Wrapper): + def __init__(self, env, testing=False, threshold=0.65): + super(StreetFighterCustomWrapper, self).__init__(env) + + self.action_space = MultiBinary(12) + + # Use a deque to store the last 16 frames (0.267 seconds) + self.frame_stack = collections.deque(maxlen=16) + + self.threshold = threshold + self.game_screen_gray = None + + self.prev_player_health = 1.0 + self.prev_opponent_health = 1.0 + + # Update observation space to include 16 stacked grayscale images + self.observation_space = gym.spaces.Box( + low=0.0, high=1.0, shape=(16, 84, 84), dtype=np.float32 + ) + + self.testing = testing + + def _preprocess_observation(self, observation): + self.game_screen_gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY) + resized_image = cv2.resize(self.game_screen_gray, (84, 84), interpolation=cv2.INTER_AREA) / 255.0 + # Add the resized image to the frame stack + self.frame_stack.append(resized_image) + + # Stack the last 16 frames and return the stacked frames + stacked_frames = np.stack(self.frame_stack, axis=0)[np.newaxis, ...] # Shape: (1, 16, 84, 84) + return stacked_frames + + def _get_win_or_lose_bonus(self): + if self.prev_player_health > self.prev_opponent_health: + # print('You win!') + return 300 + else: + # print('You lose!') + return -300 + + def _get_reward(self): + player_health_area = self.game_screen_gray[15:20, 32:120] + oppoent_health_area = self.game_screen_gray[15:20, 136:224] + + # Get health points using the number of pixels above 129. + player_health = np.sum(player_health_area > 129) / player_health_area.size + opponent_health = np.sum(oppoent_health_area > 129) / oppoent_health_area.size + + player_health_diff = self.prev_player_health - player_health + opponent_health_diff = self.prev_opponent_health - opponent_health + + reward = (opponent_health_diff - player_health_diff) * 200 # max would be 200 + + # Penalty for each step without any change in health + if opponent_health_diff <= 0.0000001: + reward -= 12.0 / 60.0 # -12 points per second if no damage to opponent + + self.prev_player_health = player_health + self.prev_opponent_health = opponent_health + + # Print the health values of the player and the opponent + # print("Player health: %f Opponent health:%f" % (player_health, opponent_health)) + return reward + + def reset(self): + observation = self.env.reset() + self.prev_player_health = 1.0 + self.prev_opponent_health = 1.0 + + # Clear the frame stack and add the first observation 16 times + self.frame_stack.clear() + for _ in range(16): + self.frame_stack.append(self._preprocess_observation(observation)[0]) + + return self._preprocess_observation(observation) + + def step(self, action): + # observation, _, _, info = self.env.step(action) + observation, _reward, _done, info = self.env.step(self.env.action_space.sample()) + custom_reward = self._get_reward() + custom_reward -= 1.0 / 60.0 # penalty for each step (-1 points per second) + + custom_done = False + if self.prev_player_health <= 0.00001 or self.prev_opponent_health <= 0.00001: + custom_reward += self._get_win_or_lose_bonus() + if not self.testing: + custom_done = True + else: + self.prev_player_health = 1.0 + self.prev_opponent_health = 1.0 + + return self._preprocess_observation(observation), custom_reward, custom_done, info + \ No newline at end of file diff --git a/002_lstm/test.py b/002_lstm/test.py new file mode 100644 index 0000000..9ecee77 --- /dev/null +++ b/002_lstm/test.py @@ -0,0 +1,73 @@ +import time + +import cv2 +import retro +from stable_baselines3 import PPO +from stable_baselines3.common.vec_env import DummyVecEnv + +from cnn_lstm import CNNLSTM, CNNEncoder +from street_fighter_custom_wrapper import StreetFighterCustomWrapper + +def make_env(game, state): + def _init(): + win_template = cv2.imread('images/pattern_win_gray.png', cv2.IMREAD_GRAYSCALE) + lose_template = cv2.imread('images/pattern_lose_gray.png', cv2.IMREAD_GRAYSCALE) + env = retro.RetroEnv( + game=game, + state=state, + use_restricted_actions=retro.Actions.FILTERED, + obs_type=retro.Observations.IMAGE + ) + env = StreetFighterCustomWrapper(env, win_template, lose_template, testing=True) + return env + return _init + +game = "StreetFighterIISpecialChampionEdition-Genesis" +state_stages = [ + "Champion.Level1.ChunLiVsGuile", + "Champion.Level2.ChunLiVsKen", + "Champion.Level3.ChunLiVsChunLi", + "Champion.Level4.ChunLiVsZangief", + "Champion.Level5.ChunLiVsDhalsim", + "Champion.Level6.ChunLiVsRyu", + "Champion.Level7.ChunLiVsEHonda", + "Champion.Level8.ChunLiVsBlanka", + "Champion.Level9.ChunLiVsBalrog", + "Champion.Level10.ChunLiVsVega", + "Champion.Level11.ChunLiVsSagat", + "Champion.Level12.ChunLiVsBison" + # Add other stages as necessary +] + +env = make_env(game, state_stages[0])() + +# Wrap the environment +env = DummyVecEnv([lambda: env]) + +policy_kwargs = { + 'features_extractor_class': CNNEncoder, + 'net_arch': [512, 'lstm'] +} + +model = PPO( + "CnnPolicy", + env, + device="cuda", + policy_kwargs=policy_kwargs, + verbose=1 +) +model.load(r"trained_models/ppo_chunli_1296000_steps") + +obs = env.reset() +done = False + +while True: + timestamp = time.time() + action, _ = model.predict(obs) + obs, rewards, done, info = env.step(action) + env.render() + render_time = time.time() - timestamp + if render_time < 0.0111: + time.sleep(0.0111 - render_time) # Add a delay for 90 FPS + +# env.close() diff --git a/002_lstm/train.py b/002_lstm/train.py new file mode 100644 index 0000000..47c425b --- /dev/null +++ b/002_lstm/train.py @@ -0,0 +1,112 @@ +import os +import random + +import gym +import cv2 +import retro +import numpy as np +from stable_baselines3 import PPO +from stable_baselines3.common.policies import ActorCriticPolicy +from stable_baselines3.common.vec_env import SubprocVecEnv +from stable_baselines3.common.callbacks import BaseCallback, CheckpointCallback + +from cnn_lstm import CNNLSTM, CNNEncoder +from street_fighter_custom_wrapper import StreetFighterCustomWrapper + +class RandomOpponentChangeCallback(BaseCallback): + def __init__(self, stages, opponent_interval, verbose=0): + super(RandomOpponentChangeCallback, self).__init__(verbose) + self.stages = stages + self.opponent_interval = opponent_interval + + def _on_step(self) -> bool: + if self.n_calls % self.opponent_interval == 0: + new_state = random.choice(self.stages) + print("\nCurrent state:", new_state) + self.training_env.env_method("load_state", new_state, indices=None) + return True + +def make_env(game, state, seed=0): + def _init(): + env = retro.RetroEnv( + game=game, + state=state, + use_restricted_actions=retro.Actions.FILTERED, + obs_type=retro.Observations.IMAGE + ) + env = StreetFighterCustomWrapper(env) + env.seed(seed) + return env + return _init + +def main(): + # Set up the environment and model + game = "StreetFighterIISpecialChampionEdition-Genesis" + state_stages = [ + "ChampionX.Level1.ChunLiVsKen", + "ChampionX.Level2.ChunLiVsChunLi", + "ChampionX.Level3.ChunLiVsZangief", + "ChampionX.Level4.ChunLiVsDhalsim", + "ChampionX.Level5.ChunLiVsRyu", + "ChampionX.Level6.ChunLiVsEHonda", + "ChampionX.Level7.ChunLiVsBlanka", + "ChampionX.Level8.ChunLiVsGuile", + "ChampionX.Level9.ChunLiVsBalrog", + "ChampionX.Level10.ChunLiVsVega", + "ChampionX.Level11.ChunLiVsSagat", + "ChampionX.Level12.ChunLiVsBison" + # Add other stages as necessary + ] + # Champion is at difficulty level 4, ChampionX is at difficulty level 8. + + num_envs = 8 + + # env = SubprocVecEnv([make_env(game, state_stages[0], seed=i) for i in range(num_envs)]) + env = SubprocVecEnv([make_env(game, state_stages[0], seed=i) for i in range(num_envs)]) + + class CustomPolicy(ActorCriticPolicy): + def __init__(self, *args, **kwargs): + super(CustomPolicy, self).__init__(*args, **kwargs) + + self.features_extractor = CNNLSTM() + + model = PPO( + CustomPolicy, + env, + device="cuda", + verbose=1, + n_steps=5400, + batch_size=64, + n_epochs=10, + learning_rate=0.0003, + ent_coef=0.01, + clip_range=0.2, + clip_range_vf=None, + gamma=0.99, + gae_lambda=0.95, + max_grad_norm=0.5, + use_sde=False, + sde_sample_freq=-1 + ) + + # Set the save directory + save_dir = "trained_models" + os.makedirs(save_dir, exist_ok=True) + + # Set up callbacks + opponent_interval = 5400 # stage_interval * num_envs = total_steps_per_stage + checkpoint_interval = 54000 # checkpoint_interval * num_envs = total_steps_per_checkpoint (Every 80 rounds) + checkpoint_callback = CheckpointCallback(save_freq=checkpoint_interval, save_path=save_dir, name_prefix="ppo_chunli") + stage_increase_callback = RandomOpponentChangeCallback(state_stages, opponent_interval, save_dir) + + + model.learn( + total_timesteps=int(6048000), # total_timesteps = stage_interval * num_envs * num_stages (1120 rounds) + callback=[checkpoint_callback, stage_increase_callback] + ) + + # Save the final model + model.save(os.path.join(save_dir, "ppo_sf2_chunli_final.zip")) + +if __name__ == "__main__": + main() diff --git a/__pycache__/custom_cnn.cpython-38.pyc b/__pycache__/custom_cnn.cpython-38.pyc index db6b9b0..cf6425e 100644 Binary files a/__pycache__/custom_cnn.cpython-38.pyc and b/__pycache__/custom_cnn.cpython-38.pyc differ diff --git a/__pycache__/custom_sf2_cv_env.cpython-38.pyc b/__pycache__/custom_sf2_cv_env.cpython-38.pyc index f084501..8085902 100644 Binary files a/__pycache__/custom_sf2_cv_env.cpython-38.pyc and b/__pycache__/custom_sf2_cv_env.cpython-38.pyc differ diff --git a/__pycache__/mobilenet_extractor.cpython-38.pyc b/__pycache__/mobilenet_extractor.cpython-38.pyc new file mode 100644 index 0000000..da42d5b Binary files /dev/null and b/__pycache__/mobilenet_extractor.cpython-38.pyc differ diff --git a/custom_cnn.py b/custom_cnn.py index 5de99a7..affade8 100644 --- a/custom_cnn.py +++ b/custom_cnn.py @@ -3,7 +3,6 @@ import torch import torch.nn as nn from stable_baselines3.common.torch_layers import BaseFeaturesExtractor from torchvision.models import mobilenet_v3_small - # Custom feature extractor (CNN) class CustomCNN(BaseFeaturesExtractor): def __init__(self, observation_space: gym.Space): diff --git a/custom_sf2_cv_env.py b/custom_sf2_cv_env.py index 8538440..d885607 100644 --- a/custom_sf2_cv_env.py +++ b/custom_sf2_cv_env.py @@ -1,13 +1,18 @@ import gym import cv2 import numpy as np +import torch +from torchvision.transforms import Normalize +from gym.spaces import MultiBinary # Custom environment wrapper class StreetFighterCustomWrapper(gym.Wrapper): def __init__(self, env, win_template, lose_template, testing=False, threshold=0.65): super(StreetFighterCustomWrapper, self).__init__(env) - self.win_template = win_template - self.lose_template = lose_template + self.action_space = MultiBinary(12) + + # self.win_template = win_template + # self.lose_template = lose_template self.threshold = threshold self.game_screen_gray = None @@ -15,24 +20,46 @@ class StreetFighterCustomWrapper(gym.Wrapper): self.prev_opponent_health = 1.0 # Update observation space to single-channel grayscale image + # self.observation_space = gym.spaces.Box( + # low=0.0, high=1.0, shape=(84, 84, 1), dtype=np.float32 + # ) + + # observation_space for mobilenet self.observation_space = gym.spaces.Box( - low=0.0, high=1.0, shape=(84, 84, 1), dtype=np.float32 + low=0.0, high=1.0, shape=(3, 96, 96), dtype=np.float32 ) self.testing = testing + + # Normalize the image for MobileNetV3Small. + self.normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) def _preprocess_observation(self, observation): + # self.game_screen_gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY) + # resized_image = cv2.resize(self.game_screen_gray, (84, 84), interpolation=cv2.INTER_AREA) / 255.0 + # return np.expand_dims(resized_image, axis=-1) + + # # Using MobileNetV3Small. self.game_screen_gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY) - resized_image = cv2.resize(self.game_screen_gray, (84, 84), interpolation=cv2.INTER_AREA) / 255.0 - return np.expand_dims(resized_image, axis=-1) + resized_image = cv2.resize(observation, (96, 96), interpolation=cv2.INTER_AREA).astype(np.float32) / 255.0 + + # Convert the NumPy array to a PyTorch tensor + resized_image = torch.from_numpy(resized_image).permute(2, 0, 1) + + # Apply normalization + resized_image = self.normalize(resized_image) + + # # Add a batch dimension to match the model input shape + # # resized_image = resized_image.unsqueeze(0) + return resized_image def _get_win_or_lose_bonus(self): if self.prev_player_health > self.prev_opponent_health: # print('You win!') - return 200 + return 300 else: # print('You lose!') - return -200 + return -300 def _get_reward(self): player_health_area = self.game_screen_gray[15:20, 32:120] @@ -45,7 +72,11 @@ class StreetFighterCustomWrapper(gym.Wrapper): player_health_diff = self.prev_player_health - player_health opponent_health_diff = self.prev_opponent_health - opponent_health - reward = (opponent_health_diff - player_health_diff) * 100 # max would be 100 + reward = (opponent_health_diff - player_health_diff) * 200 # max would be 200 + + # Penalty for each step without any change in health + if opponent_health_diff <= 0.0000001: + reward -= 12.0 / 60.0 # -12 points per second if no damage to opponent self.prev_player_health = player_health self.prev_opponent_health = opponent_health @@ -62,8 +93,9 @@ class StreetFighterCustomWrapper(gym.Wrapper): def step(self, action): # observation, _, _, info = self.env.step(action) - observation, _reward, _done, info = self.env.step(action) + observation, _reward, _done, info = self.env.step(self.env.action_space.sample()) custom_reward = self._get_reward() + custom_reward -= 1.0 / 60.0 # penalty for each step (-1 points per second) custom_done = False if self.prev_player_health <= 0.00001 or self.prev_opponent_health <= 0.00001: @@ -74,4 +106,5 @@ class StreetFighterCustomWrapper(gym.Wrapper): self.prev_player_health = 1.0 self.prev_opponent_health = 1.0 - return self._preprocess_observation(observation), custom_reward, custom_done, info \ No newline at end of file + return self._preprocess_observation(observation), custom_reward, custom_done, info + \ No newline at end of file diff --git a/mobilenet_extractor.py b/mobilenet_extractor.py new file mode 100644 index 0000000..acd0e24 --- /dev/null +++ b/mobilenet_extractor.py @@ -0,0 +1,21 @@ +import gym +import torch +import torch.nn as nn +from stable_baselines3.common.torch_layers import BaseFeaturesExtractor +from torchvision.models import mobilenet_v3_small + +# Custom MobileNetV3 Feature Extractor +class MobileNetV3Extractor(BaseFeaturesExtractor): + def __init__(self, observation_space: gym.Space): + super(MobileNetV3Extractor, self).__init__(observation_space, features_dim=256) + self.mobilenet = mobilenet_v3_small(pretrained=True) + self.adaptive_pool = nn.AdaptiveAvgPool2d((1, 1)) + self.fc = nn.Linear(576, self.features_dim) + + def forward(self, observations: torch.Tensor) -> torch.Tensor: + # x = observations.permute(0, 2, 3, 1) # Swap the channel dimension + x = self.mobilenet.features(observations) + x = self.adaptive_pool(x) + x = torch.flatten(x, 1) + x = self.fc(x) + return x diff --git a/mobilenet_extractor_no_condensing.py b/mobilenet_extractor_no_condensing.py new file mode 100644 index 0000000..73a8c66 --- /dev/null +++ b/mobilenet_extractor_no_condensing.py @@ -0,0 +1,18 @@ +import gym +import torch +import torchvision +from stable_baselines3.common.torch_layers import BaseFeaturesExtractor + +# Custom MobileNetV3 Feature Extractor +class MobileNetV3Extractor(BaseFeaturesExtractor): + def __init__(self, observation_space: gym.Space): + super(MobileNetV3Extractor, self).__init__(observation_space, features_dim=576) + self.mobilenet = torchvision.models.mobilenet_v3_small(pretrained=True) + self.mobilenet = torch.nn.Sequential(*list(self.mobilenet.children())[:-1]) + self.adaptive_pool = torch.nn.AdaptiveAvgPool2d((1, 1)) + + def forward(self, observations: torch.Tensor) -> torch.Tensor: + x = self.mobilenet(observations) + x = self.adaptive_pool(x) + x = torch.flatten(x, 1) + return x diff --git a/test_cv_sf2_ai.py b/test_cv_sf2_ai.py index 75cefb7..6dae85c 100644 --- a/test_cv_sf2_ai.py +++ b/test_cv_sf2_ai.py @@ -9,13 +9,19 @@ from stable_baselines3 import PPO from stable_baselines3.common.vec_env import DummyVecEnv from custom_cnn import CustomCNN +from mobilenet_extractor import MobileNetV3Extractor from custom_sf2_cv_env import StreetFighterCustomWrapper def make_env(game, state, seed=0): def _init(): win_template = cv2.imread('images/pattern_win_gray.png', cv2.IMREAD_GRAYSCALE) lose_template = cv2.imread('images/pattern_lose_gray.png', cv2.IMREAD_GRAYSCALE) - env = retro.RetroEnv(game=game, state=state, obs_type=retro.Observations.IMAGE) + env = retro.RetroEnv( + game=game, + state=state, + use_restricted_actions=retro.Actions.FILTERED, + obs_type=retro.Observations.IMAGE + ) env = StreetFighterCustomWrapper(env, win_template, lose_template, testing=True) # env.seed(seed) return env @@ -43,9 +49,15 @@ env = make_env(game, state_stages[0])() # Wrap the environment env = DummyVecEnv([lambda: env]) +# policy_kwargs = { +# 'features_extractor_class': CustomCNN +# } + +# Using MobileNetV3 as the feature extractor policy_kwargs = { - 'features_extractor_class': CustomCNN + 'features_extractor_class': MobileNetV3Extractor } + model = PPO( "CnnPolicy", env, @@ -53,7 +65,7 @@ model = PPO( policy_kwargs=policy_kwargs, verbose=1 ) -model.load(r"trained_models_cv_test/ppo_sf2_chunli_final") +model.load(r"trained_models_cv_mobilenet_time_penalty/ppo_chunli_1296000_steps") obs = env.reset() done = False diff --git a/train_cv_sf2_ai.py b/train_cv_sf2_ai.py index 404515a..4f2e7eb 100644 --- a/train_cv_sf2_ai.py +++ b/train_cv_sf2_ai.py @@ -6,13 +6,12 @@ import cv2 import retro import numpy as np from stable_baselines3 import PPO -from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv +from stable_baselines3.common.vec_env import SubprocVecEnv from stable_baselines3.common.preprocessing import is_image_space, is_image_space_channels_first from stable_baselines3.common.callbacks import BaseCallback, CheckpointCallback -import torch -import torch.nn as nn from custom_cnn import CustomCNN +from mobilenet_extractor import MobileNetV3Extractor from custom_sf2_cv_env import StreetFighterCustomWrapper class RandomOpponentChangeCallback(BaseCallback): @@ -32,7 +31,12 @@ def make_env(game, state, seed=0): def _init(): win_template = cv2.imread('images/pattern_win_gray.png', cv2.IMREAD_GRAYSCALE) lose_template = cv2.imread('images/pattern_lose_gray.png', cv2.IMREAD_GRAYSCALE) - env = retro.RetroEnv(game=game, state=state, obs_type=retro.Observations.IMAGE) + env = retro.RetroEnv( + game=game, + state=state, + use_restricted_actions=retro.Actions.FILTERED, + obs_type=retro.Observations.IMAGE + ) env = StreetFighterCustomWrapper(env, win_template, lose_template) # env.seed(seed) return env @@ -42,28 +46,35 @@ def main(): # Set up the environment and model game = "StreetFighterIISpecialChampionEdition-Genesis" state_stages = [ - "Champion.Level1.ChunLiVsGuile", - "Champion.Level2.ChunLiVsKen", - "Champion.Level3.ChunLiVsChunLi", - "Champion.Level4.ChunLiVsZangief", - "Champion.Level5.ChunLiVsDhalsim", - "Champion.Level6.ChunLiVsRyu", - "Champion.Level7.ChunLiVsEHonda", - "Champion.Level8.ChunLiVsBlanka", - "Champion.Level9.ChunLiVsBalrog", - "Champion.Level10.ChunLiVsVega", - "Champion.Level11.ChunLiVsSagat", - "Champion.Level12.ChunLiVsBison" + "ChampionX.Level1.ChunLiVsKen", + "ChampionX.Level2.ChunLiVsChunLi", + "ChampionX.Level3.ChunLiVsZangief", + "ChampionX.Level4.ChunLiVsDhalsim", + "ChampionX.Level5.ChunLiVsRyu", + "ChampionX.Level6.ChunLiVsEHonda", + "ChampionX.Level7.ChunLiVsBlanka", + "ChampionX.Level8.ChunLiVsGuile", + "ChampionX.Level9.ChunLiVsBalrog", + "ChampionX.Level10.ChunLiVsVega", + "ChampionX.Level11.ChunLiVsSagat", + "ChampionX.Level12.ChunLiVsBison" # Add other stages as necessary ] + # Champion is at difficulty level 4, ChampionX is at difficulty level 8. num_envs = 8 # env = SubprocVecEnv([make_env(game, state_stages[0], seed=i) for i in range(num_envs)]) env = SubprocVecEnv([make_env(game, state_stages[0], seed=i) for i in range(num_envs)]) + # Using CustomCNN as the feature extractor + # policy_kwargs = { + # 'features_extractor_class': CustomCNN + # } + + # Using MobileNetV3 as the feature extractor policy_kwargs = { - 'features_extractor_class': CustomCNN + 'features_extractor_class': MobileNetV3Extractor } model = PPO( @@ -87,7 +98,7 @@ def main(): ) # Set the save directory - save_dir = "trained_models_cv_level6up" + save_dir = "trained_models_cv_customcnn_time_penalty" os.makedirs(save_dir, exist_ok=True) # Set up callbacks diff --git a/trained_models_cv_mobilenet_random/train_log.txt b/trained_models_cv_mobilenet_random/train_log.txt new file mode 100644 index 0000000..9d3df7a --- /dev/null +++ b/trained_models_cv_mobilenet_random/train_log.txt @@ -0,0 +1,291 @@ +(StreetFighterAI) PS C:\Users\unitec\Documents\AIProjects\street-fighter-ai> python .\train_cv_sf2_ai.py +Using cuda device + +Current state: Champion.Level8.ChunLiVsBlanka +------------------------------ +| time/ | | +| fps | 186 | +| iterations | 1 | +| time_elapsed | 115 | +| total_timesteps | 21600 | +------------------------------ + +Current state: Champion.Level12.ChunLiVsBison +------------------------------------------ +| time/ | | +| fps | 65 | +| iterations | 2 | +| time_elapsed | 655 | +| total_timesteps | 43200 | +| train/ | | +| approx_kl | 0.0053780936 | +| clip_fraction | 0.0431 | +| clip_range | 0.2 | +| entropy_loss | -8.31 | +| explained_variance | 2.93e-05 | +| learning_rate | 0.0003 | +| loss | 166 | +| n_updates | 10 | +| policy_gradient_loss | -0.0016 | +| value_loss | 92 | +------------------------------------------ + +Current state: Champion.Level6.ChunLiVsRyu +----------------------------------------- +| time/ | | +| fps | 54 | +| iterations | 3 | +| time_elapsed | 1196 | +| total_timesteps | 64800 | +| train/ | | +| approx_kl | 0.005385526 | +| clip_fraction | 0.0472 | +| clip_range | 0.2 | +| entropy_loss | -8.31 | +| explained_variance | -0.00129 | +| learning_rate | 0.0003 | +| loss | 0.0349 | +| n_updates | 20 | +| policy_gradient_loss | -0.000679 | +| value_loss | 58.4 | +----------------------------------------- + +Current state: Champion.Level10.ChunLiVsVega +----------------------------------------- +| time/ | | +| fps | 49 | +| iterations | 4 | +| time_elapsed | 1736 | +| total_timesteps | 86400 | +| train/ | | +| approx_kl | 0.005108807 | +| clip_fraction | 0.0236 | +| clip_range | 0.2 | +| entropy_loss | -8.3 | +| explained_variance | 0.000588 | +| learning_rate | 0.0003 | +| loss | 0.236 | +| n_updates | 30 | +| policy_gradient_loss | -0.00157 | +| value_loss | 82.7 | +----------------------------------------- + +Current state: Champion.Level12.ChunLiVsBison +------------------------------------------ +| time/ | | +| fps | 47 | +| iterations | 5 | +| time_elapsed | 2274 | +| total_timesteps | 108000 | +| train/ | | +| approx_kl | 0.0060475296 | +| clip_fraction | 0.054 | +| clip_range | 0.2 | +| entropy_loss | -8.3 | +| explained_variance | 0.00555 | +| learning_rate | 0.0003 | +| loss | 0.536 | +| n_updates | 40 | +| policy_gradient_loss | -0.000733 | +| value_loss | 40.5 | +------------------------------------------ + +Current state: Champion.Level4.ChunLiVsZangief +------------------------------------------ +| time/ | | +| fps | 46 | +| iterations | 6 | +| time_elapsed | 2811 | +| total_timesteps | 129600 | +| train/ | | +| approx_kl | 0.0059517785 | +| clip_fraction | 0.0546 | +| clip_range | 0.2 | +| entropy_loss | -8.29 | +| explained_variance | 0.0188 | +| learning_rate | 0.0003 | +| loss | 7.26 | +| n_updates | 50 | +| policy_gradient_loss | -0.00102 | +| value_loss | 21.8 | +------------------------------------------ + +Current state: Champion.Level7.ChunLiVsEHonda +------------------------------------------ +| time/ | | +| fps | 45 | +| iterations | 7 | +| time_elapsed | 3348 | +| total_timesteps | 151200 | +| train/ | | +| approx_kl | 0.0057667145 | +| clip_fraction | 0.0337 | +| clip_range | 0.2 | +| entropy_loss | -8.29 | +| explained_variance | 0.0069 | +| learning_rate | 0.0003 | +| loss | 0.832 | +| n_updates | 60 | +| policy_gradient_loss | -0.00133 | +| value_loss | 36.7 | +------------------------------------------ + +Current state: Champion.Level8.ChunLiVsBlanka +------------------------------------------ +| time/ | | +| fps | 44 | +| iterations | 8 | +| time_elapsed | 3883 | +| total_timesteps | 172800 | +| train/ | | +| approx_kl | 0.0047547054 | +| clip_fraction | 0.0291 | +| clip_range | 0.2 | +| entropy_loss | -8.29 | +| explained_variance | 0.0128 | +| learning_rate | 0.0003 | +| loss | 0.424 | +| n_updates | 70 | +| policy_gradient_loss | -0.00186 | +| value_loss | 21.2 | +------------------------------------------ + +Current state: Champion.Level8.ChunLiVsBlanka +------------------------------------------ +| time/ | | +| fps | 43 | +| iterations | 9 | +| time_elapsed | 4422 | +| total_timesteps | 194400 | +| train/ | | +| approx_kl | 0.0063244957 | +| clip_fraction | 0.049 | +| clip_range | 0.2 | +| entropy_loss | -8.28 | +| explained_variance | 0.00417 | +| learning_rate | 0.0003 | +| loss | 0.146 | +| n_updates | 80 | +| policy_gradient_loss | -0.00167 | +| value_loss | 17 | +------------------------------------------ + +Current state: Champion.Level8.ChunLiVsBlanka +----------------------------------------- +| time/ | | +| fps | 43 | +| iterations | 10 | +| time_elapsed | 4960 | +| total_timesteps | 216000 | +| train/ | | +| approx_kl | 0.008022586 | +| clip_fraction | 0.0632 | +| clip_range | 0.2 | +| entropy_loss | -8.29 | +| explained_variance | 0.0241 | +| learning_rate | 0.0003 | +| loss | 0.902 | +| n_updates | 90 | +| policy_gradient_loss | -0.00295 | +| value_loss | 13.5 | +----------------------------------------- + +Current state: Champion.Level10.ChunLiVsVega +----------------------------------------- +| time/ | | +| fps | 43 | +| iterations | 11 | +| time_elapsed | 5499 | +| total_timesteps | 237600 | +| train/ | | +| approx_kl | 0.006470734 | +| clip_fraction | 0.0596 | +| clip_range | 0.2 | +| entropy_loss | -8.3 | +| explained_variance | 0.0611 | +| learning_rate | 0.0003 | +| loss | 5.72 | +| n_updates | 100 | +| policy_gradient_loss | -0.000976 | +| value_loss | 8.5 | +----------------------------------------- + +Current state: Champion.Level7.ChunLiVsEHonda +---------------------------------------- +| time/ | | +| fps | 42 | +| iterations | 12 | +| time_elapsed | 6040 | +| total_timesteps | 259200 | +| train/ | | +| approx_kl | 0.00540813 | +| clip_fraction | 0.0547 | +| clip_range | 0.2 | +| entropy_loss | -8.29 | +| explained_variance | -0.0271 | +| learning_rate | 0.0003 | +| loss | 0.75 | +| n_updates | 110 | +| policy_gradient_loss | -0.00104 | +| value_loss | 9.12 | +---------------------------------------- + +Current state: Champion.Level8.ChunLiVsBlanka +--------------------------------------- +| time/ | | +| fps | 42 | +| iterations | 13 | +| time_elapsed | 6582 | +| total_timesteps | 280800 | +| train/ | | +| approx_kl | 0.0058568 | +| clip_fraction | 0.0608 | +| clip_range | 0.2 | +| entropy_loss | -8.29 | +| explained_variance | 0.0277 | +| learning_rate | 0.0003 | +| loss | 0.256 | +| n_updates | 120 | +| policy_gradient_loss | -0.00284 | +| value_loss | 6.99 | +--------------------------------------- + +Current state: Champion.Level2.ChunLiVsKen +------------------------------------------ +| time/ | | +| fps | 42 | +| iterations | 14 | +| time_elapsed | 7126 | +| total_timesteps | 302400 | +| train/ | | +| approx_kl | 0.0066813217 | +| clip_fraction | 0.0554 | +| clip_range | 0.2 | +| entropy_loss | -8.3 | +| explained_variance | 0.0689 | +| learning_rate | 0.0003 | +| loss | 0.504 | +| n_updates | 130 | +| policy_gradient_loss | -0.00243 | +| value_loss | 9.48 | +------------------------------------------ + +Current state: Champion.Level8.ChunLiVsBlanka +------------------------------------------ +| time/ | | +| fps | 42 | +| iterations | 15 | +| time_elapsed | 7670 | +| total_timesteps | 324000 | +| train/ | | +| approx_kl | 0.0069995625 | +| clip_fraction | 0.0637 | +| clip_range | 0.2 | +| entropy_loss | -8.3 | +| explained_variance | -0.0336 | +| learning_rate | 0.0003 | +| loss | -0.000532 | +| n_updates | 140 | +| policy_gradient_loss | -0.00198 | +| value_loss | 9.39 | +------------------------------------------ \ No newline at end of file diff --git a/trained_models_cv_random/trainin_logs_CustomCNN_random.txt b/trained_models_cv_random/trainin_logs_CustomCNN_random.txt new file mode 100644 index 0000000..5465dfa --- /dev/null +++ b/trained_models_cv_random/trainin_logs_CustomCNN_random.txt @@ -0,0 +1,2011 @@ +(StreetFighterAI) PS C:\Users\unitec\Documents\AIProjects\street-fighter-ai> python .\train_cv_sf2_ai.py +Using cuda device + +Current state: Champion.Level10.ChunLiVsVega +------------------------------ +| time/ | | +| fps | 3021 | +| iterations | 1 | +| time_elapsed | 14 | +| total_timesteps | 43200 | +------------------------------ + +Current state: Champion.Level9.ChunLiVsBalrog +----------------------------------------- +| time/ | | +| fps | 1119 | +| iterations | 2 | +| time_elapsed | 77 | +| total_timesteps | 86400 | +| train/ | | +| approx_kl | 0.008596159 | +| clip_fraction | 0.0861 | +| clip_range | 0.2 | +| entropy_loss | -8.31 | +| explained_variance | 0.000249 | +| learning_rate | 0.0003 | +| loss | 0.0448 | +| n_updates | 10 | +| policy_gradient_loss | -0.00293 | +| value_loss | 63.7 | +----------------------------------------- + +Current state: Champion.Level7.ChunLiVsEHonda +----------------------------------------- +| time/ | | +| fps | 926 | +| iterations | 3 | +| time_elapsed | 139 | +| total_timesteps | 129600 | +| train/ | | +| approx_kl | 0.015845723 | +| clip_fraction | 0.154 | +| clip_range | 0.2 | +| entropy_loss | -8.3 | +| explained_variance | 0.0515 | +| learning_rate | 0.0003 | +| loss | 17.5 | +| n_updates | 20 | +| policy_gradient_loss | -0.0113 | +| value_loss | 22.3 | +----------------------------------------- + +Current state: Champion.Level6.ChunLiVsRyu +----------------------------------------- +| time/ | | +| fps | 854 | +| iterations | 4 | +| time_elapsed | 202 | +| total_timesteps | 172800 | +| train/ | | +| approx_kl | 0.018579809 | +| clip_fraction | 0.217 | +| clip_range | 0.2 | +| entropy_loss | -8.29 | +| explained_variance | 0.0152 | +| learning_rate | 0.0003 | +| loss | 0.313 | +| n_updates | 30 | +| policy_gradient_loss | -0.00959 | +| value_loss | 19 | +----------------------------------------- + +Current state: Champion.Level9.ChunLiVsBalrog +----------------------------------------- +| time/ | | +| fps | 816 | +| iterations | 5 | +| time_elapsed | 264 | +| total_timesteps | 216000 | +| train/ | | +| approx_kl | 0.017938577 | +| clip_fraction | 0.26 | +| clip_range | 0.2 | +| entropy_loss | -8.29 | +| explained_variance | 0.0705 | +| learning_rate | 0.0003 | +| loss | 23.4 | +| n_updates | 40 | +| policy_gradient_loss | -0.00308 | +| value_loss | 17.5 | +----------------------------------------- + +Current state: Champion.Level8.ChunLiVsBlanka +----------------------------------------- +| time/ | | +| fps | 789 | +| iterations | 6 | +| time_elapsed | 328 | +| total_timesteps | 259200 | +| train/ | | +| approx_kl | 0.015945172 | +| clip_fraction | 0.19 | +| clip_range | 0.2 | +| entropy_loss | -8.29 | +| explained_variance | 0.213 | +| learning_rate | 0.0003 | +| loss | 0.311 | +| n_updates | 50 | +| policy_gradient_loss | -0.0084 | +| value_loss | 9.25 | +----------------------------------------- + +Current state: Champion.Level1.ChunLiVsGuile +----------------------------------------- +| time/ | | +| fps | 773 | +| iterations | 7 | +| time_elapsed | 390 | +| total_timesteps | 302400 | +| train/ | | +| approx_kl | 0.017754147 | +| clip_fraction | 0.203 | +| clip_range | 0.2 | +| entropy_loss | -8.27 | +| explained_variance | 0.379 | +| learning_rate | 0.0003 | +| loss | 0.389 | +| n_updates | 60 | +| policy_gradient_loss | -0.0106 | +| value_loss | 8.24 | +----------------------------------------- + +Current state: Champion.Level5.ChunLiVsDhalsim +----------------------------------------- +| time/ | | +| fps | 763 | +| iterations | 8 | +| time_elapsed | 452 | +| total_timesteps | 345600 | +| train/ | | +| approx_kl | 0.019500943 | +| clip_fraction | 0.253 | +| clip_range | 0.2 | +| entropy_loss | -8.28 | +| explained_variance | 0.248 | +| learning_rate | 0.0003 | +| loss | 0.0383 | +| n_updates | 70 | +| policy_gradient_loss | -0.00622 | +| value_loss | 7.3 | +----------------------------------------- + +Current state: Champion.Level7.ChunLiVsEHonda +----------------------------------------- +| time/ | | +| fps | 753 | +| iterations | 9 | +| time_elapsed | 515 | +| total_timesteps | 388800 | +| train/ | | +| approx_kl | 0.016856283 | +| clip_fraction | 0.201 | +| clip_range | 0.2 | +| entropy_loss | -8.28 | +| explained_variance | 0.349 | +| learning_rate | 0.0003 | +| loss | 0.245 | +| n_updates | 80 | +| policy_gradient_loss | -0.00518 | +| value_loss | 16.7 | +----------------------------------------- + +Current state: Champion.Level1.ChunLiVsGuile +----------------------------------------- +| time/ | | +| fps | 746 | +| iterations | 10 | +| time_elapsed | 578 | +| total_timesteps | 432000 | +| train/ | | +| approx_kl | 0.018104717 | +| clip_fraction | 0.227 | +| clip_range | 0.2 | +| entropy_loss | -8.27 | +| explained_variance | 0.538 | +| learning_rate | 0.0003 | +| loss | 0.117 | +| n_updates | 90 | +| policy_gradient_loss | -0.00673 | +| value_loss | 7.65 | +----------------------------------------- + +Current state: Champion.Level10.ChunLiVsVega +----------------------------------------- +| time/ | | +| fps | 740 | +| iterations | 11 | +| time_elapsed | 641 | +| total_timesteps | 475200 | +| train/ | | +| approx_kl | 0.016069805 | +| clip_fraction | 0.205 | +| clip_range | 0.2 | +| entropy_loss | -8.25 | +| explained_variance | 0.457 | +| learning_rate | 0.0003 | +| loss | 0.157 | +| n_updates | 100 | +| policy_gradient_loss | -0.00489 | +| value_loss | 9.44 | +----------------------------------------- + +Current state: Champion.Level5.ChunLiVsDhalsim +---------------------------------------- +| time/ | | +| fps | 734 | +| iterations | 12 | +| time_elapsed | 706 | +| total_timesteps | 518400 | +| train/ | | +| approx_kl | 0.01563109 | +| clip_fraction | 0.185 | +| clip_range | 0.2 | +| entropy_loss | -8.26 | +| explained_variance | 0.477 | +| learning_rate | 0.0003 | +| loss | 0.249 | +| n_updates | 110 | +| policy_gradient_loss | -0.00818 | +| value_loss | 7.4 | +---------------------------------------- + +Current state: Champion.Level8.ChunLiVsBlanka +------------------------------------------ +| time/ | | +| fps | 729 | +| iterations | 13 | +| time_elapsed | 769 | +| total_timesteps | 561600 | +| train/ | | +| approx_kl | 0.0152117945 | +| clip_fraction | 0.183 | +| clip_range | 0.2 | +| entropy_loss | -8.26 | +| explained_variance | 0.356 | +| learning_rate | 0.0003 | +| loss | 0.255 | +| n_updates | 120 | +| policy_gradient_loss | -0.00698 | +| value_loss | 8 | +------------------------------------------ + +Current state: Champion.Level9.ChunLiVsBalrog +----------------------------------------- +| time/ | | +| fps | 726 | +| iterations | 14 | +| time_elapsed | 832 | +| total_timesteps | 604800 | +| train/ | | +| approx_kl | 0.016710367 | +| clip_fraction | 0.215 | +| clip_range | 0.2 | +| entropy_loss | -8.26 | +| explained_variance | 0.619 | +| learning_rate | 0.0003 | +| loss | 0.0512 | +| n_updates | 130 | +| policy_gradient_loss | -0.00738 | +| value_loss | 6.05 | +----------------------------------------- + +Current state: Champion.Level9.ChunLiVsBalrog +---------------------------------------- +| time/ | | +| fps | 723 | +| iterations | 15 | +| time_elapsed | 895 | +| total_timesteps | 648000 | +| train/ | | +| approx_kl | 0.01700595 | +| clip_fraction | 0.216 | +| clip_range | 0.2 | +| entropy_loss | -8.24 | +| explained_variance | 0.531 | +| learning_rate | 0.0003 | +| loss | 0.00925 | +| n_updates | 140 | +| policy_gradient_loss | -0.00818 | +| value_loss | 7.14 | +---------------------------------------- + +Current state: Champion.Level8.ChunLiVsBlanka +----------------------------------------- +| time/ | | +| fps | 720 | +| iterations | 16 | +| time_elapsed | 959 | +| total_timesteps | 691200 | +| train/ | | +| approx_kl | 0.019085549 | +| clip_fraction | 0.234 | +| clip_range | 0.2 | +| entropy_loss | -8.24 | +| explained_variance | 0.418 | +| learning_rate | 0.0003 | +| loss | 0.0344 | +| n_updates | 150 | +| policy_gradient_loss | -0.00887 | +| value_loss | 7.87 | +----------------------------------------- + +Current state: Champion.Level12.ChunLiVsBison +----------------------------------------- +| time/ | | +| fps | 717 | +| iterations | 17 | +| time_elapsed | 1022 | +| total_timesteps | 734400 | +| train/ | | +| approx_kl | 0.018299779 | +| clip_fraction | 0.216 | +| clip_range | 0.2 | +| entropy_loss | -8.22 | +| explained_variance | 0.578 | +| learning_rate | 0.0003 | +| loss | 0.356 | +| n_updates | 160 | +| policy_gradient_loss | -0.00818 | +| value_loss | 8.27 | +----------------------------------------- + +Current state: Champion.Level3.ChunLiVsChunLi +----------------------------------------- +| time/ | | +| fps | 716 | +| iterations | 18 | +| time_elapsed | 1085 | +| total_timesteps | 777600 | +| train/ | | +| approx_kl | 0.018977245 | +| clip_fraction | 0.235 | +| clip_range | 0.2 | +| entropy_loss | -8.2 | +| explained_variance | 0.685 | +| learning_rate | 0.0003 | +| loss | 0.207 | +| n_updates | 170 | +| policy_gradient_loss | -0.00998 | +| value_loss | 4.95 | +----------------------------------------- + +Current state: Champion.Level2.ChunLiVsKen +----------------------------------------- +| time/ | | +| fps | 713 | +| iterations | 19 | +| time_elapsed | 1150 | +| total_timesteps | 820800 | +| train/ | | +| approx_kl | 0.019098708 | +| clip_fraction | 0.238 | +| clip_range | 0.2 | +| entropy_loss | -8.21 | +| explained_variance | 0.647 | +| learning_rate | 0.0003 | +| loss | 0.2 | +| n_updates | 180 | +| policy_gradient_loss | -0.0057 | +| value_loss | 9.53 | +----------------------------------------- + +Current state: Champion.Level12.ChunLiVsBison +----------------------------------------- +| time/ | | +| fps | 711 | +| iterations | 20 | +| time_elapsed | 1214 | +| total_timesteps | 864000 | +| train/ | | +| approx_kl | 0.019934466 | +| clip_fraction | 0.261 | +| clip_range | 0.2 | +| entropy_loss | -8.18 | +| explained_variance | 0.722 | +| learning_rate | 0.0003 | +| loss | 0.595 | +| n_updates | 190 | +| policy_gradient_loss | -0.00507 | +| value_loss | 7.64 | +----------------------------------------- + +Current state: Champion.Level7.ChunLiVsEHonda +----------------------------------------- +| time/ | | +| fps | 709 | +| iterations | 21 | +| time_elapsed | 1277 | +| total_timesteps | 907200 | +| train/ | | +| approx_kl | 0.017742248 | +| clip_fraction | 0.226 | +| clip_range | 0.2 | +| entropy_loss | -8.19 | +| explained_variance | 0.751 | +| learning_rate | 0.0003 | +| loss | 0.0114 | +| n_updates | 200 | +| policy_gradient_loss | -0.00791 | +| value_loss | 6.37 | +----------------------------------------- + +Current state: Champion.Level9.ChunLiVsBalrog +----------------------------------------- +| time/ | | +| fps | 708 | +| iterations | 22 | +| time_elapsed | 1341 | +| total_timesteps | 950400 | +| train/ | | +| approx_kl | 0.018638369 | +| clip_fraction | 0.225 | +| clip_range | 0.2 | +| entropy_loss | -8.21 | +| explained_variance | 0.71 | +| learning_rate | 0.0003 | +| loss | 0.0415 | +| n_updates | 210 | +| policy_gradient_loss | -0.00605 | +| value_loss | 7 | +----------------------------------------- + +Current state: Champion.Level12.ChunLiVsBison +----------------------------------------- +| time/ | | +| fps | 706 | +| iterations | 23 | +| time_elapsed | 1406 | +| total_timesteps | 993600 | +| train/ | | +| approx_kl | 0.018239466 | +| clip_fraction | 0.225 | +| clip_range | 0.2 | +| entropy_loss | -8.21 | +| explained_variance | 0.691 | +| learning_rate | 0.0003 | +| loss | 2.38 | +| n_updates | 220 | +| policy_gradient_loss | -0.00453 | +| value_loss | 9.08 | +----------------------------------------- + +Current state: Champion.Level8.ChunLiVsBlanka +----------------------------------------- +| time/ | | +| fps | 705 | +| iterations | 24 | +| time_elapsed | 1470 | +| total_timesteps | 1036800 | +| train/ | | +| approx_kl | 0.019000078 | +| clip_fraction | 0.228 | +| clip_range | 0.2 | +| entropy_loss | -8.22 | +| explained_variance | 0.724 | +| learning_rate | 0.0003 | +| loss | 0.198 | +| n_updates | 230 | +| policy_gradient_loss | -0.00657 | +| value_loss | 8.04 | +----------------------------------------- + +Current state: Champion.Level5.ChunLiVsDhalsim +---------------------------------------- +| time/ | | +| fps | 704 | +| iterations | 25 | +| time_elapsed | 1533 | +| total_timesteps | 1080000 | +| train/ | | +| approx_kl | 0.01695526 | +| clip_fraction | 0.198 | +| clip_range | 0.2 | +| entropy_loss | -8.22 | +| explained_variance | 0.63 | +| learning_rate | 0.0003 | +| loss | 0.0666 | +| n_updates | 240 | +| policy_gradient_loss | -0.00626 | +| value_loss | 8.95 | +---------------------------------------- + +Current state: Champion.Level4.ChunLiVsZangief +----------------------------------------- +| time/ | | +| fps | 703 | +| iterations | 26 | +| time_elapsed | 1596 | +| total_timesteps | 1123200 | +| train/ | | +| approx_kl | 0.017780067 | +| clip_fraction | 0.215 | +| clip_range | 0.2 | +| entropy_loss | -8.2 | +| explained_variance | 0.785 | +| learning_rate | 0.0003 | +| loss | 0.452 | +| n_updates | 250 | +| policy_gradient_loss | -0.00754 | +| value_loss | 6.05 | +----------------------------------------- + +Current state: Champion.Level2.ChunLiVsKen +----------------------------------------- +| time/ | | +| fps | 702 | +| iterations | 27 | +| time_elapsed | 1661 | +| total_timesteps | 1166400 | +| train/ | | +| approx_kl | 0.018361207 | +| clip_fraction | 0.203 | +| clip_range | 0.2 | +| entropy_loss | -8.2 | +| explained_variance | 0.828 | +| learning_rate | 0.0003 | +| loss | -0.00672 | +| n_updates | 260 | +| policy_gradient_loss | -0.00908 | +| value_loss | 3.66 | +----------------------------------------- + +Current state: Champion.Level5.ChunLiVsDhalsim +---------------------------------------- +| time/ | | +| fps | 701 | +| iterations | 28 | +| time_elapsed | 1724 | +| total_timesteps | 1209600 | +| train/ | | +| approx_kl | 0.01574904 | +| clip_fraction | 0.19 | +| clip_range | 0.2 | +| entropy_loss | -8.2 | +| explained_variance | 0.596 | +| learning_rate | 0.0003 | +| loss | 0.2 | +| n_updates | 270 | +| policy_gradient_loss | -0.00431 | +| value_loss | 19.7 | +---------------------------------------- + +Current state: Champion.Level8.ChunLiVsBlanka +----------------------------------------- +| time/ | | +| fps | 700 | +| iterations | 29 | +| time_elapsed | 1788 | +| total_timesteps | 1252800 | +| train/ | | +| approx_kl | 0.017698115 | +| clip_fraction | 0.204 | +| clip_range | 0.2 | +| entropy_loss | -8.21 | +| explained_variance | 0.815 | +| learning_rate | 0.0003 | +| loss | 0.506 | +| n_updates | 280 | +| policy_gradient_loss | -0.00692 | +| value_loss | 6.41 | +----------------------------------------- + +Current state: Champion.Level12.ChunLiVsBison +----------------------------------------- +| time/ | | +| fps | 698 | +| iterations | 30 | +| time_elapsed | 1854 | +| total_timesteps | 1296000 | +| train/ | | +| approx_kl | 0.016693246 | +| clip_fraction | 0.199 | +| clip_range | 0.2 | +| entropy_loss | -8.19 | +| explained_variance | 0.86 | +| learning_rate | 0.0003 | +| loss | 0.0669 | +| n_updates | 290 | +| policy_gradient_loss | -0.00799 | +| value_loss | 6.74 | +----------------------------------------- + +Current state: Champion.Level2.ChunLiVsKen +----------------------------------------- +| time/ | | +| fps | 697 | +| iterations | 31 | +| time_elapsed | 1919 | +| total_timesteps | 1339200 | +| train/ | | +| approx_kl | 0.017687412 | +| clip_fraction | 0.209 | +| clip_range | 0.2 | +| entropy_loss | -8.21 | +| explained_variance | 0.875 | +| learning_rate | 0.0003 | +| loss | 0.112 | +| n_updates | 300 | +| policy_gradient_loss | -0.00804 | +| value_loss | 9.05 | +----------------------------------------- + +Current state: Champion.Level12.ChunLiVsBison +----------------------------------------- +| time/ | | +| fps | 696 | +| iterations | 32 | +| time_elapsed | 1984 | +| total_timesteps | 1382400 | +| train/ | | +| approx_kl | 0.020802164 | +| clip_fraction | 0.238 | +| clip_range | 0.2 | +| entropy_loss | -8.19 | +| explained_variance | 0.855 | +| learning_rate | 0.0003 | +| loss | 0.268 | +| n_updates | 310 | +| policy_gradient_loss | -0.00801 | +| value_loss | 6.73 | +----------------------------------------- + +Current state: Champion.Level4.ChunLiVsZangief +----------------------------------------- +| time/ | | +| fps | 696 | +| iterations | 33 | +| time_elapsed | 2047 | +| total_timesteps | 1425600 | +| train/ | | +| approx_kl | 0.020212276 | +| clip_fraction | 0.23 | +| clip_range | 0.2 | +| entropy_loss | -8.2 | +| explained_variance | 0.845 | +| learning_rate | 0.0003 | +| loss | 0.273 | +| n_updates | 320 | +| policy_gradient_loss | -0.00755 | +| value_loss | 5.8 | +----------------------------------------- + +Current state: Champion.Level7.ChunLiVsEHonda +----------------------------------------- +| time/ | | +| fps | 55 | +| iterations | 34 | +| time_elapsed | 26348 | +| total_timesteps | 1468800 | +| train/ | | +| approx_kl | 0.020391963 | +| clip_fraction | 0.234 | +| clip_range | 0.2 | +| entropy_loss | -8.21 | +| explained_variance | 0.822 | +| learning_rate | 0.0003 | +| loss | 0.195 | +| n_updates | 330 | +| policy_gradient_loss | -0.00638 | +| value_loss | 6.76 | +----------------------------------------- + +Current state: Champion.Level7.ChunLiVsEHonda +----------------------------------------- +| time/ | | +| fps | 57 | +| iterations | 35 | +| time_elapsed | 26411 | +| total_timesteps | 1512000 | +| train/ | | +| approx_kl | 0.019056592 | +| clip_fraction | 0.219 | +| clip_range | 0.2 | +| entropy_loss | -8.19 | +| explained_variance | 0.82 | +| learning_rate | 0.0003 | +| loss | 0.448 | +| n_updates | 340 | +| policy_gradient_loss | -0.00529 | +| value_loss | 7.98 | +----------------------------------------- + +Current state: Champion.Level2.ChunLiVsKen +----------------------------------------- +| time/ | | +| fps | 58 | +| iterations | 36 | +| time_elapsed | 26474 | +| total_timesteps | 1555200 | +| train/ | | +| approx_kl | 0.021942634 | +| clip_fraction | 0.26 | +| clip_range | 0.2 | +| entropy_loss | -8.19 | +| explained_variance | 0.808 | +| learning_rate | 0.0003 | +| loss | 0.178 | +| n_updates | 350 | +| policy_gradient_loss | -0.00282 | +| value_loss | 8.33 | +----------------------------------------- + +Current state: Champion.Level1.ChunLiVsGuile +----------------------------------------- +| time/ | | +| fps | 60 | +| iterations | 37 | +| time_elapsed | 26536 | +| total_timesteps | 1598400 | +| train/ | | +| approx_kl | 0.021785354 | +| clip_fraction | 0.248 | +| clip_range | 0.2 | +| entropy_loss | -8.16 | +| explained_variance | 0.813 | +| learning_rate | 0.0003 | +| loss | 0.362 | +| n_updates | 360 | +| policy_gradient_loss | -0.000681 | +| value_loss | 10.8 | +----------------------------------------- + +Current state: Champion.Level12.ChunLiVsBison +----------------------------------------- +| time/ | | +| fps | 61 | +| iterations | 38 | +| time_elapsed | 26599 | +| total_timesteps | 1641600 | +| train/ | | +| approx_kl | 0.020205429 | +| clip_fraction | 0.234 | +| clip_range | 0.2 | +| entropy_loss | -8.18 | +| explained_variance | 0.844 | +| learning_rate | 0.0003 | +| loss | 0.0406 | +| n_updates | 370 | +| policy_gradient_loss | -0.00637 | +| value_loss | 6.27 | +----------------------------------------- + +Current state: Champion.Level10.ChunLiVsVega +---------------------------------------- +| time/ | | +| fps | 63 | +| iterations | 39 | +| time_elapsed | 26663 | +| total_timesteps | 1684800 | +| train/ | | +| approx_kl | 0.01886538 | +| clip_fraction | 0.214 | +| clip_range | 0.2 | +| entropy_loss | -8.19 | +| explained_variance | 0.791 | +| learning_rate | 0.0003 | +| loss | 1.76 | +| n_updates | 380 | +| policy_gradient_loss | -0.00645 | +| value_loss | 6.82 | +---------------------------------------- + +Current state: Champion.Level12.ChunLiVsBison +----------------------------------------- +| time/ | | +| fps | 64 | +| iterations | 40 | +| time_elapsed | 26725 | +| total_timesteps | 1728000 | +| train/ | | +| approx_kl | 0.020300686 | +| clip_fraction | 0.213 | +| clip_range | 0.2 | +| entropy_loss | -8.18 | +| explained_variance | 0.742 | +| learning_rate | 0.0003 | +| loss | 0.0943 | +| n_updates | 390 | +| policy_gradient_loss | -0.00417 | +| value_loss | 9.39 | +----------------------------------------- + +Current state: Champion.Level3.ChunLiVsChunLi +----------------------------------------- +| time/ | | +| fps | 66 | +| iterations | 41 | +| time_elapsed | 26789 | +| total_timesteps | 1771200 | +| train/ | | +| approx_kl | 0.018035587 | +| clip_fraction | 0.211 | +| clip_range | 0.2 | +| entropy_loss | -8.2 | +| explained_variance | 0.822 | +| learning_rate | 0.0003 | +| loss | 0.085 | +| n_updates | 400 | +| policy_gradient_loss | -0.00696 | +| value_loss | 5.22 | +----------------------------------------- + +Current state: Champion.Level10.ChunLiVsVega +----------------------------------------- +| time/ | | +| fps | 67 | +| iterations | 42 | +| time_elapsed | 26852 | +| total_timesteps | 1814400 | +| train/ | | +| approx_kl | 0.018482493 | +| clip_fraction | 0.195 | +| clip_range | 0.2 | +| entropy_loss | -8.18 | +| explained_variance | 0.84 | +| learning_rate | 0.0003 | +| loss | 0.163 | +| n_updates | 410 | +| policy_gradient_loss | -0.0102 | +| value_loss | 6.02 | +----------------------------------------- + +Current state: Champion.Level6.ChunLiVsRyu +----------------------------------------- +| time/ | | +| fps | 69 | +| iterations | 43 | +| time_elapsed | 26915 | +| total_timesteps | 1857600 | +| train/ | | +| approx_kl | 0.022504063 | +| clip_fraction | 0.255 | +| clip_range | 0.2 | +| entropy_loss | -8.19 | +| explained_variance | 0.787 | +| learning_rate | 0.0003 | +| loss | 0.0696 | +| n_updates | 420 | +| policy_gradient_loss | -0.00368 | +| value_loss | 6.24 | +----------------------------------------- + +Current state: Champion.Level6.ChunLiVsRyu +----------------------------------------- +| time/ | | +| fps | 70 | +| iterations | 44 | +| time_elapsed | 26981 | +| total_timesteps | 1900800 | +| train/ | | +| approx_kl | 0.019790536 | +| clip_fraction | 0.237 | +| clip_range | 0.2 | +| entropy_loss | -8.17 | +| explained_variance | 0.82 | +| learning_rate | 0.0003 | +| loss | 0.0718 | +| n_updates | 430 | +| policy_gradient_loss | -0.00588 | +| value_loss | 5.47 | +----------------------------------------- + +Current state: Champion.Level5.ChunLiVsDhalsim +----------------------------------------- +| time/ | | +| fps | 71 | +| iterations | 45 | +| time_elapsed | 27052 | +| total_timesteps | 1944000 | +| train/ | | +| approx_kl | 0.020571997 | +| clip_fraction | 0.236 | +| clip_range | 0.2 | +| entropy_loss | -8.14 | +| explained_variance | 0.865 | +| learning_rate | 0.0003 | +| loss | 0.254 | +| n_updates | 440 | +| policy_gradient_loss | -0.00433 | +| value_loss | 6.43 | +----------------------------------------- + +Current state: Champion.Level5.ChunLiVsDhalsim +----------------------------------------- +| time/ | | +| fps | 73 | +| iterations | 46 | +| time_elapsed | 27118 | +| total_timesteps | 1987200 | +| train/ | | +| approx_kl | 0.021008013 | +| clip_fraction | 0.231 | +| clip_range | 0.2 | +| entropy_loss | -8.14 | +| explained_variance | 0.9 | +| learning_rate | 0.0003 | +| loss | 0.48 | +| n_updates | 450 | +| policy_gradient_loss | -0.00661 | +| value_loss | 6.06 | +----------------------------------------- + +Current state: Champion.Level8.ChunLiVsBlanka +----------------------------------------- +| time/ | | +| fps | 74 | +| iterations | 47 | +| time_elapsed | 27180 | +| total_timesteps | 2030400 | +| train/ | | +| approx_kl | 0.020942781 | +| clip_fraction | 0.241 | +| clip_range | 0.2 | +| entropy_loss | -8.15 | +| explained_variance | 0.783 | +| learning_rate | 0.0003 | +| loss | 0.722 | +| n_updates | 460 | +| policy_gradient_loss | -0.00539 | +| value_loss | 6.29 | +----------------------------------------- + +Current state: Champion.Level1.ChunLiVsGuile +----------------------------------------- +| time/ | | +| fps | 76 | +| iterations | 48 | +| time_elapsed | 27246 | +| total_timesteps | 2073600 | +| train/ | | +| approx_kl | 0.021348428 | +| clip_fraction | 0.254 | +| clip_range | 0.2 | +| entropy_loss | -8.14 | +| explained_variance | 0.742 | +| learning_rate | 0.0003 | +| loss | 0.119 | +| n_updates | 470 | +| policy_gradient_loss | 0.000158 | +| value_loss | 9.32 | +----------------------------------------- + +Current state: Champion.Level3.ChunLiVsChunLi +----------------------------------------- +| time/ | | +| fps | 77 | +| iterations | 49 | +| time_elapsed | 27310 | +| total_timesteps | 2116800 | +| train/ | | +| approx_kl | 0.025586518 | +| clip_fraction | 0.28 | +| clip_range | 0.2 | +| entropy_loss | -8.13 | +| explained_variance | 0.71 | +| learning_rate | 0.0003 | +| loss | 0.322 | +| n_updates | 480 | +| policy_gradient_loss | 0.00467 | +| value_loss | 23.4 | +----------------------------------------- + +Current state: Champion.Level11.ChunLiVsSagat +----------------------------------------- +| time/ | | +| fps | 78 | +| iterations | 50 | +| time_elapsed | 27375 | +| total_timesteps | 2160000 | +| train/ | | +| approx_kl | 0.019520277 | +| clip_fraction | 0.222 | +| clip_range | 0.2 | +| entropy_loss | -8.13 | +| explained_variance | 0.758 | +| learning_rate | 0.0003 | +| loss | 3.22 | +| n_updates | 490 | +| policy_gradient_loss | -0.00285 | +| value_loss | 7.24 | +----------------------------------------- + +Current state: Champion.Level4.ChunLiVsZangief +---------------------------------------- +| time/ | | +| fps | 80 | +| iterations | 51 | +| time_elapsed | 27438 | +| total_timesteps | 2203200 | +| train/ | | +| approx_kl | 0.02053951 | +| clip_fraction | 0.233 | +| clip_range | 0.2 | +| entropy_loss | -8.12 | +| explained_variance | 0.62 | +| learning_rate | 0.0003 | +| loss | 0.0361 | +| n_updates | 500 | +| policy_gradient_loss | -0.00102 | +| value_loss | 8.59 | +---------------------------------------- + +Current state: Champion.Level3.ChunLiVsChunLi +----------------------------------------- +| time/ | | +| fps | 81 | +| iterations | 52 | +| time_elapsed | 27504 | +| total_timesteps | 2246400 | +| train/ | | +| approx_kl | 0.020821892 | +| clip_fraction | 0.229 | +| clip_range | 0.2 | +| entropy_loss | -8.12 | +| explained_variance | 0.696 | +| learning_rate | 0.0003 | +| loss | 0.504 | +| n_updates | 510 | +| policy_gradient_loss | 0.00272 | +| value_loss | 11.1 | +----------------------------------------- + +Current state: Champion.Level3.ChunLiVsChunLi +----------------------------------------- +| time/ | | +| fps | 83 | +| iterations | 53 | +| time_elapsed | 27570 | +| total_timesteps | 2289600 | +| train/ | | +| approx_kl | 0.017037908 | +| clip_fraction | 0.2 | +| clip_range | 0.2 | +| entropy_loss | -8.12 | +| explained_variance | 0.725 | +| learning_rate | 0.0003 | +| loss | 0.32 | +| n_updates | 520 | +| policy_gradient_loss | -0.00438 | +| value_loss | 8.5 | +----------------------------------------- + +Current state: Champion.Level5.ChunLiVsDhalsim +----------------------------------------- +| time/ | | +| fps | 84 | +| iterations | 54 | +| time_elapsed | 27634 | +| total_timesteps | 2332800 | +| train/ | | +| approx_kl | 0.018904792 | +| clip_fraction | 0.205 | +| clip_range | 0.2 | +| entropy_loss | -8.11 | +| explained_variance | 0.739 | +| learning_rate | 0.0003 | +| loss | 0.181 | +| n_updates | 530 | +| policy_gradient_loss | -0.00508 | +| value_loss | 8.05 | +----------------------------------------- + +Current state: Champion.Level1.ChunLiVsGuile +----------------------------------------- +| time/ | | +| fps | 85 | +| iterations | 55 | +| time_elapsed | 27702 | +| total_timesteps | 2376000 | +| train/ | | +| approx_kl | 0.018986266 | +| clip_fraction | 0.216 | +| clip_range | 0.2 | +| entropy_loss | -8.11 | +| explained_variance | 0.692 | +| learning_rate | 0.0003 | +| loss | 0.32 | +| n_updates | 540 | +| policy_gradient_loss | 0.000386 | +| value_loss | 8.33 | +----------------------------------------- + +Current state: Champion.Level12.ChunLiVsBison +---------------------------------------- +| time/ | | +| fps | 87 | +| iterations | 56 | +| time_elapsed | 27766 | +| total_timesteps | 2419200 | +| train/ | | +| approx_kl | 0.01776627 | +| clip_fraction | 0.206 | +| clip_range | 0.2 | +| entropy_loss | -8.11 | +| explained_variance | 0.736 | +| learning_rate | 0.0003 | +| loss | 0.229 | +| n_updates | 550 | +| policy_gradient_loss | -0.000718 | +| value_loss | 7.7 | +---------------------------------------- + +Current state: Champion.Level9.ChunLiVsBalrog +---------------------------------------- +| time/ | | +| fps | 88 | +| iterations | 57 | +| time_elapsed | 27832 | +| total_timesteps | 2462400 | +| train/ | | +| approx_kl | 0.01693204 | +| clip_fraction | 0.188 | +| clip_range | 0.2 | +| entropy_loss | -8.1 | +| explained_variance | 0.75 | +| learning_rate | 0.0003 | +| loss | 0.118 | +| n_updates | 560 | +| policy_gradient_loss | -0.0054 | +| value_loss | 10.4 | +---------------------------------------- + +Current state: Champion.Level11.ChunLiVsSagat +----------------------------------------- +| time/ | | +| fps | 89 | +| iterations | 58 | +| time_elapsed | 27899 | +| total_timesteps | 2505600 | +| train/ | | +| approx_kl | 0.019971898 | +| clip_fraction | 0.211 | +| clip_range | 0.2 | +| entropy_loss | -8.12 | +| explained_variance | 0.685 | +| learning_rate | 0.0003 | +| loss | 0.938 | +| n_updates | 570 | +| policy_gradient_loss | -0.0017 | +| value_loss | 10.4 | +----------------------------------------- + +Current state: Champion.Level11.ChunLiVsSagat +----------------------------------------- +| time/ | | +| fps | 91 | +| iterations | 59 | +| time_elapsed | 27964 | +| total_timesteps | 2548800 | +| train/ | | +| approx_kl | 0.021618541 | +| clip_fraction | 0.226 | +| clip_range | 0.2 | +| entropy_loss | -8.12 | +| explained_variance | 0.679 | +| learning_rate | 0.0003 | +| loss | 0.252 | +| n_updates | 580 | +| policy_gradient_loss | 0.00141 | +| value_loss | 11.1 | +----------------------------------------- + +Current state: Champion.Level10.ChunLiVsVega +----------------------------------------- +| time/ | | +| fps | 92 | +| iterations | 60 | +| time_elapsed | 28029 | +| total_timesteps | 2592000 | +| train/ | | +| approx_kl | 0.019610304 | +| clip_fraction | 0.21 | +| clip_range | 0.2 | +| entropy_loss | -8.09 | +| explained_variance | 0.719 | +| learning_rate | 0.0003 | +| loss | 0.315 | +| n_updates | 590 | +| policy_gradient_loss | 0.00148 | +| value_loss | 13.5 | +----------------------------------------- + +Current state: Champion.Level6.ChunLiVsRyu +----------------------------------------- +| time/ | | +| fps | 93 | +| iterations | 61 | +| time_elapsed | 28092 | +| total_timesteps | 2635200 | +| train/ | | +| approx_kl | 0.016299905 | +| clip_fraction | 0.187 | +| clip_range | 0.2 | +| entropy_loss | -8.11 | +| explained_variance | 0.736 | +| learning_rate | 0.0003 | +| loss | 0.833 | +| n_updates | 600 | +| policy_gradient_loss | -0.00293 | +| value_loss | 9.74 | +----------------------------------------- + +Current state: Champion.Level1.ChunLiVsGuile +----------------------------------------- +| time/ | | +| fps | 95 | +| iterations | 62 | +| time_elapsed | 28157 | +| total_timesteps | 2678400 | +| train/ | | +| approx_kl | 0.018303353 | +| clip_fraction | 0.198 | +| clip_range | 0.2 | +| entropy_loss | -8.14 | +| explained_variance | 0.797 | +| learning_rate | 0.0003 | +| loss | 0.127 | +| n_updates | 610 | +| policy_gradient_loss | -0.00713 | +| value_loss | 6.24 | +----------------------------------------- + +Current state: Champion.Level6.ChunLiVsRyu +----------------------------------------- +| time/ | | +| fps | 96 | +| iterations | 63 | +| time_elapsed | 28223 | +| total_timesteps | 2721600 | +| train/ | | +| approx_kl | 0.018367823 | +| clip_fraction | 0.19 | +| clip_range | 0.2 | +| entropy_loss | -8.1 | +| explained_variance | 0.823 | +| learning_rate | 0.0003 | +| loss | 0.339 | +| n_updates | 620 | +| policy_gradient_loss | -0.00159 | +| value_loss | 8.86 | +----------------------------------------- + +Current state: Champion.Level10.ChunLiVsVega +----------------------------------------- +| time/ | | +| fps | 97 | +| iterations | 64 | +| time_elapsed | 28289 | +| total_timesteps | 2764800 | +| train/ | | +| approx_kl | 0.016586544 | +| clip_fraction | 0.185 | +| clip_range | 0.2 | +| entropy_loss | -8.14 | +| explained_variance | 0.817 | +| learning_rate | 0.0003 | +| loss | 0.146 | +| n_updates | 630 | +| policy_gradient_loss | -0.00394 | +| value_loss | 6.9 | +----------------------------------------- + +Current state: Champion.Level2.ChunLiVsKen +----------------------------------------- +| time/ | | +| fps | 99 | +| iterations | 65 | +| time_elapsed | 28355 | +| total_timesteps | 2808000 | +| train/ | | +| approx_kl | 0.017383525 | +| clip_fraction | 0.188 | +| clip_range | 0.2 | +| entropy_loss | -8.14 | +| explained_variance | 0.798 | +| learning_rate | 0.0003 | +| loss | 0.267 | +| n_updates | 640 | +| policy_gradient_loss | -0.00334 | +| value_loss | 6.53 | +----------------------------------------- + +Current state: Champion.Level9.ChunLiVsBalrog +----------------------------------------- +| time/ | | +| fps | 100 | +| iterations | 66 | +| time_elapsed | 28422 | +| total_timesteps | 2851200 | +| train/ | | +| approx_kl | 0.016600495 | +| clip_fraction | 0.187 | +| clip_range | 0.2 | +| entropy_loss | -8.14 | +| explained_variance | 0.736 | +| learning_rate | 0.0003 | +| loss | 0.115 | +| n_updates | 650 | +| policy_gradient_loss | -0.00474 | +| value_loss | 5.51 | +----------------------------------------- + +Current state: Champion.Level2.ChunLiVsKen +----------------------------------------- +| time/ | | +| fps | 101 | +| iterations | 67 | +| time_elapsed | 28489 | +| total_timesteps | 2894400 | +| train/ | | +| approx_kl | 0.021707742 | +| clip_fraction | 0.232 | +| clip_range | 0.2 | +| entropy_loss | -8.12 | +| explained_variance | 0.79 | +| learning_rate | 0.0003 | +| loss | 0.08 | +| n_updates | 660 | +| policy_gradient_loss | -0.00258 | +| value_loss | 7 | +----------------------------------------- + +Current state: Champion.Level4.ChunLiVsZangief +----------------------------------------- +| time/ | | +| fps | 102 | +| iterations | 68 | +| time_elapsed | 28568 | +| total_timesteps | 2937600 | +| train/ | | +| approx_kl | 0.020427398 | +| clip_fraction | 0.22 | +| clip_range | 0.2 | +| entropy_loss | -8.06 | +| explained_variance | 0.723 | +| learning_rate | 0.0003 | +| loss | 0.459 | +| n_updates | 670 | +| policy_gradient_loss | 0.000891 | +| value_loss | 8.5 | +----------------------------------------- + +Current state: Champion.Level1.ChunLiVsGuile +----------------------------------------- +| time/ | | +| fps | 104 | +| iterations | 69 | +| time_elapsed | 28634 | +| total_timesteps | 2980800 | +| train/ | | +| approx_kl | 0.019633507 | +| clip_fraction | 0.228 | +| clip_range | 0.2 | +| entropy_loss | -8.13 | +| explained_variance | 0.764 | +| learning_rate | 0.0003 | +| loss | 1.28 | +| n_updates | 680 | +| policy_gradient_loss | -0.00129 | +| value_loss | 7.1 | +----------------------------------------- + +Current state: Champion.Level3.ChunLiVsChunLi +----------------------------------------- +| time/ | | +| fps | 105 | +| iterations | 70 | +| time_elapsed | 28697 | +| total_timesteps | 3024000 | +| train/ | | +| approx_kl | 0.019060554 | +| clip_fraction | 0.201 | +| clip_range | 0.2 | +| entropy_loss | -8.13 | +| explained_variance | 0.742 | +| learning_rate | 0.0003 | +| loss | 0.905 | +| n_updates | 690 | +| policy_gradient_loss | 0.00212 | +| value_loss | 8.7 | +----------------------------------------- + +Current state: Champion.Level3.ChunLiVsChunLi +----------------------------------------- +| time/ | | +| fps | 106 | +| iterations | 71 | +| time_elapsed | 28763 | +| total_timesteps | 3067200 | +| train/ | | +| approx_kl | 0.016756061 | +| clip_fraction | 0.178 | +| clip_range | 0.2 | +| entropy_loss | -8.1 | +| explained_variance | 0.847 | +| learning_rate | 0.0003 | +| loss | 0.746 | +| n_updates | 700 | +| policy_gradient_loss | -0.0025 | +| value_loss | 7.51 | +----------------------------------------- + +Current state: Champion.Level4.ChunLiVsZangief +----------------------------------------- +| time/ | | +| fps | 107 | +| iterations | 72 | +| time_elapsed | 28827 | +| total_timesteps | 3110400 | +| train/ | | +| approx_kl | 0.021146594 | +| clip_fraction | 0.237 | +| clip_range | 0.2 | +| entropy_loss | -8.1 | +| explained_variance | 0.82 | +| learning_rate | 0.0003 | +| loss | 0.056 | +| n_updates | 710 | +| policy_gradient_loss | -0.00257 | +| value_loss | 5.43 | +----------------------------------------- + +Current state: Champion.Level6.ChunLiVsRyu +----------------------------------------- +| time/ | | +| fps | 109 | +| iterations | 73 | +| time_elapsed | 28893 | +| total_timesteps | 3153600 | +| train/ | | +| approx_kl | 0.021884223 | +| clip_fraction | 0.231 | +| clip_range | 0.2 | +| entropy_loss | -8.11 | +| explained_variance | 0.766 | +| learning_rate | 0.0003 | +| loss | 0.143 | +| n_updates | 720 | +| policy_gradient_loss | -0.00117 | +| value_loss | 6.34 | +----------------------------------------- + +Current state: Champion.Level12.ChunLiVsBison +----------------------------------------- +| time/ | | +| fps | 110 | +| iterations | 74 | +| time_elapsed | 28958 | +| total_timesteps | 3196800 | +| train/ | | +| approx_kl | 0.017874984 | +| clip_fraction | 0.194 | +| clip_range | 0.2 | +| entropy_loss | -8.09 | +| explained_variance | 0.834 | +| learning_rate | 0.0003 | +| loss | 0.268 | +| n_updates | 730 | +| policy_gradient_loss | -0.00408 | +| value_loss | 6.86 | +----------------------------------------- + +Current state: Champion.Level12.ChunLiVsBison +----------------------------------------- +| time/ | | +| fps | 111 | +| iterations | 75 | +| time_elapsed | 29023 | +| total_timesteps | 3240000 | +| train/ | | +| approx_kl | 0.018310113 | +| clip_fraction | 0.2 | +| clip_range | 0.2 | +| entropy_loss | -8.13 | +| explained_variance | 0.879 | +| learning_rate | 0.0003 | +| loss | 0.195 | +| n_updates | 740 | +| policy_gradient_loss | -0.000226 | +| value_loss | 5.71 | +----------------------------------------- + +Current state: Champion.Level3.ChunLiVsChunLi +----------------------------------------- +| time/ | | +| fps | 112 | +| iterations | 76 | +| time_elapsed | 29087 | +| total_timesteps | 3283200 | +| train/ | | +| approx_kl | 0.022612492 | +| clip_fraction | 0.217 | +| clip_range | 0.2 | +| entropy_loss | -8.09 | +| explained_variance | 0.835 | +| learning_rate | 0.0003 | +| loss | 0.866 | +| n_updates | 750 | +| policy_gradient_loss | -0.000449 | +| value_loss | 6.83 | +----------------------------------------- + +Current state: Champion.Level2.ChunLiVsKen +----------------------------------------- +| time/ | | +| fps | 114 | +| iterations | 77 | +| time_elapsed | 29153 | +| total_timesteps | 3326400 | +| train/ | | +| approx_kl | 0.022917483 | +| clip_fraction | 0.231 | +| clip_range | 0.2 | +| entropy_loss | -8.05 | +| explained_variance | 0.803 | +| learning_rate | 0.0003 | +| loss | 0.137 | +| n_updates | 760 | +| policy_gradient_loss | 0.00265 | +| value_loss | 10.3 | +----------------------------------------- + +Current state: Champion.Level2.ChunLiVsKen +----------------------------------------- +| time/ | | +| fps | 115 | +| iterations | 78 | +| time_elapsed | 29219 | +| total_timesteps | 3369600 | +| train/ | | +| approx_kl | 0.021332797 | +| clip_fraction | 0.223 | +| clip_range | 0.2 | +| entropy_loss | -8.09 | +| explained_variance | 0.786 | +| learning_rate | 0.0003 | +| loss | 0.0802 | +| n_updates | 770 | +| policy_gradient_loss | -0.00277 | +| value_loss | 5.3 | +----------------------------------------- + +Current state: Champion.Level8.ChunLiVsBlanka +----------------------------------------- +| time/ | | +| fps | 116 | +| iterations | 79 | +| time_elapsed | 29286 | +| total_timesteps | 3412800 | +| train/ | | +| approx_kl | 0.020077702 | +| clip_fraction | 0.221 | +| clip_range | 0.2 | +| entropy_loss | -8.07 | +| explained_variance | 0.812 | +| learning_rate | 0.0003 | +| loss | 0.991 | +| n_updates | 780 | +| policy_gradient_loss | -0.000672 | +| value_loss | 7.39 | +----------------------------------------- + +Current state: Champion.Level10.ChunLiVsVega +----------------------------------------- +| time/ | | +| fps | 117 | +| iterations | 80 | +| time_elapsed | 29355 | +| total_timesteps | 3456000 | +| train/ | | +| approx_kl | 0.020250982 | +| clip_fraction | 0.217 | +| clip_range | 0.2 | +| entropy_loss | -8.03 | +| explained_variance | 0.787 | +| learning_rate | 0.0003 | +| loss | 0.852 | +| n_updates | 790 | +| policy_gradient_loss | 8.91e-05 | +| value_loss | 7.14 | +----------------------------------------- + +Current state: Champion.Level1.ChunLiVsGuile +---------------------------------------- +| time/ | | +| fps | 118 | +| iterations | 81 | +| time_elapsed | 29424 | +| total_timesteps | 3499200 | +| train/ | | +| approx_kl | 0.02181445 | +| clip_fraction | 0.242 | +| clip_range | 0.2 | +| entropy_loss | -8.05 | +| explained_variance | 0.819 | +| learning_rate | 0.0003 | +| loss | 0.555 | +| n_updates | 800 | +| policy_gradient_loss | -2.95e-05 | +| value_loss | 6.42 | +---------------------------------------- + +Current state: Champion.Level11.ChunLiVsSagat +----------------------------------------- +| time/ | | +| fps | 120 | +| iterations | 82 | +| time_elapsed | 29493 | +| total_timesteps | 3542400 | +| train/ | | +| approx_kl | 0.019185571 | +| clip_fraction | 0.213 | +| clip_range | 0.2 | +| entropy_loss | -8.01 | +| explained_variance | 0.81 | +| learning_rate | 0.0003 | +| loss | 0.226 | +| n_updates | 810 | +| policy_gradient_loss | -0.00135 | +| value_loss | 6.76 | +----------------------------------------- + +Current state: Champion.Level1.ChunLiVsGuile +----------------------------------------- +| time/ | | +| fps | 121 | +| iterations | 83 | +| time_elapsed | 29561 | +| total_timesteps | 3585600 | +| train/ | | +| approx_kl | 0.021081235 | +| clip_fraction | 0.204 | +| clip_range | 0.2 | +| entropy_loss | -8.01 | +| explained_variance | 0.87 | +| learning_rate | 0.0003 | +| loss | 0.788 | +| n_updates | 820 | +| policy_gradient_loss | -0.00494 | +| value_loss | 4.8 | +----------------------------------------- + +Current state: Champion.Level6.ChunLiVsRyu +----------------------------------------- +| time/ | | +| fps | 122 | +| iterations | 84 | +| time_elapsed | 29630 | +| total_timesteps | 3628800 | +| train/ | | +| approx_kl | 0.021089073 | +| clip_fraction | 0.217 | +| clip_range | 0.2 | +| entropy_loss | -8.02 | +| explained_variance | 0.778 | +| learning_rate | 0.0003 | +| loss | 0.325 | +| n_updates | 830 | +| policy_gradient_loss | 0.000908 | +| value_loss | 7.47 | +----------------------------------------- + +Current state: Champion.Level6.ChunLiVsRyu +----------------------------------------- +| time/ | | +| fps | 123 | +| iterations | 85 | +| time_elapsed | 29695 | +| total_timesteps | 3672000 | +| train/ | | +| approx_kl | 0.020511039 | +| clip_fraction | 0.2 | +| clip_range | 0.2 | +| entropy_loss | -8.05 | +| explained_variance | 0.835 | +| learning_rate | 0.0003 | +| loss | 2.63 | +| n_updates | 840 | +| policy_gradient_loss | -0.00219 | +| value_loss | 6.13 | +----------------------------------------- + +Current state: Champion.Level6.ChunLiVsRyu +----------------------------------------- +| time/ | | +| fps | 124 | +| iterations | 86 | +| time_elapsed | 29765 | +| total_timesteps | 3715200 | +| train/ | | +| approx_kl | 0.022633119 | +| clip_fraction | 0.213 | +| clip_range | 0.2 | +| entropy_loss | -8.07 | +| explained_variance | 0.819 | +| learning_rate | 0.0003 | +| loss | 0.044 | +| n_updates | 850 | +| policy_gradient_loss | -0.00243 | +| value_loss | 7.23 | +----------------------------------------- + +Current state: Champion.Level8.ChunLiVsBlanka +----------------------------------------- +| time/ | | +| fps | 125 | +| iterations | 87 | +| time_elapsed | 29835 | +| total_timesteps | 3758400 | +| train/ | | +| approx_kl | 0.021649476 | +| clip_fraction | 0.199 | +| clip_range | 0.2 | +| entropy_loss | -8.1 | +| explained_variance | 0.832 | +| learning_rate | 0.0003 | +| loss | 0.0906 | +| n_updates | 860 | +| policy_gradient_loss | -0.00575 | +| value_loss | 5.31 | +----------------------------------------- + +Current state: Champion.Level9.ChunLiVsBalrog +----------------------------------------- +| time/ | | +| fps | 127 | +| iterations | 88 | +| time_elapsed | 29905 | +| total_timesteps | 3801600 | +| train/ | | +| approx_kl | 0.024564344 | +| clip_fraction | 0.209 | +| clip_range | 0.2 | +| entropy_loss | -8.1 | +| explained_variance | 0.836 | +| learning_rate | 0.0003 | +| loss | 0.465 | +| n_updates | 870 | +| policy_gradient_loss | -0.00344 | +| value_loss | 6.68 | +----------------------------------------- + +Current state: Champion.Level9.ChunLiVsBalrog +----------------------------------------- +| time/ | | +| fps | 128 | +| iterations | 89 | +| time_elapsed | 29976 | +| total_timesteps | 3844800 | +| train/ | | +| approx_kl | 0.026778972 | +| clip_fraction | 0.276 | +| clip_range | 0.2 | +| entropy_loss | -8.07 | +| explained_variance | 0.779 | +| learning_rate | 0.0003 | +| loss | 1.58 | +| n_updates | 880 | +| policy_gradient_loss | 0.002 | +| value_loss | 5.74 | +----------------------------------------- + +Current state: Champion.Level9.ChunLiVsBalrog +----------------------------------------- +| time/ | | +| fps | 129 | +| iterations | 90 | +| time_elapsed | 30044 | +| total_timesteps | 3888000 | +| train/ | | +| approx_kl | 0.024717111 | +| clip_fraction | 0.243 | +| clip_range | 0.2 | +| entropy_loss | -8.09 | +| explained_variance | 0.686 | +| learning_rate | 0.0003 | +| loss | 0.18 | +| n_updates | 890 | +| policy_gradient_loss | 0.00627 | +| value_loss | 11.7 | +----------------------------------------- + +Current state: Champion.Level7.ChunLiVsEHonda +---------------------------------------- +| time/ | | +| fps | 130 | +| iterations | 91 | +| time_elapsed | 30113 | +| total_timesteps | 3931200 | +| train/ | | +| approx_kl | 0.02501726 | +| clip_fraction | 0.236 | +| clip_range | 0.2 | +| entropy_loss | -8.09 | +| explained_variance | 0.769 | +| learning_rate | 0.0003 | +| loss | 0.317 | +| n_updates | 900 | +| policy_gradient_loss | 0.00287 | +| value_loss | 8.92 | +---------------------------------------- + +Current state: Champion.Level1.ChunLiVsGuile +---------------------------------------- +| time/ | | +| fps | 131 | +| iterations | 92 | +| time_elapsed | 30183 | +| total_timesteps | 3974400 | +| train/ | | +| approx_kl | 0.02642812 | +| clip_fraction | 0.244 | +| clip_range | 0.2 | +| entropy_loss | -8.08 | +| explained_variance | 0.765 | +| learning_rate | 0.0003 | +| loss | 0.311 | +| n_updates | 910 | +| policy_gradient_loss | 0.00265 | +| value_loss | 11.3 | +---------------------------------------- + +Current state: Champion.Level12.ChunLiVsBison +---------------------------------------- +| time/ | | +| fps | 132 | +| iterations | 93 | +| time_elapsed | 30252 | +| total_timesteps | 4017600 | +| train/ | | +| approx_kl | 0.02867736 | +| clip_fraction | 0.304 | +| clip_range | 0.2 | +| entropy_loss | -7.94 | +| explained_variance | 0.837 | +| learning_rate | 0.0003 | +| loss | 0.505 | +| n_updates | 920 | +| policy_gradient_loss | 0.0105 | +| value_loss | 12.8 | +---------------------------------------- + +Current state: Champion.Level2.ChunLiVsKen +----------------------------------------- +| time/ | | +| fps | 133 | +| iterations | 94 | +| time_elapsed | 30322 | +| total_timesteps | 4060800 | +| train/ | | +| approx_kl | 0.021006063 | +| clip_fraction | 0.219 | +| clip_range | 0.2 | +| entropy_loss | -8.08 | +| explained_variance | 0.736 | +| learning_rate | 0.0003 | +| loss | 0.195 | +| n_updates | 930 | +| policy_gradient_loss | -0.00198 | +| value_loss | 6.34 | +----------------------------------------- + +Current state: Champion.Level2.ChunLiVsKen +----------------------------------------- +| time/ | | +| fps | 135 | +| iterations | 95 | +| time_elapsed | 30393 | +| total_timesteps | 4104000 | +| train/ | | +| approx_kl | 0.023537166 | +| clip_fraction | 0.211 | +| clip_range | 0.2 | +| entropy_loss | -8.13 | +| explained_variance | 0.804 | +| learning_rate | 0.0003 | +| loss | 0.867 | +| n_updates | 940 | +| policy_gradient_loss | -0.00212 | +| value_loss | 8.02 | +----------------------------------------- + +Current state: Champion.Level3.ChunLiVsChunLi +----------------------------------------- +| time/ | | +| fps | 136 | +| iterations | 96 | +| time_elapsed | 30463 | +| total_timesteps | 4147200 | +| train/ | | +| approx_kl | 0.028692458 | +| clip_fraction | 0.251 | +| clip_range | 0.2 | +| entropy_loss | -8.02 | +| explained_variance | 0.855 | +| learning_rate | 0.0003 | +| loss | 0.462 | +| n_updates | 950 | +| policy_gradient_loss | 0.00114 | +| value_loss | 7.36 | +----------------------------------------- + +Current state: Champion.Level1.ChunLiVsGuile +----------------------------------------- +| time/ | | +| fps | 137 | +| iterations | 97 | +| time_elapsed | 30533 | +| total_timesteps | 4190400 | +| train/ | | +| approx_kl | 0.029239332 | +| clip_fraction | 0.256 | +| clip_range | 0.2 | +| entropy_loss | -7.98 | +| explained_variance | 0.851 | +| learning_rate | 0.0003 | +| loss | 0.364 | +| n_updates | 960 | +| policy_gradient_loss | 0.00631 | +| value_loss | 7.1 | +----------------------------------------- + +Current state: Champion.Level11.ChunLiVsSagat +----------------------------------------- +| time/ | | +| fps | 138 | +| iterations | 98 | +| time_elapsed | 30603 | +| total_timesteps | 4233600 | +| train/ | | +| approx_kl | 0.029852876 | +| clip_fraction | 0.288 | +| clip_range | 0.2 | +| entropy_loss | -8.01 | +| explained_variance | 0.783 | +| learning_rate | 0.0003 | +| loss | 0.375 | +| n_updates | 970 | +| policy_gradient_loss | 0.00439 | +| value_loss | 8.48 | +----------------------------------------- + +Current state: Champion.Level3.ChunLiVsChunLi +----------------------------------------- +| time/ | | +| fps | 139 | +| iterations | 99 | +| time_elapsed | 30672 | +| total_timesteps | 4276800 | +| train/ | | +| approx_kl | 0.022819508 | +| clip_fraction | 0.233 | +| clip_range | 0.2 | +| entropy_loss | -8.08 | +| explained_variance | 0.782 | +| learning_rate | 0.0003 | +| loss | 0.0532 | +| n_updates | 980 | +| policy_gradient_loss | 4.03e-05 | +| value_loss | 6.87 | +----------------------------------------- + +Current state: Champion.Level5.ChunLiVsDhalsim +----------------------------------------- +| time/ | | +| fps | 140 | +| iterations | 100 | +| time_elapsed | 30740 | +| total_timesteps | 4320000 | +| train/ | | +| approx_kl | 0.028178426 | +| clip_fraction | 0.25 | +| clip_range | 0.2 | +| entropy_loss | -7.95 | +| explained_variance | 0.856 | +| learning_rate | 0.0003 | +| loss | 0.744 | +| n_updates | 990 | +| policy_gradient_loss | 0.00621 | +| value_loss | 8.54 | +----------------------------------------- + +Current state: Champion.Level7.ChunLiVsEHonda +----------------------------------------- +| time/ | | +| fps | 141 | +| iterations | 101 | +| time_elapsed | 30808 | +| total_timesteps | 4363200 | +| train/ | | +| approx_kl | 0.022431884 | +| clip_fraction | 0.23 | +| clip_range | 0.2 | +| entropy_loss | -8.05 | +| explained_variance | 0.77 | +| learning_rate | 0.0003 | +| loss | 0.272 | +| n_updates | 1000 | +| policy_gradient_loss | 0.00136 | +| value_loss | 8.66 | +----------------------------------------- \ No newline at end of file