From d4fb6dbc59f894fe1a95da04096844639fb28f53 Mon Sep 17 00:00:00 2001 From: linyiLYi <48440925+linyiLYi@users.noreply.github.com> Date: Thu, 30 Mar 2023 01:14:39 +0800 Subject: [PATCH] image stack and lstm --- .../__pycache__/custom_cnn.cpython-38.pyc | Bin 0 -> 1169 bytes ...reet_fighter_custom_wrapper.cpython-38.pyc | Bin 0 -> 3107 bytes 001_image_stack/custom_cnn.py | 24 + .../street_fighter_custom_wrapper.py | 106 + 001_image_stack/test.py | 70 + 001_image_stack/train.py | 123 + 001_image_stack/training_log.txt | 631 ++++++ 002_lstm/__pycache__/cnn_lstm.cpython-38.pyc | Bin 0 -> 1851 bytes ...reet_fighter_custom_wrapper.cpython-38.pyc | Bin 0 -> 2992 bytes 002_lstm/cnn_lstm.py | 35 + 002_lstm/street_fighter_custom_wrapper.py | 102 + 002_lstm/test.py | 73 + 002_lstm/train.py | 112 + __pycache__/custom_cnn.cpython-38.pyc | Bin 1250 -> 1250 bytes __pycache__/custom_sf2_cv_env.cpython-38.pyc | Bin 2577 -> 2958 bytes .../mobilenet_extractor.cpython-38.pyc | Bin 0 -> 1231 bytes custom_cnn.py | 1 - custom_sf2_cv_env.py | 53 +- mobilenet_extractor.py | 21 + mobilenet_extractor_no_condensing.py | 18 + test_cv_sf2_ai.py | 18 +- train_cv_sf2_ai.py | 47 +- .../train_log.txt | 291 +++ .../trainin_logs_CustomCNN_random.txt | 2011 +++++++++++++++++ 24 files changed, 3704 insertions(+), 32 deletions(-) create mode 100644 001_image_stack/__pycache__/custom_cnn.cpython-38.pyc create mode 100644 001_image_stack/__pycache__/street_fighter_custom_wrapper.cpython-38.pyc create mode 100644 001_image_stack/custom_cnn.py create mode 100644 001_image_stack/street_fighter_custom_wrapper.py create mode 100644 001_image_stack/test.py create mode 100644 001_image_stack/train.py create mode 100644 001_image_stack/training_log.txt create mode 100644 002_lstm/__pycache__/cnn_lstm.cpython-38.pyc create mode 100644 002_lstm/__pycache__/street_fighter_custom_wrapper.cpython-38.pyc create mode 100644 002_lstm/cnn_lstm.py create mode 100644 002_lstm/street_fighter_custom_wrapper.py create mode 100644 002_lstm/test.py create mode 100644 002_lstm/train.py create mode 100644 __pycache__/mobilenet_extractor.cpython-38.pyc create mode 100644 mobilenet_extractor.py create mode 100644 mobilenet_extractor_no_condensing.py create mode 100644 trained_models_cv_mobilenet_random/train_log.txt create mode 100644 trained_models_cv_random/trainin_logs_CustomCNN_random.txt diff --git a/001_image_stack/__pycache__/custom_cnn.cpython-38.pyc b/001_image_stack/__pycache__/custom_cnn.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..efaaa797367f79f698d991e027448b98f6094fdc GIT binary patch literal 1169 zcmZuwO=}b}7*6JUx4S}16%`c^D(s<4)r&|Gb^Sn;BGz7pg^*6N+u6=!+9X?T*^?G} z>Ob(%9{ZPa^|Zf0@Z|eWYhBSm-g!SVd7k$p>+N=vz&d?jW@irGC zSHtO3JdPPo@pw+a?##K+jJKuM=ywTYxymhL&Qr=Y*W}JvDx34dMZHSxJ`gVWAm6RK z_*KEJmC8l$awWOe$i(@XQXS6)Miv8&EKjLxFqReE7{*S?&yT&w{dH)A>5l2-R(U+4@e1Y za}@-ki!`7MG`iRkZPDn*{=&sHbbluaTw60^VO*po%%=KhGO#v1M8G#QA;)AwPjIv; zOqNE}j;F7|N8dtm<3ij7&BcRCf8ttP0mERk2;HCmL*_i%X=3peFC`Se$yiy5ae;b^ zv7Iq5&LjSCuXN!W&TpN!3-JYQy6B@+rqcR0T<_ZFHP2*GFgC;IIz%;+s=FP7Vz5mK zUA$C#I->O{s+Eu*kPs4m;VhmiA@Rf;;+ab68&btesFhkN@*D7kKqCDAv**UPN|@99 z=X2-&AHVti=hbG@BT&S5o)bTqBjiII94#gcu0T~k1R;o^Depl@yMEl-7=GO&%g|zGgIGc1@w5)Y>H#@1rz4|n$LtK?E3_?a)b-5C2GQh))sZq zfYuRBF$1mp1~G|W13l)-bFUOKii)))?iG<-9jPK8{744FVI?kggygYr+KSX<08C{bqj# zbT@@v%=YJmvxlpv#s$qwH^O*^=IkBfB&T&f5sQzeE;UV?Rfj(`AhC^AGG7jNe|jBqcjImkd;goqV*{_ zas*D4s95Q8o@7bExkRs(jUX(NJmYE@gi%=!^E8e0i~y3%TrN-Luc=6G1xGY#q&8Mw{=UrnV6@s$8h|f!D+Im} zBpVkmUgF6hh^O{Xckt;B9%_*CI~7WohTHNq{5hgPeHH|vbB4!e44c|mnN1CXKl71m z+HB|i{|5ZhVU$9&_po39EvTzd)k`1>GNEHykiM~xIAqWRy!HY0(6fZG#{_$t?7~{# zo;YJe-wC&0!!feKp0Ufu3|XMlGBg$|K+>|AWN-G}A$s7jU>^gd1A@kaW3kdrQ_dm13Ii&V1M4JTg5AlV z|MTb(xcV&YmFGaTs5bbd5eqzy3VtWacrJOGtBAjrXCt))UnpceLK}w3b}l_h|4%&O znEEapgqYIm{{lLwzM#T*9hg#>uNz(CEZGId{CE~veL8Ct6j@d{o8~@Xu#Y%ndd`^6 z8F~)!XpY$;*|nf&jZNs;V|#2Z65-C1U1w~I+9s9v$Byv6j$`CzbBw%ejU8}Zy>l=N zx*g1;cm~B?6mO#V0L4#H{0T(-((nH8)<53cxg5XsD=hD{Teikv1u>>Z17Hil;t~J5 z+W-FPW3r!ku^9~XSs?nY7%c>X~9K~ zipS2cXut~7D3GuiWN{=f;+HRifHZj?`^)$x#B zOS`k>RZcWj0rYkT^n?r=42EeWufX7lSHFV8XHjTBRtPWcD${F!`VOyf;agllW-iSn z>*juQn$M6aDICfq}7T}t=b}~4ZH$hY7|J+rEE^qeHf)FJZ>H*h0o)@3n-B4 z>QEIXXqV0`x}NL0i*7@&v>Fl_Mm~+=iy%rX8x0_(SLvz9W!NhhbhDKxcrz^(fLPUq xmx!ufB_6zLma0ch^|*4T&sTl^5tX!Zs8Z>Et1|X7 torch.Tensor: + return self.cnn(observations) + \ No newline at end of file diff --git a/001_image_stack/street_fighter_custom_wrapper.py b/001_image_stack/street_fighter_custom_wrapper.py new file mode 100644 index 0000000..5fd4d35 --- /dev/null +++ b/001_image_stack/street_fighter_custom_wrapper.py @@ -0,0 +1,106 @@ +import collections + +import gym +import cv2 +import numpy as np +import torch +from torchvision.transforms import Normalize +from gym.spaces import MultiBinary + +# Custom environment wrapper +class StreetFighterCustomWrapper(gym.Wrapper): + def __init__(self, env, testing=False, threshold=0.65): + super(StreetFighterCustomWrapper, self).__init__(env) + + self.action_space = MultiBinary(12) + + # Use a deque to store the last 4 frames + self.frame_stack = collections.deque(maxlen=4) + + self.threshold = threshold + self.game_screen_gray = None + + self.prev_player_health = 1.0 + self.prev_opponent_health = 1.0 + + # Update observation space to include 4 stacked grayscale images + self.observation_space = gym.spaces.Box( + low=0.0, high=1.0, shape=(4, 84, 84), dtype=np.float32 + ) + + self.testing = testing + + # Normalize the image for MobileNetV3Small. + self.normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + + def _preprocess_observation(self, observation): + self.game_screen_gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY) + resized_image = cv2.resize(self.game_screen_gray, (84, 84), interpolation=cv2.INTER_AREA) / 255.0 + # Add the resized image to the frame stack + self.frame_stack.append(resized_image) + + # Stack the last 4 frames and return the stacked frames + stacked_frames = np.stack(self.frame_stack, axis=0) + return stacked_frames + + def _get_win_or_lose_bonus(self): + if self.prev_player_health > self.prev_opponent_health: + # print('You win!') + return 300 + else: + # print('You lose!') + return -300 + + def _get_reward(self): + player_health_area = self.game_screen_gray[15:20, 32:120] + oppoent_health_area = self.game_screen_gray[15:20, 136:224] + + # Get health points using the number of pixels above 129. + player_health = np.sum(player_health_area > 129) / player_health_area.size + opponent_health = np.sum(oppoent_health_area > 129) / oppoent_health_area.size + + player_health_diff = self.prev_player_health - player_health + opponent_health_diff = self.prev_opponent_health - opponent_health + + reward = (opponent_health_diff - player_health_diff) * 200 # max would be 200 + + # Penalty for each step without any change in health + if opponent_health_diff <= 0.0000001: + reward -= 12.0 / 60.0 # -12 points per second if no damage to opponent + + self.prev_player_health = player_health + self.prev_opponent_health = opponent_health + + # Print the health values of the player and the opponent + # print("Player health: %f Opponent health:%f" % (player_health, opponent_health)) + return reward + + def reset(self): + observation = self.env.reset() + self.prev_player_health = 1.0 + self.prev_opponent_health = 1.0 + + # Clear the frame stack and add the first observation 4 times + self.frame_stack.clear() + for _ in range(4): + self.frame_stack.append(self._preprocess_observation(observation)[0]) + + return self._preprocess_observation(observation) + + def step(self, action): + # observation, _, _, info = self.env.step(action) + observation, _reward, _done, info = self.env.step(self.env.action_space.sample()) + custom_reward = self._get_reward() + custom_reward -= 1.0 / 60.0 # penalty for each step (-1 points per second) + + custom_done = False + if self.prev_player_health <= 0.00001 or self.prev_opponent_health <= 0.00001: + custom_reward += self._get_win_or_lose_bonus() + if not self.testing: + custom_done = True + else: + self.prev_player_health = 1.0 + self.prev_opponent_health = 1.0 + + return self._preprocess_observation(observation), custom_reward, custom_done, info + \ No newline at end of file diff --git a/001_image_stack/test.py b/001_image_stack/test.py new file mode 100644 index 0000000..614b247 --- /dev/null +++ b/001_image_stack/test.py @@ -0,0 +1,70 @@ +import time + +import cv2 +import retro +from stable_baselines3 import PPO +from stable_baselines3.common.vec_env import DummyVecEnv + +from custom_cnn import CustomCNN +from street_fighter_custom_wrapper import StreetFighterCustomWrapper + +def make_env(game, state): + def _init(): + env = retro.RetroEnv( + game=game, + state=state, + use_restricted_actions=retro.Actions.FILTERED, + obs_type=retro.Observations.IMAGE + ) + env = StreetFighterCustomWrapper(env, testing=True) + return env + return _init + +game = "StreetFighterIISpecialChampionEdition-Genesis" +state_stages = [ + "Champion.Level1.ChunLiVsGuile", + "Champion.Level2.ChunLiVsKen", + "Champion.Level3.ChunLiVsChunLi", + "Champion.Level4.ChunLiVsZangief", + "Champion.Level5.ChunLiVsDhalsim", + "Champion.Level6.ChunLiVsRyu", + "Champion.Level7.ChunLiVsEHonda", + "Champion.Level8.ChunLiVsBlanka", + "Champion.Level9.ChunLiVsBalrog", + "Champion.Level10.ChunLiVsVega", + "Champion.Level11.ChunLiVsSagat", + "Champion.Level12.ChunLiVsBison" + # Add other stages as necessary +] + +env = make_env(game, state_stages[0])() + +# Wrap the environment +env = DummyVecEnv([lambda: env]) + +policy_kwargs = { + 'features_extractor_class': CustomCNN +} + +model = PPO( + "CnnPolicy", + env, + device="cuda", + policy_kwargs=policy_kwargs, + verbose=1 +) +model.load(r"trained_models_continued/ppo_chunli_432000_steps") + +obs = env.reset() +done = False + +while True: + timestamp = time.time() + action, _ = model.predict(obs) + obs, rewards, done, info = env.step(action) + env.render() + render_time = time.time() - timestamp + if render_time < 0.0111: + time.sleep(0.0111 - render_time) # Add a delay for 90 FPS + +# env.close() diff --git a/001_image_stack/train.py b/001_image_stack/train.py new file mode 100644 index 0000000..4e2195f --- /dev/null +++ b/001_image_stack/train.py @@ -0,0 +1,123 @@ +import os +import random + +import gym +import cv2 +import retro +import numpy as np +from stable_baselines3 import PPO +from stable_baselines3.common.vec_env import SubprocVecEnv +from stable_baselines3.common.preprocessing import is_image_space, is_image_space_channels_first +from stable_baselines3.common.callbacks import BaseCallback, CheckpointCallback + +from custom_cnn import CustomCNN +from street_fighter_custom_wrapper import StreetFighterCustomWrapper + +class RandomOpponentChangeCallback(BaseCallback): + def __init__(self, stages, opponent_interval, verbose=0): + super(RandomOpponentChangeCallback, self).__init__(verbose) + self.stages = stages + self.opponent_interval = opponent_interval + + def _on_step(self) -> bool: + if self.n_calls % self.opponent_interval == 0: + new_state = random.choice(self.stages) + print("\nCurrent state:", new_state) + self.training_env.env_method("load_state", new_state, indices=None) + return True + +def make_env(game, state, seed=0): + def _init(): + env = retro.RetroEnv( + game=game, + state=state, + use_restricted_actions=retro.Actions.FILTERED, + obs_type=retro.Observations.IMAGE + ) + env = StreetFighterCustomWrapper(env) + env.seed(seed) + return env + return _init + +def main(): + # Set up the environment and model + game = "StreetFighterIISpecialChampionEdition-Genesis" + state_stages = [ + "ChampionX.Level1.ChunLiVsKen", + "ChampionX.Level2.ChunLiVsChunLi", + "ChampionX.Level3.ChunLiVsZangief", + "ChampionX.Level4.ChunLiVsDhalsim", + "ChampionX.Level5.ChunLiVsRyu", + "ChampionX.Level6.ChunLiVsEHonda", + "ChampionX.Level7.ChunLiVsBlanka", + "ChampionX.Level8.ChunLiVsGuile", + "ChampionX.Level9.ChunLiVsBalrog", + "ChampionX.Level10.ChunLiVsVega", + "ChampionX.Level11.ChunLiVsSagat", + "ChampionX.Level12.ChunLiVsBison" + # Add other stages as necessary + ] + # Champion is at difficulty level 4, ChampionX is at difficulty level 8. + + num_envs = 8 + + # env = SubprocVecEnv([make_env(game, state_stages[0], seed=i) for i in range(num_envs)]) + env = SubprocVecEnv([make_env(game, state_stages[0], seed=i) for i in range(num_envs)]) + + # Using CustomCNN as the feature extractor + policy_kwargs = { + 'features_extractor_class': CustomCNN + } + + model = PPO( + "CnnPolicy", + env, + device="cuda", + policy_kwargs=policy_kwargs, + verbose=1, + n_steps=5400, + batch_size=64, + n_epochs=10, + learning_rate=0.0003, + ent_coef=0.01, + clip_range=0.2, + clip_range_vf=None, + gamma=0.99, + gae_lambda=0.95, + max_grad_norm=0.5, + use_sde=False, + sde_sample_freq=-1 + ) + + # Set the save directory + save_dir = "trained_models_continued" + os.makedirs(save_dir, exist_ok=True) + + # Load the model from file + # Change the path to the actual path of the model file + model_path = "trained_models/ppo_chunli_1296000_steps.zip" + + # Load model and modify the learning rate and entropy coefficient + custom_objects = { + "learning_rate": 0.00005, + "ent_coef": 0.2 + } + model = PPO.load(model_path, env=env, device="cuda", custom_objects=custom_objects) + + # Set up callbacks + opponent_interval = 5400 # stage_interval * num_envs = total_steps_per_stage + checkpoint_interval = 54000 # checkpoint_interval * num_envs = total_steps_per_checkpoint (Every 80 rounds) + checkpoint_callback = CheckpointCallback(save_freq=checkpoint_interval, save_path=save_dir, name_prefix="ppo_chunli") + stage_increase_callback = RandomOpponentChangeCallback(state_stages, opponent_interval, save_dir) + + + model.learn( + total_timesteps=int(6048000), # total_timesteps = stage_interval * num_envs * num_stages (1120 rounds) + callback=[checkpoint_callback, stage_increase_callback] + ) + + # Save the final model + model.save(os.path.join(save_dir, "ppo_sf2_chunli_final.zip")) + +if __name__ == "__main__": + main() diff --git a/001_image_stack/training_log.txt b/001_image_stack/training_log.txt new file mode 100644 index 0000000..a5db296 --- /dev/null +++ b/001_image_stack/training_log.txt @@ -0,0 +1,631 @@ +(StreetFighterAI) PS C:\Users\unitec\Documents\AIProjects\street-fighter-ai\001_image_stack> python .\train.py +Using cuda device + +Current state: ChampionX.Level4.ChunLiVsDhalsim +------------------------------ +| time/ | | +| fps | 1489 | +| iterations | 1 | +| time_elapsed | 28 | +| total_timesteps | 43200 | +------------------------------ + +Current state: ChampionX.Level6.ChunLiVsEHonda +----------------------------------------- +| time/ | | +| fps | 693 | +| iterations | 2 | +| time_elapsed | 124 | +| total_timesteps | 86400 | +| train/ | | +| approx_kl | 0.008018286 | +| clip_fraction | 0.0528 | +| clip_range | 0.2 | +| entropy_loss | -8.31 | +| explained_variance | -0.000782 | +| learning_rate | 0.0003 | +| loss | 189 | +| n_updates | 10 | +| policy_gradient_loss | -0.00354 | +| value_loss | 398 | +----------------------------------------- + +Current state: ChampionX.Level1.ChunLiVsKen +----------------------------------------- +| time/ | | +| fps | 476 | +| iterations | 3 | +| time_elapsed | 271 | +| total_timesteps | 129600 | +| train/ | | +| approx_kl | 0.010610209 | +| clip_fraction | 0.119 | +| clip_range | 0.2 | +| entropy_loss | -8.3 | +| explained_variance | 0.0323 | +| learning_rate | 0.0003 | +| loss | 0.228 | +| n_updates | 20 | +| policy_gradient_loss | -0.00663 | +| value_loss | 103 | +----------------------------------------- + +Current state: ChampionX.Level10.ChunLiVsVega +----------------------------------------- +| time/ | | +| fps | 467 | +| iterations | 4 | +| time_elapsed | 369 | +| total_timesteps | 172800 | +| train/ | | +| approx_kl | 0.011115557 | +| clip_fraction | 0.122 | +| clip_range | 0.2 | +| entropy_loss | -8.3 | +| explained_variance | 0.125 | +| learning_rate | 0.0003 | +| loss | 5.37 | +| n_updates | 30 | +| policy_gradient_loss | -0.00485 | +| value_loss | 83.8 | +----------------------------------------- + +Current state: ChampionX.Level11.ChunLiVsSagat +----------------------------------------- +| time/ | | +| fps | 462 | +| iterations | 5 | +| time_elapsed | 466 | +| total_timesteps | 216000 | +| train/ | | +| approx_kl | 0.012769428 | +| clip_fraction | 0.133 | +| clip_range | 0.2 | +| entropy_loss | -8.29 | +| explained_variance | 0.409 | +| learning_rate | 0.0003 | +| loss | 18.4 | +| n_updates | 40 | +| policy_gradient_loss | -0.00746 | +| value_loss | 31.6 | +----------------------------------------- + +Current state: ChampionX.Level7.ChunLiVsBlanka +----------------------------------------- +| time/ | | +| fps | 460 | +| iterations | 6 | +| time_elapsed | 563 | +| total_timesteps | 259200 | +| train/ | | +| approx_kl | 0.014561476 | +| clip_fraction | 0.184 | +| clip_range | 0.2 | +| entropy_loss | -8.29 | +| explained_variance | 0.15 | +| learning_rate | 0.0003 | +| loss | 0.66 | +| n_updates | 50 | +| policy_gradient_loss | -0.00799 | +| value_loss | 23.2 | +----------------------------------------- + +Current state: ChampionX.Level12.ChunLiVsBison +----------------------------------------- +| time/ | | +| fps | 447 | +| iterations | 7 | +| time_elapsed | 675 | +| total_timesteps | 302400 | +| train/ | | +| approx_kl | 0.013581872 | +| clip_fraction | 0.147 | +| clip_range | 0.2 | +| entropy_loss | -8.28 | +| explained_variance | 0.447 | +| learning_rate | 0.0003 | +| loss | 0.794 | +| n_updates | 60 | +| policy_gradient_loss | -0.00405 | +| value_loss | 33.4 | +----------------------------------------- + +Current state: ChampionX.Level4.ChunLiVsDhalsim +----------------------------------------- +| time/ | | +| fps | 440 | +| iterations | 8 | +| time_elapsed | 784 | +| total_timesteps | 345600 | +| train/ | | +| approx_kl | 0.015053411 | +| clip_fraction | 0.186 | +| clip_range | 0.2 | +| entropy_loss | -8.28 | +| explained_variance | 0.39 | +| learning_rate | 0.0003 | +| loss | 0.313 | +| n_updates | 70 | +| policy_gradient_loss | -0.00594 | +| value_loss | 22.3 | +----------------------------------------- + +Current state: ChampionX.Level5.ChunLiVsRyu +---------------------------------------- +| time/ | | +| fps | 433 | +| iterations | 9 | +| time_elapsed | 896 | +| total_timesteps | 388800 | +| train/ | | +| approx_kl | 0.01407744 | +| clip_fraction | 0.152 | +| clip_range | 0.2 | +| entropy_loss | -8.27 | +| explained_variance | 0.326 | +| learning_rate | 0.0003 | +| loss | 0.396 | +| n_updates | 80 | +| policy_gradient_loss | -0.00862 | +| value_loss | 15.7 | +---------------------------------------- + +Current state: ChampionX.Level9.ChunLiVsBalrog +----------------------------------------- +| time/ | | +| fps | 428 | +| iterations | 10 | +| time_elapsed | 1007 | +| total_timesteps | 432000 | +| train/ | | +| approx_kl | 0.013460734 | +| clip_fraction | 0.148 | +| clip_range | 0.2 | +| entropy_loss | -8.27 | +| explained_variance | 0.384 | +| learning_rate | 0.0003 | +| loss | 0.227 | +| n_updates | 90 | +| policy_gradient_loss | -0.00498 | +| value_loss | 16.7 | +----------------------------------------- + +Current state: ChampionX.Level9.ChunLiVsBalrog +------------------------------------------ +| time/ | | +| fps | 424 | +| iterations | 11 | +| time_elapsed | 1118 | +| total_timesteps | 475200 | +| train/ | | +| approx_kl | 0.0145851895 | +| clip_fraction | 0.165 | +| clip_range | 0.2 | +| entropy_loss | -8.26 | +| explained_variance | 0.352 | +| learning_rate | 0.0003 | +| loss | 0.147 | +| n_updates | 100 | +| policy_gradient_loss | -0.00597 | +| value_loss | 19.8 | +------------------------------------------ + +Current state: ChampionX.Level4.ChunLiVsDhalsim +----------------------------------------- +| time/ | | +| fps | 424 | +| iterations | 12 | +| time_elapsed | 1219 | +| total_timesteps | 518400 | +| train/ | | +| approx_kl | 0.015144574 | +| clip_fraction | 0.161 | +| clip_range | 0.2 | +| entropy_loss | -8.25 | +| explained_variance | 0.383 | +| learning_rate | 0.0003 | +| loss | 1.52 | +| n_updates | 110 | +| policy_gradient_loss | -0.00749 | +| value_loss | 24.1 | +----------------------------------------- + +Current state: ChampionX.Level6.ChunLiVsEHonda +----------------------------------------- +| time/ | | +| fps | 425 | +| iterations | 13 | +| time_elapsed | 1318 | +| total_timesteps | 561600 | +| train/ | | +| approx_kl | 0.015790801 | +| clip_fraction | 0.158 | +| clip_range | 0.2 | +| entropy_loss | -8.25 | +| explained_variance | 0.555 | +| learning_rate | 0.0003 | +| loss | 0.665 | +| n_updates | 120 | +| policy_gradient_loss | -0.00889 | +| value_loss | 20.7 | +----------------------------------------- + +Current state: ChampionX.Level6.ChunLiVsEHonda +----------------------------------------- +| time/ | | +| fps | 426 | +| iterations | 14 | +| time_elapsed | 1417 | +| total_timesteps | 604800 | +| train/ | | +| approx_kl | 0.016785465 | +| clip_fraction | 0.173 | +| clip_range | 0.2 | +| entropy_loss | -8.24 | +| explained_variance | 0.609 | +| learning_rate | 0.0003 | +| loss | 0.313 | +| n_updates | 130 | +| policy_gradient_loss | -0.00758 | +| value_loss | 14.9 | +----------------------------------------- + +Current state: ChampionX.Level10.ChunLiVsVega +----------------------------------------- +| time/ | | +| fps | 428 | +| iterations | 15 | +| time_elapsed | 1513 | +| total_timesteps | 648000 | +| train/ | | +| approx_kl | 0.017042443 | +| clip_fraction | 0.176 | +| clip_range | 0.2 | +| entropy_loss | -8.24 | +| explained_variance | 0.759 | +| learning_rate | 0.0003 | +| loss | 0.634 | +| n_updates | 140 | +| policy_gradient_loss | -0.00617 | +| value_loss | 15.4 | +----------------------------------------- + +Current state: ChampionX.Level3.ChunLiVsZangief +----------------------------------------- +| time/ | | +| fps | 427 | +| iterations | 16 | +| time_elapsed | 1618 | +| total_timesteps | 691200 | +| train/ | | +| approx_kl | 0.017714709 | +| clip_fraction | 0.176 | +| clip_range | 0.2 | +| entropy_loss | -8.23 | +| explained_variance | 0.79 | +| learning_rate | 0.0003 | +| loss | 0.941 | +| n_updates | 150 | +| policy_gradient_loss | -0.00703 | +| value_loss | 17.5 | +----------------------------------------- + +Current state: ChampionX.Level7.ChunLiVsBlanka +----------------------------------------- +| time/ | | +| fps | 424 | +| iterations | 17 | +| time_elapsed | 1728 | +| total_timesteps | 734400 | +| train/ | | +| approx_kl | 0.018709755 | +| clip_fraction | 0.196 | +| clip_range | 0.2 | +| entropy_loss | -8.22 | +| explained_variance | 0.746 | +| learning_rate | 0.0003 | +| loss | 0.505 | +| n_updates | 160 | +| policy_gradient_loss | -0.00795 | +| value_loss | 11.9 | +----------------------------------------- + +Current state: ChampionX.Level11.ChunLiVsSagat +----------------------------------------- +| time/ | | +| fps | 423 | +| iterations | 18 | +| time_elapsed | 1837 | +| total_timesteps | 777600 | +| train/ | | +| approx_kl | 0.017850244 | +| clip_fraction | 0.182 | +| clip_range | 0.2 | +| entropy_loss | -8.21 | +| explained_variance | 0.687 | +| learning_rate | 0.0003 | +| loss | 0.379 | +| n_updates | 170 | +| policy_gradient_loss | -0.00568 | +| value_loss | 15.2 | +----------------------------------------- + +Current state: ChampionX.Level3.ChunLiVsZangief +---------------------------------------- +| time/ | | +| fps | 420 | +| iterations | 19 | +| time_elapsed | 1950 | +| total_timesteps | 820800 | +| train/ | | +| approx_kl | 0.02048213 | +| clip_fraction | 0.221 | +| clip_range | 0.2 | +| entropy_loss | -8.21 | +| explained_variance | 0.707 | +| learning_rate | 0.0003 | +| loss | 0.391 | +| n_updates | 180 | +| policy_gradient_loss | -0.00419 | +| value_loss | 13 | +---------------------------------------- + +Current state: ChampionX.Level11.ChunLiVsSagat +----------------------------------------- +| time/ | | +| fps | 418 | +| iterations | 20 | +| time_elapsed | 2062 | +| total_timesteps | 864000 | +| train/ | | +| approx_kl | 0.016617421 | +| clip_fraction | 0.172 | +| clip_range | 0.2 | +| entropy_loss | -8.2 | +| explained_variance | 0.744 | +| learning_rate | 0.0003 | +| loss | 1.66 | +| n_updates | 190 | +| policy_gradient_loss | -0.00437 | +| value_loss | 15.8 | +----------------------------------------- + +Current state: ChampionX.Level6.ChunLiVsEHonda +----------------------------------------- +| time/ | | +| fps | 417 | +| iterations | 21 | +| time_elapsed | 2174 | +| total_timesteps | 907200 | +| train/ | | +| approx_kl | 0.017259926 | +| clip_fraction | 0.171 | +| clip_range | 0.2 | +| entropy_loss | -8.19 | +| explained_variance | 0.756 | +| learning_rate | 0.0003 | +| loss | 0.457 | +| n_updates | 200 | +| policy_gradient_loss | -0.00897 | +| value_loss | 14.9 | +----------------------------------------- + +Current state: ChampionX.Level10.ChunLiVsVega +----------------------------------------- +| time/ | | +| fps | 417 | +| iterations | 22 | +| time_elapsed | 2276 | +| total_timesteps | 950400 | +| train/ | | +| approx_kl | 0.018794816 | +| clip_fraction | 0.19 | +| clip_range | 0.2 | +| entropy_loss | -8.18 | +| explained_variance | 0.834 | +| learning_rate | 0.0003 | +| loss | 3.33 | +| n_updates | 210 | +| policy_gradient_loss | -0.00535 | +| value_loss | 15.5 | +----------------------------------------- + +Current state: ChampionX.Level5.ChunLiVsRyu +----------------------------------------- +| time/ | | +| fps | 418 | +| iterations | 23 | +| time_elapsed | 2374 | +| total_timesteps | 993600 | +| train/ | | +| approx_kl | 0.019361915 | +| clip_fraction | 0.188 | +| clip_range | 0.2 | +| entropy_loss | -8.18 | +| explained_variance | 0.825 | +| learning_rate | 0.0003 | +| loss | 0.235 | +| n_updates | 220 | +| policy_gradient_loss | -0.00762 | +| value_loss | 13.4 | +----------------------------------------- + +Current state: ChampionX.Level2.ChunLiVsChunLi +----------------------------------------- +| time/ | | +| fps | 419 | +| iterations | 24 | +| time_elapsed | 2471 | +| total_timesteps | 1036800 | +| train/ | | +| approx_kl | 0.022115083 | +| clip_fraction | 0.233 | +| clip_range | 0.2 | +| entropy_loss | -8.18 | +| explained_variance | 0.8 | +| learning_rate | 0.0003 | +| loss | 0.211 | +| n_updates | 230 | +| policy_gradient_loss | -0.00771 | +| value_loss | 11.7 | +----------------------------------------- + +Current state: ChampionX.Level3.ChunLiVsZangief +----------------------------------------- +| time/ | | +| fps | 419 | +| iterations | 25 | +| time_elapsed | 2574 | +| total_timesteps | 1080000 | +| train/ | | +| approx_kl | 0.023090197 | +| clip_fraction | 0.233 | +| clip_range | 0.2 | +| entropy_loss | -8.17 | +| explained_variance | 0.759 | +| learning_rate | 0.0003 | +| loss | 0.445 | +| n_updates | 240 | +| policy_gradient_loss | -0.00523 | +| value_loss | 13.7 | +----------------------------------------- + +Current state: ChampionX.Level11.ChunLiVsSagat +----------------------------------------- +| time/ | | +| fps | 418 | +| iterations | 26 | +| time_elapsed | 2683 | +| total_timesteps | 1123200 | +| train/ | | +| approx_kl | 0.024867734 | +| clip_fraction | 0.281 | +| clip_range | 0.2 | +| entropy_loss | -8.16 | +| explained_variance | 0.688 | +| learning_rate | 0.0003 | +| loss | 0.557 | +| n_updates | 250 | +| policy_gradient_loss | 0.00215 | +| value_loss | 13.9 | +----------------------------------------- + +Current state: ChampionX.Level10.ChunLiVsVega +----------------------------------------- +| time/ | | +| fps | 417 | +| iterations | 27 | +| time_elapsed | 2793 | +| total_timesteps | 1166400 | +| train/ | | +| approx_kl | 0.020454599 | +| clip_fraction | 0.203 | +| clip_range | 0.2 | +| entropy_loss | -8.19 | +| explained_variance | 0.766 | +| learning_rate | 0.0003 | +| loss | 0.314 | +| n_updates | 260 | +| policy_gradient_loss | -0.0058 | +| value_loss | 17.1 | +----------------------------------------- + +Current state: ChampionX.Level10.ChunLiVsVega +----------------------------------------- +| time/ | | +| fps | 416 | +| iterations | 28 | +| time_elapsed | 2904 | +| total_timesteps | 1209600 | +| train/ | | +| approx_kl | 0.020690009 | +| clip_fraction | 0.208 | +| clip_range | 0.2 | +| entropy_loss | -8.17 | +| explained_variance | 0.827 | +| learning_rate | 0.0003 | +| loss | 1.38 | +| n_updates | 270 | +| policy_gradient_loss | 2.12e-05 | +| value_loss | 20.4 | +----------------------------------------- + +Current state: ChampionX.Level10.ChunLiVsVega +----------------------------------------- +| time/ | | +| fps | 415 | +| iterations | 29 | +| time_elapsed | 3015 | +| total_timesteps | 1252800 | +| train/ | | +| approx_kl | 0.020646438 | +| clip_fraction | 0.208 | +| clip_range | 0.2 | +| entropy_loss | -8.15 | +| explained_variance | 0.81 | +| learning_rate | 0.0003 | +| loss | 0.235 | +| n_updates | 280 | +| policy_gradient_loss | -0.00852 | +| value_loss | 12.5 | +----------------------------------------- + +Current state: ChampionX.Level9.ChunLiVsBalrog +----------------------------------------- +| time/ | | +| fps | 414 | +| iterations | 30 | +| time_elapsed | 3128 | +| total_timesteps | 1296000 | +| train/ | | +| approx_kl | 0.021910097 | +| clip_fraction | 0.212 | +| clip_range | 0.2 | +| entropy_loss | -8.15 | +| explained_variance | 0.79 | +| learning_rate | 0.0003 | +| loss | 0.0264 | +| n_updates | 290 | +| policy_gradient_loss | -0.00872 | +| value_loss | 12.5 | +----------------------------------------- + +Current state: ChampionX.Level12.ChunLiVsBison +----------------------------------------- +| time/ | | +| fps | 412 | +| iterations | 31 | +| time_elapsed | 3243 | +| total_timesteps | 1339200 | +| train/ | | +| approx_kl | 0.025281599 | +| clip_fraction | 0.254 | +| clip_range | 0.2 | +| entropy_loss | -8.13 | +| explained_variance | 0.773 | +| learning_rate | 0.0003 | +| loss | 1.18 | +| n_updates | 300 | +| policy_gradient_loss | -0.00679 | +| value_loss | 12.6 | +----------------------------------------- + +Current state: ChampionX.Level2.ChunLiVsChunLi +----------------------------------------- +| time/ | | +| fps | 412 | +| iterations | 32 | +| time_elapsed | 3349 | +| total_timesteps | 1382400 | +| train/ | | +| approx_kl | 0.026466375 | +| clip_fraction | 0.259 | +| clip_range | 0.2 | +| entropy_loss | -8.12 | +| explained_variance | 0.647 | +| learning_rate | 0.0003 | +| loss | 0.518 | +| n_updates | 310 | +| policy_gradient_loss | 0.000522 | +| value_loss | 18.8 | +----------------------------------------- \ No newline at end of file diff --git a/002_lstm/__pycache__/cnn_lstm.cpython-38.pyc b/002_lstm/__pycache__/cnn_lstm.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..942f36c0b40ea39e730063dd12bacfeebe9cfa4e GIT binary patch literal 1851 zcmZuyOK%%D5GJ`#ttH2`)AWrRMf%tl7GkHTA}G?TMN!ldf~39(0tj_U)$6WS3AsvI zK{+`(#~*KD>W_!emk7uV>t5-b=K)b1lqU!wkTaf{=&g_10A^! zUEc!`M9_>3sxO_e0i)yxBA9Sq65&X8;aoZc7c{Q$K;zXKPteCC_E)e2=_iQW4Nt(P zKLLkde5 z(Gt;;!A>1Pz}21pY7abE9U^!L=v~ar&TmrXGUGa(OXKT8r9!Um;{MR<+<^D)BM?qFkS-RW8*%Or)S1fGMgP&x|3=IADnTw8r?Pi zX`0KCGHje&G2?2PjZJ$jN5xD@%|$x7qNc@pl8v~ukw@OuIIE+Gr0vo(fHmtHfbrN5wMAYis& zOJ-MuXaxxg>+}^SVf7ERxTOw2R&OA@iEtC)EreSD#(kE`=MY0krg{eyF;nlMp8l>r z#7PzoQ4@@(>iI~C8!*sq07CcdZy)YI-`~1-Ju#*g1$<0Qny0<~E@zFpkLL&BRuQ%*NFV_v_hJ6?n zU>2#lV>wtd zM!SEW3L*2YL>}T=%r)Mpc0QuF=={do`ISP+Q-amF|{(P4qzFUxE4* z;WGq$D(pSgmng*uo2uH-8A*U8X+r^Y_Cn7RBlguXs7+{{@8)?$3;%&FAA>rryGaKYtT&xh$NDxB&^;vq;}3iX5UW8;muNa z=!G7|Jx=NmgD{}v4v}2CkBD>?9n>B=VO`R zrIY3dCGUfQrEB`pNR_TE4o9l}9L&dHr$&p)K!_wu3L!KaRyL!!Nb*dWQ537P5$9>D zEXTm->i!sbT^$XTFhvyam9?U;mFefHESGzh9fMml(bLgkd1<88z8Iy^q0*wSqO|Cj zE0&j!MtP>P;)Lt;4u_>@|7l9+R{o%5*{JlpX&x2d*(fjO_l(l}(J9URLYX4TdSECw zD(#kz%J%h3X#ev-J_Ljb5vP$cLVQU6_0HD!ci#pXW_Jw6s(ANy9*>8hf!V$J#!q!V z03*$=v6#5ltw3=tN_MYb-w>%OhKnV!SQ4=Xj5w$evpzc17@`Jr^CA#Jms#CeVjlId zJ04{OAO0lZldu18=&hdwBW5UN(J)Rf^eyOS2S`EYbV>^{U`L1*2S$LL0pJftu4IoL z>3oIEU~S;d{VB8P3=FS>#mkp=B${;9!jM)Ox)_ z+Nb$zn!^79iadIdnDus}bmIMuGKlw!tvtkKf!8x9;q0+}XML3%v^4OAjyt zC%vq{(YgEjj<~t=`c1uJ7x2JkQlrlRl@?NdrVdbeo9nB{Zk!5!*s3H+kqo1r3YY9B zN(zffqhE#(D)J5d^aTNE8|ge&0E1IvP_D6AkS^0T)^x7W$;Pvy^5tS;^uB zX$=P8Z0vN8>t#%Sl0nBq?R1iF!|vqce?NHwPJaRRYW$#0t-IWbQac0&34Gd6yNhNz1@|fiBGK&?+XaD6w z&RuX>K`{qPe~%vlE(eG`$MV>M$1D#q%BRj6nYl1>ryNG!)SJ3%La-5n4QPNlhEKlv{}50KIumxx z`3NE#b}aZ9;5j`aGb*uoF-su~13q_;2oyOeY!B&e^2;kWy>Lh`WFBAvl+I;>iRS(* zCmaYE@8}vxwbiLg9X0(`V3aN#WK}$Mew7d0I8~8`#VG43eGR{S6$q5Kt2n+6q@?1U zoAf5Ie+1nu0U@+bG3C~26OQ)e>NA{TRXCGr&zyn@2&>$Ibin4$5g543XKd!oIAVCt z!BK!dv^CHgjVREl zOW9gf{#d0c+-sf|g|FcjtPJ+*g9X6g^(vrC&jM;CERrx$tyrgHyaP3 z$lE%#TI4$J?