mirror of
https://github.com/linyiLYi/street-fighter-ai.git
synced 2025-04-03 22:50:43 +00:00
image stack and lstm
This commit is contained in:
parent
79e148675a
commit
d4fb6dbc59
BIN
001_image_stack/__pycache__/custom_cnn.cpython-38.pyc
Normal file
BIN
001_image_stack/__pycache__/custom_cnn.cpython-38.pyc
Normal file
Binary file not shown.
Binary file not shown.
24
001_image_stack/custom_cnn.py
Normal file
24
001_image_stack/custom_cnn.py
Normal file
@ -0,0 +1,24 @@
|
||||
import gym
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
|
||||
|
||||
# Custom feature extractor (CNN)
|
||||
class CustomCNN(BaseFeaturesExtractor):
|
||||
def __init__(self, observation_space: gym.Space):
|
||||
super(CustomCNN, self).__init__(observation_space, features_dim=512)
|
||||
self.cnn = nn.Sequential(
|
||||
nn.Conv2d(4, 32, kernel_size=5, stride=2, padding=0),
|
||||
nn.ReLU(),
|
||||
nn.Conv2d(32, 64, kernel_size=5, stride=2, padding=0),
|
||||
nn.ReLU(),
|
||||
nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=0),
|
||||
nn.ReLU(),
|
||||
nn.Flatten(),
|
||||
nn.Linear(16384, self.features_dim),
|
||||
nn.ReLU()
|
||||
)
|
||||
|
||||
def forward(self, observations: torch.Tensor) -> torch.Tensor:
|
||||
return self.cnn(observations)
|
||||
|
106
001_image_stack/street_fighter_custom_wrapper.py
Normal file
106
001_image_stack/street_fighter_custom_wrapper.py
Normal file
@ -0,0 +1,106 @@
|
||||
import collections
|
||||
|
||||
import gym
|
||||
import cv2
|
||||
import numpy as np
|
||||
import torch
|
||||
from torchvision.transforms import Normalize
|
||||
from gym.spaces import MultiBinary
|
||||
|
||||
# Custom environment wrapper
|
||||
class StreetFighterCustomWrapper(gym.Wrapper):
|
||||
def __init__(self, env, testing=False, threshold=0.65):
|
||||
super(StreetFighterCustomWrapper, self).__init__(env)
|
||||
|
||||
self.action_space = MultiBinary(12)
|
||||
|
||||
# Use a deque to store the last 4 frames
|
||||
self.frame_stack = collections.deque(maxlen=4)
|
||||
|
||||
self.threshold = threshold
|
||||
self.game_screen_gray = None
|
||||
|
||||
self.prev_player_health = 1.0
|
||||
self.prev_opponent_health = 1.0
|
||||
|
||||
# Update observation space to include 4 stacked grayscale images
|
||||
self.observation_space = gym.spaces.Box(
|
||||
low=0.0, high=1.0, shape=(4, 84, 84), dtype=np.float32
|
||||
)
|
||||
|
||||
self.testing = testing
|
||||
|
||||
# Normalize the image for MobileNetV3Small.
|
||||
self.normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
||||
|
||||
def _preprocess_observation(self, observation):
|
||||
self.game_screen_gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)
|
||||
resized_image = cv2.resize(self.game_screen_gray, (84, 84), interpolation=cv2.INTER_AREA) / 255.0
|
||||
# Add the resized image to the frame stack
|
||||
self.frame_stack.append(resized_image)
|
||||
|
||||
# Stack the last 4 frames and return the stacked frames
|
||||
stacked_frames = np.stack(self.frame_stack, axis=0)
|
||||
return stacked_frames
|
||||
|
||||
def _get_win_or_lose_bonus(self):
|
||||
if self.prev_player_health > self.prev_opponent_health:
|
||||
# print('You win!')
|
||||
return 300
|
||||
else:
|
||||
# print('You lose!')
|
||||
return -300
|
||||
|
||||
def _get_reward(self):
|
||||
player_health_area = self.game_screen_gray[15:20, 32:120]
|
||||
oppoent_health_area = self.game_screen_gray[15:20, 136:224]
|
||||
|
||||
# Get health points using the number of pixels above 129.
|
||||
player_health = np.sum(player_health_area > 129) / player_health_area.size
|
||||
opponent_health = np.sum(oppoent_health_area > 129) / oppoent_health_area.size
|
||||
|
||||
player_health_diff = self.prev_player_health - player_health
|
||||
opponent_health_diff = self.prev_opponent_health - opponent_health
|
||||
|
||||
reward = (opponent_health_diff - player_health_diff) * 200 # max would be 200
|
||||
|
||||
# Penalty for each step without any change in health
|
||||
if opponent_health_diff <= 0.0000001:
|
||||
reward -= 12.0 / 60.0 # -12 points per second if no damage to opponent
|
||||
|
||||
self.prev_player_health = player_health
|
||||
self.prev_opponent_health = opponent_health
|
||||
|
||||
# Print the health values of the player and the opponent
|
||||
# print("Player health: %f Opponent health:%f" % (player_health, opponent_health))
|
||||
return reward
|
||||
|
||||
def reset(self):
|
||||
observation = self.env.reset()
|
||||
self.prev_player_health = 1.0
|
||||
self.prev_opponent_health = 1.0
|
||||
|
||||
# Clear the frame stack and add the first observation 4 times
|
||||
self.frame_stack.clear()
|
||||
for _ in range(4):
|
||||
self.frame_stack.append(self._preprocess_observation(observation)[0])
|
||||
|
||||
return self._preprocess_observation(observation)
|
||||
|
||||
def step(self, action):
|
||||
# observation, _, _, info = self.env.step(action)
|
||||
observation, _reward, _done, info = self.env.step(self.env.action_space.sample())
|
||||
custom_reward = self._get_reward()
|
||||
custom_reward -= 1.0 / 60.0 # penalty for each step (-1 points per second)
|
||||
|
||||
custom_done = False
|
||||
if self.prev_player_health <= 0.00001 or self.prev_opponent_health <= 0.00001:
|
||||
custom_reward += self._get_win_or_lose_bonus()
|
||||
if not self.testing:
|
||||
custom_done = True
|
||||
else:
|
||||
self.prev_player_health = 1.0
|
||||
self.prev_opponent_health = 1.0
|
||||
|
||||
return self._preprocess_observation(observation), custom_reward, custom_done, info
|
||||
|
70
001_image_stack/test.py
Normal file
70
001_image_stack/test.py
Normal file
@ -0,0 +1,70 @@
|
||||
import time
|
||||
|
||||
import cv2
|
||||
import retro
|
||||
from stable_baselines3 import PPO
|
||||
from stable_baselines3.common.vec_env import DummyVecEnv
|
||||
|
||||
from custom_cnn import CustomCNN
|
||||
from street_fighter_custom_wrapper import StreetFighterCustomWrapper
|
||||
|
||||
def make_env(game, state):
|
||||
def _init():
|
||||
env = retro.RetroEnv(
|
||||
game=game,
|
||||
state=state,
|
||||
use_restricted_actions=retro.Actions.FILTERED,
|
||||
obs_type=retro.Observations.IMAGE
|
||||
)
|
||||
env = StreetFighterCustomWrapper(env, testing=True)
|
||||
return env
|
||||
return _init
|
||||
|
||||
game = "StreetFighterIISpecialChampionEdition-Genesis"
|
||||
state_stages = [
|
||||
"Champion.Level1.ChunLiVsGuile",
|
||||
"Champion.Level2.ChunLiVsKen",
|
||||
"Champion.Level3.ChunLiVsChunLi",
|
||||
"Champion.Level4.ChunLiVsZangief",
|
||||
"Champion.Level5.ChunLiVsDhalsim",
|
||||
"Champion.Level6.ChunLiVsRyu",
|
||||
"Champion.Level7.ChunLiVsEHonda",
|
||||
"Champion.Level8.ChunLiVsBlanka",
|
||||
"Champion.Level9.ChunLiVsBalrog",
|
||||
"Champion.Level10.ChunLiVsVega",
|
||||
"Champion.Level11.ChunLiVsSagat",
|
||||
"Champion.Level12.ChunLiVsBison"
|
||||
# Add other stages as necessary
|
||||
]
|
||||
|
||||
env = make_env(game, state_stages[0])()
|
||||
|
||||
# Wrap the environment
|
||||
env = DummyVecEnv([lambda: env])
|
||||
|
||||
policy_kwargs = {
|
||||
'features_extractor_class': CustomCNN
|
||||
}
|
||||
|
||||
model = PPO(
|
||||
"CnnPolicy",
|
||||
env,
|
||||
device="cuda",
|
||||
policy_kwargs=policy_kwargs,
|
||||
verbose=1
|
||||
)
|
||||
model.load(r"trained_models_continued/ppo_chunli_432000_steps")
|
||||
|
||||
obs = env.reset()
|
||||
done = False
|
||||
|
||||
while True:
|
||||
timestamp = time.time()
|
||||
action, _ = model.predict(obs)
|
||||
obs, rewards, done, info = env.step(action)
|
||||
env.render()
|
||||
render_time = time.time() - timestamp
|
||||
if render_time < 0.0111:
|
||||
time.sleep(0.0111 - render_time) # Add a delay for 90 FPS
|
||||
|
||||
# env.close()
|
123
001_image_stack/train.py
Normal file
123
001_image_stack/train.py
Normal file
@ -0,0 +1,123 @@
|
||||
import os
|
||||
import random
|
||||
|
||||
import gym
|
||||
import cv2
|
||||
import retro
|
||||
import numpy as np
|
||||
from stable_baselines3 import PPO
|
||||
from stable_baselines3.common.vec_env import SubprocVecEnv
|
||||
from stable_baselines3.common.preprocessing import is_image_space, is_image_space_channels_first
|
||||
from stable_baselines3.common.callbacks import BaseCallback, CheckpointCallback
|
||||
|
||||
from custom_cnn import CustomCNN
|
||||
from street_fighter_custom_wrapper import StreetFighterCustomWrapper
|
||||
|
||||
class RandomOpponentChangeCallback(BaseCallback):
|
||||
def __init__(self, stages, opponent_interval, verbose=0):
|
||||
super(RandomOpponentChangeCallback, self).__init__(verbose)
|
||||
self.stages = stages
|
||||
self.opponent_interval = opponent_interval
|
||||
|
||||
def _on_step(self) -> bool:
|
||||
if self.n_calls % self.opponent_interval == 0:
|
||||
new_state = random.choice(self.stages)
|
||||
print("\nCurrent state:", new_state)
|
||||
self.training_env.env_method("load_state", new_state, indices=None)
|
||||
return True
|
||||
|
||||
def make_env(game, state, seed=0):
|
||||
def _init():
|
||||
env = retro.RetroEnv(
|
||||
game=game,
|
||||
state=state,
|
||||
use_restricted_actions=retro.Actions.FILTERED,
|
||||
obs_type=retro.Observations.IMAGE
|
||||
)
|
||||
env = StreetFighterCustomWrapper(env)
|
||||
env.seed(seed)
|
||||
return env
|
||||
return _init
|
||||
|
||||
def main():
|
||||
# Set up the environment and model
|
||||
game = "StreetFighterIISpecialChampionEdition-Genesis"
|
||||
state_stages = [
|
||||
"ChampionX.Level1.ChunLiVsKen",
|
||||
"ChampionX.Level2.ChunLiVsChunLi",
|
||||
"ChampionX.Level3.ChunLiVsZangief",
|
||||
"ChampionX.Level4.ChunLiVsDhalsim",
|
||||
"ChampionX.Level5.ChunLiVsRyu",
|
||||
"ChampionX.Level6.ChunLiVsEHonda",
|
||||
"ChampionX.Level7.ChunLiVsBlanka",
|
||||
"ChampionX.Level8.ChunLiVsGuile",
|
||||
"ChampionX.Level9.ChunLiVsBalrog",
|
||||
"ChampionX.Level10.ChunLiVsVega",
|
||||
"ChampionX.Level11.ChunLiVsSagat",
|
||||
"ChampionX.Level12.ChunLiVsBison"
|
||||
# Add other stages as necessary
|
||||
]
|
||||
# Champion is at difficulty level 4, ChampionX is at difficulty level 8.
|
||||
|
||||
num_envs = 8
|
||||
|
||||
# env = SubprocVecEnv([make_env(game, state_stages[0], seed=i) for i in range(num_envs)])
|
||||
env = SubprocVecEnv([make_env(game, state_stages[0], seed=i) for i in range(num_envs)])
|
||||
|
||||
# Using CustomCNN as the feature extractor
|
||||
policy_kwargs = {
|
||||
'features_extractor_class': CustomCNN
|
||||
}
|
||||
|
||||
model = PPO(
|
||||
"CnnPolicy",
|
||||
env,
|
||||
device="cuda",
|
||||
policy_kwargs=policy_kwargs,
|
||||
verbose=1,
|
||||
n_steps=5400,
|
||||
batch_size=64,
|
||||
n_epochs=10,
|
||||
learning_rate=0.0003,
|
||||
ent_coef=0.01,
|
||||
clip_range=0.2,
|
||||
clip_range_vf=None,
|
||||
gamma=0.99,
|
||||
gae_lambda=0.95,
|
||||
max_grad_norm=0.5,
|
||||
use_sde=False,
|
||||
sde_sample_freq=-1
|
||||
)
|
||||
|
||||
# Set the save directory
|
||||
save_dir = "trained_models_continued"
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
|
||||
# Load the model from file
|
||||
# Change the path to the actual path of the model file
|
||||
model_path = "trained_models/ppo_chunli_1296000_steps.zip"
|
||||
|
||||
# Load model and modify the learning rate and entropy coefficient
|
||||
custom_objects = {
|
||||
"learning_rate": 0.00005,
|
||||
"ent_coef": 0.2
|
||||
}
|
||||
model = PPO.load(model_path, env=env, device="cuda", custom_objects=custom_objects)
|
||||
|
||||
# Set up callbacks
|
||||
opponent_interval = 5400 # stage_interval * num_envs = total_steps_per_stage
|
||||
checkpoint_interval = 54000 # checkpoint_interval * num_envs = total_steps_per_checkpoint (Every 80 rounds)
|
||||
checkpoint_callback = CheckpointCallback(save_freq=checkpoint_interval, save_path=save_dir, name_prefix="ppo_chunli")
|
||||
stage_increase_callback = RandomOpponentChangeCallback(state_stages, opponent_interval, save_dir)
|
||||
|
||||
|
||||
model.learn(
|
||||
total_timesteps=int(6048000), # total_timesteps = stage_interval * num_envs * num_stages (1120 rounds)
|
||||
callback=[checkpoint_callback, stage_increase_callback]
|
||||
)
|
||||
|
||||
# Save the final model
|
||||
model.save(os.path.join(save_dir, "ppo_sf2_chunli_final.zip"))
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
631
001_image_stack/training_log.txt
Normal file
631
001_image_stack/training_log.txt
Normal file
@ -0,0 +1,631 @@
|
||||
(StreetFighterAI) PS C:\Users\unitec\Documents\AIProjects\street-fighter-ai\001_image_stack> python .\train.py
|
||||
Using cuda device
|
||||
|
||||
Current state: ChampionX.Level4.ChunLiVsDhalsim
|
||||
------------------------------
|
||||
| time/ | |
|
||||
| fps | 1489 |
|
||||
| iterations | 1 |
|
||||
| time_elapsed | 28 |
|
||||
| total_timesteps | 43200 |
|
||||
------------------------------
|
||||
|
||||
Current state: ChampionX.Level6.ChunLiVsEHonda
|
||||
-----------------------------------------
|
||||
| time/ | |
|
||||
| fps | 693 |
|
||||
| iterations | 2 |
|
||||
| time_elapsed | 124 |
|
||||
| total_timesteps | 86400 |
|
||||
| train/ | |
|
||||
| approx_kl | 0.008018286 |
|
||||
| clip_fraction | 0.0528 |
|
||||
| clip_range | 0.2 |
|
||||
| entropy_loss | -8.31 |
|
||||
| explained_variance | -0.000782 |
|
||||
| learning_rate | 0.0003 |
|
||||
| loss | 189 |
|
||||
| n_updates | 10 |
|
||||
| policy_gradient_loss | -0.00354 |
|
||||
| value_loss | 398 |
|
||||
-----------------------------------------
|
||||
|
||||
Current state: ChampionX.Level1.ChunLiVsKen
|
||||
-----------------------------------------
|
||||
| time/ | |
|
||||
| fps | 476 |
|
||||
| iterations | 3 |
|
||||
| time_elapsed | 271 |
|
||||
| total_timesteps | 129600 |
|
||||
| train/ | |
|
||||
| approx_kl | 0.010610209 |
|
||||
| clip_fraction | 0.119 |
|
||||
| clip_range | 0.2 |
|
||||
| entropy_loss | -8.3 |
|
||||
| explained_variance | 0.0323 |
|
||||
| learning_rate | 0.0003 |
|
||||
| loss | 0.228 |
|
||||
| n_updates | 20 |
|
||||
| policy_gradient_loss | -0.00663 |
|
||||
| value_loss | 103 |
|
||||
-----------------------------------------
|
||||
|
||||
Current state: ChampionX.Level10.ChunLiVsVega
|
||||
-----------------------------------------
|
||||
| time/ | |
|
||||
| fps | 467 |
|
||||
| iterations | 4 |
|
||||
| time_elapsed | 369 |
|
||||
| total_timesteps | 172800 |
|
||||
| train/ | |
|
||||
| approx_kl | 0.011115557 |
|
||||
| clip_fraction | 0.122 |
|
||||
| clip_range | 0.2 |
|
||||
| entropy_loss | -8.3 |
|
||||
| explained_variance | 0.125 |
|
||||
| learning_rate | 0.0003 |
|
||||
| loss | 5.37 |
|
||||
| n_updates | 30 |
|
||||
| policy_gradient_loss | -0.00485 |
|
||||
| value_loss | 83.8 |
|
||||
-----------------------------------------
|
||||
|
||||
Current state: ChampionX.Level11.ChunLiVsSagat
|
||||
-----------------------------------------
|
||||
| time/ | |
|
||||
| fps | 462 |
|
||||
| iterations | 5 |
|
||||
| time_elapsed | 466 |
|
||||
| total_timesteps | 216000 |
|
||||
| train/ | |
|
||||
| approx_kl | 0.012769428 |
|
||||
| clip_fraction | 0.133 |
|
||||
| clip_range | 0.2 |
|
||||
| entropy_loss | -8.29 |
|
||||
| explained_variance | 0.409 |
|
||||
| learning_rate | 0.0003 |
|
||||
| loss | 18.4 |
|
||||
| n_updates | 40 |
|
||||
| policy_gradient_loss | -0.00746 |
|
||||
| value_loss | 31.6 |
|
||||
-----------------------------------------
|
||||
|
||||
Current state: ChampionX.Level7.ChunLiVsBlanka
|
||||
-----------------------------------------
|
||||
| time/ | |
|
||||
| fps | 460 |
|
||||
| iterations | 6 |
|
||||
| time_elapsed | 563 |
|
||||
| total_timesteps | 259200 |
|
||||
| train/ | |
|
||||
| approx_kl | 0.014561476 |
|
||||
| clip_fraction | 0.184 |
|
||||
| clip_range | 0.2 |
|
||||
| entropy_loss | -8.29 |
|
||||
| explained_variance | 0.15 |
|
||||
| learning_rate | 0.0003 |
|
||||
| loss | 0.66 |
|
||||
| n_updates | 50 |
|
||||
| policy_gradient_loss | -0.00799 |
|
||||
| value_loss | 23.2 |
|
||||
-----------------------------------------
|
||||
|
||||
Current state: ChampionX.Level12.ChunLiVsBison
|
||||
-----------------------------------------
|
||||
| time/ | |
|
||||
| fps | 447 |
|
||||
| iterations | 7 |
|
||||
| time_elapsed | 675 |
|
||||
| total_timesteps | 302400 |
|
||||
| train/ | |
|
||||
| approx_kl | 0.013581872 |
|
||||
| clip_fraction | 0.147 |
|
||||
| clip_range | 0.2 |
|
||||
| entropy_loss | -8.28 |
|
||||
| explained_variance | 0.447 |
|
||||
| learning_rate | 0.0003 |
|
||||
| loss | 0.794 |
|
||||
| n_updates | 60 |
|
||||
| policy_gradient_loss | -0.00405 |
|
||||
| value_loss | 33.4 |
|
||||
-----------------------------------------
|
||||
|
||||
Current state: ChampionX.Level4.ChunLiVsDhalsim
|
||||
-----------------------------------------
|
||||
| time/ | |
|
||||
| fps | 440 |
|
||||
| iterations | 8 |
|
||||
| time_elapsed | 784 |
|
||||
| total_timesteps | 345600 |
|
||||
| train/ | |
|
||||
| approx_kl | 0.015053411 |
|
||||
| clip_fraction | 0.186 |
|
||||
| clip_range | 0.2 |
|
||||
| entropy_loss | -8.28 |
|
||||
| explained_variance | 0.39 |
|
||||
| learning_rate | 0.0003 |
|
||||
| loss | 0.313 |
|
||||
| n_updates | 70 |
|
||||
| policy_gradient_loss | -0.00594 |
|
||||
| value_loss | 22.3 |
|
||||
-----------------------------------------
|
||||
|
||||
Current state: ChampionX.Level5.ChunLiVsRyu
|
||||
----------------------------------------
|
||||
| time/ | |
|
||||
| fps | 433 |
|
||||
| iterations | 9 |
|
||||
| time_elapsed | 896 |
|
||||
| total_timesteps | 388800 |
|
||||
| train/ | |
|
||||
| approx_kl | 0.01407744 |
|
||||
| clip_fraction | 0.152 |
|
||||
| clip_range | 0.2 |
|
||||
| entropy_loss | -8.27 |
|
||||
| explained_variance | 0.326 |
|
||||
| learning_rate | 0.0003 |
|
||||
| loss | 0.396 |
|
||||
| n_updates | 80 |
|
||||
| policy_gradient_loss | -0.00862 |
|
||||
| value_loss | 15.7 |
|
||||
----------------------------------------
|
||||
|
||||
Current state: ChampionX.Level9.ChunLiVsBalrog
|
||||
-----------------------------------------
|
||||
| time/ | |
|
||||
| fps | 428 |
|
||||
| iterations | 10 |
|
||||
| time_elapsed | 1007 |
|
||||
| total_timesteps | 432000 |
|
||||
| train/ | |
|
||||
| approx_kl | 0.013460734 |
|
||||
| clip_fraction | 0.148 |
|
||||
| clip_range | 0.2 |
|
||||
| entropy_loss | -8.27 |
|
||||
| explained_variance | 0.384 |
|
||||
| learning_rate | 0.0003 |
|
||||
| loss | 0.227 |
|
||||
| n_updates | 90 |
|
||||
| policy_gradient_loss | -0.00498 |
|
||||
| value_loss | 16.7 |
|
||||
-----------------------------------------
|
||||
|
||||
Current state: ChampionX.Level9.ChunLiVsBalrog
|
||||
------------------------------------------
|
||||
| time/ | |
|
||||
| fps | 424 |
|
||||
| iterations | 11 |
|
||||
| time_elapsed | 1118 |
|
||||
| total_timesteps | 475200 |
|
||||
| train/ | |
|
||||
| approx_kl | 0.0145851895 |
|
||||
| clip_fraction | 0.165 |
|
||||
| clip_range | 0.2 |
|
||||
| entropy_loss | -8.26 |
|
||||
| explained_variance | 0.352 |
|
||||
| learning_rate | 0.0003 |
|
||||
| loss | 0.147 |
|
||||
| n_updates | 100 |
|
||||
| policy_gradient_loss | -0.00597 |
|
||||
| value_loss | 19.8 |
|
||||
------------------------------------------
|
||||
|
||||
Current state: ChampionX.Level4.ChunLiVsDhalsim
|
||||
-----------------------------------------
|
||||
| time/ | |
|
||||
| fps | 424 |
|
||||
| iterations | 12 |
|
||||
| time_elapsed | 1219 |
|
||||
| total_timesteps | 518400 |
|
||||
| train/ | |
|
||||
| approx_kl | 0.015144574 |
|
||||
| clip_fraction | 0.161 |
|
||||
| clip_range | 0.2 |
|
||||
| entropy_loss | -8.25 |
|
||||
| explained_variance | 0.383 |
|
||||
| learning_rate | 0.0003 |
|
||||
| loss | 1.52 |
|
||||
| n_updates | 110 |
|
||||
| policy_gradient_loss | -0.00749 |
|
||||
| value_loss | 24.1 |
|
||||
-----------------------------------------
|
||||
|
||||
Current state: ChampionX.Level6.ChunLiVsEHonda
|
||||
-----------------------------------------
|
||||
| time/ | |
|
||||
| fps | 425 |
|
||||
| iterations | 13 |
|
||||
| time_elapsed | 1318 |
|
||||
| total_timesteps | 561600 |
|
||||
| train/ | |
|
||||
| approx_kl | 0.015790801 |
|
||||
| clip_fraction | 0.158 |
|
||||
| clip_range | 0.2 |
|
||||
| entropy_loss | -8.25 |
|
||||
| explained_variance | 0.555 |
|
||||
| learning_rate | 0.0003 |
|
||||
| loss | 0.665 |
|
||||
| n_updates | 120 |
|
||||
| policy_gradient_loss | -0.00889 |
|
||||
| value_loss | 20.7 |
|
||||
-----------------------------------------
|
||||
|
||||
Current state: ChampionX.Level6.ChunLiVsEHonda
|
||||
-----------------------------------------
|
||||
| time/ | |
|
||||
| fps | 426 |
|
||||
| iterations | 14 |
|
||||
| time_elapsed | 1417 |
|
||||
| total_timesteps | 604800 |
|
||||
| train/ | |
|
||||
| approx_kl | 0.016785465 |
|
||||
| clip_fraction | 0.173 |
|
||||
| clip_range | 0.2 |
|
||||
| entropy_loss | -8.24 |
|
||||
| explained_variance | 0.609 |
|
||||
| learning_rate | 0.0003 |
|
||||
| loss | 0.313 |
|
||||
| n_updates | 130 |
|
||||
| policy_gradient_loss | -0.00758 |
|
||||
| value_loss | 14.9 |
|
||||
-----------------------------------------
|
||||
|
||||
Current state: ChampionX.Level10.ChunLiVsVega
|
||||
-----------------------------------------
|
||||
| time/ | |
|
||||
| fps | 428 |
|
||||
| iterations | 15 |
|
||||
| time_elapsed | 1513 |
|
||||
| total_timesteps | 648000 |
|
||||
| train/ | |
|
||||
| approx_kl | 0.017042443 |
|
||||
| clip_fraction | 0.176 |
|
||||
| clip_range | 0.2 |
|
||||
| entropy_loss | -8.24 |
|
||||
| explained_variance | 0.759 |
|
||||
| learning_rate | 0.0003 |
|
||||
| loss | 0.634 |
|
||||
| n_updates | 140 |
|
||||
| policy_gradient_loss | -0.00617 |
|
||||
| value_loss | 15.4 |
|
||||
-----------------------------------------
|
||||
|
||||
Current state: ChampionX.Level3.ChunLiVsZangief
|
||||
-----------------------------------------
|
||||
| time/ | |
|
||||
| fps | 427 |
|
||||
| iterations | 16 |
|
||||
| time_elapsed | 1618 |
|
||||
| total_timesteps | 691200 |
|
||||
| train/ | |
|
||||
| approx_kl | 0.017714709 |
|
||||
| clip_fraction | 0.176 |
|
||||
| clip_range | 0.2 |
|
||||
| entropy_loss | -8.23 |
|
||||
| explained_variance | 0.79 |
|
||||
| learning_rate | 0.0003 |
|
||||
| loss | 0.941 |
|
||||
| n_updates | 150 |
|
||||
| policy_gradient_loss | -0.00703 |
|
||||
| value_loss | 17.5 |
|
||||
-----------------------------------------
|
||||
|
||||
Current state: ChampionX.Level7.ChunLiVsBlanka
|
||||
-----------------------------------------
|
||||
| time/ | |
|
||||
| fps | 424 |
|
||||
| iterations | 17 |
|
||||
| time_elapsed | 1728 |
|
||||
| total_timesteps | 734400 |
|
||||
| train/ | |
|
||||
| approx_kl | 0.018709755 |
|
||||
| clip_fraction | 0.196 |
|
||||
| clip_range | 0.2 |
|
||||
| entropy_loss | -8.22 |
|
||||
| explained_variance | 0.746 |
|
||||
| learning_rate | 0.0003 |
|
||||
| loss | 0.505 |
|
||||
| n_updates | 160 |
|
||||
| policy_gradient_loss | -0.00795 |
|
||||
| value_loss | 11.9 |
|
||||
-----------------------------------------
|
||||
|
||||
Current state: ChampionX.Level11.ChunLiVsSagat
|
||||
-----------------------------------------
|
||||
| time/ | |
|
||||
| fps | 423 |
|
||||
| iterations | 18 |
|
||||
| time_elapsed | 1837 |
|
||||
| total_timesteps | 777600 |
|
||||
| train/ | |
|
||||
| approx_kl | 0.017850244 |
|
||||
| clip_fraction | 0.182 |
|
||||
| clip_range | 0.2 |
|
||||
| entropy_loss | -8.21 |
|
||||
| explained_variance | 0.687 |
|
||||
| learning_rate | 0.0003 |
|
||||
| loss | 0.379 |
|
||||
| n_updates | 170 |
|
||||
| policy_gradient_loss | -0.00568 |
|
||||
| value_loss | 15.2 |
|
||||
-----------------------------------------
|
||||
|
||||
Current state: ChampionX.Level3.ChunLiVsZangief
|
||||
----------------------------------------
|
||||
| time/ | |
|
||||
| fps | 420 |
|
||||
| iterations | 19 |
|
||||
| time_elapsed | 1950 |
|
||||
| total_timesteps | 820800 |
|
||||
| train/ | |
|
||||
| approx_kl | 0.02048213 |
|
||||
| clip_fraction | 0.221 |
|
||||
| clip_range | 0.2 |
|
||||
| entropy_loss | -8.21 |
|
||||
| explained_variance | 0.707 |
|
||||
| learning_rate | 0.0003 |
|
||||
| loss | 0.391 |
|
||||
| n_updates | 180 |
|
||||
| policy_gradient_loss | -0.00419 |
|
||||
| value_loss | 13 |
|
||||
----------------------------------------
|
||||
|
||||
Current state: ChampionX.Level11.ChunLiVsSagat
|
||||
-----------------------------------------
|
||||
| time/ | |
|
||||
| fps | 418 |
|
||||
| iterations | 20 |
|
||||
| time_elapsed | 2062 |
|
||||
| total_timesteps | 864000 |
|
||||
| train/ | |
|
||||
| approx_kl | 0.016617421 |
|
||||
| clip_fraction | 0.172 |
|
||||
| clip_range | 0.2 |
|
||||
| entropy_loss | -8.2 |
|
||||
| explained_variance | 0.744 |
|
||||
| learning_rate | 0.0003 |
|
||||
| loss | 1.66 |
|
||||
| n_updates | 190 |
|
||||
| policy_gradient_loss | -0.00437 |
|
||||
| value_loss | 15.8 |
|
||||
-----------------------------------------
|
||||
|
||||
Current state: ChampionX.Level6.ChunLiVsEHonda
|
||||
-----------------------------------------
|
||||
| time/ | |
|
||||
| fps | 417 |
|
||||
| iterations | 21 |
|
||||
| time_elapsed | 2174 |
|
||||
| total_timesteps | 907200 |
|
||||
| train/ | |
|
||||
| approx_kl | 0.017259926 |
|
||||
| clip_fraction | 0.171 |
|
||||
| clip_range | 0.2 |
|
||||
| entropy_loss | -8.19 |
|
||||
| explained_variance | 0.756 |
|
||||
| learning_rate | 0.0003 |
|
||||
| loss | 0.457 |
|
||||
| n_updates | 200 |
|
||||
| policy_gradient_loss | -0.00897 |
|
||||
| value_loss | 14.9 |
|
||||
-----------------------------------------
|
||||
|
||||
Current state: ChampionX.Level10.ChunLiVsVega
|
||||
-----------------------------------------
|
||||
| time/ | |
|
||||
| fps | 417 |
|
||||
| iterations | 22 |
|
||||
| time_elapsed | 2276 |
|
||||
| total_timesteps | 950400 |
|
||||
| train/ | |
|
||||
| approx_kl | 0.018794816 |
|
||||
| clip_fraction | 0.19 |
|
||||
| clip_range | 0.2 |
|
||||
| entropy_loss | -8.18 |
|
||||
| explained_variance | 0.834 |
|
||||
| learning_rate | 0.0003 |
|
||||
| loss | 3.33 |
|
||||
| n_updates | 210 |
|
||||
| policy_gradient_loss | -0.00535 |
|
||||
| value_loss | 15.5 |
|
||||
-----------------------------------------
|
||||
|
||||
Current state: ChampionX.Level5.ChunLiVsRyu
|
||||
-----------------------------------------
|
||||
| time/ | |
|
||||
| fps | 418 |
|
||||
| iterations | 23 |
|
||||
| time_elapsed | 2374 |
|
||||
| total_timesteps | 993600 |
|
||||
| train/ | |
|
||||
| approx_kl | 0.019361915 |
|
||||
| clip_fraction | 0.188 |
|
||||
| clip_range | 0.2 |
|
||||
| entropy_loss | -8.18 |
|
||||
| explained_variance | 0.825 |
|
||||
| learning_rate | 0.0003 |
|
||||
| loss | 0.235 |
|
||||
| n_updates | 220 |
|
||||
| policy_gradient_loss | -0.00762 |
|
||||
| value_loss | 13.4 |
|
||||
-----------------------------------------
|
||||
|
||||
Current state: ChampionX.Level2.ChunLiVsChunLi
|
||||
-----------------------------------------
|
||||
| time/ | |
|
||||
| fps | 419 |
|
||||
| iterations | 24 |
|
||||
| time_elapsed | 2471 |
|
||||
| total_timesteps | 1036800 |
|
||||
| train/ | |
|
||||
| approx_kl | 0.022115083 |
|
||||
| clip_fraction | 0.233 |
|
||||
| clip_range | 0.2 |
|
||||
| entropy_loss | -8.18 |
|
||||
| explained_variance | 0.8 |
|
||||
| learning_rate | 0.0003 |
|
||||
| loss | 0.211 |
|
||||
| n_updates | 230 |
|
||||
| policy_gradient_loss | -0.00771 |
|
||||
| value_loss | 11.7 |
|
||||
-----------------------------------------
|
||||
|
||||
Current state: ChampionX.Level3.ChunLiVsZangief
|
||||
-----------------------------------------
|
||||
| time/ | |
|
||||
| fps | 419 |
|
||||
| iterations | 25 |
|
||||
| time_elapsed | 2574 |
|
||||
| total_timesteps | 1080000 |
|
||||
| train/ | |
|
||||
| approx_kl | 0.023090197 |
|
||||
| clip_fraction | 0.233 |
|
||||
| clip_range | 0.2 |
|
||||
| entropy_loss | -8.17 |
|
||||
| explained_variance | 0.759 |
|
||||
| learning_rate | 0.0003 |
|
||||
| loss | 0.445 |
|
||||
| n_updates | 240 |
|
||||
| policy_gradient_loss | -0.00523 |
|
||||
| value_loss | 13.7 |
|
||||
-----------------------------------------
|
||||
|
||||
Current state: ChampionX.Level11.ChunLiVsSagat
|
||||
-----------------------------------------
|
||||
| time/ | |
|
||||
| fps | 418 |
|
||||
| iterations | 26 |
|
||||
| time_elapsed | 2683 |
|
||||
| total_timesteps | 1123200 |
|
||||
| train/ | |
|
||||
| approx_kl | 0.024867734 |
|
||||
| clip_fraction | 0.281 |
|
||||
| clip_range | 0.2 |
|
||||
| entropy_loss | -8.16 |
|
||||
| explained_variance | 0.688 |
|
||||
| learning_rate | 0.0003 |
|
||||
| loss | 0.557 |
|
||||
| n_updates | 250 |
|
||||
| policy_gradient_loss | 0.00215 |
|
||||
| value_loss | 13.9 |
|
||||
-----------------------------------------
|
||||
|
||||
Current state: ChampionX.Level10.ChunLiVsVega
|
||||
-----------------------------------------
|
||||
| time/ | |
|
||||
| fps | 417 |
|
||||
| iterations | 27 |
|
||||
| time_elapsed | 2793 |
|
||||
| total_timesteps | 1166400 |
|
||||
| train/ | |
|
||||
| approx_kl | 0.020454599 |
|
||||
| clip_fraction | 0.203 |
|
||||
| clip_range | 0.2 |
|
||||
| entropy_loss | -8.19 |
|
||||
| explained_variance | 0.766 |
|
||||
| learning_rate | 0.0003 |
|
||||
| loss | 0.314 |
|
||||
| n_updates | 260 |
|
||||
| policy_gradient_loss | -0.0058 |
|
||||
| value_loss | 17.1 |
|
||||
-----------------------------------------
|
||||
|
||||
Current state: ChampionX.Level10.ChunLiVsVega
|
||||
-----------------------------------------
|
||||
| time/ | |
|
||||
| fps | 416 |
|
||||
| iterations | 28 |
|
||||
| time_elapsed | 2904 |
|
||||
| total_timesteps | 1209600 |
|
||||
| train/ | |
|
||||
| approx_kl | 0.020690009 |
|
||||
| clip_fraction | 0.208 |
|
||||
| clip_range | 0.2 |
|
||||
| entropy_loss | -8.17 |
|
||||
| explained_variance | 0.827 |
|
||||
| learning_rate | 0.0003 |
|
||||
| loss | 1.38 |
|
||||
| n_updates | 270 |
|
||||
| policy_gradient_loss | 2.12e-05 |
|
||||
| value_loss | 20.4 |
|
||||
-----------------------------------------
|
||||
|
||||
Current state: ChampionX.Level10.ChunLiVsVega
|
||||
-----------------------------------------
|
||||
| time/ | |
|
||||
| fps | 415 |
|
||||
| iterations | 29 |
|
||||
| time_elapsed | 3015 |
|
||||
| total_timesteps | 1252800 |
|
||||
| train/ | |
|
||||
| approx_kl | 0.020646438 |
|
||||
| clip_fraction | 0.208 |
|
||||
| clip_range | 0.2 |
|
||||
| entropy_loss | -8.15 |
|
||||
| explained_variance | 0.81 |
|
||||
| learning_rate | 0.0003 |
|
||||
| loss | 0.235 |
|
||||
| n_updates | 280 |
|
||||
| policy_gradient_loss | -0.00852 |
|
||||
| value_loss | 12.5 |
|
||||
-----------------------------------------
|
||||
|
||||
Current state: ChampionX.Level9.ChunLiVsBalrog
|
||||
-----------------------------------------
|
||||
| time/ | |
|
||||
| fps | 414 |
|
||||
| iterations | 30 |
|
||||
| time_elapsed | 3128 |
|
||||
| total_timesteps | 1296000 |
|
||||
| train/ | |
|
||||
| approx_kl | 0.021910097 |
|
||||
| clip_fraction | 0.212 |
|
||||
| clip_range | 0.2 |
|
||||
| entropy_loss | -8.15 |
|
||||
| explained_variance | 0.79 |
|
||||
| learning_rate | 0.0003 |
|
||||
| loss | 0.0264 |
|
||||
| n_updates | 290 |
|
||||
| policy_gradient_loss | -0.00872 |
|
||||
| value_loss | 12.5 |
|
||||
-----------------------------------------
|
||||
|
||||
Current state: ChampionX.Level12.ChunLiVsBison
|
||||
-----------------------------------------
|
||||
| time/ | |
|
||||
| fps | 412 |
|
||||
| iterations | 31 |
|
||||
| time_elapsed | 3243 |
|
||||
| total_timesteps | 1339200 |
|
||||
| train/ | |
|
||||
| approx_kl | 0.025281599 |
|
||||
| clip_fraction | 0.254 |
|
||||
| clip_range | 0.2 |
|
||||
| entropy_loss | -8.13 |
|
||||
| explained_variance | 0.773 |
|
||||
| learning_rate | 0.0003 |
|
||||
| loss | 1.18 |
|
||||
| n_updates | 300 |
|
||||
| policy_gradient_loss | -0.00679 |
|
||||
| value_loss | 12.6 |
|
||||
-----------------------------------------
|
||||
|
||||
Current state: ChampionX.Level2.ChunLiVsChunLi
|
||||
-----------------------------------------
|
||||
| time/ | |
|
||||
| fps | 412 |
|
||||
| iterations | 32 |
|
||||
| time_elapsed | 3349 |
|
||||
| total_timesteps | 1382400 |
|
||||
| train/ | |
|
||||
| approx_kl | 0.026466375 |
|
||||
| clip_fraction | 0.259 |
|
||||
| clip_range | 0.2 |
|
||||
| entropy_loss | -8.12 |
|
||||
| explained_variance | 0.647 |
|
||||
| learning_rate | 0.0003 |
|
||||
| loss | 0.518 |
|
||||
| n_updates | 310 |
|
||||
| policy_gradient_loss | 0.000522 |
|
||||
| value_loss | 18.8 |
|
||||
-----------------------------------------
|
BIN
002_lstm/__pycache__/cnn_lstm.cpython-38.pyc
Normal file
BIN
002_lstm/__pycache__/cnn_lstm.cpython-38.pyc
Normal file
Binary file not shown.
Binary file not shown.
35
002_lstm/cnn_lstm.py
Normal file
35
002_lstm/cnn_lstm.py
Normal file
@ -0,0 +1,35 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
class CNNEncoder(nn.Module):
|
||||
def __init__(self, features_dim=512):
|
||||
super(CNNEncoder, self).__init__()
|
||||
self.conv1 = nn.Conv2d(16, 32, kernel_size=5, stride=2)
|
||||
self.relu1 = nn.ReLU()
|
||||
self.conv2 = nn.Conv2d(32, 64, kernel_size=5, stride=2)
|
||||
self.relu2 = nn.ReLU()
|
||||
self.conv3 = nn.Conv2d(64, 64, kernel_size=3, stride=1)
|
||||
self.relu3 = nn.ReLU()
|
||||
self.fc = nn.Linear(16384, 512)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.relu1(self.conv1(x))
|
||||
x = self.relu2(self.conv2(x))
|
||||
x = self.relu3(self.conv3(x))
|
||||
x = x.view(x.size(0), -1)
|
||||
x = self.fc(x)
|
||||
return x
|
||||
|
||||
class CNNLSTM(nn.Module):
|
||||
def __init__(self, features_dim=512):
|
||||
super(CNNLSTM, self).__init__()
|
||||
self.encoder = CNNEncoder(512)
|
||||
self.lstm = nn.LSTM(512, 512)
|
||||
|
||||
def forward(self, x, hidden):
|
||||
x = self.encoder(x)
|
||||
x, hidden = self.lstm(x.unsqueeze(0), hidden)
|
||||
return x.squeeze(0), hidden
|
||||
|
||||
def init_hidden(self, batch_size):
|
||||
return (torch.zeros(1, batch_size, 512), torch.zeros(1, batch_size, 512))
|
102
002_lstm/street_fighter_custom_wrapper.py
Normal file
102
002_lstm/street_fighter_custom_wrapper.py
Normal file
@ -0,0 +1,102 @@
|
||||
import collections
|
||||
|
||||
import gym
|
||||
import cv2
|
||||
import numpy as np
|
||||
from torchvision.transforms import Normalize
|
||||
from gym.spaces import MultiBinary
|
||||
|
||||
# Custom environment wrapper
|
||||
class StreetFighterCustomWrapper(gym.Wrapper):
|
||||
def __init__(self, env, testing=False, threshold=0.65):
|
||||
super(StreetFighterCustomWrapper, self).__init__(env)
|
||||
|
||||
self.action_space = MultiBinary(12)
|
||||
|
||||
# Use a deque to store the last 16 frames (0.267 seconds)
|
||||
self.frame_stack = collections.deque(maxlen=16)
|
||||
|
||||
self.threshold = threshold
|
||||
self.game_screen_gray = None
|
||||
|
||||
self.prev_player_health = 1.0
|
||||
self.prev_opponent_health = 1.0
|
||||
|
||||
# Update observation space to include 16 stacked grayscale images
|
||||
self.observation_space = gym.spaces.Box(
|
||||
low=0.0, high=1.0, shape=(16, 84, 84), dtype=np.float32
|
||||
)
|
||||
|
||||
self.testing = testing
|
||||
|
||||
def _preprocess_observation(self, observation):
|
||||
self.game_screen_gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)
|
||||
resized_image = cv2.resize(self.game_screen_gray, (84, 84), interpolation=cv2.INTER_AREA) / 255.0
|
||||
# Add the resized image to the frame stack
|
||||
self.frame_stack.append(resized_image)
|
||||
|
||||
# Stack the last 16 frames and return the stacked frames
|
||||
stacked_frames = np.stack(self.frame_stack, axis=0)[np.newaxis, ...] # Shape: (1, 16, 84, 84)
|
||||
return stacked_frames
|
||||
|
||||
def _get_win_or_lose_bonus(self):
|
||||
if self.prev_player_health > self.prev_opponent_health:
|
||||
# print('You win!')
|
||||
return 300
|
||||
else:
|
||||
# print('You lose!')
|
||||
return -300
|
||||
|
||||
def _get_reward(self):
|
||||
player_health_area = self.game_screen_gray[15:20, 32:120]
|
||||
oppoent_health_area = self.game_screen_gray[15:20, 136:224]
|
||||
|
||||
# Get health points using the number of pixels above 129.
|
||||
player_health = np.sum(player_health_area > 129) / player_health_area.size
|
||||
opponent_health = np.sum(oppoent_health_area > 129) / oppoent_health_area.size
|
||||
|
||||
player_health_diff = self.prev_player_health - player_health
|
||||
opponent_health_diff = self.prev_opponent_health - opponent_health
|
||||
|
||||
reward = (opponent_health_diff - player_health_diff) * 200 # max would be 200
|
||||
|
||||
# Penalty for each step without any change in health
|
||||
if opponent_health_diff <= 0.0000001:
|
||||
reward -= 12.0 / 60.0 # -12 points per second if no damage to opponent
|
||||
|
||||
self.prev_player_health = player_health
|
||||
self.prev_opponent_health = opponent_health
|
||||
|
||||
# Print the health values of the player and the opponent
|
||||
# print("Player health: %f Opponent health:%f" % (player_health, opponent_health))
|
||||
return reward
|
||||
|
||||
def reset(self):
|
||||
observation = self.env.reset()
|
||||
self.prev_player_health = 1.0
|
||||
self.prev_opponent_health = 1.0
|
||||
|
||||
# Clear the frame stack and add the first observation 16 times
|
||||
self.frame_stack.clear()
|
||||
for _ in range(16):
|
||||
self.frame_stack.append(self._preprocess_observation(observation)[0])
|
||||
|
||||
return self._preprocess_observation(observation)
|
||||
|
||||
def step(self, action):
|
||||
# observation, _, _, info = self.env.step(action)
|
||||
observation, _reward, _done, info = self.env.step(self.env.action_space.sample())
|
||||
custom_reward = self._get_reward()
|
||||
custom_reward -= 1.0 / 60.0 # penalty for each step (-1 points per second)
|
||||
|
||||
custom_done = False
|
||||
if self.prev_player_health <= 0.00001 or self.prev_opponent_health <= 0.00001:
|
||||
custom_reward += self._get_win_or_lose_bonus()
|
||||
if not self.testing:
|
||||
custom_done = True
|
||||
else:
|
||||
self.prev_player_health = 1.0
|
||||
self.prev_opponent_health = 1.0
|
||||
|
||||
return self._preprocess_observation(observation), custom_reward, custom_done, info
|
||||
|
73
002_lstm/test.py
Normal file
73
002_lstm/test.py
Normal file
@ -0,0 +1,73 @@
|
||||
import time
|
||||
|
||||
import cv2
|
||||
import retro
|
||||
from stable_baselines3 import PPO
|
||||
from stable_baselines3.common.vec_env import DummyVecEnv
|
||||
|
||||
from cnn_lstm import CNNLSTM, CNNEncoder
|
||||
from street_fighter_custom_wrapper import StreetFighterCustomWrapper
|
||||
|
||||
def make_env(game, state):
|
||||
def _init():
|
||||
win_template = cv2.imread('images/pattern_win_gray.png', cv2.IMREAD_GRAYSCALE)
|
||||
lose_template = cv2.imread('images/pattern_lose_gray.png', cv2.IMREAD_GRAYSCALE)
|
||||
env = retro.RetroEnv(
|
||||
game=game,
|
||||
state=state,
|
||||
use_restricted_actions=retro.Actions.FILTERED,
|
||||
obs_type=retro.Observations.IMAGE
|
||||
)
|
||||
env = StreetFighterCustomWrapper(env, win_template, lose_template, testing=True)
|
||||
return env
|
||||
return _init
|
||||
|
||||
game = "StreetFighterIISpecialChampionEdition-Genesis"
|
||||
state_stages = [
|
||||
"Champion.Level1.ChunLiVsGuile",
|
||||
"Champion.Level2.ChunLiVsKen",
|
||||
"Champion.Level3.ChunLiVsChunLi",
|
||||
"Champion.Level4.ChunLiVsZangief",
|
||||
"Champion.Level5.ChunLiVsDhalsim",
|
||||
"Champion.Level6.ChunLiVsRyu",
|
||||
"Champion.Level7.ChunLiVsEHonda",
|
||||
"Champion.Level8.ChunLiVsBlanka",
|
||||
"Champion.Level9.ChunLiVsBalrog",
|
||||
"Champion.Level10.ChunLiVsVega",
|
||||
"Champion.Level11.ChunLiVsSagat",
|
||||
"Champion.Level12.ChunLiVsBison"
|
||||
# Add other stages as necessary
|
||||
]
|
||||
|
||||
env = make_env(game, state_stages[0])()
|
||||
|
||||
# Wrap the environment
|
||||
env = DummyVecEnv([lambda: env])
|
||||
|
||||
policy_kwargs = {
|
||||
'features_extractor_class': CNNEncoder,
|
||||
'net_arch': [512, 'lstm']
|
||||
}
|
||||
|
||||
model = PPO(
|
||||
"CnnPolicy",
|
||||
env,
|
||||
device="cuda",
|
||||
policy_kwargs=policy_kwargs,
|
||||
verbose=1
|
||||
)
|
||||
model.load(r"trained_models/ppo_chunli_1296000_steps")
|
||||
|
||||
obs = env.reset()
|
||||
done = False
|
||||
|
||||
while True:
|
||||
timestamp = time.time()
|
||||
action, _ = model.predict(obs)
|
||||
obs, rewards, done, info = env.step(action)
|
||||
env.render()
|
||||
render_time = time.time() - timestamp
|
||||
if render_time < 0.0111:
|
||||
time.sleep(0.0111 - render_time) # Add a delay for 90 FPS
|
||||
|
||||
# env.close()
|
112
002_lstm/train.py
Normal file
112
002_lstm/train.py
Normal file
@ -0,0 +1,112 @@
|
||||
import os
|
||||
import random
|
||||
|
||||
import gym
|
||||
import cv2
|
||||
import retro
|
||||
import numpy as np
|
||||
from stable_baselines3 import PPO
|
||||
from stable_baselines3.common.policies import ActorCriticPolicy
|
||||
from stable_baselines3.common.vec_env import SubprocVecEnv
|
||||
from stable_baselines3.common.callbacks import BaseCallback, CheckpointCallback
|
||||
|
||||
from cnn_lstm import CNNLSTM, CNNEncoder
|
||||
from street_fighter_custom_wrapper import StreetFighterCustomWrapper
|
||||
|
||||
class RandomOpponentChangeCallback(BaseCallback):
|
||||
def __init__(self, stages, opponent_interval, verbose=0):
|
||||
super(RandomOpponentChangeCallback, self).__init__(verbose)
|
||||
self.stages = stages
|
||||
self.opponent_interval = opponent_interval
|
||||
|
||||
def _on_step(self) -> bool:
|
||||
if self.n_calls % self.opponent_interval == 0:
|
||||
new_state = random.choice(self.stages)
|
||||
print("\nCurrent state:", new_state)
|
||||
self.training_env.env_method("load_state", new_state, indices=None)
|
||||
return True
|
||||
|
||||
def make_env(game, state, seed=0):
|
||||
def _init():
|
||||
env = retro.RetroEnv(
|
||||
game=game,
|
||||
state=state,
|
||||
use_restricted_actions=retro.Actions.FILTERED,
|
||||
obs_type=retro.Observations.IMAGE
|
||||
)
|
||||
env = StreetFighterCustomWrapper(env)
|
||||
env.seed(seed)
|
||||
return env
|
||||
return _init
|
||||
|
||||
def main():
|
||||
# Set up the environment and model
|
||||
game = "StreetFighterIISpecialChampionEdition-Genesis"
|
||||
state_stages = [
|
||||
"ChampionX.Level1.ChunLiVsKen",
|
||||
"ChampionX.Level2.ChunLiVsChunLi",
|
||||
"ChampionX.Level3.ChunLiVsZangief",
|
||||
"ChampionX.Level4.ChunLiVsDhalsim",
|
||||
"ChampionX.Level5.ChunLiVsRyu",
|
||||
"ChampionX.Level6.ChunLiVsEHonda",
|
||||
"ChampionX.Level7.ChunLiVsBlanka",
|
||||
"ChampionX.Level8.ChunLiVsGuile",
|
||||
"ChampionX.Level9.ChunLiVsBalrog",
|
||||
"ChampionX.Level10.ChunLiVsVega",
|
||||
"ChampionX.Level11.ChunLiVsSagat",
|
||||
"ChampionX.Level12.ChunLiVsBison"
|
||||
# Add other stages as necessary
|
||||
]
|
||||
# Champion is at difficulty level 4, ChampionX is at difficulty level 8.
|
||||
|
||||
num_envs = 8
|
||||
|
||||
# env = SubprocVecEnv([make_env(game, state_stages[0], seed=i) for i in range(num_envs)])
|
||||
env = SubprocVecEnv([make_env(game, state_stages[0], seed=i) for i in range(num_envs)])
|
||||
|
||||
class CustomPolicy(ActorCriticPolicy):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(CustomPolicy, self).__init__(*args, **kwargs)
|
||||
|
||||
self.features_extractor = CNNLSTM()
|
||||
|
||||
model = PPO(
|
||||
CustomPolicy,
|
||||
env,
|
||||
device="cuda",
|
||||
verbose=1,
|
||||
n_steps=5400,
|
||||
batch_size=64,
|
||||
n_epochs=10,
|
||||
learning_rate=0.0003,
|
||||
ent_coef=0.01,
|
||||
clip_range=0.2,
|
||||
clip_range_vf=None,
|
||||
gamma=0.99,
|
||||
gae_lambda=0.95,
|
||||
max_grad_norm=0.5,
|
||||
use_sde=False,
|
||||
sde_sample_freq=-1
|
||||
)
|
||||
|
||||
# Set the save directory
|
||||
save_dir = "trained_models"
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
|
||||
# Set up callbacks
|
||||
opponent_interval = 5400 # stage_interval * num_envs = total_steps_per_stage
|
||||
checkpoint_interval = 54000 # checkpoint_interval * num_envs = total_steps_per_checkpoint (Every 80 rounds)
|
||||
checkpoint_callback = CheckpointCallback(save_freq=checkpoint_interval, save_path=save_dir, name_prefix="ppo_chunli")
|
||||
stage_increase_callback = RandomOpponentChangeCallback(state_stages, opponent_interval, save_dir)
|
||||
|
||||
|
||||
model.learn(
|
||||
total_timesteps=int(6048000), # total_timesteps = stage_interval * num_envs * num_stages (1120 rounds)
|
||||
callback=[checkpoint_callback, stage_increase_callback]
|
||||
)
|
||||
|
||||
# Save the final model
|
||||
model.save(os.path.join(save_dir, "ppo_sf2_chunli_final.zip"))
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Binary file not shown.
Binary file not shown.
BIN
__pycache__/mobilenet_extractor.cpython-38.pyc
Normal file
BIN
__pycache__/mobilenet_extractor.cpython-38.pyc
Normal file
Binary file not shown.
@ -3,7 +3,6 @@ import torch
|
||||
import torch.nn as nn
|
||||
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
|
||||
from torchvision.models import mobilenet_v3_small
|
||||
|
||||
# Custom feature extractor (CNN)
|
||||
class CustomCNN(BaseFeaturesExtractor):
|
||||
def __init__(self, observation_space: gym.Space):
|
||||
|
@ -1,13 +1,18 @@
|
||||
import gym
|
||||
import cv2
|
||||
import numpy as np
|
||||
import torch
|
||||
from torchvision.transforms import Normalize
|
||||
from gym.spaces import MultiBinary
|
||||
|
||||
# Custom environment wrapper
|
||||
class StreetFighterCustomWrapper(gym.Wrapper):
|
||||
def __init__(self, env, win_template, lose_template, testing=False, threshold=0.65):
|
||||
super(StreetFighterCustomWrapper, self).__init__(env)
|
||||
self.win_template = win_template
|
||||
self.lose_template = lose_template
|
||||
self.action_space = MultiBinary(12)
|
||||
|
||||
# self.win_template = win_template
|
||||
# self.lose_template = lose_template
|
||||
self.threshold = threshold
|
||||
self.game_screen_gray = None
|
||||
|
||||
@ -15,24 +20,46 @@ class StreetFighterCustomWrapper(gym.Wrapper):
|
||||
self.prev_opponent_health = 1.0
|
||||
|
||||
# Update observation space to single-channel grayscale image
|
||||
# self.observation_space = gym.spaces.Box(
|
||||
# low=0.0, high=1.0, shape=(84, 84, 1), dtype=np.float32
|
||||
# )
|
||||
|
||||
# observation_space for mobilenet
|
||||
self.observation_space = gym.spaces.Box(
|
||||
low=0.0, high=1.0, shape=(84, 84, 1), dtype=np.float32
|
||||
low=0.0, high=1.0, shape=(3, 96, 96), dtype=np.float32
|
||||
)
|
||||
|
||||
self.testing = testing
|
||||
|
||||
# Normalize the image for MobileNetV3Small.
|
||||
self.normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
||||
|
||||
def _preprocess_observation(self, observation):
|
||||
# self.game_screen_gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)
|
||||
# resized_image = cv2.resize(self.game_screen_gray, (84, 84), interpolation=cv2.INTER_AREA) / 255.0
|
||||
# return np.expand_dims(resized_image, axis=-1)
|
||||
|
||||
# # Using MobileNetV3Small.
|
||||
self.game_screen_gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)
|
||||
resized_image = cv2.resize(self.game_screen_gray, (84, 84), interpolation=cv2.INTER_AREA) / 255.0
|
||||
return np.expand_dims(resized_image, axis=-1)
|
||||
resized_image = cv2.resize(observation, (96, 96), interpolation=cv2.INTER_AREA).astype(np.float32) / 255.0
|
||||
|
||||
# Convert the NumPy array to a PyTorch tensor
|
||||
resized_image = torch.from_numpy(resized_image).permute(2, 0, 1)
|
||||
|
||||
# Apply normalization
|
||||
resized_image = self.normalize(resized_image)
|
||||
|
||||
# # Add a batch dimension to match the model input shape
|
||||
# # resized_image = resized_image.unsqueeze(0)
|
||||
return resized_image
|
||||
|
||||
def _get_win_or_lose_bonus(self):
|
||||
if self.prev_player_health > self.prev_opponent_health:
|
||||
# print('You win!')
|
||||
return 200
|
||||
return 300
|
||||
else:
|
||||
# print('You lose!')
|
||||
return -200
|
||||
return -300
|
||||
|
||||
def _get_reward(self):
|
||||
player_health_area = self.game_screen_gray[15:20, 32:120]
|
||||
@ -45,7 +72,11 @@ class StreetFighterCustomWrapper(gym.Wrapper):
|
||||
player_health_diff = self.prev_player_health - player_health
|
||||
opponent_health_diff = self.prev_opponent_health - opponent_health
|
||||
|
||||
reward = (opponent_health_diff - player_health_diff) * 100 # max would be 100
|
||||
reward = (opponent_health_diff - player_health_diff) * 200 # max would be 200
|
||||
|
||||
# Penalty for each step without any change in health
|
||||
if opponent_health_diff <= 0.0000001:
|
||||
reward -= 12.0 / 60.0 # -12 points per second if no damage to opponent
|
||||
|
||||
self.prev_player_health = player_health
|
||||
self.prev_opponent_health = opponent_health
|
||||
@ -62,8 +93,9 @@ class StreetFighterCustomWrapper(gym.Wrapper):
|
||||
|
||||
def step(self, action):
|
||||
# observation, _, _, info = self.env.step(action)
|
||||
observation, _reward, _done, info = self.env.step(action)
|
||||
observation, _reward, _done, info = self.env.step(self.env.action_space.sample())
|
||||
custom_reward = self._get_reward()
|
||||
custom_reward -= 1.0 / 60.0 # penalty for each step (-1 points per second)
|
||||
|
||||
custom_done = False
|
||||
if self.prev_player_health <= 0.00001 or self.prev_opponent_health <= 0.00001:
|
||||
@ -74,4 +106,5 @@ class StreetFighterCustomWrapper(gym.Wrapper):
|
||||
self.prev_player_health = 1.0
|
||||
self.prev_opponent_health = 1.0
|
||||
|
||||
return self._preprocess_observation(observation), custom_reward, custom_done, info
|
||||
return self._preprocess_observation(observation), custom_reward, custom_done, info
|
||||
|
21
mobilenet_extractor.py
Normal file
21
mobilenet_extractor.py
Normal file
@ -0,0 +1,21 @@
|
||||
import gym
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
|
||||
from torchvision.models import mobilenet_v3_small
|
||||
|
||||
# Custom MobileNetV3 Feature Extractor
|
||||
class MobileNetV3Extractor(BaseFeaturesExtractor):
|
||||
def __init__(self, observation_space: gym.Space):
|
||||
super(MobileNetV3Extractor, self).__init__(observation_space, features_dim=256)
|
||||
self.mobilenet = mobilenet_v3_small(pretrained=True)
|
||||
self.adaptive_pool = nn.AdaptiveAvgPool2d((1, 1))
|
||||
self.fc = nn.Linear(576, self.features_dim)
|
||||
|
||||
def forward(self, observations: torch.Tensor) -> torch.Tensor:
|
||||
# x = observations.permute(0, 2, 3, 1) # Swap the channel dimension
|
||||
x = self.mobilenet.features(observations)
|
||||
x = self.adaptive_pool(x)
|
||||
x = torch.flatten(x, 1)
|
||||
x = self.fc(x)
|
||||
return x
|
18
mobilenet_extractor_no_condensing.py
Normal file
18
mobilenet_extractor_no_condensing.py
Normal file
@ -0,0 +1,18 @@
|
||||
import gym
|
||||
import torch
|
||||
import torchvision
|
||||
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
|
||||
|
||||
# Custom MobileNetV3 Feature Extractor
|
||||
class MobileNetV3Extractor(BaseFeaturesExtractor):
|
||||
def __init__(self, observation_space: gym.Space):
|
||||
super(MobileNetV3Extractor, self).__init__(observation_space, features_dim=576)
|
||||
self.mobilenet = torchvision.models.mobilenet_v3_small(pretrained=True)
|
||||
self.mobilenet = torch.nn.Sequential(*list(self.mobilenet.children())[:-1])
|
||||
self.adaptive_pool = torch.nn.AdaptiveAvgPool2d((1, 1))
|
||||
|
||||
def forward(self, observations: torch.Tensor) -> torch.Tensor:
|
||||
x = self.mobilenet(observations)
|
||||
x = self.adaptive_pool(x)
|
||||
x = torch.flatten(x, 1)
|
||||
return x
|
@ -9,13 +9,19 @@ from stable_baselines3 import PPO
|
||||
from stable_baselines3.common.vec_env import DummyVecEnv
|
||||
|
||||
from custom_cnn import CustomCNN
|
||||
from mobilenet_extractor import MobileNetV3Extractor
|
||||
from custom_sf2_cv_env import StreetFighterCustomWrapper
|
||||
|
||||
def make_env(game, state, seed=0):
|
||||
def _init():
|
||||
win_template = cv2.imread('images/pattern_win_gray.png', cv2.IMREAD_GRAYSCALE)
|
||||
lose_template = cv2.imread('images/pattern_lose_gray.png', cv2.IMREAD_GRAYSCALE)
|
||||
env = retro.RetroEnv(game=game, state=state, obs_type=retro.Observations.IMAGE)
|
||||
env = retro.RetroEnv(
|
||||
game=game,
|
||||
state=state,
|
||||
use_restricted_actions=retro.Actions.FILTERED,
|
||||
obs_type=retro.Observations.IMAGE
|
||||
)
|
||||
env = StreetFighterCustomWrapper(env, win_template, lose_template, testing=True)
|
||||
# env.seed(seed)
|
||||
return env
|
||||
@ -43,9 +49,15 @@ env = make_env(game, state_stages[0])()
|
||||
# Wrap the environment
|
||||
env = DummyVecEnv([lambda: env])
|
||||
|
||||
# policy_kwargs = {
|
||||
# 'features_extractor_class': CustomCNN
|
||||
# }
|
||||
|
||||
# Using MobileNetV3 as the feature extractor
|
||||
policy_kwargs = {
|
||||
'features_extractor_class': CustomCNN
|
||||
'features_extractor_class': MobileNetV3Extractor
|
||||
}
|
||||
|
||||
model = PPO(
|
||||
"CnnPolicy",
|
||||
env,
|
||||
@ -53,7 +65,7 @@ model = PPO(
|
||||
policy_kwargs=policy_kwargs,
|
||||
verbose=1
|
||||
)
|
||||
model.load(r"trained_models_cv_test/ppo_sf2_chunli_final")
|
||||
model.load(r"trained_models_cv_mobilenet_time_penalty/ppo_chunli_1296000_steps")
|
||||
|
||||
obs = env.reset()
|
||||
done = False
|
||||
|
@ -6,13 +6,12 @@ import cv2
|
||||
import retro
|
||||
import numpy as np
|
||||
from stable_baselines3 import PPO
|
||||
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
|
||||
from stable_baselines3.common.vec_env import SubprocVecEnv
|
||||
from stable_baselines3.common.preprocessing import is_image_space, is_image_space_channels_first
|
||||
from stable_baselines3.common.callbacks import BaseCallback, CheckpointCallback
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
from custom_cnn import CustomCNN
|
||||
from mobilenet_extractor import MobileNetV3Extractor
|
||||
from custom_sf2_cv_env import StreetFighterCustomWrapper
|
||||
|
||||
class RandomOpponentChangeCallback(BaseCallback):
|
||||
@ -32,7 +31,12 @@ def make_env(game, state, seed=0):
|
||||
def _init():
|
||||
win_template = cv2.imread('images/pattern_win_gray.png', cv2.IMREAD_GRAYSCALE)
|
||||
lose_template = cv2.imread('images/pattern_lose_gray.png', cv2.IMREAD_GRAYSCALE)
|
||||
env = retro.RetroEnv(game=game, state=state, obs_type=retro.Observations.IMAGE)
|
||||
env = retro.RetroEnv(
|
||||
game=game,
|
||||
state=state,
|
||||
use_restricted_actions=retro.Actions.FILTERED,
|
||||
obs_type=retro.Observations.IMAGE
|
||||
)
|
||||
env = StreetFighterCustomWrapper(env, win_template, lose_template)
|
||||
# env.seed(seed)
|
||||
return env
|
||||
@ -42,28 +46,35 @@ def main():
|
||||
# Set up the environment and model
|
||||
game = "StreetFighterIISpecialChampionEdition-Genesis"
|
||||
state_stages = [
|
||||
"Champion.Level1.ChunLiVsGuile",
|
||||
"Champion.Level2.ChunLiVsKen",
|
||||
"Champion.Level3.ChunLiVsChunLi",
|
||||
"Champion.Level4.ChunLiVsZangief",
|
||||
"Champion.Level5.ChunLiVsDhalsim",
|
||||
"Champion.Level6.ChunLiVsRyu",
|
||||
"Champion.Level7.ChunLiVsEHonda",
|
||||
"Champion.Level8.ChunLiVsBlanka",
|
||||
"Champion.Level9.ChunLiVsBalrog",
|
||||
"Champion.Level10.ChunLiVsVega",
|
||||
"Champion.Level11.ChunLiVsSagat",
|
||||
"Champion.Level12.ChunLiVsBison"
|
||||
"ChampionX.Level1.ChunLiVsKen",
|
||||
"ChampionX.Level2.ChunLiVsChunLi",
|
||||
"ChampionX.Level3.ChunLiVsZangief",
|
||||
"ChampionX.Level4.ChunLiVsDhalsim",
|
||||
"ChampionX.Level5.ChunLiVsRyu",
|
||||
"ChampionX.Level6.ChunLiVsEHonda",
|
||||
"ChampionX.Level7.ChunLiVsBlanka",
|
||||
"ChampionX.Level8.ChunLiVsGuile",
|
||||
"ChampionX.Level9.ChunLiVsBalrog",
|
||||
"ChampionX.Level10.ChunLiVsVega",
|
||||
"ChampionX.Level11.ChunLiVsSagat",
|
||||
"ChampionX.Level12.ChunLiVsBison"
|
||||
# Add other stages as necessary
|
||||
]
|
||||
# Champion is at difficulty level 4, ChampionX is at difficulty level 8.
|
||||
|
||||
num_envs = 8
|
||||
|
||||
# env = SubprocVecEnv([make_env(game, state_stages[0], seed=i) for i in range(num_envs)])
|
||||
env = SubprocVecEnv([make_env(game, state_stages[0], seed=i) for i in range(num_envs)])
|
||||
|
||||
# Using CustomCNN as the feature extractor
|
||||
# policy_kwargs = {
|
||||
# 'features_extractor_class': CustomCNN
|
||||
# }
|
||||
|
||||
# Using MobileNetV3 as the feature extractor
|
||||
policy_kwargs = {
|
||||
'features_extractor_class': CustomCNN
|
||||
'features_extractor_class': MobileNetV3Extractor
|
||||
}
|
||||
|
||||
model = PPO(
|
||||
@ -87,7 +98,7 @@ def main():
|
||||
)
|
||||
|
||||
# Set the save directory
|
||||
save_dir = "trained_models_cv_level6up"
|
||||
save_dir = "trained_models_cv_customcnn_time_penalty"
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
|
||||
# Set up callbacks
|
||||
|
291
trained_models_cv_mobilenet_random/train_log.txt
Normal file
291
trained_models_cv_mobilenet_random/train_log.txt
Normal file
@ -0,0 +1,291 @@
|
||||
(StreetFighterAI) PS C:\Users\unitec\Documents\AIProjects\street-fighter-ai> python .\train_cv_sf2_ai.py
|
||||
Using cuda device
|
||||
|
||||
Current state: Champion.Level8.ChunLiVsBlanka
|
||||
------------------------------
|
||||
| time/ | |
|
||||
| fps | 186 |
|
||||
| iterations | 1 |
|
||||
| time_elapsed | 115 |
|
||||
| total_timesteps | 21600 |
|
||||
------------------------------
|
||||
|
||||
Current state: Champion.Level12.ChunLiVsBison
|
||||
------------------------------------------
|
||||
| time/ | |
|
||||
| fps | 65 |
|
||||
| iterations | 2 |
|
||||
| time_elapsed | 655 |
|
||||
| total_timesteps | 43200 |
|
||||
| train/ | |
|
||||
| approx_kl | 0.0053780936 |
|
||||
| clip_fraction | 0.0431 |
|
||||
| clip_range | 0.2 |
|
||||
| entropy_loss | -8.31 |
|
||||
| explained_variance | 2.93e-05 |
|
||||
| learning_rate | 0.0003 |
|
||||
| loss | 166 |
|
||||
| n_updates | 10 |
|
||||
| policy_gradient_loss | -0.0016 |
|
||||
| value_loss | 92 |
|
||||
------------------------------------------
|
||||
|
||||
Current state: Champion.Level6.ChunLiVsRyu
|
||||
-----------------------------------------
|
||||
| time/ | |
|
||||
| fps | 54 |
|
||||
| iterations | 3 |
|
||||
| time_elapsed | 1196 |
|
||||
| total_timesteps | 64800 |
|
||||
| train/ | |
|
||||
| approx_kl | 0.005385526 |
|
||||
| clip_fraction | 0.0472 |
|
||||
| clip_range | 0.2 |
|
||||
| entropy_loss | -8.31 |
|
||||
| explained_variance | -0.00129 |
|
||||
| learning_rate | 0.0003 |
|
||||
| loss | 0.0349 |
|
||||
| n_updates | 20 |
|
||||
| policy_gradient_loss | -0.000679 |
|
||||
| value_loss | 58.4 |
|
||||
-----------------------------------------
|
||||
|
||||
Current state: Champion.Level10.ChunLiVsVega
|
||||
-----------------------------------------
|
||||
| time/ | |
|
||||
| fps | 49 |
|
||||
| iterations | 4 |
|
||||
| time_elapsed | 1736 |
|
||||
| total_timesteps | 86400 |
|
||||
| train/ | |
|
||||
| approx_kl | 0.005108807 |
|
||||
| clip_fraction | 0.0236 |
|
||||
| clip_range | 0.2 |
|
||||
| entropy_loss | -8.3 |
|
||||
| explained_variance | 0.000588 |
|
||||
| learning_rate | 0.0003 |
|
||||
| loss | 0.236 |
|
||||
| n_updates | 30 |
|
||||
| policy_gradient_loss | -0.00157 |
|
||||
| value_loss | 82.7 |
|
||||
-----------------------------------------
|
||||
|
||||
Current state: Champion.Level12.ChunLiVsBison
|
||||
------------------------------------------
|
||||
| time/ | |
|
||||
| fps | 47 |
|
||||
| iterations | 5 |
|
||||
| time_elapsed | 2274 |
|
||||
| total_timesteps | 108000 |
|
||||
| train/ | |
|
||||
| approx_kl | 0.0060475296 |
|
||||
| clip_fraction | 0.054 |
|
||||
| clip_range | 0.2 |
|
||||
| entropy_loss | -8.3 |
|
||||
| explained_variance | 0.00555 |
|
||||
| learning_rate | 0.0003 |
|
||||
| loss | 0.536 |
|
||||
| n_updates | 40 |
|
||||
| policy_gradient_loss | -0.000733 |
|
||||
| value_loss | 40.5 |
|
||||
------------------------------------------
|
||||
|
||||
Current state: Champion.Level4.ChunLiVsZangief
|
||||
------------------------------------------
|
||||
| time/ | |
|
||||
| fps | 46 |
|
||||
| iterations | 6 |
|
||||
| time_elapsed | 2811 |
|
||||
| total_timesteps | 129600 |
|
||||
| train/ | |
|
||||
| approx_kl | 0.0059517785 |
|
||||
| clip_fraction | 0.0546 |
|
||||
| clip_range | 0.2 |
|
||||
| entropy_loss | -8.29 |
|
||||
| explained_variance | 0.0188 |
|
||||
| learning_rate | 0.0003 |
|
||||
| loss | 7.26 |
|
||||
| n_updates | 50 |
|
||||
| policy_gradient_loss | -0.00102 |
|
||||
| value_loss | 21.8 |
|
||||
------------------------------------------
|
||||
|
||||
Current state: Champion.Level7.ChunLiVsEHonda
|
||||
------------------------------------------
|
||||
| time/ | |
|
||||
| fps | 45 |
|
||||
| iterations | 7 |
|
||||
| time_elapsed | 3348 |
|
||||
| total_timesteps | 151200 |
|
||||
| train/ | |
|
||||
| approx_kl | 0.0057667145 |
|
||||
| clip_fraction | 0.0337 |
|
||||
| clip_range | 0.2 |
|
||||
| entropy_loss | -8.29 |
|
||||
| explained_variance | 0.0069 |
|
||||
| learning_rate | 0.0003 |
|
||||
| loss | 0.832 |
|
||||
| n_updates | 60 |
|
||||
| policy_gradient_loss | -0.00133 |
|
||||
| value_loss | 36.7 |
|
||||
------------------------------------------
|
||||
|
||||
Current state: Champion.Level8.ChunLiVsBlanka
|
||||
------------------------------------------
|
||||
| time/ | |
|
||||
| fps | 44 |
|
||||
| iterations | 8 |
|
||||
| time_elapsed | 3883 |
|
||||
| total_timesteps | 172800 |
|
||||
| train/ | |
|
||||
| approx_kl | 0.0047547054 |
|
||||
| clip_fraction | 0.0291 |
|
||||
| clip_range | 0.2 |
|
||||
| entropy_loss | -8.29 |
|
||||
| explained_variance | 0.0128 |
|
||||
| learning_rate | 0.0003 |
|
||||
| loss | 0.424 |
|
||||
| n_updates | 70 |
|
||||
| policy_gradient_loss | -0.00186 |
|
||||
| value_loss | 21.2 |
|
||||
------------------------------------------
|
||||
|
||||
Current state: Champion.Level8.ChunLiVsBlanka
|
||||
------------------------------------------
|
||||
| time/ | |
|
||||
| fps | 43 |
|
||||
| iterations | 9 |
|
||||
| time_elapsed | 4422 |
|
||||
| total_timesteps | 194400 |
|
||||
| train/ | |
|
||||
| approx_kl | 0.0063244957 |
|
||||
| clip_fraction | 0.049 |
|
||||
| clip_range | 0.2 |
|
||||
| entropy_loss | -8.28 |
|
||||
| explained_variance | 0.00417 |
|
||||
| learning_rate | 0.0003 |
|
||||
| loss | 0.146 |
|
||||
| n_updates | 80 |
|
||||
| policy_gradient_loss | -0.00167 |
|
||||
| value_loss | 17 |
|
||||
------------------------------------------
|
||||
|
||||
Current state: Champion.Level8.ChunLiVsBlanka
|
||||
-----------------------------------------
|
||||
| time/ | |
|
||||
| fps | 43 |
|
||||
| iterations | 10 |
|
||||
| time_elapsed | 4960 |
|
||||
| total_timesteps | 216000 |
|
||||
| train/ | |
|
||||
| approx_kl | 0.008022586 |
|
||||
| clip_fraction | 0.0632 |
|
||||
| clip_range | 0.2 |
|
||||
| entropy_loss | -8.29 |
|
||||
| explained_variance | 0.0241 |
|
||||
| learning_rate | 0.0003 |
|
||||
| loss | 0.902 |
|
||||
| n_updates | 90 |
|
||||
| policy_gradient_loss | -0.00295 |
|
||||
| value_loss | 13.5 |
|
||||
-----------------------------------------
|
||||
|
||||
Current state: Champion.Level10.ChunLiVsVega
|
||||
-----------------------------------------
|
||||
| time/ | |
|
||||
| fps | 43 |
|
||||
| iterations | 11 |
|
||||
| time_elapsed | 5499 |
|
||||
| total_timesteps | 237600 |
|
||||
| train/ | |
|
||||
| approx_kl | 0.006470734 |
|
||||
| clip_fraction | 0.0596 |
|
||||
| clip_range | 0.2 |
|
||||
| entropy_loss | -8.3 |
|
||||
| explained_variance | 0.0611 |
|
||||
| learning_rate | 0.0003 |
|
||||
| loss | 5.72 |
|
||||
| n_updates | 100 |
|
||||
| policy_gradient_loss | -0.000976 |
|
||||
| value_loss | 8.5 |
|
||||
-----------------------------------------
|
||||
|
||||
Current state: Champion.Level7.ChunLiVsEHonda
|
||||
----------------------------------------
|
||||
| time/ | |
|
||||
| fps | 42 |
|
||||
| iterations | 12 |
|
||||
| time_elapsed | 6040 |
|
||||
| total_timesteps | 259200 |
|
||||
| train/ | |
|
||||
| approx_kl | 0.00540813 |
|
||||
| clip_fraction | 0.0547 |
|
||||
| clip_range | 0.2 |
|
||||
| entropy_loss | -8.29 |
|
||||
| explained_variance | -0.0271 |
|
||||
| learning_rate | 0.0003 |
|
||||
| loss | 0.75 |
|
||||
| n_updates | 110 |
|
||||
| policy_gradient_loss | -0.00104 |
|
||||
| value_loss | 9.12 |
|
||||
----------------------------------------
|
||||
|
||||
Current state: Champion.Level8.ChunLiVsBlanka
|
||||
---------------------------------------
|
||||
| time/ | |
|
||||
| fps | 42 |
|
||||
| iterations | 13 |
|
||||
| time_elapsed | 6582 |
|
||||
| total_timesteps | 280800 |
|
||||
| train/ | |
|
||||
| approx_kl | 0.0058568 |
|
||||
| clip_fraction | 0.0608 |
|
||||
| clip_range | 0.2 |
|
||||
| entropy_loss | -8.29 |
|
||||
| explained_variance | 0.0277 |
|
||||
| learning_rate | 0.0003 |
|
||||
| loss | 0.256 |
|
||||
| n_updates | 120 |
|
||||
| policy_gradient_loss | -0.00284 |
|
||||
| value_loss | 6.99 |
|
||||
---------------------------------------
|
||||
|
||||
Current state: Champion.Level2.ChunLiVsKen
|
||||
------------------------------------------
|
||||
| time/ | |
|
||||
| fps | 42 |
|
||||
| iterations | 14 |
|
||||
| time_elapsed | 7126 |
|
||||
| total_timesteps | 302400 |
|
||||
| train/ | |
|
||||
| approx_kl | 0.0066813217 |
|
||||
| clip_fraction | 0.0554 |
|
||||
| clip_range | 0.2 |
|
||||
| entropy_loss | -8.3 |
|
||||
| explained_variance | 0.0689 |
|
||||
| learning_rate | 0.0003 |
|
||||
| loss | 0.504 |
|
||||
| n_updates | 130 |
|
||||
| policy_gradient_loss | -0.00243 |
|
||||
| value_loss | 9.48 |
|
||||
------------------------------------------
|
||||
|
||||
Current state: Champion.Level8.ChunLiVsBlanka
|
||||
------------------------------------------
|
||||
| time/ | |
|
||||
| fps | 42 |
|
||||
| iterations | 15 |
|
||||
| time_elapsed | 7670 |
|
||||
| total_timesteps | 324000 |
|
||||
| train/ | |
|
||||
| approx_kl | 0.0069995625 |
|
||||
| clip_fraction | 0.0637 |
|
||||
| clip_range | 0.2 |
|
||||
| entropy_loss | -8.3 |
|
||||
| explained_variance | -0.0336 |
|
||||
| learning_rate | 0.0003 |
|
||||
| loss | -0.000532 |
|
||||
| n_updates | 140 |
|
||||
| policy_gradient_loss | -0.00198 |
|
||||
| value_loss | 9.39 |
|
||||
------------------------------------------
|
2011
trained_models_cv_random/trainin_logs_CustomCNN_random.txt
Normal file
2011
trained_models_cv_random/trainin_logs_CustomCNN_random.txt
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user