image stack and lstm

This commit is contained in:
linyiLYi 2023-03-30 01:14:39 +08:00
parent 79e148675a
commit d4fb6dbc59
24 changed files with 3704 additions and 32 deletions

Binary file not shown.

View File

@ -0,0 +1,24 @@
import gym
import torch
import torch.nn as nn
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
# Custom feature extractor (CNN)
class CustomCNN(BaseFeaturesExtractor):
def __init__(self, observation_space: gym.Space):
super(CustomCNN, self).__init__(observation_space, features_dim=512)
self.cnn = nn.Sequential(
nn.Conv2d(4, 32, kernel_size=5, stride=2, padding=0),
nn.ReLU(),
nn.Conv2d(32, 64, kernel_size=5, stride=2, padding=0),
nn.ReLU(),
nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=0),
nn.ReLU(),
nn.Flatten(),
nn.Linear(16384, self.features_dim),
nn.ReLU()
)
def forward(self, observations: torch.Tensor) -> torch.Tensor:
return self.cnn(observations)

View File

@ -0,0 +1,106 @@
import collections
import gym
import cv2
import numpy as np
import torch
from torchvision.transforms import Normalize
from gym.spaces import MultiBinary
# Custom environment wrapper
class StreetFighterCustomWrapper(gym.Wrapper):
def __init__(self, env, testing=False, threshold=0.65):
super(StreetFighterCustomWrapper, self).__init__(env)
self.action_space = MultiBinary(12)
# Use a deque to store the last 4 frames
self.frame_stack = collections.deque(maxlen=4)
self.threshold = threshold
self.game_screen_gray = None
self.prev_player_health = 1.0
self.prev_opponent_health = 1.0
# Update observation space to include 4 stacked grayscale images
self.observation_space = gym.spaces.Box(
low=0.0, high=1.0, shape=(4, 84, 84), dtype=np.float32
)
self.testing = testing
# Normalize the image for MobileNetV3Small.
self.normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
def _preprocess_observation(self, observation):
self.game_screen_gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)
resized_image = cv2.resize(self.game_screen_gray, (84, 84), interpolation=cv2.INTER_AREA) / 255.0
# Add the resized image to the frame stack
self.frame_stack.append(resized_image)
# Stack the last 4 frames and return the stacked frames
stacked_frames = np.stack(self.frame_stack, axis=0)
return stacked_frames
def _get_win_or_lose_bonus(self):
if self.prev_player_health > self.prev_opponent_health:
# print('You win!')
return 300
else:
# print('You lose!')
return -300
def _get_reward(self):
player_health_area = self.game_screen_gray[15:20, 32:120]
oppoent_health_area = self.game_screen_gray[15:20, 136:224]
# Get health points using the number of pixels above 129.
player_health = np.sum(player_health_area > 129) / player_health_area.size
opponent_health = np.sum(oppoent_health_area > 129) / oppoent_health_area.size
player_health_diff = self.prev_player_health - player_health
opponent_health_diff = self.prev_opponent_health - opponent_health
reward = (opponent_health_diff - player_health_diff) * 200 # max would be 200
# Penalty for each step without any change in health
if opponent_health_diff <= 0.0000001:
reward -= 12.0 / 60.0 # -12 points per second if no damage to opponent
self.prev_player_health = player_health
self.prev_opponent_health = opponent_health
# Print the health values of the player and the opponent
# print("Player health: %f Opponent health:%f" % (player_health, opponent_health))
return reward
def reset(self):
observation = self.env.reset()
self.prev_player_health = 1.0
self.prev_opponent_health = 1.0
# Clear the frame stack and add the first observation 4 times
self.frame_stack.clear()
for _ in range(4):
self.frame_stack.append(self._preprocess_observation(observation)[0])
return self._preprocess_observation(observation)
def step(self, action):
# observation, _, _, info = self.env.step(action)
observation, _reward, _done, info = self.env.step(self.env.action_space.sample())
custom_reward = self._get_reward()
custom_reward -= 1.0 / 60.0 # penalty for each step (-1 points per second)
custom_done = False
if self.prev_player_health <= 0.00001 or self.prev_opponent_health <= 0.00001:
custom_reward += self._get_win_or_lose_bonus()
if not self.testing:
custom_done = True
else:
self.prev_player_health = 1.0
self.prev_opponent_health = 1.0
return self._preprocess_observation(observation), custom_reward, custom_done, info

70
001_image_stack/test.py Normal file
View File

@ -0,0 +1,70 @@
import time
import cv2
import retro
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from custom_cnn import CustomCNN
from street_fighter_custom_wrapper import StreetFighterCustomWrapper
def make_env(game, state):
def _init():
env = retro.RetroEnv(
game=game,
state=state,
use_restricted_actions=retro.Actions.FILTERED,
obs_type=retro.Observations.IMAGE
)
env = StreetFighterCustomWrapper(env, testing=True)
return env
return _init
game = "StreetFighterIISpecialChampionEdition-Genesis"
state_stages = [
"Champion.Level1.ChunLiVsGuile",
"Champion.Level2.ChunLiVsKen",
"Champion.Level3.ChunLiVsChunLi",
"Champion.Level4.ChunLiVsZangief",
"Champion.Level5.ChunLiVsDhalsim",
"Champion.Level6.ChunLiVsRyu",
"Champion.Level7.ChunLiVsEHonda",
"Champion.Level8.ChunLiVsBlanka",
"Champion.Level9.ChunLiVsBalrog",
"Champion.Level10.ChunLiVsVega",
"Champion.Level11.ChunLiVsSagat",
"Champion.Level12.ChunLiVsBison"
# Add other stages as necessary
]
env = make_env(game, state_stages[0])()
# Wrap the environment
env = DummyVecEnv([lambda: env])
policy_kwargs = {
'features_extractor_class': CustomCNN
}
model = PPO(
"CnnPolicy",
env,
device="cuda",
policy_kwargs=policy_kwargs,
verbose=1
)
model.load(r"trained_models_continued/ppo_chunli_432000_steps")
obs = env.reset()
done = False
while True:
timestamp = time.time()
action, _ = model.predict(obs)
obs, rewards, done, info = env.step(action)
env.render()
render_time = time.time() - timestamp
if render_time < 0.0111:
time.sleep(0.0111 - render_time) # Add a delay for 90 FPS
# env.close()

123
001_image_stack/train.py Normal file
View File

@ -0,0 +1,123 @@
import os
import random
import gym
import cv2
import retro
import numpy as np
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import SubprocVecEnv
from stable_baselines3.common.preprocessing import is_image_space, is_image_space_channels_first
from stable_baselines3.common.callbacks import BaseCallback, CheckpointCallback
from custom_cnn import CustomCNN
from street_fighter_custom_wrapper import StreetFighterCustomWrapper
class RandomOpponentChangeCallback(BaseCallback):
def __init__(self, stages, opponent_interval, verbose=0):
super(RandomOpponentChangeCallback, self).__init__(verbose)
self.stages = stages
self.opponent_interval = opponent_interval
def _on_step(self) -> bool:
if self.n_calls % self.opponent_interval == 0:
new_state = random.choice(self.stages)
print("\nCurrent state:", new_state)
self.training_env.env_method("load_state", new_state, indices=None)
return True
def make_env(game, state, seed=0):
def _init():
env = retro.RetroEnv(
game=game,
state=state,
use_restricted_actions=retro.Actions.FILTERED,
obs_type=retro.Observations.IMAGE
)
env = StreetFighterCustomWrapper(env)
env.seed(seed)
return env
return _init
def main():
# Set up the environment and model
game = "StreetFighterIISpecialChampionEdition-Genesis"
state_stages = [
"ChampionX.Level1.ChunLiVsKen",
"ChampionX.Level2.ChunLiVsChunLi",
"ChampionX.Level3.ChunLiVsZangief",
"ChampionX.Level4.ChunLiVsDhalsim",
"ChampionX.Level5.ChunLiVsRyu",
"ChampionX.Level6.ChunLiVsEHonda",
"ChampionX.Level7.ChunLiVsBlanka",
"ChampionX.Level8.ChunLiVsGuile",
"ChampionX.Level9.ChunLiVsBalrog",
"ChampionX.Level10.ChunLiVsVega",
"ChampionX.Level11.ChunLiVsSagat",
"ChampionX.Level12.ChunLiVsBison"
# Add other stages as necessary
]
# Champion is at difficulty level 4, ChampionX is at difficulty level 8.
num_envs = 8
# env = SubprocVecEnv([make_env(game, state_stages[0], seed=i) for i in range(num_envs)])
env = SubprocVecEnv([make_env(game, state_stages[0], seed=i) for i in range(num_envs)])
# Using CustomCNN as the feature extractor
policy_kwargs = {
'features_extractor_class': CustomCNN
}
model = PPO(
"CnnPolicy",
env,
device="cuda",
policy_kwargs=policy_kwargs,
verbose=1,
n_steps=5400,
batch_size=64,
n_epochs=10,
learning_rate=0.0003,
ent_coef=0.01,
clip_range=0.2,
clip_range_vf=None,
gamma=0.99,
gae_lambda=0.95,
max_grad_norm=0.5,
use_sde=False,
sde_sample_freq=-1
)
# Set the save directory
save_dir = "trained_models_continued"
os.makedirs(save_dir, exist_ok=True)
# Load the model from file
# Change the path to the actual path of the model file
model_path = "trained_models/ppo_chunli_1296000_steps.zip"
# Load model and modify the learning rate and entropy coefficient
custom_objects = {
"learning_rate": 0.00005,
"ent_coef": 0.2
}
model = PPO.load(model_path, env=env, device="cuda", custom_objects=custom_objects)
# Set up callbacks
opponent_interval = 5400 # stage_interval * num_envs = total_steps_per_stage
checkpoint_interval = 54000 # checkpoint_interval * num_envs = total_steps_per_checkpoint (Every 80 rounds)
checkpoint_callback = CheckpointCallback(save_freq=checkpoint_interval, save_path=save_dir, name_prefix="ppo_chunli")
stage_increase_callback = RandomOpponentChangeCallback(state_stages, opponent_interval, save_dir)
model.learn(
total_timesteps=int(6048000), # total_timesteps = stage_interval * num_envs * num_stages (1120 rounds)
callback=[checkpoint_callback, stage_increase_callback]
)
# Save the final model
model.save(os.path.join(save_dir, "ppo_sf2_chunli_final.zip"))
if __name__ == "__main__":
main()

View File

@ -0,0 +1,631 @@
(StreetFighterAI) PS C:\Users\unitec\Documents\AIProjects\street-fighter-ai\001_image_stack> python .\train.py
Using cuda device
Current state: ChampionX.Level4.ChunLiVsDhalsim
------------------------------
| time/ | |
| fps | 1489 |
| iterations | 1 |
| time_elapsed | 28 |
| total_timesteps | 43200 |
------------------------------
Current state: ChampionX.Level6.ChunLiVsEHonda
-----------------------------------------
| time/ | |
| fps | 693 |
| iterations | 2 |
| time_elapsed | 124 |
| total_timesteps | 86400 |
| train/ | |
| approx_kl | 0.008018286 |
| clip_fraction | 0.0528 |
| clip_range | 0.2 |
| entropy_loss | -8.31 |
| explained_variance | -0.000782 |
| learning_rate | 0.0003 |
| loss | 189 |
| n_updates | 10 |
| policy_gradient_loss | -0.00354 |
| value_loss | 398 |
-----------------------------------------
Current state: ChampionX.Level1.ChunLiVsKen
-----------------------------------------
| time/ | |
| fps | 476 |
| iterations | 3 |
| time_elapsed | 271 |
| total_timesteps | 129600 |
| train/ | |
| approx_kl | 0.010610209 |
| clip_fraction | 0.119 |
| clip_range | 0.2 |
| entropy_loss | -8.3 |
| explained_variance | 0.0323 |
| learning_rate | 0.0003 |
| loss | 0.228 |
| n_updates | 20 |
| policy_gradient_loss | -0.00663 |
| value_loss | 103 |
-----------------------------------------
Current state: ChampionX.Level10.ChunLiVsVega
-----------------------------------------
| time/ | |
| fps | 467 |
| iterations | 4 |
| time_elapsed | 369 |
| total_timesteps | 172800 |
| train/ | |
| approx_kl | 0.011115557 |
| clip_fraction | 0.122 |
| clip_range | 0.2 |
| entropy_loss | -8.3 |
| explained_variance | 0.125 |
| learning_rate | 0.0003 |
| loss | 5.37 |
| n_updates | 30 |
| policy_gradient_loss | -0.00485 |
| value_loss | 83.8 |
-----------------------------------------
Current state: ChampionX.Level11.ChunLiVsSagat
-----------------------------------------
| time/ | |
| fps | 462 |
| iterations | 5 |
| time_elapsed | 466 |
| total_timesteps | 216000 |
| train/ | |
| approx_kl | 0.012769428 |
| clip_fraction | 0.133 |
| clip_range | 0.2 |
| entropy_loss | -8.29 |
| explained_variance | 0.409 |
| learning_rate | 0.0003 |
| loss | 18.4 |
| n_updates | 40 |
| policy_gradient_loss | -0.00746 |
| value_loss | 31.6 |
-----------------------------------------
Current state: ChampionX.Level7.ChunLiVsBlanka
-----------------------------------------
| time/ | |
| fps | 460 |
| iterations | 6 |
| time_elapsed | 563 |
| total_timesteps | 259200 |
| train/ | |
| approx_kl | 0.014561476 |
| clip_fraction | 0.184 |
| clip_range | 0.2 |
| entropy_loss | -8.29 |
| explained_variance | 0.15 |
| learning_rate | 0.0003 |
| loss | 0.66 |
| n_updates | 50 |
| policy_gradient_loss | -0.00799 |
| value_loss | 23.2 |
-----------------------------------------
Current state: ChampionX.Level12.ChunLiVsBison
-----------------------------------------
| time/ | |
| fps | 447 |
| iterations | 7 |
| time_elapsed | 675 |
| total_timesteps | 302400 |
| train/ | |
| approx_kl | 0.013581872 |
| clip_fraction | 0.147 |
| clip_range | 0.2 |
| entropy_loss | -8.28 |
| explained_variance | 0.447 |
| learning_rate | 0.0003 |
| loss | 0.794 |
| n_updates | 60 |
| policy_gradient_loss | -0.00405 |
| value_loss | 33.4 |
-----------------------------------------
Current state: ChampionX.Level4.ChunLiVsDhalsim
-----------------------------------------
| time/ | |
| fps | 440 |
| iterations | 8 |
| time_elapsed | 784 |
| total_timesteps | 345600 |
| train/ | |
| approx_kl | 0.015053411 |
| clip_fraction | 0.186 |
| clip_range | 0.2 |
| entropy_loss | -8.28 |
| explained_variance | 0.39 |
| learning_rate | 0.0003 |
| loss | 0.313 |
| n_updates | 70 |
| policy_gradient_loss | -0.00594 |
| value_loss | 22.3 |
-----------------------------------------
Current state: ChampionX.Level5.ChunLiVsRyu
----------------------------------------
| time/ | |
| fps | 433 |
| iterations | 9 |
| time_elapsed | 896 |
| total_timesteps | 388800 |
| train/ | |
| approx_kl | 0.01407744 |
| clip_fraction | 0.152 |
| clip_range | 0.2 |
| entropy_loss | -8.27 |
| explained_variance | 0.326 |
| learning_rate | 0.0003 |
| loss | 0.396 |
| n_updates | 80 |
| policy_gradient_loss | -0.00862 |
| value_loss | 15.7 |
----------------------------------------
Current state: ChampionX.Level9.ChunLiVsBalrog
-----------------------------------------
| time/ | |
| fps | 428 |
| iterations | 10 |
| time_elapsed | 1007 |
| total_timesteps | 432000 |
| train/ | |
| approx_kl | 0.013460734 |
| clip_fraction | 0.148 |
| clip_range | 0.2 |
| entropy_loss | -8.27 |
| explained_variance | 0.384 |
| learning_rate | 0.0003 |
| loss | 0.227 |
| n_updates | 90 |
| policy_gradient_loss | -0.00498 |
| value_loss | 16.7 |
-----------------------------------------
Current state: ChampionX.Level9.ChunLiVsBalrog
------------------------------------------
| time/ | |
| fps | 424 |
| iterations | 11 |
| time_elapsed | 1118 |
| total_timesteps | 475200 |
| train/ | |
| approx_kl | 0.0145851895 |
| clip_fraction | 0.165 |
| clip_range | 0.2 |
| entropy_loss | -8.26 |
| explained_variance | 0.352 |
| learning_rate | 0.0003 |
| loss | 0.147 |
| n_updates | 100 |
| policy_gradient_loss | -0.00597 |
| value_loss | 19.8 |
------------------------------------------
Current state: ChampionX.Level4.ChunLiVsDhalsim
-----------------------------------------
| time/ | |
| fps | 424 |
| iterations | 12 |
| time_elapsed | 1219 |
| total_timesteps | 518400 |
| train/ | |
| approx_kl | 0.015144574 |
| clip_fraction | 0.161 |
| clip_range | 0.2 |
| entropy_loss | -8.25 |
| explained_variance | 0.383 |
| learning_rate | 0.0003 |
| loss | 1.52 |
| n_updates | 110 |
| policy_gradient_loss | -0.00749 |
| value_loss | 24.1 |
-----------------------------------------
Current state: ChampionX.Level6.ChunLiVsEHonda
-----------------------------------------
| time/ | |
| fps | 425 |
| iterations | 13 |
| time_elapsed | 1318 |
| total_timesteps | 561600 |
| train/ | |
| approx_kl | 0.015790801 |
| clip_fraction | 0.158 |
| clip_range | 0.2 |
| entropy_loss | -8.25 |
| explained_variance | 0.555 |
| learning_rate | 0.0003 |
| loss | 0.665 |
| n_updates | 120 |
| policy_gradient_loss | -0.00889 |
| value_loss | 20.7 |
-----------------------------------------
Current state: ChampionX.Level6.ChunLiVsEHonda
-----------------------------------------
| time/ | |
| fps | 426 |
| iterations | 14 |
| time_elapsed | 1417 |
| total_timesteps | 604800 |
| train/ | |
| approx_kl | 0.016785465 |
| clip_fraction | 0.173 |
| clip_range | 0.2 |
| entropy_loss | -8.24 |
| explained_variance | 0.609 |
| learning_rate | 0.0003 |
| loss | 0.313 |
| n_updates | 130 |
| policy_gradient_loss | -0.00758 |
| value_loss | 14.9 |
-----------------------------------------
Current state: ChampionX.Level10.ChunLiVsVega
-----------------------------------------
| time/ | |
| fps | 428 |
| iterations | 15 |
| time_elapsed | 1513 |
| total_timesteps | 648000 |
| train/ | |
| approx_kl | 0.017042443 |
| clip_fraction | 0.176 |
| clip_range | 0.2 |
| entropy_loss | -8.24 |
| explained_variance | 0.759 |
| learning_rate | 0.0003 |
| loss | 0.634 |
| n_updates | 140 |
| policy_gradient_loss | -0.00617 |
| value_loss | 15.4 |
-----------------------------------------
Current state: ChampionX.Level3.ChunLiVsZangief
-----------------------------------------
| time/ | |
| fps | 427 |
| iterations | 16 |
| time_elapsed | 1618 |
| total_timesteps | 691200 |
| train/ | |
| approx_kl | 0.017714709 |
| clip_fraction | 0.176 |
| clip_range | 0.2 |
| entropy_loss | -8.23 |
| explained_variance | 0.79 |
| learning_rate | 0.0003 |
| loss | 0.941 |
| n_updates | 150 |
| policy_gradient_loss | -0.00703 |
| value_loss | 17.5 |
-----------------------------------------
Current state: ChampionX.Level7.ChunLiVsBlanka
-----------------------------------------
| time/ | |
| fps | 424 |
| iterations | 17 |
| time_elapsed | 1728 |
| total_timesteps | 734400 |
| train/ | |
| approx_kl | 0.018709755 |
| clip_fraction | 0.196 |
| clip_range | 0.2 |
| entropy_loss | -8.22 |
| explained_variance | 0.746 |
| learning_rate | 0.0003 |
| loss | 0.505 |
| n_updates | 160 |
| policy_gradient_loss | -0.00795 |
| value_loss | 11.9 |
-----------------------------------------
Current state: ChampionX.Level11.ChunLiVsSagat
-----------------------------------------
| time/ | |
| fps | 423 |
| iterations | 18 |
| time_elapsed | 1837 |
| total_timesteps | 777600 |
| train/ | |
| approx_kl | 0.017850244 |
| clip_fraction | 0.182 |
| clip_range | 0.2 |
| entropy_loss | -8.21 |
| explained_variance | 0.687 |
| learning_rate | 0.0003 |
| loss | 0.379 |
| n_updates | 170 |
| policy_gradient_loss | -0.00568 |
| value_loss | 15.2 |
-----------------------------------------
Current state: ChampionX.Level3.ChunLiVsZangief
----------------------------------------
| time/ | |
| fps | 420 |
| iterations | 19 |
| time_elapsed | 1950 |
| total_timesteps | 820800 |
| train/ | |
| approx_kl | 0.02048213 |
| clip_fraction | 0.221 |
| clip_range | 0.2 |
| entropy_loss | -8.21 |
| explained_variance | 0.707 |
| learning_rate | 0.0003 |
| loss | 0.391 |
| n_updates | 180 |
| policy_gradient_loss | -0.00419 |
| value_loss | 13 |
----------------------------------------
Current state: ChampionX.Level11.ChunLiVsSagat
-----------------------------------------
| time/ | |
| fps | 418 |
| iterations | 20 |
| time_elapsed | 2062 |
| total_timesteps | 864000 |
| train/ | |
| approx_kl | 0.016617421 |
| clip_fraction | 0.172 |
| clip_range | 0.2 |
| entropy_loss | -8.2 |
| explained_variance | 0.744 |
| learning_rate | 0.0003 |
| loss | 1.66 |
| n_updates | 190 |
| policy_gradient_loss | -0.00437 |
| value_loss | 15.8 |
-----------------------------------------
Current state: ChampionX.Level6.ChunLiVsEHonda
-----------------------------------------
| time/ | |
| fps | 417 |
| iterations | 21 |
| time_elapsed | 2174 |
| total_timesteps | 907200 |
| train/ | |
| approx_kl | 0.017259926 |
| clip_fraction | 0.171 |
| clip_range | 0.2 |
| entropy_loss | -8.19 |
| explained_variance | 0.756 |
| learning_rate | 0.0003 |
| loss | 0.457 |
| n_updates | 200 |
| policy_gradient_loss | -0.00897 |
| value_loss | 14.9 |
-----------------------------------------
Current state: ChampionX.Level10.ChunLiVsVega
-----------------------------------------
| time/ | |
| fps | 417 |
| iterations | 22 |
| time_elapsed | 2276 |
| total_timesteps | 950400 |
| train/ | |
| approx_kl | 0.018794816 |
| clip_fraction | 0.19 |
| clip_range | 0.2 |
| entropy_loss | -8.18 |
| explained_variance | 0.834 |
| learning_rate | 0.0003 |
| loss | 3.33 |
| n_updates | 210 |
| policy_gradient_loss | -0.00535 |
| value_loss | 15.5 |
-----------------------------------------
Current state: ChampionX.Level5.ChunLiVsRyu
-----------------------------------------
| time/ | |
| fps | 418 |
| iterations | 23 |
| time_elapsed | 2374 |
| total_timesteps | 993600 |
| train/ | |
| approx_kl | 0.019361915 |
| clip_fraction | 0.188 |
| clip_range | 0.2 |
| entropy_loss | -8.18 |
| explained_variance | 0.825 |
| learning_rate | 0.0003 |
| loss | 0.235 |
| n_updates | 220 |
| policy_gradient_loss | -0.00762 |
| value_loss | 13.4 |
-----------------------------------------
Current state: ChampionX.Level2.ChunLiVsChunLi
-----------------------------------------
| time/ | |
| fps | 419 |
| iterations | 24 |
| time_elapsed | 2471 |
| total_timesteps | 1036800 |
| train/ | |
| approx_kl | 0.022115083 |
| clip_fraction | 0.233 |
| clip_range | 0.2 |
| entropy_loss | -8.18 |
| explained_variance | 0.8 |
| learning_rate | 0.0003 |
| loss | 0.211 |
| n_updates | 230 |
| policy_gradient_loss | -0.00771 |
| value_loss | 11.7 |
-----------------------------------------
Current state: ChampionX.Level3.ChunLiVsZangief
-----------------------------------------
| time/ | |
| fps | 419 |
| iterations | 25 |
| time_elapsed | 2574 |
| total_timesteps | 1080000 |
| train/ | |
| approx_kl | 0.023090197 |
| clip_fraction | 0.233 |
| clip_range | 0.2 |
| entropy_loss | -8.17 |
| explained_variance | 0.759 |
| learning_rate | 0.0003 |
| loss | 0.445 |
| n_updates | 240 |
| policy_gradient_loss | -0.00523 |
| value_loss | 13.7 |
-----------------------------------------
Current state: ChampionX.Level11.ChunLiVsSagat
-----------------------------------------
| time/ | |
| fps | 418 |
| iterations | 26 |
| time_elapsed | 2683 |
| total_timesteps | 1123200 |
| train/ | |
| approx_kl | 0.024867734 |
| clip_fraction | 0.281 |
| clip_range | 0.2 |
| entropy_loss | -8.16 |
| explained_variance | 0.688 |
| learning_rate | 0.0003 |
| loss | 0.557 |
| n_updates | 250 |
| policy_gradient_loss | 0.00215 |
| value_loss | 13.9 |
-----------------------------------------
Current state: ChampionX.Level10.ChunLiVsVega
-----------------------------------------
| time/ | |
| fps | 417 |
| iterations | 27 |
| time_elapsed | 2793 |
| total_timesteps | 1166400 |
| train/ | |
| approx_kl | 0.020454599 |
| clip_fraction | 0.203 |
| clip_range | 0.2 |
| entropy_loss | -8.19 |
| explained_variance | 0.766 |
| learning_rate | 0.0003 |
| loss | 0.314 |
| n_updates | 260 |
| policy_gradient_loss | -0.0058 |
| value_loss | 17.1 |
-----------------------------------------
Current state: ChampionX.Level10.ChunLiVsVega
-----------------------------------------
| time/ | |
| fps | 416 |
| iterations | 28 |
| time_elapsed | 2904 |
| total_timesteps | 1209600 |
| train/ | |
| approx_kl | 0.020690009 |
| clip_fraction | 0.208 |
| clip_range | 0.2 |
| entropy_loss | -8.17 |
| explained_variance | 0.827 |
| learning_rate | 0.0003 |
| loss | 1.38 |
| n_updates | 270 |
| policy_gradient_loss | 2.12e-05 |
| value_loss | 20.4 |
-----------------------------------------
Current state: ChampionX.Level10.ChunLiVsVega
-----------------------------------------
| time/ | |
| fps | 415 |
| iterations | 29 |
| time_elapsed | 3015 |
| total_timesteps | 1252800 |
| train/ | |
| approx_kl | 0.020646438 |
| clip_fraction | 0.208 |
| clip_range | 0.2 |
| entropy_loss | -8.15 |
| explained_variance | 0.81 |
| learning_rate | 0.0003 |
| loss | 0.235 |
| n_updates | 280 |
| policy_gradient_loss | -0.00852 |
| value_loss | 12.5 |
-----------------------------------------
Current state: ChampionX.Level9.ChunLiVsBalrog
-----------------------------------------
| time/ | |
| fps | 414 |
| iterations | 30 |
| time_elapsed | 3128 |
| total_timesteps | 1296000 |
| train/ | |
| approx_kl | 0.021910097 |
| clip_fraction | 0.212 |
| clip_range | 0.2 |
| entropy_loss | -8.15 |
| explained_variance | 0.79 |
| learning_rate | 0.0003 |
| loss | 0.0264 |
| n_updates | 290 |
| policy_gradient_loss | -0.00872 |
| value_loss | 12.5 |
-----------------------------------------
Current state: ChampionX.Level12.ChunLiVsBison
-----------------------------------------
| time/ | |
| fps | 412 |
| iterations | 31 |
| time_elapsed | 3243 |
| total_timesteps | 1339200 |
| train/ | |
| approx_kl | 0.025281599 |
| clip_fraction | 0.254 |
| clip_range | 0.2 |
| entropy_loss | -8.13 |
| explained_variance | 0.773 |
| learning_rate | 0.0003 |
| loss | 1.18 |
| n_updates | 300 |
| policy_gradient_loss | -0.00679 |
| value_loss | 12.6 |
-----------------------------------------
Current state: ChampionX.Level2.ChunLiVsChunLi
-----------------------------------------
| time/ | |
| fps | 412 |
| iterations | 32 |
| time_elapsed | 3349 |
| total_timesteps | 1382400 |
| train/ | |
| approx_kl | 0.026466375 |
| clip_fraction | 0.259 |
| clip_range | 0.2 |
| entropy_loss | -8.12 |
| explained_variance | 0.647 |
| learning_rate | 0.0003 |
| loss | 0.518 |
| n_updates | 310 |
| policy_gradient_loss | 0.000522 |
| value_loss | 18.8 |
-----------------------------------------

Binary file not shown.

35
002_lstm/cnn_lstm.py Normal file
View File

@ -0,0 +1,35 @@
import torch
import torch.nn as nn
class CNNEncoder(nn.Module):
def __init__(self, features_dim=512):
super(CNNEncoder, self).__init__()
self.conv1 = nn.Conv2d(16, 32, kernel_size=5, stride=2)
self.relu1 = nn.ReLU()
self.conv2 = nn.Conv2d(32, 64, kernel_size=5, stride=2)
self.relu2 = nn.ReLU()
self.conv3 = nn.Conv2d(64, 64, kernel_size=3, stride=1)
self.relu3 = nn.ReLU()
self.fc = nn.Linear(16384, 512)
def forward(self, x):
x = self.relu1(self.conv1(x))
x = self.relu2(self.conv2(x))
x = self.relu3(self.conv3(x))
x = x.view(x.size(0), -1)
x = self.fc(x)
return x
class CNNLSTM(nn.Module):
def __init__(self, features_dim=512):
super(CNNLSTM, self).__init__()
self.encoder = CNNEncoder(512)
self.lstm = nn.LSTM(512, 512)
def forward(self, x, hidden):
x = self.encoder(x)
x, hidden = self.lstm(x.unsqueeze(0), hidden)
return x.squeeze(0), hidden
def init_hidden(self, batch_size):
return (torch.zeros(1, batch_size, 512), torch.zeros(1, batch_size, 512))

View File

@ -0,0 +1,102 @@
import collections
import gym
import cv2
import numpy as np
from torchvision.transforms import Normalize
from gym.spaces import MultiBinary
# Custom environment wrapper
class StreetFighterCustomWrapper(gym.Wrapper):
def __init__(self, env, testing=False, threshold=0.65):
super(StreetFighterCustomWrapper, self).__init__(env)
self.action_space = MultiBinary(12)
# Use a deque to store the last 16 frames (0.267 seconds)
self.frame_stack = collections.deque(maxlen=16)
self.threshold = threshold
self.game_screen_gray = None
self.prev_player_health = 1.0
self.prev_opponent_health = 1.0
# Update observation space to include 16 stacked grayscale images
self.observation_space = gym.spaces.Box(
low=0.0, high=1.0, shape=(16, 84, 84), dtype=np.float32
)
self.testing = testing
def _preprocess_observation(self, observation):
self.game_screen_gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)
resized_image = cv2.resize(self.game_screen_gray, (84, 84), interpolation=cv2.INTER_AREA) / 255.0
# Add the resized image to the frame stack
self.frame_stack.append(resized_image)
# Stack the last 16 frames and return the stacked frames
stacked_frames = np.stack(self.frame_stack, axis=0)[np.newaxis, ...] # Shape: (1, 16, 84, 84)
return stacked_frames
def _get_win_or_lose_bonus(self):
if self.prev_player_health > self.prev_opponent_health:
# print('You win!')
return 300
else:
# print('You lose!')
return -300
def _get_reward(self):
player_health_area = self.game_screen_gray[15:20, 32:120]
oppoent_health_area = self.game_screen_gray[15:20, 136:224]
# Get health points using the number of pixels above 129.
player_health = np.sum(player_health_area > 129) / player_health_area.size
opponent_health = np.sum(oppoent_health_area > 129) / oppoent_health_area.size
player_health_diff = self.prev_player_health - player_health
opponent_health_diff = self.prev_opponent_health - opponent_health
reward = (opponent_health_diff - player_health_diff) * 200 # max would be 200
# Penalty for each step without any change in health
if opponent_health_diff <= 0.0000001:
reward -= 12.0 / 60.0 # -12 points per second if no damage to opponent
self.prev_player_health = player_health
self.prev_opponent_health = opponent_health
# Print the health values of the player and the opponent
# print("Player health: %f Opponent health:%f" % (player_health, opponent_health))
return reward
def reset(self):
observation = self.env.reset()
self.prev_player_health = 1.0
self.prev_opponent_health = 1.0
# Clear the frame stack and add the first observation 16 times
self.frame_stack.clear()
for _ in range(16):
self.frame_stack.append(self._preprocess_observation(observation)[0])
return self._preprocess_observation(observation)
def step(self, action):
# observation, _, _, info = self.env.step(action)
observation, _reward, _done, info = self.env.step(self.env.action_space.sample())
custom_reward = self._get_reward()
custom_reward -= 1.0 / 60.0 # penalty for each step (-1 points per second)
custom_done = False
if self.prev_player_health <= 0.00001 or self.prev_opponent_health <= 0.00001:
custom_reward += self._get_win_or_lose_bonus()
if not self.testing:
custom_done = True
else:
self.prev_player_health = 1.0
self.prev_opponent_health = 1.0
return self._preprocess_observation(observation), custom_reward, custom_done, info

73
002_lstm/test.py Normal file
View File

@ -0,0 +1,73 @@
import time
import cv2
import retro
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from cnn_lstm import CNNLSTM, CNNEncoder
from street_fighter_custom_wrapper import StreetFighterCustomWrapper
def make_env(game, state):
def _init():
win_template = cv2.imread('images/pattern_win_gray.png', cv2.IMREAD_GRAYSCALE)
lose_template = cv2.imread('images/pattern_lose_gray.png', cv2.IMREAD_GRAYSCALE)
env = retro.RetroEnv(
game=game,
state=state,
use_restricted_actions=retro.Actions.FILTERED,
obs_type=retro.Observations.IMAGE
)
env = StreetFighterCustomWrapper(env, win_template, lose_template, testing=True)
return env
return _init
game = "StreetFighterIISpecialChampionEdition-Genesis"
state_stages = [
"Champion.Level1.ChunLiVsGuile",
"Champion.Level2.ChunLiVsKen",
"Champion.Level3.ChunLiVsChunLi",
"Champion.Level4.ChunLiVsZangief",
"Champion.Level5.ChunLiVsDhalsim",
"Champion.Level6.ChunLiVsRyu",
"Champion.Level7.ChunLiVsEHonda",
"Champion.Level8.ChunLiVsBlanka",
"Champion.Level9.ChunLiVsBalrog",
"Champion.Level10.ChunLiVsVega",
"Champion.Level11.ChunLiVsSagat",
"Champion.Level12.ChunLiVsBison"
# Add other stages as necessary
]
env = make_env(game, state_stages[0])()
# Wrap the environment
env = DummyVecEnv([lambda: env])
policy_kwargs = {
'features_extractor_class': CNNEncoder,
'net_arch': [512, 'lstm']
}
model = PPO(
"CnnPolicy",
env,
device="cuda",
policy_kwargs=policy_kwargs,
verbose=1
)
model.load(r"trained_models/ppo_chunli_1296000_steps")
obs = env.reset()
done = False
while True:
timestamp = time.time()
action, _ = model.predict(obs)
obs, rewards, done, info = env.step(action)
env.render()
render_time = time.time() - timestamp
if render_time < 0.0111:
time.sleep(0.0111 - render_time) # Add a delay for 90 FPS
# env.close()

112
002_lstm/train.py Normal file
View File

@ -0,0 +1,112 @@
import os
import random
import gym
import cv2
import retro
import numpy as np
from stable_baselines3 import PPO
from stable_baselines3.common.policies import ActorCriticPolicy
from stable_baselines3.common.vec_env import SubprocVecEnv
from stable_baselines3.common.callbacks import BaseCallback, CheckpointCallback
from cnn_lstm import CNNLSTM, CNNEncoder
from street_fighter_custom_wrapper import StreetFighterCustomWrapper
class RandomOpponentChangeCallback(BaseCallback):
def __init__(self, stages, opponent_interval, verbose=0):
super(RandomOpponentChangeCallback, self).__init__(verbose)
self.stages = stages
self.opponent_interval = opponent_interval
def _on_step(self) -> bool:
if self.n_calls % self.opponent_interval == 0:
new_state = random.choice(self.stages)
print("\nCurrent state:", new_state)
self.training_env.env_method("load_state", new_state, indices=None)
return True
def make_env(game, state, seed=0):
def _init():
env = retro.RetroEnv(
game=game,
state=state,
use_restricted_actions=retro.Actions.FILTERED,
obs_type=retro.Observations.IMAGE
)
env = StreetFighterCustomWrapper(env)
env.seed(seed)
return env
return _init
def main():
# Set up the environment and model
game = "StreetFighterIISpecialChampionEdition-Genesis"
state_stages = [
"ChampionX.Level1.ChunLiVsKen",
"ChampionX.Level2.ChunLiVsChunLi",
"ChampionX.Level3.ChunLiVsZangief",
"ChampionX.Level4.ChunLiVsDhalsim",
"ChampionX.Level5.ChunLiVsRyu",
"ChampionX.Level6.ChunLiVsEHonda",
"ChampionX.Level7.ChunLiVsBlanka",
"ChampionX.Level8.ChunLiVsGuile",
"ChampionX.Level9.ChunLiVsBalrog",
"ChampionX.Level10.ChunLiVsVega",
"ChampionX.Level11.ChunLiVsSagat",
"ChampionX.Level12.ChunLiVsBison"
# Add other stages as necessary
]
# Champion is at difficulty level 4, ChampionX is at difficulty level 8.
num_envs = 8
# env = SubprocVecEnv([make_env(game, state_stages[0], seed=i) for i in range(num_envs)])
env = SubprocVecEnv([make_env(game, state_stages[0], seed=i) for i in range(num_envs)])
class CustomPolicy(ActorCriticPolicy):
def __init__(self, *args, **kwargs):
super(CustomPolicy, self).__init__(*args, **kwargs)
self.features_extractor = CNNLSTM()
model = PPO(
CustomPolicy,
env,
device="cuda",
verbose=1,
n_steps=5400,
batch_size=64,
n_epochs=10,
learning_rate=0.0003,
ent_coef=0.01,
clip_range=0.2,
clip_range_vf=None,
gamma=0.99,
gae_lambda=0.95,
max_grad_norm=0.5,
use_sde=False,
sde_sample_freq=-1
)
# Set the save directory
save_dir = "trained_models"
os.makedirs(save_dir, exist_ok=True)
# Set up callbacks
opponent_interval = 5400 # stage_interval * num_envs = total_steps_per_stage
checkpoint_interval = 54000 # checkpoint_interval * num_envs = total_steps_per_checkpoint (Every 80 rounds)
checkpoint_callback = CheckpointCallback(save_freq=checkpoint_interval, save_path=save_dir, name_prefix="ppo_chunli")
stage_increase_callback = RandomOpponentChangeCallback(state_stages, opponent_interval, save_dir)
model.learn(
total_timesteps=int(6048000), # total_timesteps = stage_interval * num_envs * num_stages (1120 rounds)
callback=[checkpoint_callback, stage_increase_callback]
)
# Save the final model
model.save(os.path.join(save_dir, "ppo_sf2_chunli_final.zip"))
if __name__ == "__main__":
main()

Binary file not shown.

View File

@ -3,7 +3,6 @@ import torch
import torch.nn as nn
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
from torchvision.models import mobilenet_v3_small
# Custom feature extractor (CNN)
class CustomCNN(BaseFeaturesExtractor):
def __init__(self, observation_space: gym.Space):

View File

@ -1,13 +1,18 @@
import gym
import cv2
import numpy as np
import torch
from torchvision.transforms import Normalize
from gym.spaces import MultiBinary
# Custom environment wrapper
class StreetFighterCustomWrapper(gym.Wrapper):
def __init__(self, env, win_template, lose_template, testing=False, threshold=0.65):
super(StreetFighterCustomWrapper, self).__init__(env)
self.win_template = win_template
self.lose_template = lose_template
self.action_space = MultiBinary(12)
# self.win_template = win_template
# self.lose_template = lose_template
self.threshold = threshold
self.game_screen_gray = None
@ -15,24 +20,46 @@ class StreetFighterCustomWrapper(gym.Wrapper):
self.prev_opponent_health = 1.0
# Update observation space to single-channel grayscale image
# self.observation_space = gym.spaces.Box(
# low=0.0, high=1.0, shape=(84, 84, 1), dtype=np.float32
# )
# observation_space for mobilenet
self.observation_space = gym.spaces.Box(
low=0.0, high=1.0, shape=(84, 84, 1), dtype=np.float32
low=0.0, high=1.0, shape=(3, 96, 96), dtype=np.float32
)
self.testing = testing
# Normalize the image for MobileNetV3Small.
self.normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
def _preprocess_observation(self, observation):
# self.game_screen_gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)
# resized_image = cv2.resize(self.game_screen_gray, (84, 84), interpolation=cv2.INTER_AREA) / 255.0
# return np.expand_dims(resized_image, axis=-1)
# # Using MobileNetV3Small.
self.game_screen_gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)
resized_image = cv2.resize(self.game_screen_gray, (84, 84), interpolation=cv2.INTER_AREA) / 255.0
return np.expand_dims(resized_image, axis=-1)
resized_image = cv2.resize(observation, (96, 96), interpolation=cv2.INTER_AREA).astype(np.float32) / 255.0
# Convert the NumPy array to a PyTorch tensor
resized_image = torch.from_numpy(resized_image).permute(2, 0, 1)
# Apply normalization
resized_image = self.normalize(resized_image)
# # Add a batch dimension to match the model input shape
# # resized_image = resized_image.unsqueeze(0)
return resized_image
def _get_win_or_lose_bonus(self):
if self.prev_player_health > self.prev_opponent_health:
# print('You win!')
return 200
return 300
else:
# print('You lose!')
return -200
return -300
def _get_reward(self):
player_health_area = self.game_screen_gray[15:20, 32:120]
@ -45,7 +72,11 @@ class StreetFighterCustomWrapper(gym.Wrapper):
player_health_diff = self.prev_player_health - player_health
opponent_health_diff = self.prev_opponent_health - opponent_health
reward = (opponent_health_diff - player_health_diff) * 100 # max would be 100
reward = (opponent_health_diff - player_health_diff) * 200 # max would be 200
# Penalty for each step without any change in health
if opponent_health_diff <= 0.0000001:
reward -= 12.0 / 60.0 # -12 points per second if no damage to opponent
self.prev_player_health = player_health
self.prev_opponent_health = opponent_health
@ -62,8 +93,9 @@ class StreetFighterCustomWrapper(gym.Wrapper):
def step(self, action):
# observation, _, _, info = self.env.step(action)
observation, _reward, _done, info = self.env.step(action)
observation, _reward, _done, info = self.env.step(self.env.action_space.sample())
custom_reward = self._get_reward()
custom_reward -= 1.0 / 60.0 # penalty for each step (-1 points per second)
custom_done = False
if self.prev_player_health <= 0.00001 or self.prev_opponent_health <= 0.00001:
@ -74,4 +106,5 @@ class StreetFighterCustomWrapper(gym.Wrapper):
self.prev_player_health = 1.0
self.prev_opponent_health = 1.0
return self._preprocess_observation(observation), custom_reward, custom_done, info
return self._preprocess_observation(observation), custom_reward, custom_done, info

21
mobilenet_extractor.py Normal file
View File

@ -0,0 +1,21 @@
import gym
import torch
import torch.nn as nn
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
from torchvision.models import mobilenet_v3_small
# Custom MobileNetV3 Feature Extractor
class MobileNetV3Extractor(BaseFeaturesExtractor):
def __init__(self, observation_space: gym.Space):
super(MobileNetV3Extractor, self).__init__(observation_space, features_dim=256)
self.mobilenet = mobilenet_v3_small(pretrained=True)
self.adaptive_pool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(576, self.features_dim)
def forward(self, observations: torch.Tensor) -> torch.Tensor:
# x = observations.permute(0, 2, 3, 1) # Swap the channel dimension
x = self.mobilenet.features(observations)
x = self.adaptive_pool(x)
x = torch.flatten(x, 1)
x = self.fc(x)
return x

View File

@ -0,0 +1,18 @@
import gym
import torch
import torchvision
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
# Custom MobileNetV3 Feature Extractor
class MobileNetV3Extractor(BaseFeaturesExtractor):
def __init__(self, observation_space: gym.Space):
super(MobileNetV3Extractor, self).__init__(observation_space, features_dim=576)
self.mobilenet = torchvision.models.mobilenet_v3_small(pretrained=True)
self.mobilenet = torch.nn.Sequential(*list(self.mobilenet.children())[:-1])
self.adaptive_pool = torch.nn.AdaptiveAvgPool2d((1, 1))
def forward(self, observations: torch.Tensor) -> torch.Tensor:
x = self.mobilenet(observations)
x = self.adaptive_pool(x)
x = torch.flatten(x, 1)
return x

View File

@ -9,13 +9,19 @@ from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from custom_cnn import CustomCNN
from mobilenet_extractor import MobileNetV3Extractor
from custom_sf2_cv_env import StreetFighterCustomWrapper
def make_env(game, state, seed=0):
def _init():
win_template = cv2.imread('images/pattern_win_gray.png', cv2.IMREAD_GRAYSCALE)
lose_template = cv2.imread('images/pattern_lose_gray.png', cv2.IMREAD_GRAYSCALE)
env = retro.RetroEnv(game=game, state=state, obs_type=retro.Observations.IMAGE)
env = retro.RetroEnv(
game=game,
state=state,
use_restricted_actions=retro.Actions.FILTERED,
obs_type=retro.Observations.IMAGE
)
env = StreetFighterCustomWrapper(env, win_template, lose_template, testing=True)
# env.seed(seed)
return env
@ -43,9 +49,15 @@ env = make_env(game, state_stages[0])()
# Wrap the environment
env = DummyVecEnv([lambda: env])
# policy_kwargs = {
# 'features_extractor_class': CustomCNN
# }
# Using MobileNetV3 as the feature extractor
policy_kwargs = {
'features_extractor_class': CustomCNN
'features_extractor_class': MobileNetV3Extractor
}
model = PPO(
"CnnPolicy",
env,
@ -53,7 +65,7 @@ model = PPO(
policy_kwargs=policy_kwargs,
verbose=1
)
model.load(r"trained_models_cv_test/ppo_sf2_chunli_final")
model.load(r"trained_models_cv_mobilenet_time_penalty/ppo_chunli_1296000_steps")
obs = env.reset()
done = False

View File

@ -6,13 +6,12 @@ import cv2
import retro
import numpy as np
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines3.common.vec_env import SubprocVecEnv
from stable_baselines3.common.preprocessing import is_image_space, is_image_space_channels_first
from stable_baselines3.common.callbacks import BaseCallback, CheckpointCallback
import torch
import torch.nn as nn
from custom_cnn import CustomCNN
from mobilenet_extractor import MobileNetV3Extractor
from custom_sf2_cv_env import StreetFighterCustomWrapper
class RandomOpponentChangeCallback(BaseCallback):
@ -32,7 +31,12 @@ def make_env(game, state, seed=0):
def _init():
win_template = cv2.imread('images/pattern_win_gray.png', cv2.IMREAD_GRAYSCALE)
lose_template = cv2.imread('images/pattern_lose_gray.png', cv2.IMREAD_GRAYSCALE)
env = retro.RetroEnv(game=game, state=state, obs_type=retro.Observations.IMAGE)
env = retro.RetroEnv(
game=game,
state=state,
use_restricted_actions=retro.Actions.FILTERED,
obs_type=retro.Observations.IMAGE
)
env = StreetFighterCustomWrapper(env, win_template, lose_template)
# env.seed(seed)
return env
@ -42,28 +46,35 @@ def main():
# Set up the environment and model
game = "StreetFighterIISpecialChampionEdition-Genesis"
state_stages = [
"Champion.Level1.ChunLiVsGuile",
"Champion.Level2.ChunLiVsKen",
"Champion.Level3.ChunLiVsChunLi",
"Champion.Level4.ChunLiVsZangief",
"Champion.Level5.ChunLiVsDhalsim",
"Champion.Level6.ChunLiVsRyu",
"Champion.Level7.ChunLiVsEHonda",
"Champion.Level8.ChunLiVsBlanka",
"Champion.Level9.ChunLiVsBalrog",
"Champion.Level10.ChunLiVsVega",
"Champion.Level11.ChunLiVsSagat",
"Champion.Level12.ChunLiVsBison"
"ChampionX.Level1.ChunLiVsKen",
"ChampionX.Level2.ChunLiVsChunLi",
"ChampionX.Level3.ChunLiVsZangief",
"ChampionX.Level4.ChunLiVsDhalsim",
"ChampionX.Level5.ChunLiVsRyu",
"ChampionX.Level6.ChunLiVsEHonda",
"ChampionX.Level7.ChunLiVsBlanka",
"ChampionX.Level8.ChunLiVsGuile",
"ChampionX.Level9.ChunLiVsBalrog",
"ChampionX.Level10.ChunLiVsVega",
"ChampionX.Level11.ChunLiVsSagat",
"ChampionX.Level12.ChunLiVsBison"
# Add other stages as necessary
]
# Champion is at difficulty level 4, ChampionX is at difficulty level 8.
num_envs = 8
# env = SubprocVecEnv([make_env(game, state_stages[0], seed=i) for i in range(num_envs)])
env = SubprocVecEnv([make_env(game, state_stages[0], seed=i) for i in range(num_envs)])
# Using CustomCNN as the feature extractor
# policy_kwargs = {
# 'features_extractor_class': CustomCNN
# }
# Using MobileNetV3 as the feature extractor
policy_kwargs = {
'features_extractor_class': CustomCNN
'features_extractor_class': MobileNetV3Extractor
}
model = PPO(
@ -87,7 +98,7 @@ def main():
)
# Set the save directory
save_dir = "trained_models_cv_level6up"
save_dir = "trained_models_cv_customcnn_time_penalty"
os.makedirs(save_dir, exist_ok=True)
# Set up callbacks

View File

@ -0,0 +1,291 @@
(StreetFighterAI) PS C:\Users\unitec\Documents\AIProjects\street-fighter-ai> python .\train_cv_sf2_ai.py
Using cuda device
Current state: Champion.Level8.ChunLiVsBlanka
------------------------------
| time/ | |
| fps | 186 |
| iterations | 1 |
| time_elapsed | 115 |
| total_timesteps | 21600 |
------------------------------
Current state: Champion.Level12.ChunLiVsBison
------------------------------------------
| time/ | |
| fps | 65 |
| iterations | 2 |
| time_elapsed | 655 |
| total_timesteps | 43200 |
| train/ | |
| approx_kl | 0.0053780936 |
| clip_fraction | 0.0431 |
| clip_range | 0.2 |
| entropy_loss | -8.31 |
| explained_variance | 2.93e-05 |
| learning_rate | 0.0003 |
| loss | 166 |
| n_updates | 10 |
| policy_gradient_loss | -0.0016 |
| value_loss | 92 |
------------------------------------------
Current state: Champion.Level6.ChunLiVsRyu
-----------------------------------------
| time/ | |
| fps | 54 |
| iterations | 3 |
| time_elapsed | 1196 |
| total_timesteps | 64800 |
| train/ | |
| approx_kl | 0.005385526 |
| clip_fraction | 0.0472 |
| clip_range | 0.2 |
| entropy_loss | -8.31 |
| explained_variance | -0.00129 |
| learning_rate | 0.0003 |
| loss | 0.0349 |
| n_updates | 20 |
| policy_gradient_loss | -0.000679 |
| value_loss | 58.4 |
-----------------------------------------
Current state: Champion.Level10.ChunLiVsVega
-----------------------------------------
| time/ | |
| fps | 49 |
| iterations | 4 |
| time_elapsed | 1736 |
| total_timesteps | 86400 |
| train/ | |
| approx_kl | 0.005108807 |
| clip_fraction | 0.0236 |
| clip_range | 0.2 |
| entropy_loss | -8.3 |
| explained_variance | 0.000588 |
| learning_rate | 0.0003 |
| loss | 0.236 |
| n_updates | 30 |
| policy_gradient_loss | -0.00157 |
| value_loss | 82.7 |
-----------------------------------------
Current state: Champion.Level12.ChunLiVsBison
------------------------------------------
| time/ | |
| fps | 47 |
| iterations | 5 |
| time_elapsed | 2274 |
| total_timesteps | 108000 |
| train/ | |
| approx_kl | 0.0060475296 |
| clip_fraction | 0.054 |
| clip_range | 0.2 |
| entropy_loss | -8.3 |
| explained_variance | 0.00555 |
| learning_rate | 0.0003 |
| loss | 0.536 |
| n_updates | 40 |
| policy_gradient_loss | -0.000733 |
| value_loss | 40.5 |
------------------------------------------
Current state: Champion.Level4.ChunLiVsZangief
------------------------------------------
| time/ | |
| fps | 46 |
| iterations | 6 |
| time_elapsed | 2811 |
| total_timesteps | 129600 |
| train/ | |
| approx_kl | 0.0059517785 |
| clip_fraction | 0.0546 |
| clip_range | 0.2 |
| entropy_loss | -8.29 |
| explained_variance | 0.0188 |
| learning_rate | 0.0003 |
| loss | 7.26 |
| n_updates | 50 |
| policy_gradient_loss | -0.00102 |
| value_loss | 21.8 |
------------------------------------------
Current state: Champion.Level7.ChunLiVsEHonda
------------------------------------------
| time/ | |
| fps | 45 |
| iterations | 7 |
| time_elapsed | 3348 |
| total_timesteps | 151200 |
| train/ | |
| approx_kl | 0.0057667145 |
| clip_fraction | 0.0337 |
| clip_range | 0.2 |
| entropy_loss | -8.29 |
| explained_variance | 0.0069 |
| learning_rate | 0.0003 |
| loss | 0.832 |
| n_updates | 60 |
| policy_gradient_loss | -0.00133 |
| value_loss | 36.7 |
------------------------------------------
Current state: Champion.Level8.ChunLiVsBlanka
------------------------------------------
| time/ | |
| fps | 44 |
| iterations | 8 |
| time_elapsed | 3883 |
| total_timesteps | 172800 |
| train/ | |
| approx_kl | 0.0047547054 |
| clip_fraction | 0.0291 |
| clip_range | 0.2 |
| entropy_loss | -8.29 |
| explained_variance | 0.0128 |
| learning_rate | 0.0003 |
| loss | 0.424 |
| n_updates | 70 |
| policy_gradient_loss | -0.00186 |
| value_loss | 21.2 |
------------------------------------------
Current state: Champion.Level8.ChunLiVsBlanka
------------------------------------------
| time/ | |
| fps | 43 |
| iterations | 9 |
| time_elapsed | 4422 |
| total_timesteps | 194400 |
| train/ | |
| approx_kl | 0.0063244957 |
| clip_fraction | 0.049 |
| clip_range | 0.2 |
| entropy_loss | -8.28 |
| explained_variance | 0.00417 |
| learning_rate | 0.0003 |
| loss | 0.146 |
| n_updates | 80 |
| policy_gradient_loss | -0.00167 |
| value_loss | 17 |
------------------------------------------
Current state: Champion.Level8.ChunLiVsBlanka
-----------------------------------------
| time/ | |
| fps | 43 |
| iterations | 10 |
| time_elapsed | 4960 |
| total_timesteps | 216000 |
| train/ | |
| approx_kl | 0.008022586 |
| clip_fraction | 0.0632 |
| clip_range | 0.2 |
| entropy_loss | -8.29 |
| explained_variance | 0.0241 |
| learning_rate | 0.0003 |
| loss | 0.902 |
| n_updates | 90 |
| policy_gradient_loss | -0.00295 |
| value_loss | 13.5 |
-----------------------------------------
Current state: Champion.Level10.ChunLiVsVega
-----------------------------------------
| time/ | |
| fps | 43 |
| iterations | 11 |
| time_elapsed | 5499 |
| total_timesteps | 237600 |
| train/ | |
| approx_kl | 0.006470734 |
| clip_fraction | 0.0596 |
| clip_range | 0.2 |
| entropy_loss | -8.3 |
| explained_variance | 0.0611 |
| learning_rate | 0.0003 |
| loss | 5.72 |
| n_updates | 100 |
| policy_gradient_loss | -0.000976 |
| value_loss | 8.5 |
-----------------------------------------
Current state: Champion.Level7.ChunLiVsEHonda
----------------------------------------
| time/ | |
| fps | 42 |
| iterations | 12 |
| time_elapsed | 6040 |
| total_timesteps | 259200 |
| train/ | |
| approx_kl | 0.00540813 |
| clip_fraction | 0.0547 |
| clip_range | 0.2 |
| entropy_loss | -8.29 |
| explained_variance | -0.0271 |
| learning_rate | 0.0003 |
| loss | 0.75 |
| n_updates | 110 |
| policy_gradient_loss | -0.00104 |
| value_loss | 9.12 |
----------------------------------------
Current state: Champion.Level8.ChunLiVsBlanka
---------------------------------------
| time/ | |
| fps | 42 |
| iterations | 13 |
| time_elapsed | 6582 |
| total_timesteps | 280800 |
| train/ | |
| approx_kl | 0.0058568 |
| clip_fraction | 0.0608 |
| clip_range | 0.2 |
| entropy_loss | -8.29 |
| explained_variance | 0.0277 |
| learning_rate | 0.0003 |
| loss | 0.256 |
| n_updates | 120 |
| policy_gradient_loss | -0.00284 |
| value_loss | 6.99 |
---------------------------------------
Current state: Champion.Level2.ChunLiVsKen
------------------------------------------
| time/ | |
| fps | 42 |
| iterations | 14 |
| time_elapsed | 7126 |
| total_timesteps | 302400 |
| train/ | |
| approx_kl | 0.0066813217 |
| clip_fraction | 0.0554 |
| clip_range | 0.2 |
| entropy_loss | -8.3 |
| explained_variance | 0.0689 |
| learning_rate | 0.0003 |
| loss | 0.504 |
| n_updates | 130 |
| policy_gradient_loss | -0.00243 |
| value_loss | 9.48 |
------------------------------------------
Current state: Champion.Level8.ChunLiVsBlanka
------------------------------------------
| time/ | |
| fps | 42 |
| iterations | 15 |
| time_elapsed | 7670 |
| total_timesteps | 324000 |
| train/ | |
| approx_kl | 0.0069995625 |
| clip_fraction | 0.0637 |
| clip_range | 0.2 |
| entropy_loss | -8.3 |
| explained_variance | -0.0336 |
| learning_rate | 0.0003 |
| loss | -0.000532 |
| n_updates | 140 |
| policy_gradient_loss | -0.00198 |
| value_loss | 9.39 |
------------------------------------------

File diff suppressed because it is too large Load Diff