2023-03-29 17:14:39 +00:00
import time
import retro
from stable_baselines3 import PPO
from street_fighter_custom_wrapper import StreetFighterCustomWrapper
def make_env ( game , state ) :
def _init ( ) :
2023-03-30 18:10:25 +00:00
env = retro . make (
2023-03-29 17:14:39 +00:00
game = game ,
state = state ,
use_restricted_actions = retro . Actions . FILTERED ,
obs_type = retro . Observations . IMAGE
)
2023-03-30 18:10:25 +00:00
env = StreetFighterCustomWrapper ( env )
2023-03-29 17:14:39 +00:00
return env
return _init
game = " StreetFighterIISpecialChampionEdition-Genesis "
state_stages = [
2023-04-02 16:19:56 +00:00
" Champion.Level1.RyuVsGuile " ,
2023-04-03 12:50:10 +00:00
" Champion.Level2.RyuVsKen " ,
" Champion.Level3.RyuVsChunLi " ,
" Champion.Level4.RyuVsZangief " ,
" Champion.Level5.RyuVsDhalsim " ,
" Champion.Level6.RyuVsRyu " ,
" Champion.Level7.RyuVsEHonda " ,
" Champion.Level8.RyuVsBlanka " ,
" Champion.Level9.RyuVsBalrog " ,
" Champion.Level10.RyuVsVega " ,
" Champion.Level11.RyuVsSagat " ,
" Champion.Level12.RyuVsBison "
2023-03-29 17:14:39 +00:00
]
2023-04-03 12:50:10 +00:00
# state_stages = [
# "Champion.Level1.RyuVsGuile",
# "Champion.Level1.ChunLiVsGuile", # Average reward for random strategy: -102.3 | -20.4
# "ChampionX.Level1.ChunLiVsKen", # Average reward for random strategy: -247.6
# "Champion.Level2.ChunLiVsKen",
# "Champion.Level3.ChunLiVsChunLi",
# "Champion.Level4.ChunLiVsZangief",
# "Champion.Level5.ChunLiVsDhalsim",
# "Champion.Level6.ChunLiVsRyu",
# "Champion.Level7.ChunLiVsEHonda",
# "Champion.Level8.ChunLiVsBlanka",
# "Champion.Level9.ChunLiVsBalrog",
# "Champion.Level10.ChunLiVsVega",
# "Champion.Level11.ChunLiVsSagat",
# "Champion.Level12.ChunLiVsBison"
# # Add other stages as necessary
# ]
2023-03-29 17:14:39 +00:00
2023-04-05 02:48:49 +00:00
env = make_env ( game , state_stages [ 0 ] ) ( )
2023-03-29 17:14:39 +00:00
model = PPO (
" CnnPolicy " ,
env ,
verbose = 1
)
2023-04-05 02:48:49 +00:00
model_path = r " trained_models_ryu_level_1_time_reward_small_loop_continue/ppo_ryu_5000000_steps.zip "
2023-03-30 18:10:25 +00:00
model . load ( model_path )
2023-04-01 16:16:57 +00:00
# Average reward for optuna/trial_1_best_model: -82.3
# Average reward for optuna/trial_9_best_model: 36.7 | -86.23
# Average reward for trained_models/ppo_chunli_5376000_steps: -77.8
2023-04-05 02:48:49 +00:00
# Level_1 Average reward for trained_models_ryu_level_1_time_reward_small_random/ppo_ryu_4200000_steps: 0.35772262101207986 Winning rate: 0.5666666666666667
# Level_2 Average reward for trained_models_ryu_level_1_time_reward_small_random/ppo_ryu_4200000_steps: 0.18094390738868166 Winning rate: 0.16666666666666666
2023-03-29 17:14:39 +00:00
2023-04-05 02:48:49 +00:00
# obs = env.reset()
2023-03-29 17:14:39 +00:00
done = False
2023-04-05 02:48:49 +00:00
num_episodes = 12
2023-03-30 18:10:25 +00:00
episode_reward_sum = 0
2023-04-05 02:48:49 +00:00
num_victory = 0
2023-03-30 18:10:25 +00:00
for _ in range ( num_episodes ) :
done = False
obs = env . reset ( )
total_reward = 0
while not done :
2023-04-03 12:50:10 +00:00
# while True:
2023-03-30 18:10:25 +00:00
timestamp = time . time ( )
2023-04-02 16:19:56 +00:00
action , _states = model . predict ( obs )
obs , reward , done , info = env . step ( action )
2023-03-30 18:10:25 +00:00
if reward != 0 :
total_reward + = reward
2023-04-05 02:48:49 +00:00
print ( " Reward: {} , playerHP: {} , enemyHP: {} " . format ( reward , info [ ' agent_hp ' ] , info [ ' enemy_hp ' ] ) )
2023-03-30 18:10:25 +00:00
env . render ( )
2023-04-03 12:50:10 +00:00
# time.sleep(0.005)
2023-04-05 02:48:49 +00:00
if info [ ' enemy_hp ' ] < 0 :
print ( " Victory! " )
num_victory + = 1
2023-03-30 18:10:25 +00:00
print ( " Total reward: {} " . format ( total_reward ) )
episode_reward_sum + = total_reward
2023-03-29 17:14:39 +00:00
2023-04-05 02:48:49 +00:00
env . close ( )
print ( " Winning rate: {} " . format ( 1.0 * num_victory / num_episodes ) )
print ( " Average reward for {} : {} " . format ( model_path , episode_reward_sum / num_episodes ) )