street-fighter-ai/004_image_stack_ram_based_reward_custom/test.py

91 lines
2.7 KiB
Python
Raw Normal View History

2023-03-29 17:14:39 +00:00
import time
import retro
from stable_baselines3 import PPO
from street_fighter_custom_wrapper import StreetFighterCustomWrapper
def make_env(game, state):
def _init():
2023-03-30 18:10:25 +00:00
env = retro.make(
2023-03-29 17:14:39 +00:00
game=game,
state=state,
use_restricted_actions=retro.Actions.FILTERED,
obs_type=retro.Observations.IMAGE
)
2023-03-30 18:10:25 +00:00
env = StreetFighterCustomWrapper(env)
2023-03-29 17:14:39 +00:00
return env
return _init
game = "StreetFighterIISpecialChampionEdition-Genesis"
state_stages = [
2023-04-02 16:19:56 +00:00
"Champion.Level1.RyuVsGuile",
2023-04-03 12:50:10 +00:00
"Champion.Level2.RyuVsKen",
"Champion.Level3.RyuVsChunLi",
"Champion.Level4.RyuVsZangief",
"Champion.Level5.RyuVsDhalsim",
"Champion.Level6.RyuVsRyu",
"Champion.Level7.RyuVsEHonda",
"Champion.Level8.RyuVsBlanka",
"Champion.Level9.RyuVsBalrog",
"Champion.Level10.RyuVsVega",
"Champion.Level11.RyuVsSagat",
"Champion.Level12.RyuVsBison"
2023-03-29 17:14:39 +00:00
]
2023-04-03 12:50:10 +00:00
# state_stages = [
# "Champion.Level1.RyuVsGuile",
# "Champion.Level1.ChunLiVsGuile", # Average reward for random strategy: -102.3 | -20.4
# "ChampionX.Level1.ChunLiVsKen", # Average reward for random strategy: -247.6
# "Champion.Level2.ChunLiVsKen",
# "Champion.Level3.ChunLiVsChunLi",
# "Champion.Level4.ChunLiVsZangief",
# "Champion.Level5.ChunLiVsDhalsim",
# "Champion.Level6.ChunLiVsRyu",
# "Champion.Level7.ChunLiVsEHonda",
# "Champion.Level8.ChunLiVsBlanka",
# "Champion.Level9.ChunLiVsBalrog",
# "Champion.Level10.ChunLiVsVega",
# "Champion.Level11.ChunLiVsSagat",
# "Champion.Level12.ChunLiVsBison"
# # Add other stages as necessary
# ]
2023-03-29 17:14:39 +00:00
2023-04-03 12:50:10 +00:00
env = make_env(game, state_stages[11])()
2023-03-29 17:14:39 +00:00
model = PPO(
"CnnPolicy",
env,
verbose=1
)
2023-04-03 12:50:10 +00:00
model_path = r"trained_models_ryu_level_1_time_reward_small_random/ppo_ryu_2600000_steps"
2023-03-30 18:10:25 +00:00
model.load(model_path)
2023-04-01 16:16:57 +00:00
# Average reward for optuna/trial_1_best_model: -82.3
# Average reward for optuna/trial_9_best_model: 36.7 | -86.23
# Average reward for trained_models/ppo_chunli_5376000_steps: -77.8
2023-03-29 17:14:39 +00:00
obs = env.reset()
done = False
2023-03-30 18:10:25 +00:00
num_episodes = 30
episode_reward_sum = 0
for _ in range(num_episodes):
done = False
obs = env.reset()
total_reward = 0
while not done:
2023-04-03 12:50:10 +00:00
# while True:
2023-03-30 18:10:25 +00:00
timestamp = time.time()
2023-04-02 16:19:56 +00:00
action, _states = model.predict(obs)
obs, reward, done, info = env.step(action)
2023-03-30 18:10:25 +00:00
if reward != 0:
total_reward += reward
print("Reward: {}, playerHP: {}, enemyHP:{}".format(reward, info['health'], info['enemy_health']))
env.render()
2023-04-03 12:50:10 +00:00
# time.sleep(0.005)
2023-03-30 18:10:25 +00:00
print("Total reward: {}".format(total_reward))
episode_reward_sum += total_reward
2023-03-29 17:14:39 +00:00
2023-04-03 12:50:10 +00:00
# env.close()
# print("Average reward for {}: {}".format(model_path, episode_reward_sum/num_episodes))