street-fighter-ai/000_image_stack_ram_based_reward_ai_generated/evaluate.py

53 lines
1.7 KiB
Python
Raw Normal View History

2023-03-29 17:14:39 +00:00
import retro
2023-03-30 18:10:25 +00:00
2023-03-29 17:14:39 +00:00
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
2023-03-30 18:10:25 +00:00
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.evaluation import evaluate_policy
2023-03-29 17:14:39 +00:00
from custom_cnn import CustomCNN
from street_fighter_custom_wrapper import StreetFighterCustomWrapper
2023-03-30 18:10:25 +00:00
2023-03-29 17:14:39 +00:00
def make_env(game, state):
def _init():
2023-03-30 18:10:25 +00:00
env = retro.make(
2023-03-29 17:14:39 +00:00
game=game,
state=state,
use_restricted_actions=retro.Actions.FILTERED,
obs_type=retro.Observations.IMAGE
)
2023-03-30 18:10:25 +00:00
env = StreetFighterCustomWrapper(env)
2023-03-29 17:14:39 +00:00
return env
return _init
game = "StreetFighterIISpecialChampionEdition-Genesis"
state_stages = [
"Champion.Level1.ChunLiVsGuile",
"Champion.Level2.ChunLiVsKen",
"Champion.Level3.ChunLiVsChunLi",
"Champion.Level4.ChunLiVsZangief",
"Champion.Level5.ChunLiVsDhalsim",
"Champion.Level6.ChunLiVsRyu",
"Champion.Level7.ChunLiVsEHonda",
"Champion.Level8.ChunLiVsBlanka",
"Champion.Level9.ChunLiVsBalrog",
"Champion.Level10.ChunLiVsVega",
"Champion.Level11.ChunLiVsSagat",
"Champion.Level12.ChunLiVsBison"
# Add other stages as necessary
]
env = make_env(game, state_stages[0])()
# Wrap the environment
2023-03-30 18:10:25 +00:00
# env = Monitor(env, 'logs/')
2023-03-29 17:14:39 +00:00
2023-03-30 18:10:25 +00:00
policy_kwargs = {'features_extractor_class': CustomCNN}
model = PPO("CnnPolicy", env, policy_kwargs=policy_kwargs)
2023-03-29 17:14:39 +00:00
2023-03-30 18:10:25 +00:00
model = PPO.load(r"dummy_model_ppo_chunli")
# model.load(r"trained_models/ppo_chunli_864000_steps")
2023-03-29 17:14:39 +00:00
2023-03-30 18:10:25 +00:00
mean_reward, std_reward = evaluate_policy(model, env, render=True, n_eval_episodes=10, deterministic=False, return_episode_rewards=True)
print(f"Mean reward: {mean_reward:.2f} +/- {std_reward:.2f}")