mirror of
https://github.com/linyiLYi/street-fighter-ai.git
synced 2025-04-04 15:10:43 +00:00
requirements.txt
This commit is contained in:
parent
f09e69d05c
commit
45f8e4649a
5
main/requirements.txt
Normal file
5
main/requirements.txt
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
gym==0.18.3
|
||||||
|
gym-retro==0.8.0
|
||||||
|
opencv-python==4.7.0.72
|
||||||
|
stable-baselines3==1.1.0
|
||||||
|
tensorboard==2.12.1
|
@ -1,68 +0,0 @@
|
|||||||
import os
|
|
||||||
|
|
||||||
import retro
|
|
||||||
import optuna
|
|
||||||
from stable_baselines3 import PPO
|
|
||||||
from stable_baselines3.common.monitor import Monitor
|
|
||||||
from stable_baselines3.common.evaluation import evaluate_policy
|
|
||||||
|
|
||||||
from street_fighter_custom_wrapper import StreetFighterCustomWrapper
|
|
||||||
|
|
||||||
LOG_DIR = 'logs/'
|
|
||||||
OPT_DIR = 'optuna/'
|
|
||||||
os.makedirs(LOG_DIR, exist_ok=True)
|
|
||||||
os.makedirs(OPT_DIR, exist_ok=True)
|
|
||||||
|
|
||||||
def optimize_ppo(trial):
|
|
||||||
return {
|
|
||||||
'n_steps':trial.suggest_int('n_steps', 512, 2048, log=True),
|
|
||||||
'gamma':trial.suggest_float('gamma', 0.9, 0.9999),
|
|
||||||
'learning_rate':trial.suggest_float('learning_rate', 5e-5, 5e-4, log=True),
|
|
||||||
'gae_lambda':trial.suggest_float('gae_lambda', 0.8, 0.9999)
|
|
||||||
}
|
|
||||||
|
|
||||||
def make_env(game, state):
|
|
||||||
def _init():
|
|
||||||
env = retro.make(
|
|
||||||
game=game,
|
|
||||||
state=state,
|
|
||||||
use_restricted_actions=retro.Actions.FILTERED,
|
|
||||||
obs_type=retro.Observations.IMAGE
|
|
||||||
)
|
|
||||||
env = StreetFighterCustomWrapper(env)
|
|
||||||
return env
|
|
||||||
return _init
|
|
||||||
|
|
||||||
def optimize_agent(trial):
|
|
||||||
game = "StreetFighterIISpecialChampionEdition-Genesis"
|
|
||||||
state = "Champion.Level1.ChunLiVsGuile"#"ChampionX.Level1.ChunLiVsKen"
|
|
||||||
|
|
||||||
try:
|
|
||||||
model_params = optimize_ppo(trial)
|
|
||||||
|
|
||||||
# Create environment
|
|
||||||
env = make_env(game, state)()
|
|
||||||
env = Monitor(env, LOG_DIR)
|
|
||||||
|
|
||||||
# Create algo
|
|
||||||
model = PPO('CnnPolicy', env, verbose=1, **model_params)
|
|
||||||
model.learn(total_timesteps=500000)
|
|
||||||
|
|
||||||
# Evaluate model
|
|
||||||
mean_reward, _ = evaluate_policy(model, env, n_eval_episodes=30, deterministic=False)
|
|
||||||
env.close()
|
|
||||||
|
|
||||||
SAVE_PATH = os.path.join(OPT_DIR, 'trial_{}_best_model'.format(trial.number))
|
|
||||||
model.save(SAVE_PATH)
|
|
||||||
|
|
||||||
return mean_reward
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
return -1
|
|
||||||
|
|
||||||
# Creating the experiment
|
|
||||||
study = optuna.create_study(direction='maximize')
|
|
||||||
study.optimize(optimize_agent, n_trials=10, n_jobs=1)
|
|
||||||
|
|
||||||
print(study.best_params)
|
|
||||||
print(study.best_trial)
|
|
Loading…
Reference in New Issue
Block a user