diff --git a/__pycache__/custom_cnn.cpython-38.pyc b/__pycache__/custom_cnn.cpython-38.pyc index 9adaf11..87ffae4 100644 Binary files a/__pycache__/custom_cnn.cpython-38.pyc and b/__pycache__/custom_cnn.cpython-38.pyc differ diff --git a/__pycache__/custom_sf2_cv_env.cpython-38.pyc b/__pycache__/custom_sf2_cv_env.cpython-38.pyc index e8de06b..c708a97 100644 Binary files a/__pycache__/custom_sf2_cv_env.cpython-38.pyc and b/__pycache__/custom_sf2_cv_env.cpython-38.pyc differ diff --git a/custom_cnn.py b/custom_cnn.py index bad8a1b..8daa92e 100644 --- a/custom_cnn.py +++ b/custom_cnn.py @@ -8,16 +8,16 @@ class CustomCNN(BaseFeaturesExtractor): def __init__(self, observation_space: gym.Space): super(CustomCNN, self).__init__(observation_space, features_dim=512) self.cnn = nn.Sequential( - nn.Conv2d(1, 32, kernel_size=8, stride=4, padding=0), + nn.Conv2d(1, 32, kernel_size=5, stride=2, padding=0), nn.ReLU(), - nn.Conv2d(32, 64, kernel_size=4, stride=2, padding=0), + nn.Conv2d(32, 64, kernel_size=5, stride=2, padding=0), nn.ReLU(), nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=0), nn.ReLU(), nn.Flatten(), - nn.Linear(3136, self.features_dim), + nn.Linear(16384, self.features_dim), nn.ReLU() ) def forward(self, observations: torch.Tensor) -> torch.Tensor: - return self.cnn(observations) \ No newline at end of file + return self.cnn(observations.permute(0, 3, 1, 2)) # Swap the channel dimension \ No newline at end of file diff --git a/custom_sf2_cv_env.py b/custom_sf2_cv_env.py index 017605b..55b3d0f 100644 --- a/custom_sf2_cv_env.py +++ b/custom_sf2_cv_env.py @@ -9,7 +9,6 @@ class StreetFighterCustomWrapper(gym.Wrapper): self.win_template = win_template self.lose_template = lose_template self.threshold = threshold - self.game_screen_gray = None self.prev_player_health = 1.0 @@ -17,7 +16,7 @@ class StreetFighterCustomWrapper(gym.Wrapper): # Update observation space to single-channel grayscale image self.observation_space = gym.spaces.Box( - low=0, high=255, shape=(84, 84, 1), dtype=np.uint8 + low=0.0, high=1.0, shape=(84, 84, 1), dtype=np.float32 ) def _preprocess_observation(self, observation): @@ -26,7 +25,7 @@ class StreetFighterCustomWrapper(gym.Wrapper): # print("self.game_screen_gray size: ", self.game_screen_gray.shape) # Print the size of the observation # print("Observation size: ", observation.shape) - resized_image = cv2.resize(self.game_screen_gray, (84, 84), interpolation=cv2.INTER_AREA) + resized_image = cv2.resize(self.game_screen_gray, (84, 84), interpolation=cv2.INTER_AREA) / 255.0 return np.expand_dims(resized_image, axis=-1) def _check_game_over(self): @@ -46,11 +45,26 @@ class StreetFighterCustomWrapper(gym.Wrapper): player_health = np.sum(player_health_area > 129) / player_health_area.size opponent_health = np.sum(oppoent_health_area > 129) / oppoent_health_area.size - reward = player_health - opponent_health + player_health_diff = self.prev_player_health - player_health + opponent_health_diff = self.prev_opponent_health - opponent_health + + reward = (opponent_health_diff - player_health_diff) * 100 + + # Add bonus for successful attacks or penalize for taking damage + if opponent_health_diff > player_health_diff: + reward += 10 # Bonus for successful attacks + elif opponent_health_diff < player_health_diff: + reward -= 10 # Penalty for taking damage + + self.prev_player_health = player_health + self.prev_opponent_health = opponent_health + return reward def reset(self): observation = self.env.reset() + self.prev_player_health = 1.0 + self.prev_opponent_health = 1.0 return self._preprocess_observation(observation) def step(self, action): diff --git a/test_cv_sf2_ai.py b/test_cv_sf2_ai.py index fc29a8a..5410f39 100644 --- a/test_cv_sf2_ai.py +++ b/test_cv_sf2_ai.py @@ -45,12 +45,12 @@ model = PPO( policy_kwargs=policy_kwargs, verbose=1 ) -model.load("ppo_sf2_cnn") +model.load("ppo_sf2_cnn_new") obs = env.reset() done = False -while not done: +while True: timestamp = time.time() action, _ = model.predict(obs) obs, rewards, done, info = env.step(action) @@ -59,4 +59,4 @@ while not done: if render_time < 0.0111: time.sleep(0.0111 - render_time) # Add a delay for 90 FPS -env.close() \ No newline at end of file +# env.close() \ No newline at end of file diff --git a/train_cv_sf2_ai.py b/train_cv_sf2_ai.py index 5036a19..80c5253 100644 --- a/train_cv_sf2_ai.py +++ b/train_cv_sf2_ai.py @@ -45,11 +45,23 @@ def main(): env, device="cuda", policy_kwargs=policy_kwargs, - verbose=1 + verbose=1, + n_steps=2048, + batch_size=64, + n_epochs=10, + learning_rate=0.0003, + ent_coef=0.01, + clip_range=0.2, + clip_range_vf=None, + gamma=0.99, + gae_lambda=0.95, + max_grad_norm=0.5, + use_sde=False, + sde_sample_freq=-1 ) - model.learn(total_timesteps=int(1000)) + model.learn(total_timesteps=int(500000)) - model.save("ppo_sf2_cnn") + model.save("ppo_sf2_cnn_new") if __name__ == "__main__": main()