street-fighter-ai/002_lstm/street_fighter_custom_wrapper.py

102 lines
3.9 KiB
Python
Raw Normal View History

2023-03-29 17:14:39 +00:00
import collections
import gym
import cv2
import numpy as np
from torchvision.transforms import Normalize
from gym.spaces import MultiBinary
# Custom environment wrapper
class StreetFighterCustomWrapper(gym.Wrapper):
def __init__(self, env, testing=False, threshold=0.65):
super(StreetFighterCustomWrapper, self).__init__(env)
self.action_space = MultiBinary(12)
# Use a deque to store the last 16 frames (0.267 seconds)
self.frame_stack = collections.deque(maxlen=16)
self.threshold = threshold
self.game_screen_gray = None
self.prev_player_health = 1.0
self.prev_opponent_health = 1.0
# Update observation space to include 16 stacked grayscale images
self.observation_space = gym.spaces.Box(
low=0.0, high=1.0, shape=(16, 84, 84), dtype=np.float32
)
self.testing = testing
def _preprocess_observation(self, observation):
self.game_screen_gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)
resized_image = cv2.resize(self.game_screen_gray, (84, 84), interpolation=cv2.INTER_AREA) / 255.0
# Add the resized image to the frame stack
self.frame_stack.append(resized_image)
# Stack the last 16 frames and return the stacked frames
stacked_frames = np.stack(self.frame_stack, axis=0)[np.newaxis, ...] # Shape: (1, 16, 84, 84)
return stacked_frames
def _get_win_or_lose_bonus(self):
if self.prev_player_health > self.prev_opponent_health:
# print('You win!')
return 300
else:
# print('You lose!')
return -300
def _get_reward(self):
player_health_area = self.game_screen_gray[15:20, 32:120]
oppoent_health_area = self.game_screen_gray[15:20, 136:224]
# Get health points using the number of pixels above 129.
player_health = np.sum(player_health_area > 129) / player_health_area.size
opponent_health = np.sum(oppoent_health_area > 129) / oppoent_health_area.size
player_health_diff = self.prev_player_health - player_health
opponent_health_diff = self.prev_opponent_health - opponent_health
reward = (opponent_health_diff - player_health_diff) * 200 # max would be 200
# Penalty for each step without any change in health
if opponent_health_diff <= 0.0000001:
reward -= 12.0 / 60.0 # -12 points per second if no damage to opponent
self.prev_player_health = player_health
self.prev_opponent_health = opponent_health
# Print the health values of the player and the opponent
# print("Player health: %f Opponent health:%f" % (player_health, opponent_health))
return reward
def reset(self):
observation = self.env.reset()
self.prev_player_health = 1.0
self.prev_opponent_health = 1.0
# Clear the frame stack and add the first observation 16 times
self.frame_stack.clear()
for _ in range(16):
self.frame_stack.append(self._preprocess_observation(observation)[0])
return self._preprocess_observation(observation)
def step(self, action):
# observation, _, _, info = self.env.step(action)
observation, _reward, _done, info = self.env.step(self.env.action_space.sample())
custom_reward = self._get_reward()
custom_reward -= 1.0 / 60.0 # penalty for each step (-1 points per second)
custom_done = False
if self.prev_player_health <= 0.00001 or self.prev_opponent_health <= 0.00001:
custom_reward += self._get_win_or_lose_bonus()
if not self.testing:
custom_done = True
else:
self.prev_player_health = 1.0
self.prev_opponent_health = 1.0
return self._preprocess_observation(observation), custom_reward, custom_done, info