First version, not tested yet.
This commit is contained in:
parent
005ddfed49
commit
81e272763e
73
README.md
73
README.md
@ -1,3 +1,72 @@
|
|||||||
# transformer-trading
|
# AutoTradingSystem
|
||||||
|
|
||||||
|
Trial project for deep learning trading model with ChatGPT-4
|
||||||
|
|
||||||
|
|
||||||
|
This project is an automatic trading system based on a Transformer and Reinforcement Learning hybrid model.
|
||||||
|
|
||||||
|
Trial project for deep learning trading model with ChatGPT-4
|
||||||
|
|
||||||
|
## Setup
|
||||||
|
|
||||||
|
1. Install the required packages: `pip install -r requirements.txt`
|
||||||
|
2. Run the main script: `python src/main.py`
|
||||||
|
|
||||||
|
## Structure
|
||||||
|
|
||||||
|
trading-system/
|
||||||
|
│
|
||||||
|
├── data/
|
||||||
|
│ ├── raw/ # Raw data files
|
||||||
|
│ └── processed/ # Processed data files
|
||||||
|
│
|
||||||
|
├── models/ # Trained models and model checkpoints
|
||||||
|
│
|
||||||
|
├── logs/ # Training logs, evaluation results, etc.
|
||||||
|
│
|
||||||
|
├── notebooks/ # Jupyter notebooks
|
||||||
|
│ ├── data_exploration.ipynb
|
||||||
|
│ ├── model_training.ipynb
|
||||||
|
│ ├── model_evaluation.ipynb
|
||||||
|
│ └── demo.ipynb
|
||||||
|
│
|
||||||
|
├── src/
|
||||||
|
│ ├── data/ # Data-related modules
|
||||||
|
│ │ ├── __init__.py
|
||||||
|
│ │ ├── data_collection.py
|
||||||
|
│ │ └── data_preprocessing.py
|
||||||
|
│ │
|
||||||
|
│ ├── models/ # Model-related modules
|
||||||
|
│ │ ├── __init__.py
|
||||||
|
│ │ ├── transformer_model.py
|
||||||
|
│ │ ├── rl_model.py
|
||||||
|
│ │ └── trading_agent.py
|
||||||
|
│ │
|
||||||
|
│ ├── training/ # Training-related modules
|
||||||
|
│ │ ├── __init__.py
|
||||||
|
│ │ └── train.py
|
||||||
|
│ │
|
||||||
|
│ ├── evaluation/ # Evaluation-related modules
|
||||||
|
│ │ ├── __init__.py
|
||||||
|
│ │ └── evaluate.py
|
||||||
|
│ │
|
||||||
|
│ ├── utils/ # Utility modules
|
||||||
|
│ │ ├── __init__.py
|
||||||
|
│ │ ├── metrics.py
|
||||||
|
│ │ └── utils.py
|
||||||
|
│ │
|
||||||
|
│ └── main.py # Main entry point for the project
|
||||||
|
│
|
||||||
|
├── tests/ # Test-related modules
|
||||||
|
│ ├── __init__.py
|
||||||
|
│ ├── test_data_collection.py
|
||||||
|
│ ├── test_data_preprocessing.py
|
||||||
|
│ ├── test_transformer_model.py
|
||||||
|
│ ├── test_rl_model.py
|
||||||
|
│ ├── test_trading_model.py
|
||||||
|
│ └── test_metrics.py
|
||||||
|
│
|
||||||
|
├── requirements.txt # Required Python packages
|
||||||
|
│
|
||||||
|
└── README.md # Project documentation
|
||||||
|
|
||||||
Trial project for deep learning trading model with ChatGPT-4
|
|
51
notebooks/data_exploration.py
Normal file
51
notebooks/data_exploration.py
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
# %% Import required packages
|
||||||
|
import pandas as pd
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import seaborn as sns
|
||||||
|
|
||||||
|
# %% Load data
|
||||||
|
# Assume that we have a CSV file in the processed data folder
|
||||||
|
data = pd.read_csv('./data/processed/processed_data.csv')
|
||||||
|
|
||||||
|
# %% Display the first few rows of the data
|
||||||
|
print(data.head())
|
||||||
|
|
||||||
|
# %% Display data summary
|
||||||
|
print(data.describe())
|
||||||
|
|
||||||
|
# %% Check for missing values
|
||||||
|
print(data.isnull().sum())
|
||||||
|
|
||||||
|
# %% Visualize the closing prices
|
||||||
|
plt.figure(figsize=(14, 7))
|
||||||
|
plt.plot(data['Close'])
|
||||||
|
plt.title('Closing Prices Over Time')
|
||||||
|
plt.xlabel('Time')
|
||||||
|
plt.ylabel('Price')
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
# %% Display the distribution of daily returns
|
||||||
|
daily_returns = data['Close'].pct_change().dropna()
|
||||||
|
sns.histplot(daily_returns, bins=50, kde=True)
|
||||||
|
plt.title('Distribution of Daily Returns')
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
# %% Display correlation between different features
|
||||||
|
correlation_matrix = data.corr()
|
||||||
|
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
|
||||||
|
plt.title('Correlation Matrix of Features')
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
# %% Display a scatter plot of volume vs closing price
|
||||||
|
plt.scatter(data['Volume'], data['Close'])
|
||||||
|
plt.title('Volume vs Closing Price')
|
||||||
|
plt.xlabel('Volume')
|
||||||
|
plt.ylabel('Closing Price')
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
# %% Display time series decomposition if applicable
|
||||||
|
# You might need to install and import statsmodels for this
|
||||||
|
# from statsmodels.tsa.seasonal import seasonal_decompose
|
||||||
|
# decomposed = seasonal_decompose(data['Close'], model='multiplicative', period=252) # Assume that period is 252 for trading days in a year
|
||||||
|
# decomposed.plot()
|
||||||
|
# plt.show()
|
0
notebooks/demo.py
Normal file
0
notebooks/demo.py
Normal file
35
notebooks/model_evaluation.py
Normal file
35
notebooks/model_evaluation.py
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
# %% Import required packages
|
||||||
|
import torch
|
||||||
|
from src.models.transformer_model import TransformerModel
|
||||||
|
from src.models.rl_model import RLModel
|
||||||
|
from src.models.trading_agent import TradingAgent
|
||||||
|
from src.evaluation.evaluate import evaluate_trading_agent
|
||||||
|
from src.data.data_preprocessing import load_processed_data
|
||||||
|
|
||||||
|
# %% Set device
|
||||||
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||||
|
|
||||||
|
# %% Load processed data
|
||||||
|
data = load_processed_data('./data/processed/processed_data.csv')
|
||||||
|
|
||||||
|
# %% Initialize models
|
||||||
|
transformer_model = TransformerModel().to(device)
|
||||||
|
rl_model = RLModel().to(device)
|
||||||
|
trading_agent = TradingAgent(transformer_model, rl_model)
|
||||||
|
|
||||||
|
# %% Load model weights
|
||||||
|
transformer_model.load_state_dict(torch.load('./models/transformer_model.pth'))
|
||||||
|
rl_model.load_state_dict(torch.load('./models/rl_model.pth'))
|
||||||
|
|
||||||
|
# %% Evaluate the trading agent
|
||||||
|
trading_agent_results = evaluate_trading_agent(trading_agent, data)
|
||||||
|
|
||||||
|
# %% Display evaluation results
|
||||||
|
print("Total Profit: ", trading_agent_results['total_profit'])
|
||||||
|
print("Total Trades Made: ", trading_agent_results['total_trades'])
|
||||||
|
print("Successful Trades: ", trading_agent_results['successful_trades'])
|
||||||
|
|
||||||
|
# %% Save evaluation results
|
||||||
|
with open('./logs/evaluation_results.txt', 'w') as f:
|
||||||
|
for key, value in trading_agent_results.items():
|
||||||
|
f.write(f'{key}: {value}\n')
|
46
notebooks/model_training.py
Normal file
46
notebooks/model_training.py
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
# %% Import required packages
|
||||||
|
import torch
|
||||||
|
from src.models.transformer_model import TransformerModel
|
||||||
|
from src.models.rl_model import RLModel
|
||||||
|
from src.models.trading_agent import TradingAgent
|
||||||
|
from src.training.train import train_transformer, train_rl
|
||||||
|
from src.data.data_preprocessing import load_processed_data
|
||||||
|
|
||||||
|
# %% Set device
|
||||||
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||||
|
|
||||||
|
# %% Load processed data
|
||||||
|
data = load_processed_data('./data/processed/processed_data.csv')
|
||||||
|
|
||||||
|
# %% Initialize models
|
||||||
|
transformer_model = TransformerModel().to(device)
|
||||||
|
rl_model = RLModel().to(device)
|
||||||
|
trading_agent = TradingAgent(transformer_model, rl_model)
|
||||||
|
|
||||||
|
# %% Train Transformer Model
|
||||||
|
# Set the appropriate hyperparameters
|
||||||
|
transformer_model_hyperparams = {
|
||||||
|
"epochs": 10,
|
||||||
|
"batch_size": 32,
|
||||||
|
"learning_rate": 0.001,
|
||||||
|
}
|
||||||
|
train_transformer(transformer_model, data, transformer_model_hyperparams)
|
||||||
|
|
||||||
|
# %% Save Transformer Model
|
||||||
|
torch.save(transformer_model.state_dict(), './models/transformer_model.pth')
|
||||||
|
|
||||||
|
# %% Train RL Model
|
||||||
|
# Set the appropriate hyperparameters
|
||||||
|
rl_model_hyperparams = {
|
||||||
|
"epochs": 500,
|
||||||
|
"batch_size": 32,
|
||||||
|
"learning_rate": 0.001,
|
||||||
|
"gamma": 0.99, # discount factor
|
||||||
|
"epsilon_start": 1.0, # exploration rate at the beginning
|
||||||
|
"epsilon_end": 0.01, # minimum exploration rate
|
||||||
|
"epsilon_decay": 0.995, # exponential decay rate for exploration probability
|
||||||
|
}
|
||||||
|
train_rl(trading_agent, data, rl_model_hyperparams)
|
||||||
|
|
||||||
|
# %% Save RL Model
|
||||||
|
torch.save(rl_model.state_dict(), './models/rl_model.pth')
|
8
requirements.txt
Normal file
8
requirements.txt
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
numpy
|
||||||
|
pandas
|
||||||
|
torch
|
||||||
|
transformers
|
||||||
|
gym
|
||||||
|
stable-baselines3
|
||||||
|
yfinance
|
||||||
|
matplotlib
|
0
src/data/__init__.py
Normal file
0
src/data/__init__.py
Normal file
19
src/data/data_collection.py
Normal file
19
src/data/data_collection.py
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
import yfinance as yf
|
||||||
|
|
||||||
|
def collect_data(tickers, start_date, end_date):
|
||||||
|
"""
|
||||||
|
Collects data for the given tickers and date range.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
tickers (list of str): List of ticker symbols.
|
||||||
|
start_date (str): Start date in format 'YYYY-MM-DD'.
|
||||||
|
end_date (str): End date in format 'YYYY-MM-DD'.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: Dictionary where the keys are ticker symbols and the values are pandas DataFrames of the price data.
|
||||||
|
"""
|
||||||
|
data = {}
|
||||||
|
for ticker in tickers:
|
||||||
|
df = yf.download(ticker, start=start_date, end=end_date)
|
||||||
|
data[ticker] = df
|
||||||
|
return data
|
16
src/data/data_preprocessing.py
Normal file
16
src/data/data_preprocessing.py
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
from sklearn.preprocessing import MinMaxScaler
|
||||||
|
|
||||||
|
def preprocess_data(data):
|
||||||
|
"""
|
||||||
|
Preprocesses the collected data.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
data (dict): The data collected from collect_data function. Keys are tickers and values are pandas DataFrames.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: Preprocessed data where the 'Close' prices have been scaled to be between 0 and 1.
|
||||||
|
"""
|
||||||
|
scaler = MinMaxScaler()
|
||||||
|
for ticker in data:
|
||||||
|
data[ticker]['Close'] = scaler.fit_transform(data[ticker][['Close']])
|
||||||
|
return data
|
0
src/evaluation/__init__.py
Normal file
0
src/evaluation/__init__.py
Normal file
68
src/evaluation/evaluate.py
Normal file
68
src/evaluation/evaluate.py
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
import torch
|
||||||
|
from torch.utils.data import DataLoader
|
||||||
|
from src.models.transformer_model import TransformerModel
|
||||||
|
from src.models.rl_model import RLModel
|
||||||
|
from src.data.data_preprocessing import Dataset
|
||||||
|
|
||||||
|
def evaluate_transformer_model(transformer_model, test_data):
|
||||||
|
"""
|
||||||
|
Evaluate the Transformer model.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
transformer_model (TransformerModel): The transformer model to evaluate.
|
||||||
|
test_data (Dataset): The test data.
|
||||||
|
"""
|
||||||
|
# Create data loader
|
||||||
|
dataloader = DataLoader(test_data, batch_size=32, shuffle=False)
|
||||||
|
|
||||||
|
# Set device
|
||||||
|
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||||
|
transformer_model.to(device)
|
||||||
|
|
||||||
|
# Define loss function
|
||||||
|
criterion = torch.nn.CrossEntropyLoss()
|
||||||
|
|
||||||
|
# Evaluation mode
|
||||||
|
transformer_model.eval()
|
||||||
|
|
||||||
|
# Evaluation loop
|
||||||
|
with torch.no_grad():
|
||||||
|
total_loss = 0
|
||||||
|
for i, (inputs, targets) in enumerate(dataloader):
|
||||||
|
inputs, targets = inputs.to(device), targets.to(device)
|
||||||
|
|
||||||
|
# Forward pass
|
||||||
|
outputs = transformer_model(inputs)
|
||||||
|
|
||||||
|
# Compute loss
|
||||||
|
loss = criterion(outputs, targets)
|
||||||
|
|
||||||
|
total_loss += loss.item()
|
||||||
|
|
||||||
|
# Compute average loss
|
||||||
|
average_loss = total_loss / len(dataloader)
|
||||||
|
print(f'Average loss: {average_loss}')
|
||||||
|
|
||||||
|
def evaluate_rl_model(rl_model, env, episodes):
|
||||||
|
"""
|
||||||
|
Evaluate the RL model.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
rl_model (RLModel): The RL model to evaluate.
|
||||||
|
env (gym.Env): The Gym environment to use for evaluation.
|
||||||
|
episodes (int): The number of episodes to evaluate for.
|
||||||
|
"""
|
||||||
|
total_rewards = 0
|
||||||
|
for i_episode in range(episodes):
|
||||||
|
state = env.reset()
|
||||||
|
done = False
|
||||||
|
while not done:
|
||||||
|
action = rl_model.predict(state)
|
||||||
|
state, reward, done, _ = env.step(action)
|
||||||
|
total_rewards += reward
|
||||||
|
|
||||||
|
print(f'Episode: {i_episode+1}, Reward: {reward}')
|
||||||
|
|
||||||
|
# Compute average reward
|
||||||
|
average_reward = total_rewards / episodes
|
||||||
|
print(f'Average reward: {average_reward}')
|
40
src/main.py
Normal file
40
src/main.py
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
import argparse
|
||||||
|
from src.data import data_collection, data_preprocessing
|
||||||
|
from src.models import transformer_model, rl_model, trading_model
|
||||||
|
from src.training import train
|
||||||
|
from src.evaluation import evaluate
|
||||||
|
from src.utils import utils, metrics
|
||||||
|
|
||||||
|
def main(args):
|
||||||
|
# Set seed for reproducibility
|
||||||
|
utils.seed_everything(args.seed)
|
||||||
|
|
||||||
|
# Data Collection
|
||||||
|
raw_data = data_collection.collect_data(args.data_source)
|
||||||
|
|
||||||
|
# Data Preprocessing
|
||||||
|
processed_data = data_preprocessing.preprocess_data(raw_data)
|
||||||
|
|
||||||
|
# Model Creation
|
||||||
|
transformer = transformer_model.TransformerModel(args.transformer_config)
|
||||||
|
rl_agent = rl_model.RLModel(args.rl_config)
|
||||||
|
trading_agent = trading_model.TradingAgent(transformer, rl_agent)
|
||||||
|
|
||||||
|
# Model Training
|
||||||
|
train.train(trading_agent, processed_data, args.training_config)
|
||||||
|
|
||||||
|
# Model Evaluation
|
||||||
|
evaluation_results = evaluate.evaluate(trading_agent, processed_data, metrics)
|
||||||
|
|
||||||
|
print(evaluation_results)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("--seed", type=int, default=42, help="Random seed for reproducibility.")
|
||||||
|
parser.add_argument("--data_source", type=str, default="data/raw/", help="Data source for the trading data.")
|
||||||
|
parser.add_argument("--transformer_config", type=str, default="config/transformer.json", help="Path to the Transformer model configuration file.")
|
||||||
|
parser.add_argument("--rl_config", type=str, default="config/rl.json", help="Path to the RL model configuration file.")
|
||||||
|
parser.add_argument("--training_config", type=str, default="config/training.json", help="Path to the training configuration file.")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
main(args)
|
0
src/models/__init__.py
Normal file
0
src/models/__init__.py
Normal file
35
src/models/rl_model.py
Normal file
35
src/models/rl_model.py
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
from stable_baselines3 import PPO
|
||||||
|
from stable_baselines3.common.envs import DummyVecEnv
|
||||||
|
|
||||||
|
class RLModel:
|
||||||
|
def __init__(self, env):
|
||||||
|
"""
|
||||||
|
Initializes the RLModel with a given environment.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
env (gym.Env): The Gym environment to use for training.
|
||||||
|
"""
|
||||||
|
self.env = DummyVecEnv([lambda: env]) # The environment must be vectorized
|
||||||
|
self.model = PPO('MlpPolicy', self.env, verbose=1)
|
||||||
|
|
||||||
|
def train(self, timesteps):
|
||||||
|
"""
|
||||||
|
Trains the model for a given number of timesteps.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
timesteps (int): The number of timesteps to train for.
|
||||||
|
"""
|
||||||
|
self.model.learn(total_timesteps=timesteps)
|
||||||
|
|
||||||
|
def predict(self, obs):
|
||||||
|
"""
|
||||||
|
Makes a prediction based on the given observations.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
obs (np.array): The observations to base the prediction on.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
np.array: The action predicted by the model.
|
||||||
|
"""
|
||||||
|
action, _states = self.model.predict(obs)
|
||||||
|
return action
|
33
src/models/trading_agent.py
Normal file
33
src/models/trading_agent.py
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
class TradingAgent:
|
||||||
|
def __init__(self, transformer_model, rl_model):
|
||||||
|
"""
|
||||||
|
Initializes the TradingAgent with the Transformer and RL models.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
transformer_model (TransformerModel): The Transformer model to use for predictions.
|
||||||
|
rl_model (RLModel): The RL model to use for predictions.
|
||||||
|
"""
|
||||||
|
self.transformer_model = transformer_model
|
||||||
|
self.rl_model = rl_model
|
||||||
|
|
||||||
|
def make_decision(self, text, obs):
|
||||||
|
"""
|
||||||
|
Makes a trading decision based on the given text and observations.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
text (str): The text to feed to the Transformer model.
|
||||||
|
obs (np.array): The observations to feed to the RL model.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
int: The action chosen by the agent (0: hold, 1: buy, 2: sell).
|
||||||
|
"""
|
||||||
|
# Get embeddings from transformer model
|
||||||
|
embeddings = self.transformer_model.get_embeddings(text)
|
||||||
|
|
||||||
|
# Combine embeddings with observations
|
||||||
|
combined_input = np.concatenate((embeddings.detach().numpy(), obs))
|
||||||
|
|
||||||
|
# Get action from RL model
|
||||||
|
action = self.rl_model.predict(combined_input)
|
||||||
|
|
||||||
|
return action
|
20
src/models/transformer_model.py
Normal file
20
src/models/transformer_model.py
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
from transformers import BertModel, BertTokenizer
|
||||||
|
|
||||||
|
class TransformerModel:
|
||||||
|
def __init__(self, pretrained_model_name='bert-base-uncased'):
|
||||||
|
self.tokenizer = BertTokenizer.from_pretrained(pretrained_model_name)
|
||||||
|
self.model = BertModel.from_pretrained(pretrained_model_name)
|
||||||
|
|
||||||
|
def get_embeddings(self, text):
|
||||||
|
"""
|
||||||
|
Returns the embeddings generated by the transformer model.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
text (str): Text to get embeddings for.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
torch.Tensor: Embeddings for the input text.
|
||||||
|
"""
|
||||||
|
inputs = self.tokenizer(text, return_tensors='pt', truncation=True, padding=True)
|
||||||
|
outputs = self.model(**inputs)
|
||||||
|
return outputs.last_hidden_state
|
0
src/training/__init__.py
Normal file
0
src/training/__init__.py
Normal file
57
src/training/train.py
Normal file
57
src/training/train.py
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
import torch
|
||||||
|
from torch.utils.data import DataLoader
|
||||||
|
from src.models.transformer_model import TransformerModel
|
||||||
|
from src.models.rl_model import RLModel
|
||||||
|
from src.data.data_preprocessing import Dataset
|
||||||
|
|
||||||
|
def train_transformer_model(transformer_model, train_data, epochs, learning_rate):
|
||||||
|
"""
|
||||||
|
Train the Transformer model.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
transformer_model (TransformerModel): The transformer model to train.
|
||||||
|
train_data (Dataset): The training data.
|
||||||
|
epochs (int): The number of epochs to train for.
|
||||||
|
learning_rate (float): The learning rate for the optimizer.
|
||||||
|
"""
|
||||||
|
# Create data loader
|
||||||
|
dataloader = DataLoader(train_data, batch_size=32, shuffle=True)
|
||||||
|
|
||||||
|
# Set device
|
||||||
|
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||||
|
transformer_model.to(device)
|
||||||
|
|
||||||
|
# Define loss function and optimizer
|
||||||
|
criterion = torch.nn.CrossEntropyLoss()
|
||||||
|
optimizer = torch.optim.Adam(transformer_model.parameters(), lr=learning_rate)
|
||||||
|
|
||||||
|
# Training loop
|
||||||
|
for epoch in range(epochs):
|
||||||
|
for i, (inputs, targets) in enumerate(dataloader):
|
||||||
|
inputs, targets = inputs.to(device), targets.to(device)
|
||||||
|
|
||||||
|
# Forward pass
|
||||||
|
outputs = transformer_model(inputs)
|
||||||
|
|
||||||
|
# Compute loss
|
||||||
|
loss = criterion(outputs, targets)
|
||||||
|
|
||||||
|
# Backward pass and optimization
|
||||||
|
optimizer.zero_grad()
|
||||||
|
loss.backward()
|
||||||
|
optimizer.step()
|
||||||
|
|
||||||
|
# Print loss every 100 batches
|
||||||
|
if i % 100 == 0:
|
||||||
|
print(f'Epoch [{epoch+1}/{epochs}], Step [{i+1}/{len(dataloader)}], Loss: {loss.item()}')
|
||||||
|
|
||||||
|
def train_rl_model(rl_model, env, timesteps):
|
||||||
|
"""
|
||||||
|
Train the RL model.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
rl_model (RLModel): The RL model to train.
|
||||||
|
env (gym.Env): The Gym environment to use for training.
|
||||||
|
timesteps (int): The number of timesteps to train for.
|
||||||
|
"""
|
||||||
|
rl_model.train(timesteps)
|
0
src/utils/__init__.py
Normal file
0
src/utils/__init__.py
Normal file
36
src/utils/metrics.py
Normal file
36
src/utils/metrics.py
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
|
||||||
|
|
||||||
|
def compute_classification_metrics(y_true, y_pred):
|
||||||
|
"""
|
||||||
|
Compute classification metrics.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
y_true (np.array): Ground truth labels.
|
||||||
|
y_pred (np.array): Predicted labels.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: A dictionary containing the computed metrics.
|
||||||
|
"""
|
||||||
|
accuracy = accuracy_score(y_true, y_pred)
|
||||||
|
precision = precision_score(y_true, y_pred, average='weighted', zero_division=0)
|
||||||
|
recall = recall_score(y_true, y_pred, average='weighted', zero_division=0)
|
||||||
|
f1 = f1_score(y_true, y_pred, average='weighted', zero_division=0)
|
||||||
|
|
||||||
|
return {'accuracy': accuracy, 'precision': precision, 'recall': recall, 'f1': f1}
|
||||||
|
|
||||||
|
def compute_reward_metrics(total_rewards, num_episodes):
|
||||||
|
"""
|
||||||
|
Compute reward metrics.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
total_rewards (list): List of total rewards per episode.
|
||||||
|
num_episodes (int): Total number of episodes.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: A dictionary containing the computed metrics.
|
||||||
|
"""
|
||||||
|
average_reward = sum(total_rewards) / num_episodes
|
||||||
|
max_reward = max(total_rewards)
|
||||||
|
min_reward = min(total_rewards)
|
||||||
|
|
||||||
|
return {'average_reward': average_reward, 'max_reward': max_reward, 'min_reward': min_reward}
|
52
src/utils/utils.py
Normal file
52
src/utils/utils.py
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
from sklearn.preprocessing import MinMaxScaler
|
||||||
|
|
||||||
|
def seed_everything(seed):
|
||||||
|
"""
|
||||||
|
Set a seed for all random number generators to ensure reproducibility.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
seed (int): The seed to use.
|
||||||
|
"""
|
||||||
|
np.random.seed(seed)
|
||||||
|
torch.manual_seed(seed)
|
||||||
|
|
||||||
|
def scale_data(data):
|
||||||
|
"""
|
||||||
|
Scale data using MinMaxScaler.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
data (np.array): The data to scale.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
np.array: The scaled data.
|
||||||
|
"""
|
||||||
|
scaler = MinMaxScaler()
|
||||||
|
scaled_data = scaler.fit_transform(data)
|
||||||
|
return scaled_data, scaler
|
||||||
|
|
||||||
|
def save_model(model, path):
|
||||||
|
"""
|
||||||
|
Save a PyTorch model.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
model (torch.nn.Module): The model to save.
|
||||||
|
path (str): The path where to save the model.
|
||||||
|
"""
|
||||||
|
torch.save(model.state_dict(), path)
|
||||||
|
|
||||||
|
def load_model(model, path):
|
||||||
|
"""
|
||||||
|
Load a PyTorch model.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
model (torch.nn.Module): The model to load.
|
||||||
|
path (str): The path from where to load the model.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
torch.nn.Module: The loaded model.
|
||||||
|
"""
|
||||||
|
model.load_state_dict(torch.load(path))
|
||||||
|
model.eval()
|
||||||
|
return model
|
0
tests/__init__.py
Normal file
0
tests/__init__.py
Normal file
13
tests/test_data_collection.py
Normal file
13
tests/test_data_collection.py
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
import pytest
|
||||||
|
from src.data import data_collection
|
||||||
|
|
||||||
|
def test_collect_data():
|
||||||
|
# Test the collect_data function
|
||||||
|
data = data_collection.collect_data('path_to_test_data')
|
||||||
|
|
||||||
|
# Check that the data has the expected shape
|
||||||
|
assert data.shape == (expected_number_of_rows, expected_number_of_columns)
|
||||||
|
|
||||||
|
# Check that the data has the expected columns
|
||||||
|
expected_columns = ['column1', 'column2', 'column3']
|
||||||
|
assert all(column in data.columns for column in expected_columns)
|
26
tests/test_data_preprocessing.py
Normal file
26
tests/test_data_preprocessing.py
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
import pytest
|
||||||
|
import pandas as pd
|
||||||
|
from src.data import data_preprocessing
|
||||||
|
|
||||||
|
def test_preprocess_data():
|
||||||
|
# create a mock data
|
||||||
|
raw_data = pd.DataFrame({
|
||||||
|
'Open': [1.0, 2.0, 3.0, 4.0, 5.0],
|
||||||
|
'High': [1.1, 2.1, 3.1, 4.1, 5.1],
|
||||||
|
'Low': [0.9, 1.9, 2.9, 3.9, 4.9],
|
||||||
|
'Close': [1.0, 2.0, 3.0, 4.0, 5.0],
|
||||||
|
'Volume': [1000, 2000, 3000, 4000, 5000]
|
||||||
|
})
|
||||||
|
|
||||||
|
# perform preprocessing
|
||||||
|
processed_data = data_preprocessing.preprocess_data(raw_data)
|
||||||
|
|
||||||
|
# check that the data has the expected columns
|
||||||
|
expected_columns = ['Open', 'High', 'Low', 'Close', 'Volume']
|
||||||
|
assert all(column in processed_data.columns for column in expected_columns)
|
||||||
|
|
||||||
|
# check the shape of the data
|
||||||
|
assert processed_data.shape == raw_data.shape
|
||||||
|
|
||||||
|
# check that values are normalized (within a certain range, e.g. -1.0 to 1.0)
|
||||||
|
assert all(-1.0 <= value <= 1.0 for value in processed_data.values.flatten())
|
0
tests/test_metrics.py
Normal file
0
tests/test_metrics.py
Normal file
0
tests/test_rl_model.py
Normal file
0
tests/test_rl_model.py
Normal file
0
tests/test_trading_model.py
Normal file
0
tests/test_trading_model.py
Normal file
34
tests/test_transformer_model.py
Normal file
34
tests/test_transformer_model.py
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
import pytest
|
||||||
|
import torch
|
||||||
|
from src.models import transformer_model
|
||||||
|
|
||||||
|
def test_transformer_model():
|
||||||
|
# Create mock input data
|
||||||
|
input_ids = torch.randint(0, 100, (1, 20))
|
||||||
|
attention_mask = torch.ones((1, 20))
|
||||||
|
|
||||||
|
# Instantiate the model
|
||||||
|
model = transformer_model.TransformerModel()
|
||||||
|
|
||||||
|
# Forward pass
|
||||||
|
outputs = model(input_ids, attention_mask)
|
||||||
|
|
||||||
|
# Check output dimensions
|
||||||
|
assert outputs.size() == torch.Size([1, 20, model.hidden_size])
|
||||||
|
|
||||||
|
# Check that the model is on the correct device
|
||||||
|
assert outputs.device == model.device
|
||||||
|
|
||||||
|
def test_model_save_load():
|
||||||
|
# Instantiate the model
|
||||||
|
model = transformer_model.TransformerModel()
|
||||||
|
|
||||||
|
# Save the model
|
||||||
|
model.save_pretrained('test_model')
|
||||||
|
|
||||||
|
# Load the model
|
||||||
|
loaded_model = transformer_model.TransformerModel.from_pretrained('test_model')
|
||||||
|
|
||||||
|
# Check that the loaded model has the same parameters as the original model
|
||||||
|
for p1, p2 in zip(model.parameters(), loaded_model.parameters()):
|
||||||
|
assert torch.all(p1.eq(p2))
|
Loading…
Reference in New Issue
Block a user