auto-trading/notebooks/data_exploration.py

52 lines
1.5 KiB
Python

# %% Import required packages
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# %% Load data
# Assume that we have a CSV file in the processed data folder
data = pd.read_csv('./data/processed/processed_data.csv')
# %% Display the first few rows of the data
print(data.head())
# %% Display data summary
print(data.describe())
# %% Check for missing values
print(data.isnull().sum())
# %% Visualize the closing prices
plt.figure(figsize=(14, 7))
plt.plot(data['Close'])
plt.title('Closing Prices Over Time')
plt.xlabel('Time')
plt.ylabel('Price')
plt.show()
# %% Display the distribution of daily returns
daily_returns = data['Close'].pct_change().dropna()
sns.histplot(daily_returns, bins=50, kde=True)
plt.title('Distribution of Daily Returns')
plt.show()
# %% Display correlation between different features
correlation_matrix = data.corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
plt.title('Correlation Matrix of Features')
plt.show()
# %% Display a scatter plot of volume vs closing price
plt.scatter(data['Volume'], data['Close'])
plt.title('Volume vs Closing Price')
plt.xlabel('Volume')
plt.ylabel('Closing Price')
plt.show()
# %% Display time series decomposition if applicable
# You might need to install and import statsmodels for this
# from statsmodels.tsa.seasonal import seasonal_decompose
# decomposed = seasonal_decompose(data['Close'], model='multiplicative', period=252) # Assume that period is 252 for trading days in a year
# decomposed.plot()
# plt.show()