52 lines
1.5 KiB
Python
52 lines
1.5 KiB
Python
# %% Import required packages
|
|
import pandas as pd
|
|
import matplotlib.pyplot as plt
|
|
import seaborn as sns
|
|
|
|
# %% Load data
|
|
# Assume that we have a CSV file in the processed data folder
|
|
data = pd.read_csv('./data/processed/processed_data.csv')
|
|
|
|
# %% Display the first few rows of the data
|
|
print(data.head())
|
|
|
|
# %% Display data summary
|
|
print(data.describe())
|
|
|
|
# %% Check for missing values
|
|
print(data.isnull().sum())
|
|
|
|
# %% Visualize the closing prices
|
|
plt.figure(figsize=(14, 7))
|
|
plt.plot(data['Close'])
|
|
plt.title('Closing Prices Over Time')
|
|
plt.xlabel('Time')
|
|
plt.ylabel('Price')
|
|
plt.show()
|
|
|
|
# %% Display the distribution of daily returns
|
|
daily_returns = data['Close'].pct_change().dropna()
|
|
sns.histplot(daily_returns, bins=50, kde=True)
|
|
plt.title('Distribution of Daily Returns')
|
|
plt.show()
|
|
|
|
# %% Display correlation between different features
|
|
correlation_matrix = data.corr()
|
|
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
|
|
plt.title('Correlation Matrix of Features')
|
|
plt.show()
|
|
|
|
# %% Display a scatter plot of volume vs closing price
|
|
plt.scatter(data['Volume'], data['Close'])
|
|
plt.title('Volume vs Closing Price')
|
|
plt.xlabel('Volume')
|
|
plt.ylabel('Closing Price')
|
|
plt.show()
|
|
|
|
# %% Display time series decomposition if applicable
|
|
# You might need to install and import statsmodels for this
|
|
# from statsmodels.tsa.seasonal import seasonal_decompose
|
|
# decomposed = seasonal_decompose(data['Close'], model='multiplicative', period=252) # Assume that period is 252 for trading days in a year
|
|
# decomposed.plot()
|
|
# plt.show()
|