77 lines
2.8 KiB
Python
77 lines
2.8 KiB
Python
# %% Import required packages
|
|
import pandas as pd
|
|
import matplotlib.pyplot as plt
|
|
import seaborn as sns
|
|
|
|
# %% Load data
|
|
# Assume that we have a CSV file in the processed data folder
|
|
data = pd.read_csv('../data/processed/processed_data.csv')
|
|
|
|
# %% Display the first few rows of the data
|
|
# This gives a snapshot of the data and its structure.
|
|
print(data.head())
|
|
|
|
# %% Display data summary
|
|
# This gives statistical details of the data like mean, standard deviation, etc.
|
|
print(data.describe())
|
|
|
|
# %% Check for missing values
|
|
# Missing values can affect the performance of the model and should be handled appropriately.
|
|
print(data.isnull().sum())
|
|
|
|
# %% Visualize the closing prices
|
|
# Plotting the data helps in understanding the trend and seasonality in the data.
|
|
plt.figure(figsize=(14, 7))
|
|
plt.plot(data['Close'])
|
|
plt.title('Closing Prices Over Time')
|
|
plt.xlabel('Time')
|
|
plt.ylabel('Price')
|
|
plt.show()
|
|
|
|
# %% Display the distribution of daily returns
|
|
# This can give an idea about the volatility of the stock.
|
|
daily_returns = data['Close'].pct_change().dropna()
|
|
sns.histplot(daily_returns, bins=50, kde=True)
|
|
plt.title('Distribution of Daily Returns')
|
|
plt.show()
|
|
|
|
# %% Display correlation between different features
|
|
# Correlation can indicate if there are any dependent relationships between the variables.
|
|
correlation_matrix = data.corr()
|
|
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
|
|
plt.title('Correlation Matrix of Features')
|
|
plt.show()
|
|
|
|
# %% Display a scatter plot of volume vs closing price
|
|
# Scatter plot can show the relationship between two variables.
|
|
plt.scatter(data['Volume'], data['Close'])
|
|
plt.title('Volume vs Closing Price')
|
|
plt.xlabel('Volume')
|
|
plt.ylabel('Closing Price')
|
|
plt.show()
|
|
|
|
# %% Display time series decomposition if applicable
|
|
# Time series decomposition can help in understanding the trend, seasonality, and noise in the data.
|
|
# Please note that this requires statsmodels library.
|
|
from statsmodels.tsa.seasonal import seasonal_decompose
|
|
decomposed = seasonal_decompose(data['Close'], model='multiplicative', period=252) # Assume that period is 252 for trading days in a year
|
|
decomposed.plot()
|
|
plt.show()
|
|
|
|
# %% Display moving averages
|
|
# Moving averages can help in understanding the trend in the data over different time periods.
|
|
data['Close'].rolling(window=7).mean().plot(label='7 Day Average')
|
|
data['Close'].rolling(window=30).mean().plot(label='30 Day Average')
|
|
data['Close'].rolling(window=90).mean().plot(label='90 Day Average')
|
|
plt.legend()
|
|
plt.title('Moving Averages of Closing Prices')
|
|
plt.show()
|
|
|
|
# %% Display Autocorrelation plot
|
|
# Autocorrelation can show if the data is random or if there is a pattern.
|
|
# Please note that this requires pandas.plotting library.
|
|
from pandas.plotting import autocorrelation_plot
|
|
autocorrelation_plot(data['Close'])
|
|
plt.title('Autocorrelation of Closing Prices')
|
|
plt.show()
|