# %% Import required packages import pandas as pd import matplotlib.pyplot as plt import seaborn as sns # %% Load data # Assume that we have a CSV file in the processed data folder data = pd.read_csv('../data/processed/processed_data.csv') # %% Display the first few rows of the data # This gives a snapshot of the data and its structure. print(data.head()) # %% Display data summary # This gives statistical details of the data like mean, standard deviation, etc. print(data.describe()) # %% Check for missing values # Missing values can affect the performance of the model and should be handled appropriately. print(data.isnull().sum()) # %% Visualize the closing prices # Plotting the data helps in understanding the trend and seasonality in the data. plt.figure(figsize=(14, 7)) plt.plot(data['Close']) plt.title('Closing Prices Over Time') plt.xlabel('Time') plt.ylabel('Price') plt.show() # %% Display the distribution of daily returns # This can give an idea about the volatility of the stock. daily_returns = data['Close'].pct_change().dropna() sns.histplot(daily_returns, bins=50, kde=True) plt.title('Distribution of Daily Returns') plt.show() # %% Display correlation between different features # Correlation can indicate if there are any dependent relationships between the variables. correlation_matrix = data.corr() sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm') plt.title('Correlation Matrix of Features') plt.show() # %% Display a scatter plot of volume vs closing price # Scatter plot can show the relationship between two variables. plt.scatter(data['Volume'], data['Close']) plt.title('Volume vs Closing Price') plt.xlabel('Volume') plt.ylabel('Closing Price') plt.show() # %% Display time series decomposition if applicable # Time series decomposition can help in understanding the trend, seasonality, and noise in the data. # Please note that this requires statsmodels library. from statsmodels.tsa.seasonal import seasonal_decompose decomposed = seasonal_decompose(data['Close'], model='multiplicative', period=252) # Assume that period is 252 for trading days in a year decomposed.plot() plt.show() # %% Display moving averages # Moving averages can help in understanding the trend in the data over different time periods. data['Close'].rolling(window=7).mean().plot(label='7 Day Average') data['Close'].rolling(window=30).mean().plot(label='30 Day Average') data['Close'].rolling(window=90).mean().plot(label='90 Day Average') plt.legend() plt.title('Moving Averages of Closing Prices') plt.show() # %% Display Autocorrelation plot # Autocorrelation can show if the data is random or if there is a pattern. # Please note that this requires pandas.plotting library. from pandas.plotting import autocorrelation_plot autocorrelation_plot(data['Close']) plt.title('Autocorrelation of Closing Prices') plt.show()