# %% Import required packages import pandas as pd import matplotlib.pyplot as plt import seaborn as sns # %% Load data # Assume that we have a CSV file in the processed data folder data = pd.read_csv('./data/processed/processed_data.csv') # %% Display the first few rows of the data print(data.head()) # %% Display data summary print(data.describe()) # %% Check for missing values print(data.isnull().sum()) # %% Visualize the closing prices plt.figure(figsize=(14, 7)) plt.plot(data['Close']) plt.title('Closing Prices Over Time') plt.xlabel('Time') plt.ylabel('Price') plt.show() # %% Display the distribution of daily returns daily_returns = data['Close'].pct_change().dropna() sns.histplot(daily_returns, bins=50, kde=True) plt.title('Distribution of Daily Returns') plt.show() # %% Display correlation between different features correlation_matrix = data.corr() sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm') plt.title('Correlation Matrix of Features') plt.show() # %% Display a scatter plot of volume vs closing price plt.scatter(data['Volume'], data['Close']) plt.title('Volume vs Closing Price') plt.xlabel('Volume') plt.ylabel('Closing Price') plt.show() # %% Display time series decomposition if applicable # You might need to install and import statsmodels for this # from statsmodels.tsa.seasonal import seasonal_decompose # decomposed = seasonal_decompose(data['Close'], model='multiplicative', period=252) # Assume that period is 252 for trading days in a year # decomposed.plot() # plt.show()