import pandas as pd import numpy as np from src.data import data_collection, data_preprocessing def test_collect_data(): url = "https://github.com/plotly/datasets/raw/master/tesla-stock-price.csv" data = data_collection.collect_data(url) assert isinstance(data, pd.DataFrame) # Assert that a DataFrame is returned assert data.shape[0] > 0 # Assert that the DataFrame is not empty assert set(data.columns) == set(['Date', 'Open', 'High', 'Low', 'Close', 'Volume']) # Assert the columns are as expected def test_preprocess_data(): url = "https://github.com/plotly/datasets/raw/master/tesla-stock-price.csv" raw_data = data_collection.collect_data(url) processed_data = data_preprocessing.preprocess_data(raw_data) assert isinstance(processed_data, pd.DataFrame) assert processed_data.shape[0] > 0 # Assert that the dataframe is not empty # Assert that the processed data columns are as expected assert set(processed_data.columns) == set(['Processed_Open', 'Processed_High', 'Processed_Low', 'Processed_Close', 'Processed_Volume']) assert pd.api.types.is_numeric_dtype(processed_data["Processed_Open"]) # Assert that the 'Processed_Open' column is numeric assert pd.api.types.is_numeric_dtype(processed_data["Processed_High"]) # Assert that the 'Processed_High' column is numeric assert pd.api.types.is_numeric_dtype(processed_data["Processed_Low"]) # Assert that the 'Processed_Low' column is numeric assert pd.api.types.is_numeric_dtype(processed_data["Processed_Close"]) # Assert that the 'Processed_Close' column is numeric assert pd.api.types.is_numeric_dtype(processed_data["Processed_Volume"]) # Assert that the 'Processed_Volume' column is numeric if __name__ == "__main__": test_collect_data() test_preprocess_data()