31 lines
1.8 KiB
Python
31 lines
1.8 KiB
Python
import pandas as pd
|
|
import numpy as np
|
|
from src.data import data_collection, data_preprocessing
|
|
|
|
def test_collect_data():
|
|
url = "https://github.com/plotly/datasets/raw/master/tesla-stock-price.csv"
|
|
data = data_collection.collect_data(url)
|
|
|
|
assert isinstance(data, pd.DataFrame) # Assert that a DataFrame is returned
|
|
assert data.shape[0] > 0 # Assert that the DataFrame is not empty
|
|
assert set(data.columns) == set(['Date', 'Open', 'High', 'Low', 'Close', 'Volume']) # Assert the columns are as expected
|
|
|
|
def test_preprocess_data():
|
|
url = "https://github.com/plotly/datasets/raw/master/tesla-stock-price.csv"
|
|
raw_data = data_collection.collect_data(url)
|
|
processed_data = data_preprocessing.preprocess_data(raw_data)
|
|
|
|
assert isinstance(processed_data, pd.DataFrame)
|
|
assert processed_data.shape[0] > 0 # Assert that the dataframe is not empty
|
|
# Assert that the processed data columns are as expected
|
|
assert set(processed_data.columns) == set(['Processed_Open', 'Processed_High', 'Processed_Low', 'Processed_Close', 'Processed_Volume'])
|
|
assert pd.api.types.is_numeric_dtype(processed_data["Processed_Open"]) # Assert that the 'Processed_Open' column is numeric
|
|
assert pd.api.types.is_numeric_dtype(processed_data["Processed_High"]) # Assert that the 'Processed_High' column is numeric
|
|
assert pd.api.types.is_numeric_dtype(processed_data["Processed_Low"]) # Assert that the 'Processed_Low' column is numeric
|
|
assert pd.api.types.is_numeric_dtype(processed_data["Processed_Close"]) # Assert that the 'Processed_Close' column is numeric
|
|
assert pd.api.types.is_numeric_dtype(processed_data["Processed_Volume"]) # Assert that the 'Processed_Volume' column is numeric
|
|
|
|
if __name__ == "__main__":
|
|
test_collect_data()
|
|
test_preprocess_data()
|