diff --git a/.gitignore b/.gitignore index 7e144ff..ad14e1b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,7 @@ +# data +*.pdf +*.csv + # ---> JupyterNotebooks # gitignore template for Jupyter Notebooks # website: http://jupyter.org/ diff --git a/README.md b/README.md index 602f56e..2a340df 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,41 @@ -# personal-finance-database +# Personal Finance Database +This project aims to manage personal finance data using a Google Sheets based database. It provides tools to import data from PDF statements, perform basic personal financial analysis, and visualize the data. + +## Features + +- **Data Ingestion:** Import data from Google Sheets and PDF statements. +- **Data Processing:** Clean and preprocess the data for further analysis. +- **Data Analysis:** Conduct basic personal financial analysis. +- **Data Visualization:** Create simple and understandable visualizations of financial data. + +## Installation + +Clone this repository to your local machine. + +```bash +git clone https://github.com/your-github-username/personal-finance-database.git +``` +Navigate to the project directory. + +```bash +cd personal-finance-database +``` +Install the necessary packages. + +```bash +pip install -r requirements.txt +``` +## Usage +[Provide instructions on how to use the project. This should include code examples and explanations of the different components.] + +## Contributing +Pull requests are welcome. For major changes, please open an issue first to discuss what you would like to change. + +## License +[Choose an open source license and mention it here.] + +## Contact +[Your Name] - [Your Email] - [Your LinkedIn/GitHub/Twitter etc.] + +Remember to replace the placeholders with your actual details. You should also include a more detailed explanation in the "Usage" section once you have more functionality built out. diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..e69de29 diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/analysis_visualization/__init__.py b/src/analysis_visualization/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/analysis_visualization/data_visualization.py b/src/analysis_visualization/data_visualization.py new file mode 100644 index 0000000..e69de29 diff --git a/src/analysis_visualization/financial_analysis.py b/src/analysis_visualization/financial_analysis.py new file mode 100644 index 0000000..e69de29 diff --git a/src/data_ingestion/__init__.py b/src/data_ingestion/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/data_ingestion/google_sheets_api.py b/src/data_ingestion/google_sheets_api.py new file mode 100644 index 0000000..e69de29 diff --git a/src/data_ingestion/pdf_parser.py b/src/data_ingestion/pdf_parser.py new file mode 100644 index 0000000..6295cef --- /dev/null +++ b/src/data_ingestion/pdf_parser.py @@ -0,0 +1,14 @@ +from pdfminer.high_level import extract_text +import tabula + +class PdfParser: + def __init__(self, file_path): + self.file_path = file_path + + def extract_text(self): + text = extract_text(self.file_path) + return text + + def extract_table(self): + tables = tabula.read_pdf(self.file_path, pages='all') + return tables diff --git a/src/data_processing/__init__.py b/src/data_processing/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/data_processing/data_cleaner.py b/src/data_processing/data_cleaner.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_analysis_visualization.py b/tests/test_analysis_visualization.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_data_ingestion.py b/tests/test_data_ingestion.py new file mode 100644 index 0000000..377fbd6 --- /dev/null +++ b/tests/test_data_ingestion.py @@ -0,0 +1,16 @@ +import pytest +from pdf_parser import PdfParser + +def test_pdf_parser_text_extraction(): + pdf_parser = PdfParser('path_to_test_pdf') + text = pdf_parser.extract_text() + + assert isinstance(text, str) + assert len(text) > 0 + +def test_pdf_parser_table_extraction(): + pdf_parser = PdfParser('path_to_test_pdf') + tables = pdf_parser.extract_table() + + assert isinstance(tables, list) + assert all(isinstance(table, pd.DataFrame) for table in tables) diff --git a/tests/test_data_processing.py b/tests/test_data_processing.py new file mode 100644 index 0000000..e69de29