2023-06-25 15:50:17 +00:00
|
|
|
import pytest
|
|
|
|
from pdf_parser import PdfParser
|
|
|
|
|
2023-07-30 17:45:41 +00:00
|
|
|
|
2023-06-25 15:50:17 +00:00
|
|
|
def test_pdf_parser_text_extraction():
|
2023-07-30 17:45:41 +00:00
|
|
|
pdf_parser = PdfParser("path_to_test_pdf")
|
2023-06-25 15:50:17 +00:00
|
|
|
text = pdf_parser.extract_text()
|
|
|
|
|
|
|
|
assert isinstance(text, str)
|
|
|
|
assert len(text) > 0
|
|
|
|
|
2023-07-30 17:45:41 +00:00
|
|
|
|
2023-06-25 15:50:17 +00:00
|
|
|
def test_pdf_parser_table_extraction():
|
2023-07-30 17:45:41 +00:00
|
|
|
pdf_parser = PdfParser("path_to_test_pdf")
|
2023-06-25 15:50:17 +00:00
|
|
|
tables = pdf_parser.extract_table()
|
|
|
|
|
|
|
|
assert isinstance(tables, list)
|
|
|
|
assert all(isinstance(table, pd.DataFrame) for table in tables)
|