personal-finance-database/notebooks/pdf_data_extraction_test.ipynb

1523 lines
58 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "b878d67d-c5a8-4817-99f3-4179edfd535a",
"metadata": {
"ExecuteTime": {
"end_time": "2023-07-05T16:32:58.644004+00:00",
"start_time": "2023-07-05T16:32:58.438814+00:00"
},
"noteable": {
"cell_type": "code",
"output_collection_id": "accfea1e-ca89-444f-9133-f39b76f10c8e"
}
},
"outputs": [],
"source": [
"from pdfminer.high_level import extract_text\n",
"import tabula\n",
"\n",
"class PdfParser:\n",
" def __init__(self, file_path):\n",
" self.file_path = file_path\n",
"\n",
" def extract_text(self):\n",
" text = extract_text(self.file_path)\n",
" return text\n",
"\n",
" def extract_table(self):\n",
" tables = tabula.read_pdf(self.file_path, pages='all')\n",
" return tables"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "202c2c7a-a22f-468d-a67e-e10638fd96a2",
"metadata": {
"noteable": {
"cell_type": "markdown"
}
},
"source": [
"Now, let's test the `PdfParser` class with a sample PDF file. Please replace `sample.pdf` with your actual PDF file path."
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "76db2644",
"metadata": {},
"outputs": [],
"source": [
"pdf_file_path = r\"..\\data\\raw\\chris' statements\\savings\\sc\\sc_savings_eStatement_202306.pdf\""
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "e91a1abd-de76-4a12-adc5-fec5fd1e4301",
"metadata": {
"noteable": {
"cell_type": "code"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Consolidated Statement\n",
"\n",
"Statement Date \n",
"\n",
": 17 Jun 2023\n",
"\n",
"Page \n",
"\n",
": 1 of 3\n",
"\n",
"WONG CHI YAN \n",
"UNIT B 3/F BLK 1\n",
"REGENTVILLE\n",
"8 WO MUN ST\n",
"FANLING NT\n",
"HONG KONG\n",
"\n",
"YOUR FINANCIAL STATUS \n",
"\n",
"AS AT STATEMENT DATE (IN HKD EQUIVALENT) \n",
"\n",
"1. DEPOSITS \n",
"\n",
"HKD Deposits \n",
"CNY Deposits \n",
"FX Deposits \n",
"\n",
"2. INVESTMENTS \n",
"\n",
"Securities \n",
"Investment Funds \n",
"Gold \n",
"Currency Trading \n",
"Debt Securities \n",
"Equity Linked Investment \n",
"\n",
"3. PERSONAL LOANS \n",
"\n",
"Instalment Loans \n",
"Overdrafts \n",
"Credit Cards \n",
"4. WEALTHPRO \n",
"\n",
"207 \n",
"\n",
"5. INVESTPOWER \n",
"\n",
"109,214.90\n",
"1,136.87\n",
"778.10\n",
"\n",
"0.00\n",
"0.00\n",
"0.00\n",
"0.00\n",
"0.00 \n",
"0.00 \n",
"\n",
"0.00\n",
"0.00\n",
"0.00\n",
"\n",
"111,129.87\n",
"\n",
"0.00\n",
"\n",
"0.00 \n",
"\n",
"0.00 \n",
"\n",
"0.00 \n",
"\n",
"PREMIUM PROGRAMME \n",
"\n",
"6. NET POSITION \n",
"\n",
" (1+23+4+5)\n",
"\n",
"111,129.87 \n",
"\n",
"7. INSURANCE \n",
"\n",
"8. MORTGAGES \n",
"\n",
"9. MPF/ORSO \n",
"\n",
"0.00 \n",
"\n",
"0.00 \n",
"\n",
"0.00 \n",
"\n",
"YOUR ACCOUNT BALANCES \n",
"\n",
"Account Type\n",
"\n",
"Account Number\n",
"\n",
"Currency Balance\n",
"\n",
"HKD Balance or equivalent\n",
"\n",
"INTEGRATED S/A (Bonus Payroll)\n",
"\n",
"36881189271\n",
"\n",
"Integrated Account Current \n",
"\n",
"36881189270\n",
"\n",
"HKD \n",
"\n",
"CNY \n",
"\n",
"USD \n",
"\n",
"TOTAL\n",
"\n",
"HKD \n",
"\n",
"TOTAL\n",
"\n",
"107,220.22 \n",
"\n",
"1,044.92 \n",
"\n",
"100.00 \n",
"\n",
"1,994.68 \n",
"\n",
"107,220.22 \n",
"\n",
"1,136.87 \n",
"\n",
"778.10 \n",
"\n",
"109,135.19 \n",
"\n",
"1,994.68 \n",
"\n",
"1,994.68 \n",
"\n",
"With immediate effect, Standard Chartered Bank (Hong Kong) Limited will need customer consent in order to access and utilize their depositrelated information for providing investment /\n",
"wealth management services to them. If you would like to make such arrangement, please visit any of our branches to arrange.\n",
"\n",
"With effect from August 2018, the Relationship Balance of your personal account will include the MPF account balance under Manulife Global Select (MPF) Scheme where you authorise and\n",
"consent the Bank to receive your MPF account information. To obtain the request form, please visit sc.com/hk/help/downloadcentre/ after 6 August 2018.\n",
"\n",
"2018\n",
"\n",
"8\n",
"\n",
"8\n",
"\n",
"6\n",
"\n",
"sc.com/hk/help/downloadcentre/\n",
"\n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
"\fConsolidated Statement \n",
"\n",
"PREMIUM PROGRAMME\n",
"\n",
"YOUR ACCOUNT ACTIVITIES \n",
"\n",
"INTEGRATED S/A (BONUS PAYROLL) \n",
"WONG CHI YAN\n",
"\n",
"Statement Date \n",
"\n",
":17 Jun 2023\n",
"\n",
"Page \n",
"\n",
": 2 of 3\n",
"\n",
"WONG CHI YAN\n",
"\n",
": 36881189271\n",
"\n",
"Date \n",
"\n",
"Description \n",
"\n",
"Deposit \n",
"\n",
"Withdrawal \n",
"\n",
"Balance\n",
"\n",
"HKD \n",
"\n",
"17 May\n",
"\n",
"BALANCE FROM PREVIOUS STATEMENT\n",
"\n",
"20 May\n",
"\n",
"25 May\n",
"\n",
"FPS QR MO S** P*** HK2305200062274383\n",
"\n",
"FPS QR HKT TEL / EYE 26404890000118A46\n",
"\n",
"FPS QR (HKT) NETVIGATO 68112310619222A62\n",
"\n",
"SCB ATM QR WDL 0078 1223\n",
"\n",
"29 May\n",
"\n",
"WONG K** L*** 002305282310196700\n",
"(Value Date As of 28 MAY)\n",
"\n",
"31 May\n",
"\n",
"CREDIT INTEREST NINT\n",
"\n",
"CLOUD LIGHT TECHNOLO\n",
"\n",
"01 Jun\n",
"\n",
"WONG PAK WING KATHY 002306010004322000\n",
"\n",
"TRANSFER WITHDRAWAL NTRF\n",
"INTERNET BANKING\n",
"\n",
"TRANSFER WITHDRAWAL NTRF\n",
"INTERNET BANKING\n",
"\n",
"05 Jun\n",
"\n",
"BILL PAYB01 5408047948319007\n",
"INTERNET BANKING\n",
"\n",
"TRANSFER WITHDRAWAL NTRF\n",
"INTERNET BANKING\n",
"\n",
"TRANSFER WITHDRAWAL NTRF\n",
"INTERNET BANKING\n",
"\n",
"06 Jun\n",
"\n",
"YU FUNG SHING 0605PAYC0101085281768\n",
"(Value Date As of 05 JUN)\n",
"\n",
"07 Jun\n",
"\n",
"12 Jun\n",
"\n",
"13 Jun\n",
"\n",
"17 Jun\n",
"\n",
"MAN YUK FAI 0606PAYC0101086133910\n",
"\n",
"PRUDENTIAL HONG KONG\n",
"\n",
"SCB ATM QR WDL 0437 1657\n",
"\n",
"TRANSFER WITHDRAWAL NTRF\n",
"INTERNET BANKING\n",
"\n",
"SCB ATM QR WDL 0093 1954\n",
"\n",
"TRANSFER WITHDRAWAL NTRF\n",
"INTERNET BANKING\n",
"\n",
"SCB ATM QR WDL 0093 1706\n",
"\n",
"17 Jun\n",
"\n",
"CLOSING BALANCE\n",
"\n",
"CNY \n",
"\n",
"17 May\n",
"\n",
"BALANCE FROM PREVIOUS STATEMENT\n",
"\n",
"31 May\n",
"\n",
"17 Jun\n",
"\n",
"CREDIT INTEREST NINT\n",
"\n",
"CLOSING BALANCE\n",
"\n",
"USD \n",
"\n",
"17 May\n",
"\n",
"BALANCE FROM PREVIOUS STATEMENT\n",
"\n",
"17 Jun\n",
"\n",
"CLOSING BALANCE\n",
"\n",
"71.51\n",
"\n",
"46,031.00\n",
"\n",
"12,008.00\n",
"\n",
"275.00\n",
"\n",
"275.00\n",
"\n",
"0.08\n",
"\n",
"285.00\n",
"\n",
"166.00\n",
"\n",
"416.00\n",
"\n",
"900.00\n",
"\n",
"12,000.00\n",
"\n",
"12,000.00\n",
"\n",
"115,572.92\n",
"\n",
"115,287.92\n",
"\n",
"115,121.92\n",
"\n",
"114,705.92\n",
"\n",
"113,805.92\n",
"\n",
"101,805.92\n",
"\n",
"101,877.43\n",
"\n",
"147,908.43\n",
"\n",
"159,916.43\n",
"\n",
"147,916.43\n",
"\n",
"12,008.00\n",
"\n",
"135,908.43\n",
"\n",
"29.86\n",
"\n",
"489.06\n",
"\n",
"135,878.57\n",
"\n",
"135,389.51\n",
"\n",
"24,226.11\n",
"\n",
"111,163.40\n",
"\n",
"793.18\n",
"\n",
"900.00\n",
"\n",
"500.00\n",
"\n",
"900.00\n",
"\n",
"500.00\n",
"\n",
"900.00\n",
"\n",
"111,438.40\n",
"\n",
"111,713.40\n",
"\n",
"110,920.22\n",
"\n",
"110,020.22\n",
"\n",
"109,520.22\n",
"\n",
"108,620.22\n",
"\n",
"108,120.22\n",
"\n",
"107,220.22\n",
"\n",
"107,220.22\n",
"\n",
"1,044.84\n",
"\n",
"1,044.92\n",
"\n",
"1,044.92\n",
"\n",
"100.00\n",
"\n",
"100.00\n",
"\n",
"1,994.68\n",
"\n",
"1,994.68\n",
"\n",
"INTEGRATED ACCOUNT CURRENT \n",
"WONG CHI YAN\n",
"\n",
": 36881189270\n",
"\n",
"Date \n",
"\n",
"Description \n",
"\n",
"Deposit \n",
"\n",
"Withdrawal \n",
"\n",
"Balance\n",
"\n",
"HKD \n",
"\n",
"17 May\n",
"\n",
"BALANCE FROM PREVIOUS STATEMENT\n",
"\n",
"17 Jun\n",
"\n",
"CLOSING BALANCE\n",
"\n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
"\fConsolidated Statement \n",
"\n",
"PREMIUM PROGRAMME\n",
"\n",
"YOUR AVERAGE RELATIONSHIP BALANCE \n",
"\n",
"Below is a summary of your past three months' relationship balance:\n",
"\n",
"Statement Date \n",
"\n",
":17 Jun 2023\n",
"\n",
"Page \n",
"\n",
": 3 of 3\n",
"\n",
"WONG CHI YAN\n",
"\n",
"Month\n",
"\n",
"Average daily relationship balance\n",
"\n",
"March 2023 \n",
"\n",
" 03 \n",
"\n",
"April 2023 \n",
"\n",
" 04 \n",
"\n",
"May 2023 \n",
"\n",
" 05 \n",
"\n",
"HKD \n",
"\n",
"HKD \n",
"\n",
"HKD \n",
"\n",
"115,653.41\n",
"\n",
"137,308.64\n",
"\n",
"151,906.70\n",
"\n",
"Please refer to \"Your Important Statement Information\" on the back of your statement for relationship balance calculation.\n",
"\n",
"For further information on minimum relationship balance requirement, please refer to the Service Charges booklet which can be obtained at any branches or our website at\n",
"sc.com/hk.\n",
"\n",
"sc.com/hk\n",
"\n",
"\fStatement Back Page 月結單背頁\n",
"\n",
"Click here to view the information on the back page of the statement.\n",
"\n",
"請 按 此 參 閱 月 結 單 背 頁 的 資 料 。 \n",
"\n",
"\f\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Got stderr: Jul 06, 2023 12:49:47 AM org.apache.pdfbox.pdmodel.font.FileSystemFontProvider loadDiskCache\n",
"WARNING: New fonts found, font cache will be re-built\n",
"Jul 06, 2023 12:49:47 AM org.apache.pdfbox.pdmodel.font.FileSystemFontProvider <init>\n",
"WARNING: Building on-disk font cache, this may take a while\n",
"Jul 06, 2023 12:49:48 AM org.apache.pdfbox.pdmodel.font.FileSystemFontProvider <init>\n",
"WARNING: Finished building on-disk font cache, found 502 fonts\n",
"Jul 06, 2023 12:49:48 AM org.apache.pdfbox.pdmodel.font.PDCIDFontType0 <init>\n",
"WARNING: Using fallback DFKaiShu-SB-Estd-BF for CID-keyed font MSungStd-Light\n",
"Jul 06, 2023 12:49:50 AM org.apache.pdfbox.pdmodel.font.PDCIDFontType0 <init>\n",
"WARNING: Using fallback DFKaiShu-SB-Estd-BF for CID-keyed font MSungStd-Light\n",
"Jul 06, 2023 12:49:50 AM org.apache.fontbox.cmap.CMap readCode\n",
"WARNING: Invalid character code sequence 0x00 (0000) 0x20 (0040) in CMap ETen-HOST-H\n",
"Jul 06, 2023 12:49:50 AM org.apache.fontbox.cmap.CMap readCode\n",
"WARNING: Invalid character code sequence 0x00 (0000) 0x20 (0040) in CMap ETen-HOST-H\n",
"Jul 06, 2023 12:49:50 AM org.apache.fontbox.cmap.CMap readCode\n",
"WARNING: Invalid character code sequence 0x00 (0000) 0x20 (0040) in CMap ETen-HOST-H\n",
"Jul 06, 2023 12:49:50 AM org.apache.fontbox.cmap.CMap readCode\n",
"WARNING: Invalid character code sequence 0x00 (0000) 0x20 (0040) in CMap ETen-HOST-H\n",
"Jul 06, 2023 12:49:50 AM org.apache.fontbox.cmap.CMap readCode\n",
"WARNING: Invalid character code sequence 0x00 (0000) 0x20 (0040) in CMap ETen-HOST-H\n",
"Jul 06, 2023 12:49:50 AM org.apache.fontbox.cmap.CMap readCode\n",
"WARNING: Invalid character code sequence 0x00 (0000) 0x20 (0040) in CMap ETen-HOST-H\n",
"Jul 06, 2023 12:49:50 AM org.apache.fontbox.cmap.CMap readCode\n",
"WARNING: Invalid character code sequence 0x00 (0000) 0x20 (0040) in CMap ETen-HOST-H\n",
"Jul 06, 2023 12:49:50 AM org.apache.fontbox.cmap.CMap readCode\n",
"WARNING: Invalid character code sequence 0x00 (0000) 0x20 (0040) in CMap ETen-HOST-H\n",
"Jul 06, 2023 12:49:50 AM org.apache.fontbox.cmap.CMap readCode\n",
"WARNING: Invalid character code sequence 0x00 (0000) 0x20 (0040) in CMap ETen-HOST-H\n",
"Jul 06, 2023 12:49:50 AM org.apache.fontbox.cmap.CMap readCode\n",
"WARNING: Invalid character code sequence 0x00 (0000) 0x20 (0040) in CMap ETen-HOST-H\n",
"Jul 06, 2023 12:49:50 AM org.apache.fontbox.cmap.CMap readCode\n",
"WARNING: Invalid character code sequence 0x00 (0000) 0x20 (0040) in CMap ETen-HOST-H\n",
"Jul 06, 2023 12:49:50 AM org.apache.fontbox.cmap.CMap readCode\n",
"WARNING: Invalid character code sequence 0x00 (0000) 0x20 (0040) in CMap ETen-HOST-H\n",
"Jul 06, 2023 12:49:50 AM org.apache.fontbox.cmap.CMap readCode\n",
"WARNING: Invalid character code sequence 0x00 (0000) 0x20 (0040) in CMap ETen-HOST-H\n",
"Jul 06, 2023 12:49:50 AM org.apache.fontbox.cmap.CMap readCode\n",
"WARNING: Invalid character code sequence 0x00 (0000) 0x20 (0040) in CMap ETen-HOST-H\n",
"Jul 06, 2023 12:49:50 AM org.apache.fontbox.cmap.CMap readCode\n",
"WARNING: Invalid character code sequence 0x00 (0000) 0x20 (0040) in CMap ETen-HOST-H\n",
"Jul 06, 2023 12:49:50 AM org.apache.fontbox.cmap.CMap readCode\n",
"WARNING: Invalid character code sequence 0x00 (0000) 0x20 (0040) in CMap ETen-HOST-H\n",
"Jul 06, 2023 12:49:50 AM org.apache.fontbox.cmap.CMap readCode\n",
"WARNING: Invalid character code sequence 0x00 (0000) 0x20 (0040) in CMap ETen-HOST-H\n",
"Jul 06, 2023 12:49:50 AM org.apache.fontbox.cmap.CMap readCode\n",
"WARNING: Invalid character code sequence 0x00 (0000) 0x20 (0040) in CMap ETen-HOST-H\n",
"Jul 06, 2023 12:49:50 AM org.apache.fontbox.cmap.CMap readCode\n",
"WARNING: Invalid character code sequence 0x00 (0000) 0x20 (0040) in CMap ETen-HOST-H\n",
"Jul 06, 2023 12:49:50 AM org.apache.fontbox.cmap.CMap readCode\n",
"WARNING: Invalid character code sequence 0x00 (0000) 0x20 (0040) in CMap ETen-HOST-H\n",
"Jul 06, 2023 12:49:50 AM org.apache.fontbox.cmap.CMap readCode\n",
"WARNING: Invalid character code sequence 0x00 (0000) 0x20 (0040) in CMap ETen-HOST-H\n",
"Jul 06, 2023 12:49:50 AM org.apache.pdfbox.rendering.CIDType0Glyph2D getPathForCharacterCode\n",
"WARNING: No glyph for 32 (CID 0000) in font MSungStd-Light\n",
"Jul 06, 2023 12:49:50 AM org.apache.fontbox.cmap.CMap readCode\n",
"WARNING: Invalid character code sequence 0x00 (0000) 0x20 (0040) in CMap ETen-HOST-H\n",
"Jul 06, 2023 12:49:50 AM org.apache.fontbox.cmap.CMap readCode\n",
"WARNING: Invalid character code sequence 0x00 (0000) 0x20 (0040) in CMap ETen-HOST-H\n",
"Jul 06, 2023 12:49:50 AM org.apache.fontbox.cmap.CMap readCode\n",
"WARNING: Invalid character code sequence 0x00 (0000) 0x20 (0040) in CMap ETen-HOST-H\n",
"Jul 06, 2023 12:49:50 AM org.apache.fontbox.cmap.CMap readCode\n",
"WARNING: Invalid character code sequence 0x00 (0000) 0x20 (0040) in CMap ETen-HOST-H\n",
"Jul 06, 2023 12:49:50 AM org.apache.fontbox.cmap.CMap readCode\n",
"WARNING: Invalid character code sequence 0x00 (0000) 0x20 (0040) in CMap ETen-HOST-H\n",
"Jul 06, 2023 12:49:50 AM org.apache.fontbox.cmap.CMap readCode\n",
"WARNING: Invalid character code sequence 0x00 (0000) 0x20 (0040) in CMap ETen-HOST-H\n",
"Jul 06, 2023 12:49:50 AM org.apache.fontbox.cmap.CMap readCode\n",
"WARNING: Invalid character code sequence 0x00 (0000) 0x20 (0040) in CMap ETen-HOST-H\n",
"Jul 06, 2023 12:49:50 AM org.apache.fontbox.cmap.CMap readCode\n",
"WARNING: Invalid character code sequence 0x00 (0000) 0x20 (0040) in CMap ETen-HOST-H\n",
"Jul 06, 2023 12:49:50 AM org.apache.fontbox.cmap.CMap readCode\n",
"WARNING: Invalid character code sequence 0x00 (0000) 0x20 (0040) in CMap ETen-HOST-H\n",
"Jul 06, 2023 12:49:50 AM org.apache.pdfbox.pdmodel.font.PDCIDFontType0 <init>\n",
"WARNING: Using fallback DFKaiShu-SB-Estd-BF for CID-keyed font MSungStd-Light\n",
"Jul 06, 2023 12:49:50 AM org.apache.fontbox.cmap.CMap readCode\n",
"WARNING: Invalid character code sequence 0x00 (0000) 0x20 (0040) in CMap ETen-HOST-H\n",
"Jul 06, 2023 12:49:50 AM org.apache.fontbox.cmap.CMap readCode\n",
"WARNING: Invalid character code sequence 0x00 (0000) 0x20 (0040) in CMap ETen-HOST-H\n",
"Jul 06, 2023 12:49:50 AM org.apache.fontbox.cmap.CMap readCode\n",
"WARNING: Invalid character code sequence 0x00 (0000) 0x20 (0040) in CMap ETen-HOST-H\n",
"Jul 06, 2023 12:49:50 AM org.apache.fontbox.cmap.CMap readCode\n",
"WARNING: Invalid character code sequence 0x00 (0000) 0x20 (0040) in CMap ETen-HOST-H\n",
"Jul 06, 2023 12:49:50 AM org.apache.fontbox.cmap.CMap readCode\n",
"WARNING: Invalid character code sequence 0x00 (0000) 0x20 (0040) in CMap ETen-HOST-H\n",
"Jul 06, 2023 12:49:50 AM org.apache.pdfbox.rendering.CIDType0Glyph2D getPathForCharacterCode\n",
"WARNING: No glyph for 32 (CID 0000) in font MSungStd-Light\n",
"Jul 06, 2023 12:49:50 AM org.apache.fontbox.cmap.CMap readCode\n",
"WARNING: Invalid character code sequence 0x00 (0000) 0x20 (0040) in CMap ETen-HOST-H\n",
"Jul 06, 2023 12:49:51 AM org.apache.pdfbox.pdmodel.font.PDCIDFontType0 <init>\n",
"WARNING: Using fallback DFKaiShu-SB-Estd-BF for CID-keyed font MSungStd-Light\n",
"Jul 06, 2023 12:49:51 AM org.apache.pdfbox.pdmodel.font.PDCIDFontType2 findFontOrSubstitute\n",
"WARNING: Using fallback font MingLiU for CID-keyed TrueType font MSung-Light\n",
"Jul 06, 2023 12:49:51 AM org.apache.pdfbox.pdmodel.font.PDCIDFontType2 findFontOrSubstitute\n",
"WARNING: Using fallback font MingLiU for CID-keyed TrueType font MSung-Light\n",
"\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Account Type</th>\n",
" <th>Account Number</th>\n",
" <th>Unnamed: 0</th>\n",
" <th>Currency Balance</th>\n",
" <th>HKD Balance or equivalent</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>戶口種類</td>\n",
" <td>戶口號碼</td>\n",
" <td>NaN</td>\n",
" <td>外幣結存</td>\n",
" <td>結存(以港幣為單位)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>INTEGRATED S/A (Bonus Payroll)</td>\n",
" <td>36881189271</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>綜合存款戶口—儲蓄(紅利出糧)</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>HKD</td>\n",
" <td>107,220.22</td>\n",
" <td>107,220.22</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>CNY</td>\n",
" <td>1,044.92</td>\n",
" <td>1,136.87</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>USD</td>\n",
" <td>100.00</td>\n",
" <td>778.10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>TOTAL</td>\n",
" <td>NaN</td>\n",
" <td>109,135.19</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Integrated Account Current 綜合存款戶口—支票</td>\n",
" <td>36881189270</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>HKD</td>\n",
" <td>1,994.68</td>\n",
" <td>1,994.68</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>TOTAL</td>\n",
" <td>NaN</td>\n",
" <td>1,994.68</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Account Type Account Number Unnamed: 0 \\\n",
"0 戶口種類 戶口號碼 NaN \n",
"1 INTEGRATED S/A (Bonus Payroll) 36881189271 NaN \n",
"2 綜合存款戶口—儲蓄(紅利出糧) NaN NaN \n",
"3 NaN NaN HKD \n",
"4 NaN NaN CNY \n",
"5 NaN NaN USD \n",
"6 NaN NaN TOTAL \n",
"7 Integrated Account Current 綜合存款戶口—支票 36881189270 NaN \n",
"8 NaN NaN HKD \n",
"9 NaN NaN TOTAL \n",
"\n",
" Currency Balance HKD Balance or equivalent \n",
"0 外幣結存 結存(以港幣為單位) \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 107,220.22 107,220.22 \n",
"4 1,044.92 1,136.87 \n",
"5 100.00 778.10 \n",
"6 NaN 109,135.19 \n",
"7 NaN NaN \n",
"8 1,994.68 1,994.68 \n",
"9 NaN 1,994.68 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Date 日期</th>\n",
" <th>Unnamed: 0</th>\n",
" <th>Description 進支詳列</th>\n",
" <th>Deposit 存款</th>\n",
" <th>Withdrawal 提款</th>\n",
" <th>Balance 結餘</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>NaN</td>\n",
" <td>HKD</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>17 May</td>\n",
" <td>BALANCE FROM PREVIOUS STATEMENT 戶口之進支結餘</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>115,572.92</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>20 May</td>\n",
" <td>FPS QR MO S** P*** HK2305200062274383</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>285.00</td>\n",
" <td>115,287.92</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>25 May</td>\n",
" <td>FPS QR HKT TEL / EYE 26404890000118A46</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>166.00</td>\n",
" <td>115,121.92</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>NaN</td>\n",
" <td>FPS QR (HKT) NETVIGATO 68112310619222A62</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>416.00</td>\n",
" <td>114,705.92</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>NaN</td>\n",
" <td>SCB ATM QR WDL</td>\n",
" <td>0078 1223</td>\n",
" <td>NaN</td>\n",
" <td>900.00</td>\n",
" <td>113,805.92</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>29 May</td>\n",
" <td>WONG K** L*** 002305282310196700</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>12,000.00</td>\n",
" <td>101,805.92</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>NaN</td>\n",
" <td>(Value Date As of 28 MAY)</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>31 May</td>\n",
" <td>CREDIT INTEREST</td>\n",
" <td>NINT</td>\n",
" <td>71.51</td>\n",
" <td>NaN</td>\n",
" <td>101,877.43</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>NaN</td>\n",
" <td>CLOUD LIGHT TECHNOLO</td>\n",
" <td>NaN</td>\n",
" <td>46,031.00</td>\n",
" <td>NaN</td>\n",
" <td>147,908.43</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>01 Jun</td>\n",
" <td>WONG PAK WING KATHY 002306010004322000</td>\n",
" <td>NaN</td>\n",
" <td>12,008.00</td>\n",
" <td>NaN</td>\n",
" <td>159,916.43</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>NaN</td>\n",
" <td>TRANSFER WITHDRAWAL</td>\n",
" <td>NTRF</td>\n",
" <td>NaN</td>\n",
" <td>12,000.00</td>\n",
" <td>147,916.43</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>NaN</td>\n",
" <td>INTERNET BANKING</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>NaN</td>\n",
" <td>TRANSFER WITHDRAWAL</td>\n",
" <td>NTRF</td>\n",
" <td>NaN</td>\n",
" <td>12,008.00</td>\n",
" <td>135,908.43</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>NaN</td>\n",
" <td>INTERNET BANKING</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>05 Jun</td>\n",
" <td>BILL PAYB01 5408047948319007</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>29.86</td>\n",
" <td>135,878.57</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>NaN</td>\n",
" <td>INTERNET BANKING</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>NaN</td>\n",
" <td>TRANSFER WITHDRAWAL</td>\n",
" <td>NTRF</td>\n",
" <td>NaN</td>\n",
" <td>489.06</td>\n",
" <td>135,389.51</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>NaN</td>\n",
" <td>INTERNET BANKING</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>NaN</td>\n",
" <td>TRANSFER WITHDRAWAL</td>\n",
" <td>NTRF</td>\n",
" <td>NaN</td>\n",
" <td>24,226.11</td>\n",
" <td>111,163.40</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>NaN</td>\n",
" <td>INTERNET BANKING</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>06 Jun</td>\n",
" <td>YU FUNG SHING 0605PAYC0101085281768</td>\n",
" <td>NaN</td>\n",
" <td>275.00</td>\n",
" <td>NaN</td>\n",
" <td>111,438.40</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>NaN</td>\n",
" <td>(Value Date As of 05 JUN)</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>NaN</td>\n",
" <td>MAN YUK FAI 0606PAYC0101086133910</td>\n",
" <td>NaN</td>\n",
" <td>275.00</td>\n",
" <td>NaN</td>\n",
" <td>111,713.40</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>NaN</td>\n",
" <td>PRUDENTIAL HONG KONG</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>793.18</td>\n",
" <td>110,920.22</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>07 Jun</td>\n",
" <td>SCB ATM QR WDL</td>\n",
" <td>0437 1657</td>\n",
" <td>NaN</td>\n",
" <td>900.00</td>\n",
" <td>110,020.22</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>12 Jun</td>\n",
" <td>TRANSFER WITHDRAWAL</td>\n",
" <td>NTRF</td>\n",
" <td>NaN</td>\n",
" <td>500.00</td>\n",
" <td>109,520.22</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>NaN</td>\n",
" <td>INTERNET BANKING</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>13 Jun</td>\n",
" <td>SCB ATM QR WDL</td>\n",
" <td>0093 1954</td>\n",
" <td>NaN</td>\n",
" <td>900.00</td>\n",
" <td>108,620.22</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>17 Jun</td>\n",
" <td>TRANSFER WITHDRAWAL</td>\n",
" <td>NTRF</td>\n",
" <td>NaN</td>\n",
" <td>500.00</td>\n",
" <td>108,120.22</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30</th>\n",
" <td>NaN</td>\n",
" <td>INTERNET BANKING</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31</th>\n",
" <td>NaN</td>\n",
" <td>SCB ATM QR WDL</td>\n",
" <td>0093 1706</td>\n",
" <td>NaN</td>\n",
" <td>900.00</td>\n",
" <td>107,220.22</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32</th>\n",
" <td>17 Jun</td>\n",
" <td>CLOSING BALANCE 截數結餘</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>107,220.22</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33</th>\n",
" <td>NaN</td>\n",
" <td>CNY</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>34</th>\n",
" <td>17 May</td>\n",
" <td>BALANCE FROM PREVIOUS STATEMENT 戶口之進支結餘</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1,044.84</td>\n",
" </tr>\n",
" <tr>\n",
" <th>35</th>\n",
" <td>31 May</td>\n",
" <td>CREDIT INTEREST</td>\n",
" <td>NINT</td>\n",
" <td>0.08</td>\n",
" <td>NaN</td>\n",
" <td>1,044.92</td>\n",
" </tr>\n",
" <tr>\n",
" <th>36</th>\n",
" <td>17 Jun</td>\n",
" <td>CLOSING BALANCE 截數結餘</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1,044.92</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37</th>\n",
" <td>NaN</td>\n",
" <td>USD</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>38</th>\n",
" <td>17 May</td>\n",
" <td>BALANCE FROM PREVIOUS STATEMENT 戶口之進支結餘</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>100.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>39</th>\n",
" <td>17 Jun</td>\n",
" <td>CLOSING BALANCE 截數結餘</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>100.00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Date 日期 Unnamed: 0 Description 進支詳列 \\\n",
"0 NaN HKD NaN \n",
"1 17 May BALANCE FROM PREVIOUS STATEMENT 戶口之進支結餘 NaN \n",
"2 20 May FPS QR MO S** P*** HK2305200062274383 NaN \n",
"3 25 May FPS QR HKT TEL / EYE 26404890000118A46 NaN \n",
"4 NaN FPS QR (HKT) NETVIGATO 68112310619222A62 NaN \n",
"5 NaN SCB ATM QR WDL 0078 1223 \n",
"6 29 May WONG K** L*** 002305282310196700 NaN \n",
"7 NaN (Value Date As of 28 MAY) NaN \n",
"8 31 May CREDIT INTEREST NINT \n",
"9 NaN CLOUD LIGHT TECHNOLO NaN \n",
"10 01 Jun WONG PAK WING KATHY 002306010004322000 NaN \n",
"11 NaN TRANSFER WITHDRAWAL NTRF \n",
"12 NaN INTERNET BANKING NaN \n",
"13 NaN TRANSFER WITHDRAWAL NTRF \n",
"14 NaN INTERNET BANKING NaN \n",
"15 05 Jun BILL PAYB01 5408047948319007 NaN \n",
"16 NaN INTERNET BANKING NaN \n",
"17 NaN TRANSFER WITHDRAWAL NTRF \n",
"18 NaN INTERNET BANKING NaN \n",
"19 NaN TRANSFER WITHDRAWAL NTRF \n",
"20 NaN INTERNET BANKING NaN \n",
"21 06 Jun YU FUNG SHING 0605PAYC0101085281768 NaN \n",
"22 NaN (Value Date As of 05 JUN) NaN \n",
"23 NaN MAN YUK FAI 0606PAYC0101086133910 NaN \n",
"24 NaN PRUDENTIAL HONG KONG NaN \n",
"25 07 Jun SCB ATM QR WDL 0437 1657 \n",
"26 12 Jun TRANSFER WITHDRAWAL NTRF \n",
"27 NaN INTERNET BANKING NaN \n",
"28 13 Jun SCB ATM QR WDL 0093 1954 \n",
"29 17 Jun TRANSFER WITHDRAWAL NTRF \n",
"30 NaN INTERNET BANKING NaN \n",
"31 NaN SCB ATM QR WDL 0093 1706 \n",
"32 17 Jun CLOSING BALANCE 截數結餘 NaN \n",
"33 NaN CNY NaN \n",
"34 17 May BALANCE FROM PREVIOUS STATEMENT 戶口之進支結餘 NaN \n",
"35 31 May CREDIT INTEREST NINT \n",
"36 17 Jun CLOSING BALANCE 截數結餘 NaN \n",
"37 NaN USD NaN \n",
"38 17 May BALANCE FROM PREVIOUS STATEMENT 戶口之進支結餘 NaN \n",
"39 17 Jun CLOSING BALANCE 截數結餘 NaN \n",
"\n",
" Deposit 存款 Withdrawal 提款 Balance 結餘 \n",
"0 NaN NaN NaN \n",
"1 NaN NaN 115,572.92 \n",
"2 NaN 285.00 115,287.92 \n",
"3 NaN 166.00 115,121.92 \n",
"4 NaN 416.00 114,705.92 \n",
"5 NaN 900.00 113,805.92 \n",
"6 NaN 12,000.00 101,805.92 \n",
"7 NaN NaN NaN \n",
"8 71.51 NaN 101,877.43 \n",
"9 46,031.00 NaN 147,908.43 \n",
"10 12,008.00 NaN 159,916.43 \n",
"11 NaN 12,000.00 147,916.43 \n",
"12 NaN NaN NaN \n",
"13 NaN 12,008.00 135,908.43 \n",
"14 NaN NaN NaN \n",
"15 NaN 29.86 135,878.57 \n",
"16 NaN NaN NaN \n",
"17 NaN 489.06 135,389.51 \n",
"18 NaN NaN NaN \n",
"19 NaN 24,226.11 111,163.40 \n",
"20 NaN NaN NaN \n",
"21 275.00 NaN 111,438.40 \n",
"22 NaN NaN NaN \n",
"23 275.00 NaN 111,713.40 \n",
"24 NaN 793.18 110,920.22 \n",
"25 NaN 900.00 110,020.22 \n",
"26 NaN 500.00 109,520.22 \n",
"27 NaN NaN NaN \n",
"28 NaN 900.00 108,620.22 \n",
"29 NaN 500.00 108,120.22 \n",
"30 NaN NaN NaN \n",
"31 NaN 900.00 107,220.22 \n",
"32 NaN NaN 107,220.22 \n",
"33 NaN NaN NaN \n",
"34 NaN NaN 1,044.84 \n",
"35 0.08 NaN 1,044.92 \n",
"36 NaN NaN 1,044.92 \n",
"37 NaN NaN NaN \n",
"38 NaN NaN 100.00 \n",
"39 NaN NaN 100.00 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Date 日期</th>\n",
" <th>Unnamed: 0</th>\n",
" <th>Description 進支詳列</th>\n",
" <th>Deposit 存款</th>\n",
" <th>Withdrawal 提款</th>\n",
" <th>Balance 結餘</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>NaN</td>\n",
" <td>HKD</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>17 May</td>\n",
" <td>BALANCE FROM PREVIOUS STATEMENT 戶口之進支結餘</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1,994.68</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>17 Jun</td>\n",
" <td>CLOSING BALANCE 截數結餘</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1,994.68</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Date 日期 Unnamed: 0 Description 進支詳列 \\\n",
"0 NaN HKD NaN \n",
"1 17 May BALANCE FROM PREVIOUS STATEMENT 戶口之進支結餘 NaN \n",
"2 17 Jun CLOSING BALANCE 截數結餘 NaN \n",
"\n",
" Deposit 存款 Withdrawal 提款 Balance 結餘 \n",
"0 NaN NaN NaN \n",
"1 NaN NaN 1,994.68 \n",
"2 NaN NaN 1,994.68 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Month</th>\n",
" <th>Average daily relationship balance</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>月份</td>\n",
" <td>每日平均總結餘</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>March 2023 年 03 月</td>\n",
" <td>HKD 港元 115,653.41</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>April 2023 年 04 月</td>\n",
" <td>HKD 港元 137,308.64</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>May 2023 年 05 月</td>\n",
" <td>HKD 港元 151,906.70</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Month Average daily relationship balance\n",
"0 月份 每日平均總結餘\n",
"1 March 2023 年 03 月 HKD 港元 115,653.41\n",
"2 April 2023 年 04 月 HKD 港元 137,308.64\n",
"3 May 2023 年 05 月 HKD 港元 151,906.70"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# replace 'sample.pdf' with your actual PDF file path\n",
"pdf_parser = PdfParser(pdf_file_path)\n",
"\n",
"# extract text\n",
"text = pdf_parser.extract_text()\n",
"print(text)\n",
"\n",
"# extract tables\n",
"tables = pdf_parser.extract_table()\n",
"for table in tables:\n",
" display(table)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "finance",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
},
"noteable": {
"last_transaction_id": "73ebba71-e2fd-4f44-9b68-dbc9778c8fc8"
},
"noteable-chatgpt": {
"create_notebook": {
"openai_conversation_id": "179ca6e3-0377-5e6e-8f81-719779d73690",
"openai_ephemeral_user_id": "ace69c27-3d03-5a21-855f-72ec4b037401",
"openai_subdivision1_iso_code": "TW-TPE"
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}