budget-bear/capital_one.py
Sara Montecino d986461019 Add a bunch of fixes, categories
- Go through and update database
2022-10-31 00:57:57 -07:00

104 lines
3.1 KiB
Python
Executable File

import PyPDF2 as pdf
import re
import sys
import math
from datetime import datetime
import model
class Regex:
def error(self, msg):
print(f"ERROR: {self.__class__.__name__} failed: {msg}")
sys.exit(-1)
def extract(self, text):
return None
class Balance(Regex):
pattern = r'\nNew\sBalance.*\$([0-9,\.]*)'
def extract(self, text):
results = re.findall(self.pattern, text)
if len(results) == 0:
self.error("No matches.")
if len(results) > 1:
self.error(f"Too many matches {len(results)}")
return float(results[0].replace(',',''))
class Year(Regex):
pattern = r'\nAvailable\sCredit\s+\(as\sof\s(.*)\)'
def extract(self, text):
results = re.findall(self.pattern, text)
if len(results) == 0:
self.error("No matches.")
if len(results) > 1:
self.error(f"Too many matches {len(results)}")
date = datetime.strptime(results[0], '%b %d, %Y')
return date.year
class Transactions(Regex):
pattern = r'\n([a-zA-Z]{3}\s[0-9]{1,2})\s[a-zA-Z]{3}\s[0-9]{1,2}(.*[a-zA-Z])\s*(\-?)\s\$([0-9,\.]*)'
def extract(self, text):
results = re.findall(self.pattern, text)
transactions = []
for result in results:
if len(result) != 4:
self.error("ERROR: Invalid result.")
date = result[0]
description = " ".join(result[1].split())
is_credit = '-' in result[2]
amount = float(result[3].replace(',', ''))
if is_credit:
if "AUTOPAY" in description:
print(f"Skipping payment: {amount}")
continue
amount *= -1
transactions.append([date, description, amount])
return transactions
class Parser(model.BaseParser):
def __init__(self):
self.balance = Balance()
self.year = Year()
self.transactions = Transactions()
@property
def source(self):
return model.TransactionSource.CAPITAL_ONE
def parse(self, file_name):
text = ''
with open(file_name, 'rb') as pdf_file:
reader = pdf.PdfReader(pdf_file)
for page in reader.pages:
text += page.extract_text()
balance = self.balance.extract(text)
year = self.year.extract(text)
transactions = self.transactions.extract(text)
# Validate transactions match extracted budget
total = sum(x[2] for x in transactions)
if not math.isclose(total, balance, abs_tol=0.001):
print(f"ERROR: Actual {total} != Expected {balance}")
sys.exit(-1)
# Add year to all the parsed dates.
for transaction in transactions:
orig_date = transaction[0]
orig_date = datetime.strptime(orig_date, "%b %d")
new_date = orig_date.replace(year = year)
transaction[0] = new_date
return transactions