commit ac6763df1b6987f7b6b39a4d0ddf379f0633919a Author: Sara Montecino Date: Sun Oct 30 02:20:51 2022 -0700 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..93526df --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +venv/ +__pycache__/ diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100755 index 0000000..9c519d0 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,17 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Main", + "type": "python", + "request": "launch", + "program": "main.py", + "console": "integratedTerminal", + "justMyCode": true, + "args": ["C:\\Users\\saram\\Downloads\\Statement_082022_4653.pdf"] + } + ] +} \ No newline at end of file diff --git a/budget.db b/budget.db new file mode 100755 index 0000000..1eeed46 Binary files /dev/null and b/budget.db differ diff --git a/capital_one.py b/capital_one.py new file mode 100755 index 0000000..513b28d --- /dev/null +++ b/capital_one.py @@ -0,0 +1,101 @@ +import PyPDF2 as pdf +import re +import sys +import math +from datetime import datetime + +import model + +class Regex: + def error(self, msg): + print(f"ERROR: {self.__class__.__name__} failed: {msg}") + sys.exit(-1) + + def extract(self, text): + return None + +class Balance(Regex): + pattern = r'\nNew\sBalance.*\$([0-9,\.]*)' + + def extract(self, text): + results = re.findall(self.pattern, text) + if len(results) == 0: + self.error("No matches.") + + if len(results) > 1: + self.error(f"Too many matches {len(results)}") + + return float(results[0].replace(',','')) + +class Year(Regex): + pattern = r'\nAvailable\sCredit\s+\(as\sof\s(.*)\)' + + def extract(self, text): + results = re.findall(self.pattern, text) + if len(results) == 0: + self.error("No matches.") + + if len(results) > 1: + self.error(f"Too many matches {len(results)}") + + date = datetime.strptime(results[0], '%b %d, %Y') + return date.year + +class Transactions(Regex): + pattern = r'\n([a-zA-Z]{3}\s[0-9]{1,2})\s[a-zA-Z]{3}\s[0-9]{1,2}(.*[a-zA-Z])\s*(\-?)\s\$([0-9,\.]*)' + + def extract(self, text): + results = re.findall(self.pattern, text) + transactions = [] + for result in results: + if len(result) != 4: + self.error("ERROR: Invalid result.") + + date = result[0] + description = " ".join(result[1].split()) + is_payment = '-' in result[2] + amount = float(result[3].replace(',', '')) + + if is_payment: + print(f"Skipping payment: {amount}") + continue + + transactions.append([date, description, amount]) + + return transactions + +class Parser(model.BaseParser): + def __init__(self): + self.balance = Balance() + self.year = Year() + self.transactions = Transactions() + + @property + def source(self): + return model.TransactionSource.CAPITAL_ONE + + def parse(self, file_name): + text = '' + with open(file_name, 'rb') as pdf_file: + reader = pdf.PdfReader(pdf_file) + for page in reader.pages: + text += page.extract_text() + + balance = self.balance.extract(text) + year = self.year.extract(text) + transactions = self.transactions.extract(text) + + # Validate transactions match extracted budget + total = sum(x[2] for x in transactions) + if not math.isclose(total, balance, abs_tol=0.001): + print(f"ERROR: Actual {total} != Expected {balance}") + sys.exit(-1) + + # Add year to all the parsed dates. + for transaction in transactions: + orig_date = transaction[0] + orig_date = datetime.strptime(orig_date, "%b %d") + new_date = orig_date.replace(year = year) + transaction[0] = new_date + + return transactions \ No newline at end of file diff --git a/database.py b/database.py new file mode 100755 index 0000000..8496df9 --- /dev/null +++ b/database.py @@ -0,0 +1,69 @@ +import datetime +from peewee import * + +DATABASE = 'budget.db' +instance = SqliteDatabase(DATABASE, pragmas=[('foreign_keys', 'on')]) + +def create_tables(): + """Helper function to create database tables. Should be called manually.""" + with instance: + instance.create_tables([User, TransactionCategory, Transaction, Source]) + + # Make my user. + instance.connect() + User.create(username='ciphercules') + instance.close() + +def add_default_categories(): + categories = [ + {"food": ["snacks", "fast_food", "groceries", "restaurant"]}, + {"clothing": []}, + {"event": ["event_food", "birthday", "movie_theater"]}, + {"finance": ["interest"]}, + {"hobby": ["gym", "game_development", "projects", "education"]}, + {"home_improvement": []}, + {"pet": ["health", "dog_food"]}, + {"media": ["book", "music", "television", "video_game"]}, + {"health": ["medicine"]}, + {"transit": ["car_insurance", "car_registration", "gas", "parking", "taxi", "car_maintenance"]}, + {"utilities": ["electricity", "gas", "laundry", "cell_phone", "trash", "water"]}, + {"rent": []}, + ] + + instance.connect() + with instance.atomic(): + for parent, children in categories.items(): + parent_db = TransactionCategory.create(name=parent) + for child in children: + TransactionCategory.create(name=child, parent=parent_db) + +class BaseModel(Model): + class Meta: + database = instance + +class User(BaseModel): + username = CharField(unique=True, primary_key=True) + +class TransactionCategory(BaseModel): + primary_key = AutoField(primary_key=True) + name = CharField(unique=True) + parent = ForeignKeyField('self', null=True, backref='children') + +class Source(BaseModel): + filename=CharField(unique=True, primary_key=True) + type = IntegerField() + created_date = DateTimeField(default=datetime.datetime.now) + user = ForeignKeyField(User, backref='transactions') + +class Transaction(BaseModel): + # Metadata + primary_key = AutoField(primary_key=True) + source = ForeignKeyField(Source, backref='transactions') + created_date = DateTimeField(default=datetime.datetime.now) + user = ForeignKeyField(User, backref='transactions') + + # Real data + transaction_date = DateTimeField() + description = CharField() + amount = FloatField() + subcategory = ForeignKeyField(TransactionCategory, backref='+', null=True) \ No newline at end of file diff --git a/main.py b/main.py new file mode 100755 index 0000000..9b28ebf --- /dev/null +++ b/main.py @@ -0,0 +1,45 @@ +import sys +import argparse +import os + +import capital_one +import database + +parser = argparse.ArgumentParser(prog="BudgetBear", description="Calculate a budget fit for a bear.") +parser.add_argument('files', type=str, nargs='+', help='File to parse transactions from.') +args = parser.parse_args() + +username = 'ciphercules' + +file_parsers = [capital_one.Parser()] +for f in args.files: + for file_parser in file_parsers: + # Use the first successful parser. + transactions = file_parser.parse(f) + if not transactions: + continue + + # Add to database + database.instance.connect() + with database.instance.atomic(): + # Add source file first. + source = database.Source.create( + filename=os.path.basename(f), + type=file_parser.source, + user=username + ) + + # Add each transaction + for transaction in transactions: + date, description, amount = transaction + database.Transaction.create( + user=username, + transaction_date=date, + description=description, + amount=amount, + source=source + ) + database.instance.close() + + # We successfully updated database with this parser, exit loop. + break \ No newline at end of file diff --git a/model.py b/model.py new file mode 100755 index 0000000..fbf73f5 --- /dev/null +++ b/model.py @@ -0,0 +1,14 @@ +class BaseParser: + def __init__(self): + self.butt = None + + def parse(self): + return None + + @property + def source(self): + return -1 + +class TransactionSource: + """Enum of possible transaction sources""" + CAPITAL_ONE = 1 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100755 index 0000000..b982e94 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +peewee==3.15.3 +PyPDF2==2.11.1 +typing_extensions==4.4.0 \ No newline at end of file diff --git a/todo.txt b/todo.txt new file mode 100755 index 0000000..940509e --- /dev/null +++ b/todo.txt @@ -0,0 +1,14 @@ +# Goal: Command line tool for easily doing my budget +Every week, I need to: + 1. Categorize my purchases + 2. Update SplitWise for shared purchases + +# Tasks + +## Parse capital one statement +x Given a PDF bank statement from capital one, extract the transaction date, description, and amount from each transaction +Store the data in a SQL lite database + x Create database schema + x Automatically add capital one transactions to database +Add web page to add categories +Add web page to categorize transactions \ No newline at end of file