Initial commit

This commit is contained in:
Sara Montecino 2022-10-30 02:20:51 -07:00
commit ac6763df1b
9 changed files with 265 additions and 0 deletions

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
venv/
__pycache__/

17
.vscode/launch.json vendored Executable file
View File

@ -0,0 +1,17 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Main",
"type": "python",
"request": "launch",
"program": "main.py",
"console": "integratedTerminal",
"justMyCode": true,
"args": ["C:\\Users\\saram\\Downloads\\Statement_082022_4653.pdf"]
}
]
}

BIN
budget.db Executable file

Binary file not shown.

101
capital_one.py Executable file
View File

@ -0,0 +1,101 @@
import PyPDF2 as pdf
import re
import sys
import math
from datetime import datetime
import model
class Regex:
def error(self, msg):
print(f"ERROR: {self.__class__.__name__} failed: {msg}")
sys.exit(-1)
def extract(self, text):
return None
class Balance(Regex):
pattern = r'\nNew\sBalance.*\$([0-9,\.]*)'
def extract(self, text):
results = re.findall(self.pattern, text)
if len(results) == 0:
self.error("No matches.")
if len(results) > 1:
self.error(f"Too many matches {len(results)}")
return float(results[0].replace(',',''))
class Year(Regex):
pattern = r'\nAvailable\sCredit\s+\(as\sof\s(.*)\)'
def extract(self, text):
results = re.findall(self.pattern, text)
if len(results) == 0:
self.error("No matches.")
if len(results) > 1:
self.error(f"Too many matches {len(results)}")
date = datetime.strptime(results[0], '%b %d, %Y')
return date.year
class Transactions(Regex):
pattern = r'\n([a-zA-Z]{3}\s[0-9]{1,2})\s[a-zA-Z]{3}\s[0-9]{1,2}(.*[a-zA-Z])\s*(\-?)\s\$([0-9,\.]*)'
def extract(self, text):
results = re.findall(self.pattern, text)
transactions = []
for result in results:
if len(result) != 4:
self.error("ERROR: Invalid result.")
date = result[0]
description = " ".join(result[1].split())
is_payment = '-' in result[2]
amount = float(result[3].replace(',', ''))
if is_payment:
print(f"Skipping payment: {amount}")
continue
transactions.append([date, description, amount])
return transactions
class Parser(model.BaseParser):
def __init__(self):
self.balance = Balance()
self.year = Year()
self.transactions = Transactions()
@property
def source(self):
return model.TransactionSource.CAPITAL_ONE
def parse(self, file_name):
text = ''
with open(file_name, 'rb') as pdf_file:
reader = pdf.PdfReader(pdf_file)
for page in reader.pages:
text += page.extract_text()
balance = self.balance.extract(text)
year = self.year.extract(text)
transactions = self.transactions.extract(text)
# Validate transactions match extracted budget
total = sum(x[2] for x in transactions)
if not math.isclose(total, balance, abs_tol=0.001):
print(f"ERROR: Actual {total} != Expected {balance}")
sys.exit(-1)
# Add year to all the parsed dates.
for transaction in transactions:
orig_date = transaction[0]
orig_date = datetime.strptime(orig_date, "%b %d")
new_date = orig_date.replace(year = year)
transaction[0] = new_date
return transactions

69
database.py Executable file
View File

@ -0,0 +1,69 @@
import datetime
from peewee import *
DATABASE = 'budget.db'
instance = SqliteDatabase(DATABASE, pragmas=[('foreign_keys', 'on')])
def create_tables():
"""Helper function to create database tables. Should be called manually."""
with instance:
instance.create_tables([User, TransactionCategory, Transaction, Source])
# Make my user.
instance.connect()
User.create(username='ciphercules')
instance.close()
def add_default_categories():
categories = [
{"food": ["snacks", "fast_food", "groceries", "restaurant"]},
{"clothing": []},
{"event": ["event_food", "birthday", "movie_theater"]},
{"finance": ["interest"]},
{"hobby": ["gym", "game_development", "projects", "education"]},
{"home_improvement": []},
{"pet": ["health", "dog_food"]},
{"media": ["book", "music", "television", "video_game"]},
{"health": ["medicine"]},
{"transit": ["car_insurance", "car_registration", "gas", "parking", "taxi", "car_maintenance"]},
{"utilities": ["electricity", "gas", "laundry", "cell_phone", "trash", "water"]},
{"rent": []},
]
instance.connect()
with instance.atomic():
for parent, children in categories.items():
parent_db = TransactionCategory.create(name=parent)
for child in children:
TransactionCategory.create(name=child, parent=parent_db)
class BaseModel(Model):
class Meta:
database = instance
class User(BaseModel):
username = CharField(unique=True, primary_key=True)
class TransactionCategory(BaseModel):
primary_key = AutoField(primary_key=True)
name = CharField(unique=True)
parent = ForeignKeyField('self', null=True, backref='children')
class Source(BaseModel):
filename=CharField(unique=True, primary_key=True)
type = IntegerField()
created_date = DateTimeField(default=datetime.datetime.now)
user = ForeignKeyField(User, backref='transactions')
class Transaction(BaseModel):
# Metadata
primary_key = AutoField(primary_key=True)
source = ForeignKeyField(Source, backref='transactions')
created_date = DateTimeField(default=datetime.datetime.now)
user = ForeignKeyField(User, backref='transactions')
# Real data
transaction_date = DateTimeField()
description = CharField()
amount = FloatField()
subcategory = ForeignKeyField(TransactionCategory, backref='+', null=True)

45
main.py Executable file
View File

@ -0,0 +1,45 @@
import sys
import argparse
import os
import capital_one
import database
parser = argparse.ArgumentParser(prog="BudgetBear", description="Calculate a budget fit for a bear.")
parser.add_argument('files', type=str, nargs='+', help='File to parse transactions from.')
args = parser.parse_args()
username = 'ciphercules'
file_parsers = [capital_one.Parser()]
for f in args.files:
for file_parser in file_parsers:
# Use the first successful parser.
transactions = file_parser.parse(f)
if not transactions:
continue
# Add to database
database.instance.connect()
with database.instance.atomic():
# Add source file first.
source = database.Source.create(
filename=os.path.basename(f),
type=file_parser.source,
user=username
)
# Add each transaction
for transaction in transactions:
date, description, amount = transaction
database.Transaction.create(
user=username,
transaction_date=date,
description=description,
amount=amount,
source=source
)
database.instance.close()
# We successfully updated database with this parser, exit loop.
break

14
model.py Executable file
View File

@ -0,0 +1,14 @@
class BaseParser:
def __init__(self):
self.butt = None
def parse(self):
return None
@property
def source(self):
return -1
class TransactionSource:
"""Enum of possible transaction sources"""
CAPITAL_ONE = 1

3
requirements.txt Executable file
View File

@ -0,0 +1,3 @@
peewee==3.15.3
PyPDF2==2.11.1
typing_extensions==4.4.0

14
todo.txt Executable file
View File

@ -0,0 +1,14 @@
# Goal: Command line tool for easily doing my budget
Every week, I need to:
1. Categorize my purchases
2. Update SplitWise for shared purchases
# Tasks
## Parse capital one statement
x Given a PDF bank statement from capital one, extract the transaction date, description, and amount from each transaction
Store the data in a SQL lite database
x Create database schema
x Automatically add capital one transactions to database
Add web page to add categories
Add web page to categorize transactions