Initial commit
This commit is contained in:
commit
ac6763df1b
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
venv/
|
||||||
|
__pycache__/
|
17
.vscode/launch.json
vendored
Executable file
17
.vscode/launch.json
vendored
Executable file
@ -0,0 +1,17 @@
|
|||||||
|
{
|
||||||
|
// Use IntelliSense to learn about possible attributes.
|
||||||
|
// Hover to view descriptions of existing attributes.
|
||||||
|
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||||
|
"version": "0.2.0",
|
||||||
|
"configurations": [
|
||||||
|
{
|
||||||
|
"name": "Main",
|
||||||
|
"type": "python",
|
||||||
|
"request": "launch",
|
||||||
|
"program": "main.py",
|
||||||
|
"console": "integratedTerminal",
|
||||||
|
"justMyCode": true,
|
||||||
|
"args": ["C:\\Users\\saram\\Downloads\\Statement_082022_4653.pdf"]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
101
capital_one.py
Executable file
101
capital_one.py
Executable file
@ -0,0 +1,101 @@
|
|||||||
|
import PyPDF2 as pdf
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import math
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
import model
|
||||||
|
|
||||||
|
class Regex:
|
||||||
|
def error(self, msg):
|
||||||
|
print(f"ERROR: {self.__class__.__name__} failed: {msg}")
|
||||||
|
sys.exit(-1)
|
||||||
|
|
||||||
|
def extract(self, text):
|
||||||
|
return None
|
||||||
|
|
||||||
|
class Balance(Regex):
|
||||||
|
pattern = r'\nNew\sBalance.*\$([0-9,\.]*)'
|
||||||
|
|
||||||
|
def extract(self, text):
|
||||||
|
results = re.findall(self.pattern, text)
|
||||||
|
if len(results) == 0:
|
||||||
|
self.error("No matches.")
|
||||||
|
|
||||||
|
if len(results) > 1:
|
||||||
|
self.error(f"Too many matches {len(results)}")
|
||||||
|
|
||||||
|
return float(results[0].replace(',',''))
|
||||||
|
|
||||||
|
class Year(Regex):
|
||||||
|
pattern = r'\nAvailable\sCredit\s+\(as\sof\s(.*)\)'
|
||||||
|
|
||||||
|
def extract(self, text):
|
||||||
|
results = re.findall(self.pattern, text)
|
||||||
|
if len(results) == 0:
|
||||||
|
self.error("No matches.")
|
||||||
|
|
||||||
|
if len(results) > 1:
|
||||||
|
self.error(f"Too many matches {len(results)}")
|
||||||
|
|
||||||
|
date = datetime.strptime(results[0], '%b %d, %Y')
|
||||||
|
return date.year
|
||||||
|
|
||||||
|
class Transactions(Regex):
|
||||||
|
pattern = r'\n([a-zA-Z]{3}\s[0-9]{1,2})\s[a-zA-Z]{3}\s[0-9]{1,2}(.*[a-zA-Z])\s*(\-?)\s\$([0-9,\.]*)'
|
||||||
|
|
||||||
|
def extract(self, text):
|
||||||
|
results = re.findall(self.pattern, text)
|
||||||
|
transactions = []
|
||||||
|
for result in results:
|
||||||
|
if len(result) != 4:
|
||||||
|
self.error("ERROR: Invalid result.")
|
||||||
|
|
||||||
|
date = result[0]
|
||||||
|
description = " ".join(result[1].split())
|
||||||
|
is_payment = '-' in result[2]
|
||||||
|
amount = float(result[3].replace(',', ''))
|
||||||
|
|
||||||
|
if is_payment:
|
||||||
|
print(f"Skipping payment: {amount}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
transactions.append([date, description, amount])
|
||||||
|
|
||||||
|
return transactions
|
||||||
|
|
||||||
|
class Parser(model.BaseParser):
|
||||||
|
def __init__(self):
|
||||||
|
self.balance = Balance()
|
||||||
|
self.year = Year()
|
||||||
|
self.transactions = Transactions()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def source(self):
|
||||||
|
return model.TransactionSource.CAPITAL_ONE
|
||||||
|
|
||||||
|
def parse(self, file_name):
|
||||||
|
text = ''
|
||||||
|
with open(file_name, 'rb') as pdf_file:
|
||||||
|
reader = pdf.PdfReader(pdf_file)
|
||||||
|
for page in reader.pages:
|
||||||
|
text += page.extract_text()
|
||||||
|
|
||||||
|
balance = self.balance.extract(text)
|
||||||
|
year = self.year.extract(text)
|
||||||
|
transactions = self.transactions.extract(text)
|
||||||
|
|
||||||
|
# Validate transactions match extracted budget
|
||||||
|
total = sum(x[2] for x in transactions)
|
||||||
|
if not math.isclose(total, balance, abs_tol=0.001):
|
||||||
|
print(f"ERROR: Actual {total} != Expected {balance}")
|
||||||
|
sys.exit(-1)
|
||||||
|
|
||||||
|
# Add year to all the parsed dates.
|
||||||
|
for transaction in transactions:
|
||||||
|
orig_date = transaction[0]
|
||||||
|
orig_date = datetime.strptime(orig_date, "%b %d")
|
||||||
|
new_date = orig_date.replace(year = year)
|
||||||
|
transaction[0] = new_date
|
||||||
|
|
||||||
|
return transactions
|
69
database.py
Executable file
69
database.py
Executable file
@ -0,0 +1,69 @@
|
|||||||
|
import datetime
|
||||||
|
from peewee import *
|
||||||
|
|
||||||
|
DATABASE = 'budget.db'
|
||||||
|
instance = SqliteDatabase(DATABASE, pragmas=[('foreign_keys', 'on')])
|
||||||
|
|
||||||
|
def create_tables():
|
||||||
|
"""Helper function to create database tables. Should be called manually."""
|
||||||
|
with instance:
|
||||||
|
instance.create_tables([User, TransactionCategory, Transaction, Source])
|
||||||
|
|
||||||
|
# Make my user.
|
||||||
|
instance.connect()
|
||||||
|
User.create(username='ciphercules')
|
||||||
|
instance.close()
|
||||||
|
|
||||||
|
def add_default_categories():
|
||||||
|
categories = [
|
||||||
|
{"food": ["snacks", "fast_food", "groceries", "restaurant"]},
|
||||||
|
{"clothing": []},
|
||||||
|
{"event": ["event_food", "birthday", "movie_theater"]},
|
||||||
|
{"finance": ["interest"]},
|
||||||
|
{"hobby": ["gym", "game_development", "projects", "education"]},
|
||||||
|
{"home_improvement": []},
|
||||||
|
{"pet": ["health", "dog_food"]},
|
||||||
|
{"media": ["book", "music", "television", "video_game"]},
|
||||||
|
{"health": ["medicine"]},
|
||||||
|
{"transit": ["car_insurance", "car_registration", "gas", "parking", "taxi", "car_maintenance"]},
|
||||||
|
{"utilities": ["electricity", "gas", "laundry", "cell_phone", "trash", "water"]},
|
||||||
|
{"rent": []},
|
||||||
|
]
|
||||||
|
|
||||||
|
instance.connect()
|
||||||
|
with instance.atomic():
|
||||||
|
for parent, children in categories.items():
|
||||||
|
parent_db = TransactionCategory.create(name=parent)
|
||||||
|
for child in children:
|
||||||
|
TransactionCategory.create(name=child, parent=parent_db)
|
||||||
|
|
||||||
|
class BaseModel(Model):
|
||||||
|
class Meta:
|
||||||
|
database = instance
|
||||||
|
|
||||||
|
class User(BaseModel):
|
||||||
|
username = CharField(unique=True, primary_key=True)
|
||||||
|
|
||||||
|
class TransactionCategory(BaseModel):
|
||||||
|
primary_key = AutoField(primary_key=True)
|
||||||
|
name = CharField(unique=True)
|
||||||
|
parent = ForeignKeyField('self', null=True, backref='children')
|
||||||
|
|
||||||
|
class Source(BaseModel):
|
||||||
|
filename=CharField(unique=True, primary_key=True)
|
||||||
|
type = IntegerField()
|
||||||
|
created_date = DateTimeField(default=datetime.datetime.now)
|
||||||
|
user = ForeignKeyField(User, backref='transactions')
|
||||||
|
|
||||||
|
class Transaction(BaseModel):
|
||||||
|
# Metadata
|
||||||
|
primary_key = AutoField(primary_key=True)
|
||||||
|
source = ForeignKeyField(Source, backref='transactions')
|
||||||
|
created_date = DateTimeField(default=datetime.datetime.now)
|
||||||
|
user = ForeignKeyField(User, backref='transactions')
|
||||||
|
|
||||||
|
# Real data
|
||||||
|
transaction_date = DateTimeField()
|
||||||
|
description = CharField()
|
||||||
|
amount = FloatField()
|
||||||
|
subcategory = ForeignKeyField(TransactionCategory, backref='+', null=True)
|
45
main.py
Executable file
45
main.py
Executable file
@ -0,0 +1,45 @@
|
|||||||
|
import sys
|
||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
|
||||||
|
import capital_one
|
||||||
|
import database
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(prog="BudgetBear", description="Calculate a budget fit for a bear.")
|
||||||
|
parser.add_argument('files', type=str, nargs='+', help='File to parse transactions from.')
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
username = 'ciphercules'
|
||||||
|
|
||||||
|
file_parsers = [capital_one.Parser()]
|
||||||
|
for f in args.files:
|
||||||
|
for file_parser in file_parsers:
|
||||||
|
# Use the first successful parser.
|
||||||
|
transactions = file_parser.parse(f)
|
||||||
|
if not transactions:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Add to database
|
||||||
|
database.instance.connect()
|
||||||
|
with database.instance.atomic():
|
||||||
|
# Add source file first.
|
||||||
|
source = database.Source.create(
|
||||||
|
filename=os.path.basename(f),
|
||||||
|
type=file_parser.source,
|
||||||
|
user=username
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add each transaction
|
||||||
|
for transaction in transactions:
|
||||||
|
date, description, amount = transaction
|
||||||
|
database.Transaction.create(
|
||||||
|
user=username,
|
||||||
|
transaction_date=date,
|
||||||
|
description=description,
|
||||||
|
amount=amount,
|
||||||
|
source=source
|
||||||
|
)
|
||||||
|
database.instance.close()
|
||||||
|
|
||||||
|
# We successfully updated database with this parser, exit loop.
|
||||||
|
break
|
14
model.py
Executable file
14
model.py
Executable file
@ -0,0 +1,14 @@
|
|||||||
|
class BaseParser:
|
||||||
|
def __init__(self):
|
||||||
|
self.butt = None
|
||||||
|
|
||||||
|
def parse(self):
|
||||||
|
return None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def source(self):
|
||||||
|
return -1
|
||||||
|
|
||||||
|
class TransactionSource:
|
||||||
|
"""Enum of possible transaction sources"""
|
||||||
|
CAPITAL_ONE = 1
|
3
requirements.txt
Executable file
3
requirements.txt
Executable file
@ -0,0 +1,3 @@
|
|||||||
|
peewee==3.15.3
|
||||||
|
PyPDF2==2.11.1
|
||||||
|
typing_extensions==4.4.0
|
14
todo.txt
Executable file
14
todo.txt
Executable file
@ -0,0 +1,14 @@
|
|||||||
|
# Goal: Command line tool for easily doing my budget
|
||||||
|
Every week, I need to:
|
||||||
|
1. Categorize my purchases
|
||||||
|
2. Update SplitWise for shared purchases
|
||||||
|
|
||||||
|
# Tasks
|
||||||
|
|
||||||
|
## Parse capital one statement
|
||||||
|
x Given a PDF bank statement from capital one, extract the transaction date, description, and amount from each transaction
|
||||||
|
Store the data in a SQL lite database
|
||||||
|
x Create database schema
|
||||||
|
x Automatically add capital one transactions to database
|
||||||
|
Add web page to add categories
|
||||||
|
Add web page to categorize transactions
|
Loading…
Reference in New Issue
Block a user