Initial commit
This commit is contained in:
commit
ac6763df1b
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
venv/
|
||||
__pycache__/
|
17
.vscode/launch.json
vendored
Executable file
17
.vscode/launch.json
vendored
Executable file
@ -0,0 +1,17 @@
|
||||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Main",
|
||||
"type": "python",
|
||||
"request": "launch",
|
||||
"program": "main.py",
|
||||
"console": "integratedTerminal",
|
||||
"justMyCode": true,
|
||||
"args": ["C:\\Users\\saram\\Downloads\\Statement_082022_4653.pdf"]
|
||||
}
|
||||
]
|
||||
}
|
101
capital_one.py
Executable file
101
capital_one.py
Executable file
@ -0,0 +1,101 @@
|
||||
import PyPDF2 as pdf
|
||||
import re
|
||||
import sys
|
||||
import math
|
||||
from datetime import datetime
|
||||
|
||||
import model
|
||||
|
||||
class Regex:
|
||||
def error(self, msg):
|
||||
print(f"ERROR: {self.__class__.__name__} failed: {msg}")
|
||||
sys.exit(-1)
|
||||
|
||||
def extract(self, text):
|
||||
return None
|
||||
|
||||
class Balance(Regex):
|
||||
pattern = r'\nNew\sBalance.*\$([0-9,\.]*)'
|
||||
|
||||
def extract(self, text):
|
||||
results = re.findall(self.pattern, text)
|
||||
if len(results) == 0:
|
||||
self.error("No matches.")
|
||||
|
||||
if len(results) > 1:
|
||||
self.error(f"Too many matches {len(results)}")
|
||||
|
||||
return float(results[0].replace(',',''))
|
||||
|
||||
class Year(Regex):
|
||||
pattern = r'\nAvailable\sCredit\s+\(as\sof\s(.*)\)'
|
||||
|
||||
def extract(self, text):
|
||||
results = re.findall(self.pattern, text)
|
||||
if len(results) == 0:
|
||||
self.error("No matches.")
|
||||
|
||||
if len(results) > 1:
|
||||
self.error(f"Too many matches {len(results)}")
|
||||
|
||||
date = datetime.strptime(results[0], '%b %d, %Y')
|
||||
return date.year
|
||||
|
||||
class Transactions(Regex):
|
||||
pattern = r'\n([a-zA-Z]{3}\s[0-9]{1,2})\s[a-zA-Z]{3}\s[0-9]{1,2}(.*[a-zA-Z])\s*(\-?)\s\$([0-9,\.]*)'
|
||||
|
||||
def extract(self, text):
|
||||
results = re.findall(self.pattern, text)
|
||||
transactions = []
|
||||
for result in results:
|
||||
if len(result) != 4:
|
||||
self.error("ERROR: Invalid result.")
|
||||
|
||||
date = result[0]
|
||||
description = " ".join(result[1].split())
|
||||
is_payment = '-' in result[2]
|
||||
amount = float(result[3].replace(',', ''))
|
||||
|
||||
if is_payment:
|
||||
print(f"Skipping payment: {amount}")
|
||||
continue
|
||||
|
||||
transactions.append([date, description, amount])
|
||||
|
||||
return transactions
|
||||
|
||||
class Parser(model.BaseParser):
|
||||
def __init__(self):
|
||||
self.balance = Balance()
|
||||
self.year = Year()
|
||||
self.transactions = Transactions()
|
||||
|
||||
@property
|
||||
def source(self):
|
||||
return model.TransactionSource.CAPITAL_ONE
|
||||
|
||||
def parse(self, file_name):
|
||||
text = ''
|
||||
with open(file_name, 'rb') as pdf_file:
|
||||
reader = pdf.PdfReader(pdf_file)
|
||||
for page in reader.pages:
|
||||
text += page.extract_text()
|
||||
|
||||
balance = self.balance.extract(text)
|
||||
year = self.year.extract(text)
|
||||
transactions = self.transactions.extract(text)
|
||||
|
||||
# Validate transactions match extracted budget
|
||||
total = sum(x[2] for x in transactions)
|
||||
if not math.isclose(total, balance, abs_tol=0.001):
|
||||
print(f"ERROR: Actual {total} != Expected {balance}")
|
||||
sys.exit(-1)
|
||||
|
||||
# Add year to all the parsed dates.
|
||||
for transaction in transactions:
|
||||
orig_date = transaction[0]
|
||||
orig_date = datetime.strptime(orig_date, "%b %d")
|
||||
new_date = orig_date.replace(year = year)
|
||||
transaction[0] = new_date
|
||||
|
||||
return transactions
|
69
database.py
Executable file
69
database.py
Executable file
@ -0,0 +1,69 @@
|
||||
import datetime
|
||||
from peewee import *
|
||||
|
||||
DATABASE = 'budget.db'
|
||||
instance = SqliteDatabase(DATABASE, pragmas=[('foreign_keys', 'on')])
|
||||
|
||||
def create_tables():
|
||||
"""Helper function to create database tables. Should be called manually."""
|
||||
with instance:
|
||||
instance.create_tables([User, TransactionCategory, Transaction, Source])
|
||||
|
||||
# Make my user.
|
||||
instance.connect()
|
||||
User.create(username='ciphercules')
|
||||
instance.close()
|
||||
|
||||
def add_default_categories():
|
||||
categories = [
|
||||
{"food": ["snacks", "fast_food", "groceries", "restaurant"]},
|
||||
{"clothing": []},
|
||||
{"event": ["event_food", "birthday", "movie_theater"]},
|
||||
{"finance": ["interest"]},
|
||||
{"hobby": ["gym", "game_development", "projects", "education"]},
|
||||
{"home_improvement": []},
|
||||
{"pet": ["health", "dog_food"]},
|
||||
{"media": ["book", "music", "television", "video_game"]},
|
||||
{"health": ["medicine"]},
|
||||
{"transit": ["car_insurance", "car_registration", "gas", "parking", "taxi", "car_maintenance"]},
|
||||
{"utilities": ["electricity", "gas", "laundry", "cell_phone", "trash", "water"]},
|
||||
{"rent": []},
|
||||
]
|
||||
|
||||
instance.connect()
|
||||
with instance.atomic():
|
||||
for parent, children in categories.items():
|
||||
parent_db = TransactionCategory.create(name=parent)
|
||||
for child in children:
|
||||
TransactionCategory.create(name=child, parent=parent_db)
|
||||
|
||||
class BaseModel(Model):
|
||||
class Meta:
|
||||
database = instance
|
||||
|
||||
class User(BaseModel):
|
||||
username = CharField(unique=True, primary_key=True)
|
||||
|
||||
class TransactionCategory(BaseModel):
|
||||
primary_key = AutoField(primary_key=True)
|
||||
name = CharField(unique=True)
|
||||
parent = ForeignKeyField('self', null=True, backref='children')
|
||||
|
||||
class Source(BaseModel):
|
||||
filename=CharField(unique=True, primary_key=True)
|
||||
type = IntegerField()
|
||||
created_date = DateTimeField(default=datetime.datetime.now)
|
||||
user = ForeignKeyField(User, backref='transactions')
|
||||
|
||||
class Transaction(BaseModel):
|
||||
# Metadata
|
||||
primary_key = AutoField(primary_key=True)
|
||||
source = ForeignKeyField(Source, backref='transactions')
|
||||
created_date = DateTimeField(default=datetime.datetime.now)
|
||||
user = ForeignKeyField(User, backref='transactions')
|
||||
|
||||
# Real data
|
||||
transaction_date = DateTimeField()
|
||||
description = CharField()
|
||||
amount = FloatField()
|
||||
subcategory = ForeignKeyField(TransactionCategory, backref='+', null=True)
|
45
main.py
Executable file
45
main.py
Executable file
@ -0,0 +1,45 @@
|
||||
import sys
|
||||
import argparse
|
||||
import os
|
||||
|
||||
import capital_one
|
||||
import database
|
||||
|
||||
parser = argparse.ArgumentParser(prog="BudgetBear", description="Calculate a budget fit for a bear.")
|
||||
parser.add_argument('files', type=str, nargs='+', help='File to parse transactions from.')
|
||||
args = parser.parse_args()
|
||||
|
||||
username = 'ciphercules'
|
||||
|
||||
file_parsers = [capital_one.Parser()]
|
||||
for f in args.files:
|
||||
for file_parser in file_parsers:
|
||||
# Use the first successful parser.
|
||||
transactions = file_parser.parse(f)
|
||||
if not transactions:
|
||||
continue
|
||||
|
||||
# Add to database
|
||||
database.instance.connect()
|
||||
with database.instance.atomic():
|
||||
# Add source file first.
|
||||
source = database.Source.create(
|
||||
filename=os.path.basename(f),
|
||||
type=file_parser.source,
|
||||
user=username
|
||||
)
|
||||
|
||||
# Add each transaction
|
||||
for transaction in transactions:
|
||||
date, description, amount = transaction
|
||||
database.Transaction.create(
|
||||
user=username,
|
||||
transaction_date=date,
|
||||
description=description,
|
||||
amount=amount,
|
||||
source=source
|
||||
)
|
||||
database.instance.close()
|
||||
|
||||
# We successfully updated database with this parser, exit loop.
|
||||
break
|
14
model.py
Executable file
14
model.py
Executable file
@ -0,0 +1,14 @@
|
||||
class BaseParser:
|
||||
def __init__(self):
|
||||
self.butt = None
|
||||
|
||||
def parse(self):
|
||||
return None
|
||||
|
||||
@property
|
||||
def source(self):
|
||||
return -1
|
||||
|
||||
class TransactionSource:
|
||||
"""Enum of possible transaction sources"""
|
||||
CAPITAL_ONE = 1
|
3
requirements.txt
Executable file
3
requirements.txt
Executable file
@ -0,0 +1,3 @@
|
||||
peewee==3.15.3
|
||||
PyPDF2==2.11.1
|
||||
typing_extensions==4.4.0
|
14
todo.txt
Executable file
14
todo.txt
Executable file
@ -0,0 +1,14 @@
|
||||
# Goal: Command line tool for easily doing my budget
|
||||
Every week, I need to:
|
||||
1. Categorize my purchases
|
||||
2. Update SplitWise for shared purchases
|
||||
|
||||
# Tasks
|
||||
|
||||
## Parse capital one statement
|
||||
x Given a PDF bank statement from capital one, extract the transaction date, description, and amount from each transaction
|
||||
Store the data in a SQL lite database
|
||||
x Create database schema
|
||||
x Automatically add capital one transactions to database
|
||||
Add web page to add categories
|
||||
Add web page to categorize transactions
|
Loading…
Reference in New Issue
Block a user