From ac6763df1b6987f7b6b39a4d0ddf379f0633919a Mon Sep 17 00:00:00 2001 From: Sara Montecino Date: Sun, 30 Oct 2022 02:20:51 -0700 Subject: [PATCH] Initial commit --- .gitignore | 2 + .vscode/launch.json | 17 ++++++++ budget.db | Bin 0 -> 57344 bytes capital_one.py | 101 ++++++++++++++++++++++++++++++++++++++++++++ database.py | 69 ++++++++++++++++++++++++++++++ main.py | 45 ++++++++++++++++++++ model.py | 14 ++++++ requirements.txt | 3 ++ todo.txt | 14 ++++++ 9 files changed, 265 insertions(+) create mode 100644 .gitignore create mode 100755 .vscode/launch.json create mode 100755 budget.db create mode 100755 capital_one.py create mode 100755 database.py create mode 100755 main.py create mode 100755 model.py create mode 100755 requirements.txt create mode 100755 todo.txt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..93526df --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +venv/ +__pycache__/ diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100755 index 0000000..9c519d0 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,17 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Main", + "type": "python", + "request": "launch", + "program": "main.py", + "console": "integratedTerminal", + "justMyCode": true, + "args": ["C:\\Users\\saram\\Downloads\\Statement_082022_4653.pdf"] + } + ] +} \ No newline at end of file diff --git a/budget.db b/budget.db new file mode 100755 index 0000000000000000000000000000000000000000..1eeed465a839dc1751e8dc0099f43cd194faad8f GIT binary patch literal 57344 zcmeI5Yi!%r6@W#{wyXzv?Ie!kJQ*p8t+aATQO{(pP0O^TMkZy6j_WRiq1tlm`WZjk zCTY;A>o5$%HWdA{cG!;&!-io)hyK|*bU&5>!>~tD1nAHX!!~3^uwv^lq(!@7-GJRI zMlZ&s%XG!+>=4$mjt0%b|EY%eMsMx5{xD0Uf7sm4Q#MpUzhthUf00e-*b|LV%+u;@cRJnI`c$X5VfT-__MO zcU!Xt9&$Ro0`};>$>YXaX|>s(%h=zIl{fWPwFREs(c$p={nY2?*NgWrlp0tyymj7i zqKZRV=_X(+t8&ANZrFsX8|e?jR!vEZ)~J=#j*ns0tgmqk zHDto>@OF1o<*o_>Qjf{2wYiYwBT}kbHQ&6IHFgn!-qv>Sscyf00e-* z_9d|2Z2Yf%|9{f00e*l5C8%|00;m9AOHk_fQ|r7wfLv!Rvs*^ zPH!xf*0k^cVf?S71S${!0zd!=00AHX1b_e#00KY&2mpcYO@OicoekrE^8G(f`UeXT z00KY&2mk>f00e*l5C8%|00;m9AaENKAm9JP`2RLOz)&-n9h z;J)Ubb^Y13uHXe=e>hb37|ii5t{%2K8dYQ9f@1io`(PGs*<(sFNs^@n1is6a}_CxWp^ zEP`V=_2SrHK6~)RM-ONrq$q>p^NB>*Qt?Bz;^#zJ6;t?Gt!f%SN;ofNl>#fuDJd%z zlA^0@->i*M z`2>e$T1<-*YFf$5B#JxU{mGZl_Ptl8c56`>bpyEfuoy+G7DY~+%;ASFA1~w-H8V0f zUJx^3E|d@=v6vvplY=MzN!r21a(lNH!?@8wh;dxp5{YOnhVi_bW|QK`2!3$zy=>nN z1R-7s#uBk$I3k2efAs1F5=q&2xKYS zcWi+m!ts{+WUlT#C#gw=jYPt1RETi+e?lL{r0|;Su9a^*)};j@8X*XCyoII_3kvmC zi62U?u>Np}=c9V>*~^t~`LWI|ybBy}p*nt0@15F7@iFw#c~`}JN>Xu^#GM)^#FeBZ z%7x4@=?{|E$r(9cwlSLfj1hW#sbQfBG!?$mD`iyNB@QV=B3pTIe6W6H;@2*Z%B{2( z!q|T;1g_Q!aU(3S!iLC@)RuPYHv)+Httx(V{qQbVA~}kcBe?22qb9C*?<^ zq*^^AiE)9$r={BENyi2E7QQ(y6#d{GqBY;H|C#U3OU@fd-hg>Hf$Qxpmir8@=SoT( zmDqSR8jOacK|T?S;NDkerEGRm&SYB_IyCnujNHdLA#{uGb7_SwC@f#A?gz^bhvsg= zpq_`Vx(n4$d*bMQGCO)&&9UQ!@mP3=OKtAnHDx)WcGTH2eIQYqvl@su#b}f*k zQ6Mo+u+Znnf}wD&!R5tlE{)GQ93PF2igH?1RZ*^-b;QBhv1jn|Y}wYP`8Vow!dCtB z^(!HAFQI`YW=$TyC>(5l4gzTk3u-#?euYqh*UY!d%!=?`JWN z&KslDc1nxG=+ZDOaA8YV0u6B_)0u1rpNw$FtBPzgE9Nt**red|VqqDdmFPp{`~Lua z4(ILr&0ioQ-?rLWMJ=@;nd=%?s1-X$zR00;m9AOHk_01yBIKmZ5;f&Xs;$nK{aZnXq- zdY?}3)#=A{`ca*JM5p_8x=*Jc*6FNHKcv$S>huFTeZNlMr_=Z9^d6nQN2hyr`fi=x zt762yY?(>mRw)7?7VrPG}{y+fxvbb7l^Z`0|mI^C|*TXgyV zD&v2*U_)=9pQ3M}&!c%XicX*|<{jqO%nz7nn2XG(m;pS-x6{|?pV8l@zd$e23eD3y zJ@0y6^Zd~BRnH~Qlt=XVJ#Fqkxqt5dj{A%5W%n4~B`iPy2mk>f00e*l5C8%|;C3Kz zz&=13{$Mp=YV9+%_L^FcnOcvUT924o{iar*sr9g_l{K{r7HQ|k^>YnQ3D)6|Mgt&FLaHnn<8t!`7R%hc*LwRV_V z9j4ZHQ)`>4wbj&WH?_9xw+}cC`>V?MABAn`U+C}XZS+_4CcYE!d-NOhDtZO|0^bw( zF?tDoAO9-gyZG+F*U{I|SJ2bwOZYy)ljw1D0d1fae8*rG|5o5Enm~Dc?;wqm=x&t2 zyMzS@00AHX1b_e#00KY&2mk>f00e-*wkFVOw^0Ei`iSTy;usM}i8w-pp9mijhlyZ` zI7GxjA`TF-pNM@#>?NXyh&@DjiP%jYXeXkLh*l!(M6}rLHYa)h-yuAKA3ehSjd=j(|A#_T;b=tYUzwTRB=-Q!|8JPx z1M~muGo6fC!2ExG@(=4X@9VSPVE%tK4GPTv4>ruehxz{;%>Re^|1kgGB=-Q!|A+bi z&1uLi&%cNH|1kf*X>@2gAHv6I{{PuNoGOcq|6Q(YHaz3Mg}F%on?CP(&$Hxy*FBFv zgarry0U!VbfB+Bx0zd!=Y)1lRdqB&!WtbHiCq=f9^$X{tu4h5pI{9aE3FkykJzsY8 zX;B#E#f!#q;=YEt;f6ojt$j09k3!3fH^06@^5UgN<6=CVx=#=nFD&&>FE5_Be{Nv` z=f(R(q>>!>kzOqj$siw~^+4+BV5_-theR3YejUx#6XPbuwsPAs&9hNDx+qRwYb_;n z-7`*mCQAjWniv}=;}rAybaZ>Q>RYxQ)#5P9%omL&;ug};;R;dD40w-%!(h`2PQFMI z;8t?;RW+P=ZR$j2;8muMXdxISDE=@Z;N0Li0GtwXSi&iaagxGlh-*xeoPSzNN9)%@ zFiJ-ojU?jMDrB9)s5mT5-~^N;OLQnMW!Zw18pnxwF@e%68i7`y=H4hD>W6Wk%NQSlKGb4`_T(5q#NzI0fN!YD~(G@OWA$dK5StKKY0Wbvg+V&Y5e7Vd>a S+**sMr^+{T-&e^Deepkw|9Xl5 literal 0 HcmV?d00001 diff --git a/capital_one.py b/capital_one.py new file mode 100755 index 0000000..513b28d --- /dev/null +++ b/capital_one.py @@ -0,0 +1,101 @@ +import PyPDF2 as pdf +import re +import sys +import math +from datetime import datetime + +import model + +class Regex: + def error(self, msg): + print(f"ERROR: {self.__class__.__name__} failed: {msg}") + sys.exit(-1) + + def extract(self, text): + return None + +class Balance(Regex): + pattern = r'\nNew\sBalance.*\$([0-9,\.]*)' + + def extract(self, text): + results = re.findall(self.pattern, text) + if len(results) == 0: + self.error("No matches.") + + if len(results) > 1: + self.error(f"Too many matches {len(results)}") + + return float(results[0].replace(',','')) + +class Year(Regex): + pattern = r'\nAvailable\sCredit\s+\(as\sof\s(.*)\)' + + def extract(self, text): + results = re.findall(self.pattern, text) + if len(results) == 0: + self.error("No matches.") + + if len(results) > 1: + self.error(f"Too many matches {len(results)}") + + date = datetime.strptime(results[0], '%b %d, %Y') + return date.year + +class Transactions(Regex): + pattern = r'\n([a-zA-Z]{3}\s[0-9]{1,2})\s[a-zA-Z]{3}\s[0-9]{1,2}(.*[a-zA-Z])\s*(\-?)\s\$([0-9,\.]*)' + + def extract(self, text): + results = re.findall(self.pattern, text) + transactions = [] + for result in results: + if len(result) != 4: + self.error("ERROR: Invalid result.") + + date = result[0] + description = " ".join(result[1].split()) + is_payment = '-' in result[2] + amount = float(result[3].replace(',', '')) + + if is_payment: + print(f"Skipping payment: {amount}") + continue + + transactions.append([date, description, amount]) + + return transactions + +class Parser(model.BaseParser): + def __init__(self): + self.balance = Balance() + self.year = Year() + self.transactions = Transactions() + + @property + def source(self): + return model.TransactionSource.CAPITAL_ONE + + def parse(self, file_name): + text = '' + with open(file_name, 'rb') as pdf_file: + reader = pdf.PdfReader(pdf_file) + for page in reader.pages: + text += page.extract_text() + + balance = self.balance.extract(text) + year = self.year.extract(text) + transactions = self.transactions.extract(text) + + # Validate transactions match extracted budget + total = sum(x[2] for x in transactions) + if not math.isclose(total, balance, abs_tol=0.001): + print(f"ERROR: Actual {total} != Expected {balance}") + sys.exit(-1) + + # Add year to all the parsed dates. + for transaction in transactions: + orig_date = transaction[0] + orig_date = datetime.strptime(orig_date, "%b %d") + new_date = orig_date.replace(year = year) + transaction[0] = new_date + + return transactions \ No newline at end of file diff --git a/database.py b/database.py new file mode 100755 index 0000000..8496df9 --- /dev/null +++ b/database.py @@ -0,0 +1,69 @@ +import datetime +from peewee import * + +DATABASE = 'budget.db' +instance = SqliteDatabase(DATABASE, pragmas=[('foreign_keys', 'on')]) + +def create_tables(): + """Helper function to create database tables. Should be called manually.""" + with instance: + instance.create_tables([User, TransactionCategory, Transaction, Source]) + + # Make my user. + instance.connect() + User.create(username='ciphercules') + instance.close() + +def add_default_categories(): + categories = [ + {"food": ["snacks", "fast_food", "groceries", "restaurant"]}, + {"clothing": []}, + {"event": ["event_food", "birthday", "movie_theater"]}, + {"finance": ["interest"]}, + {"hobby": ["gym", "game_development", "projects", "education"]}, + {"home_improvement": []}, + {"pet": ["health", "dog_food"]}, + {"media": ["book", "music", "television", "video_game"]}, + {"health": ["medicine"]}, + {"transit": ["car_insurance", "car_registration", "gas", "parking", "taxi", "car_maintenance"]}, + {"utilities": ["electricity", "gas", "laundry", "cell_phone", "trash", "water"]}, + {"rent": []}, + ] + + instance.connect() + with instance.atomic(): + for parent, children in categories.items(): + parent_db = TransactionCategory.create(name=parent) + for child in children: + TransactionCategory.create(name=child, parent=parent_db) + +class BaseModel(Model): + class Meta: + database = instance + +class User(BaseModel): + username = CharField(unique=True, primary_key=True) + +class TransactionCategory(BaseModel): + primary_key = AutoField(primary_key=True) + name = CharField(unique=True) + parent = ForeignKeyField('self', null=True, backref='children') + +class Source(BaseModel): + filename=CharField(unique=True, primary_key=True) + type = IntegerField() + created_date = DateTimeField(default=datetime.datetime.now) + user = ForeignKeyField(User, backref='transactions') + +class Transaction(BaseModel): + # Metadata + primary_key = AutoField(primary_key=True) + source = ForeignKeyField(Source, backref='transactions') + created_date = DateTimeField(default=datetime.datetime.now) + user = ForeignKeyField(User, backref='transactions') + + # Real data + transaction_date = DateTimeField() + description = CharField() + amount = FloatField() + subcategory = ForeignKeyField(TransactionCategory, backref='+', null=True) \ No newline at end of file diff --git a/main.py b/main.py new file mode 100755 index 0000000..9b28ebf --- /dev/null +++ b/main.py @@ -0,0 +1,45 @@ +import sys +import argparse +import os + +import capital_one +import database + +parser = argparse.ArgumentParser(prog="BudgetBear", description="Calculate a budget fit for a bear.") +parser.add_argument('files', type=str, nargs='+', help='File to parse transactions from.') +args = parser.parse_args() + +username = 'ciphercules' + +file_parsers = [capital_one.Parser()] +for f in args.files: + for file_parser in file_parsers: + # Use the first successful parser. + transactions = file_parser.parse(f) + if not transactions: + continue + + # Add to database + database.instance.connect() + with database.instance.atomic(): + # Add source file first. + source = database.Source.create( + filename=os.path.basename(f), + type=file_parser.source, + user=username + ) + + # Add each transaction + for transaction in transactions: + date, description, amount = transaction + database.Transaction.create( + user=username, + transaction_date=date, + description=description, + amount=amount, + source=source + ) + database.instance.close() + + # We successfully updated database with this parser, exit loop. + break \ No newline at end of file diff --git a/model.py b/model.py new file mode 100755 index 0000000..fbf73f5 --- /dev/null +++ b/model.py @@ -0,0 +1,14 @@ +class BaseParser: + def __init__(self): + self.butt = None + + def parse(self): + return None + + @property + def source(self): + return -1 + +class TransactionSource: + """Enum of possible transaction sources""" + CAPITAL_ONE = 1 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100755 index 0000000..b982e94 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +peewee==3.15.3 +PyPDF2==2.11.1 +typing_extensions==4.4.0 \ No newline at end of file diff --git a/todo.txt b/todo.txt new file mode 100755 index 0000000..940509e --- /dev/null +++ b/todo.txt @@ -0,0 +1,14 @@ +# Goal: Command line tool for easily doing my budget +Every week, I need to: + 1. Categorize my purchases + 2. Update SplitWise for shared purchases + +# Tasks + +## Parse capital one statement +x Given a PDF bank statement from capital one, extract the transaction date, description, and amount from each transaction +Store the data in a SQL lite database + x Create database schema + x Automatically add capital one transactions to database +Add web page to add categories +Add web page to categorize transactions \ No newline at end of file