88 lines
2.1 KiB
Python
88 lines
2.1 KiB
Python
import csv as pcsv
|
|
import dataclasses
|
|
from typing import Optional, Tuple
|
|
import re
|
|
import datetime
|
|
|
|
import click
|
|
|
|
|
|
@dataclasses.dataclass
|
|
class Entry:
|
|
date: str
|
|
memo: str
|
|
location: str
|
|
kind: str
|
|
amount: float
|
|
|
|
|
|
def parse_date(text: str) -> datetime.date:
|
|
d, m, y = (int(x) for x in text.split('.'))
|
|
return datetime.date(year=2000 + y, month=m, day=d)
|
|
|
|
|
|
def parse_memo(memo: str) -> Tuple[str, str, str]:
|
|
parts = memo.split('\n')
|
|
if len(parts) == 1:
|
|
memo, kind = parts[0], ''
|
|
else:
|
|
memo, kind = parts[0], parts[1]
|
|
parts = memo.rsplit(',', maxsplit=1)
|
|
if len(parts) == 1:
|
|
memo, location = parts[0], ''
|
|
else:
|
|
memo, location = parts
|
|
return memo, location.strip(), kind
|
|
|
|
|
|
def parse(line: tuple) -> Optional[Entry]:
|
|
# ['15.07.22 18.07.22 NYA*Arena Cinemas AG, 433050453 CH\nLebensmittel, Spezialgeschäfte', '', '', '', '', '', '4.00']
|
|
if len(line) < 4 or not line[-1]:
|
|
return None
|
|
|
|
if not re.match(r'\d+\.\d+\.\d+ \d+', line[0]):
|
|
return None
|
|
|
|
parts = line[0].split(maxsplit=2)
|
|
if len(parts) == 3:
|
|
date = parse_date(parts[0])
|
|
memo = parts[-1]
|
|
else:
|
|
date = parse_date(parts[0])
|
|
memo = line[2]
|
|
memo, location, kind = parse_memo(memo)
|
|
|
|
parts = line[-1].split(' ', maxsplit=1)
|
|
amount = float(parts[0].replace("'", ''))
|
|
if len(parts) == 2:
|
|
assert parts[-1] == '-'
|
|
amount = -amount
|
|
|
|
return Entry(date=str(date),
|
|
memo=memo,
|
|
location=location,
|
|
kind=kind,
|
|
amount=amount)
|
|
|
|
|
|
@click.command()
|
|
@click.option('--out', required=True, type=str)
|
|
@click.option('--csv', required=True, type=str)
|
|
def extract(out: str, csv: str):
|
|
entries = []
|
|
|
|
with open(csv) as f:
|
|
for line in pcsv.reader(f):
|
|
entry = parse(line)
|
|
if entry:
|
|
entries.append(entry)
|
|
|
|
with open(out, 'w') as f:
|
|
writer = pcsv.writer(f)
|
|
for entry in entries:
|
|
writer.writerow(dataclasses.astuple(entry))
|
|
|
|
|
|
if __name__ == '__main__':
|
|
extract()
|