import csv as pcsv import dataclasses from typing import Optional, Tuple import re import datetime import click @dataclasses.dataclass class Entry: date: str memo: str location: str kind: str amount: float def parse_date(text: str) -> datetime.date: d, m, y = (int(x) for x in text.split('.')) return datetime.date(year=2000 + y, month=m, day=d) def parse_memo(memo: str) -> Tuple[str, str, str]: parts = memo.split('\n') if len(parts) == 1: memo, kind = parts[0], '' else: memo, kind = parts[0], parts[1] parts = memo.rsplit(',', maxsplit=1) if len(parts) == 1: memo, location = parts[0], '' else: memo, location = parts return memo, location.strip(), kind def parse(line: tuple) -> Optional[Entry]: # ['15.07.22 18.07.22 NYA*Arena Cinemas AG, 433050453 CH\nLebensmittel, Spezialgeschäfte', '', '', '', '', '', '4.00'] if len(line) < 4 or not line[-1]: return None if not re.match(r'\d+\.\d+\.\d+ \d+', line[0]): return None parts = line[0].split(maxsplit=2) if len(parts) == 3: date = parse_date(parts[0]) memo = parts[-1] else: date = parse_date(parts[0]) memo = line[2] memo, location, kind = parse_memo(memo) parts = line[-1].split(' ', maxsplit=1) amount = float(parts[0].replace("'", '')) if len(parts) == 2: assert parts[-1] == '-' amount = -amount return Entry(date=str(date), memo=memo, location=location, kind=kind, amount=amount) @click.command() @click.option('--out', required=True, type=str) @click.option('--csv', required=True, type=str) def extract(out: str, csv: str): entries = [] with open(csv) as f: for line in pcsv.reader(f): entry = parse(line) if entry: entries.append(entry) with open(out, 'w') as f: writer = pcsv.writer(f) for entry in entries: writer.writerow(dataclasses.astuple(entry)) if __name__ == '__main__': extract()