import csv as pcsv import dataclasses from typing import Optional, Tuple import re import datetime import click @dataclasses.dataclass class Entry: date: str memo: str location: str kind: str amount: float def parse_date(text: str) -> datetime.date: d, m, y = (int(x) for x in text.split('.')) return datetime.date(year=y, month=m, day=d) def parse_memo(memo: str) -> Tuple[str, str, str]: location = '' kind = '' memo = memo.split('\n')[0] match = re.match(r'(.+) ([A-Z]{3})$', memo) if match: memo = match.group(1) location = match.group(2) return memo, location.strip(), kind def parse(line: tuple) -> Optional[Entry]: # 19.10.2021,19.10.2021,Nintendo CD598510225 Frankfurt am DEU,,5.60 if len(line) < 4 or not line[-1]: return None if not re.match(r'\d+\.\d+\.\d+', line[0]): return None date = parse_date(line[0]) memo = line[2] memo, location, kind = parse_memo(memo) amount = float(line[-1]) return Entry(date=str(date), memo=memo, location=location, kind=kind, amount=amount) @click.command() @click.option('--out', required=True, type=str) @click.option('--csv', required=True, type=str) def extract(out: str, csv: str): entries = [] with open(csv) as f: for line in pcsv.reader(f): entry = parse(line) if entry: entries.append(entry) with open(out, 'w') as f: writer = pcsv.writer(f) for entry in entries: writer.writerow(dataclasses.astuple(entry)) if __name__ == '__main__': extract()