
| Current Path : /usr/local/lib/python3.8/dist-packages/konto/parser/ |
Linux ift1.ift-informatik.de 5.4.0-216-generic #236-Ubuntu SMP Fri Apr 11 19:53:21 UTC 2025 x86_64 |
| Current File : //usr/local/lib/python3.8/dist-packages/konto/parser/unicredit.py |
# coding: utf8
import os,sys,re,glob,time
try:
from konto_custom import config
except:
from konto.base import config
#*********************************************************************************
class Unicredit (object):
def __init__ (self,dir="."):
self.dir = dir
#*********************************************************************************
def mark (self,remark=""):
t = time.perf_counter()
if 't0' in vars(self):
print ( ("%9.2f" % ((t-self.t0)*1000)) + " ms for: " + remark )
self.t0 = t
#********************************************************************************
def parse_unicredit (self,*pars):
ktoauszuege = glob.glob(self.dir+"/*.pdf")
for ktoauszug in ktoauszuege:
print("xxxxxx",ktoauszug)
m = re.search(r"^(.*?)\-([A-Za-z0-9]+)",ktoauszug)
if m:
fileroot = m.group(2)
else:
m = re.search(r"^(.*?)\_(\d\d\d\d\d\d\d\d)\_(\d\d\d)",ktoauszug)
if m:
fileroot = m.group(1)
# else:
# continue
# os.system("pdftotext -layout " + ktoauszug)
file1 = ktoauszug[:-4]
# os.system("mv " + file1 + ".txt " + file1 + ".ocr")
text = ""
datum = ""
betrag = ""
remark = ""
nr = ""
zeile0 = ""
fileroot = "IzvodPLReport"
auszug_datum = ""
for zeile in open(file1+".ocr").read().split("\n"):
zeile = re.sub(r" 0\.380 "," 0.80 ",zeile,9999)
if "VALUTI" in zeile:
fileroot = "IzvodDevPLReport"
m = re.search("IZVOD BR[DO][J\)\]\!].*?(\d+)",zeile)
if m:
auszug_nr = m.group(1)
continue
m = re.search("Na dan.*?(\d *\d) *\. *(\d *\d) *\. *(\d\d\d[\d\?])",zeile)
if m:
auszug_datum = m.group(3) + m.group(2) + m.group(1)
auszug_datum = auszug_datum.replace(" ","",9999)
auszug_datum = auszug_datum.replace("?","3",9999)
continue
if "UKUPNO" in zeile:
break
m = re.search(r"^(.*?)(\d\d\.\d\d\.\d\d\d\d)(.*?) +([0123456789o,]+\.[\do][\do]) +([0123456789o,]+\.[\do][\do])",zeile)
if m:
text = text + zeile0 + " " + nr + "\n"
nr = m.group(1).strip()
datum = m.group(2)
betrag = m.group(5)
if betrag == "0.00":
betrag = "-" + m.group(4)
betrag = betrag.replace(",","")
betrag = betrag.replace(".",",")
betrag = betrag.replace("o","0")
zeile0 = datum + ";" + betrag + ";" + m.group(3).strip()
continue
if not zeile.strip() == "" and not zeile0 == "":
zeile0 = zeile0 + " " + zeile.strip()
text = text + zeile0 + " " + nr + "\n"
text = text.replace(" "," ")
text = text.replace(" "," ")
text = text.replace(" "," ")
text = text.replace(" "," ")
text = text.replace(" "," ")
text = text.replace(" "," ")
text = text.replace("¬"," ")
# fileroot = fileroot.replace("_ocr","")
# fileroot = fileroot.replace("_ocr","")
if not auszug_datum == "":
fileroot = fileroot + "_" + auszug_datum + "_" + ("%03u" % int(auszug_nr))
else:
fileroot = fileroot + "_99999999_" + ("%03u" % int(auszug_nr))
open(fileroot+".csv","w").write(text.strip()+"\n")
os.system("mv " + file1 + ".pdf " + fileroot+".pdf")
os.system("mv " + file1 + ".ocr " + fileroot+".ocr")
#******************************************************************************
if __name__ == "__main__":
Unicredit().parse_unicredit()