Preprocessing and Postprocessing examples

Some recipes for preprocessing edi files.
Plugin 'demo_preprocessing' at the bots sourceforge site demonstrates preprocessing.

Example 1: Discard input files that are too small

import os
import bots.preprocess as preprocess
from bots.botsconfig import *

def postincommunication(routedict,*args,**kwargs):
    ''' function is called after the communication in the route.'''
    preprocess.preprocess(routedict=routedict,function=discard_file)

def discard_file(ta_from,endstatus,*args,**kwargs):
    ''' discard files that are to small (zero files)''' 
    ta_from.synall()
    filesize = ta_from.filesize
    if filesize < 100:    #filesize in bytes
        ta_from.update(statust=DONE)                       #statust=DONE: bots discards file, gives no errors.
    else:
        ta_to = ta_from.copyta(status=endstatus)           #make new transaction for bots database
        ta_to.update(statust=OK,filename=ta_from.filename) #update outmessage transaction (same) filename

Example 2: Extract data from PDF file (to csv)

# Extract data from PDF file (to csv)
# x_group: group text closer than this as one field (default 10)
# y_group: group lines closer than this as one line (default 5)
# password: if required

import bots.preprocess as preprocess

def postincommunication(routedict,*args,**kwargs):
    preprocess.preprocess(routedict,preprocess.extractpdf,x_group=12,y_group=3,password='secret')

Example 3: Manipulate records without BOTSID

import bots.preprocess as preprocess
import bots.botslib as botslib
import bots.botsglobal as botsglobal
from bots.botsconfig import *

def postincommunication(routedict,*args,**kwargs):
    preprocess.preprocess(routedict,custom_preprocess)

def custom_preprocess(ta_from,endstatus,*args,**kwargs):
    try:
        # copy ta for preprocessing
        ta_to = ta_from.copyta(status=endstatus)

        # open the files
        infile = botslib.opendata(ta_from.filename,'r')
        tofile = botslib.opendata(str(ta_to.idta),'wb')

        # preprocessing: read infile, write tofile
        # This file has headers and lines, but no field that can be used for BOTSID!
        # Determine the line type from the data, and add HDR or LIN in first column
        # Text heading lines and blank lines are omitted
        for line in infile:
            if '\tAU' in line: 
                tofile.write('HDR\t' + line)
            elif ('\tWAIT' in line or
                  '\tFULL' in line or
                  '\tEMPTY' in line):
                tofile.write('LIN\t' + line)

        infile.close()
        tofile.close()

        ta_to.update(statust=OK,filename=str(ta_to.idta)) #update outmessage transaction with ta_info; 
    except:
        txt=botslib.txtexc()
        botsglobal.logger.error(u'Custom preprocess failed. Error:\n%s',txt)
        raise botslib.InMessageError(u'Custom preprocess failed. Error:\n$error',error=txt)

Example 4: Sort input file

import bots.preprocess as preprocess
import bots.botslib as botslib
import bots.botsglobal as botsglobal
from bots.botsconfig import *

def postincommunication(routedict,*args,**kwargs):
    preprocess.preprocess(routedict,sort_file)

def sort_file(ta_from,endstatus,*args,**kwargs):
    try:
        # copy ta for preprocessing
        ta_to = ta_from.copyta(status=endstatus)

        # open the files
        infile = botslib.opendata(ta_from.filename,'r')
        tofile = botslib.opendata(str(ta_to.idta),'wb')

        # sort output
        lines = infile.readlines()
        lines.sort()
        for line in lines:
            tofile.write(line)

        infile.close()
        tofile.close()

        ta_to.update(statust=OK,filename=str(ta_to.idta)) #update outmessage transaction with ta_info; 
    except:
        txt=botslib.txtexc()
        botsglobal.logger.error(u'Sort preprocess failed. Error:\n%s',txt)
        raise botslib.InMessageError(u'Sort preprocess failed. Error:\n$error',error=txt)

Example 5: Postprocessing

Post processing works the same way as pre processing, except it is done before out communication.

import bots.preprocess as preprocess
import bots.botslib as botslib
import bots.botsglobal as botsglobal
from bots.botsconfig import *

def preoutcommunication(routedict,*args,**kwargs):
    preprocess.postprocess(routedict,split_lines)

def split_lines(ta_from,endstatus,,*args,**kwargs):
    try:
        # copy ta for postprocessing, open the files
        ta_to = ta_from.copyta(status=endstatus)
        infile = botslib.opendata(ta_from.filename,'r')
        tofile = botslib.opendata(str(ta_to.idta),'wb')

        # split every line at the first separator (space)
        # output the two parts on separate lines
        for line in infile:
            part = line.partition(' ')
            tofile.write(part[0] + '\n' + part[2])

        # close files and update outmessage transaction with ta_info
        infile.close()
        tofile.close()
        ta_to.update(statust=OK,filename=str(ta_to.idta))

    except:
        txt=botslib.txtexc()
        botsglobal.logger.error(_(u'split_lines postprocess failed. Error:\n%s'),txt)
        raise botslib.OutMessageError(_(u'split_lines postprocess failed. Error:\n$error'),error=txt)

Example 6: Preprocessing an encrypted file

This example uses gnupg to decrypt a file before processing it in Bots.

import bots.preprocess as preprocess
import bots.botslib as botslib
import gnupg

# Preprocessing - Decrypt infile using GPG
# Dependencies: python-gnupg-0.3.0
#   botssys/gnugpghome directory, containing:
#     gpg binary files (gpg.exe and iconv.dll)
#     keys (pubring.gpg, secring.gpg, trustdb.gpg)
#     passphrase.txt

def postincommunication(routedict,*args,**kwargs):
    # preprocess to decrypt, then passthrough (no translation)
    preprocess.preprocess(routedict,decrypt_GPG)
    transform.addinfo(change={'status':MERGED},where={'status':FILEIN,'idroute':routedict['idroute']})

def decrypt_GPG(ta_from,endstatus,*args,**kwargs):

    # copy ta for preprocessing
    ta_to = ta_from.copyta(status=endstatus)

    # gnupghome contains the gpg binary files, public/private keys, and passphrase
    gnupghome = botslib.join(botsglobal.ini.get('directories','botssys'),'gnupghome')
    passphrase = open(botslib.join(gnupghome,'passphrase.txt'),'r').read()
    gpgbinary = botslib.join(gnupghome,'gpg.exe')

    # Here is where we do the actual decryption
    gpg = gnupg.GPG(gnupghome=gnupghome,gpgbinary=gpgbinary)
    with botslib.opendata(ta_from.filename,'rb') as input:
        status = gpg.decrypt_file(input, passphrase=passphrase,output=botslib.abspathdata(str(ta_to.idta)))

    # log the results and finish
    botsglobal.logger.debug(status.stderr)
    if status.ok:
        botsglobal.logger.info(status.status)
        ta_to.update(statust=OK,filename=str(ta_to.idta))
    else:
        botsglobal.logger.error(status.status)
        ta_to.update(statust=ERROR,filename=str(ta_to.idta))
        raise PreprocessError(status.status + '\n' + status.stderr)

class PreprocessError(botslib.BotsError):
    pass

Example 7: Preprocessing to ignore/remove XML namespaces

This example changes the default namespace to a namespace prefix (so it is ignored). It also removes a namespace prefix (ENV). You may need to use either or both of these methods, depending on the content of your XML file.

#-------------------------------------------------------------------------------
# preprocess - Remove XML namespaces to simplify grammar and mapping
# Generally Bots does not need to use the xmlns for incoming files
# This example handles both default and prefix namespaces

def postincommunication(routedict):

    def _preprocess(ta_from,endstatus,**argv):

        # copy ta for preprocessing
        ta_to = ta_from.copyta(status=endstatus)

        # open the files
        infile = botslib.opendata(ta_from.filename,'r')
        tofile = botslib.opendata(str(ta_to.idta),'wb')

        for line in infile:
            tofile.write(line.replace('xmlns=','xmlns:NOTUSED=').replace('<ENV:','<').replace('</ENV:','</'))

        # close files and update outmessage transaction
        infile.close()
        tofile.close()
        ta_to.update(statust=OK,filename=str(ta_to.idta))

    preprocess.preprocess(routedict,_preprocess)