#!/bin/env python
#
# mkbackup.py
#
# (c) Alain Spineux alain.spineux@gmail.com
#
# mkbackup is a front-end for popular and free linux and windows archiver tools.
#
# Supported archiver:
# - ntbackup
# - wbadmin (next to come)
# - tar (next to come)

# Features:
# - job definitions are stored in a configuration file
# - the destination of the archive can be set to different location depending
#   on the day, the week or the month to allow very complex schema of backup
# - send and email at the end of job
# - the email contains all information to evalute the reliability of the backup jobs
#
#
# mkbackup is released under the GNU GPL license
#

import sys, os, subprocess, time, smtplib, calendar, re, socket, urllib2

from optparse import OptionParser
import ConfigParser

import email
from email.Utils import parseaddr, formataddr, formatdate
from email.MIMEMultipart import MIMEMultipart
from email.MIMEText import MIMEText
from email.MIMEBase import MIMEBase

from datetime import datetime, timedelta

import cron

__version__='0.1.2'
last_version_url='http://www.magikmon.com/download/mkbackup_last.txt'

atom=r"[a-zA-Z0-9_#\$&'*+/=?\^`{}~|\-]+"
dot_atom=atom  +  r"(?:\."  +  atom  +  ")*"
quoted=r'"(?:\\[^\r\n]|[^\\"])*"'
local="(?:"  +  dot_atom  +  "|"  +  quoted  +  ")"
domain_lit=r"\[(?:\\\S|[\x21-\x5a\x5e-\x7e])*\]"
domain="(?:"  +  dot_atom  +  "|"  +  domain_lit  +  ")"
addr_spec=local  +  "\@"  +  domain
postfix_restricted_rfc2822_address_name=local
postfix_restricted_rfc2822_email_address=addr_spec
cyrus_mailbox_name=r"[a-zA-Z0-9_#$'=`{}~|-]+(?:\.[a-zA-Z0-9_#$'=`{}~|-]+)*"

domain_nameRE=re.compile('^'+dot_atom+'$')
email_addressRE=re.compile('^'+postfix_restricted_rfc2822_email_address+'$')
valid_ipRE=re.compile('^([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$')
valid_hostnameRE=re.compile('^(([a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9\\-]*[a-zA-Z0-9])\\.)*([A-Za-z]|[A-Za-z][A-Za-z0-9\\-]*[A-Za-z0-9])$')

boolean={ 'on':True, 'yes':True, 'true':True, '1':True, 1:True, 'off':False, 'no':False, 'false':False, '0':False, 0:False}

# ---------------------------------------------------------------------------
def update_check():
    up2date, msg='yes', ''
    try:
        data=urllib2.urlopen(last_version_url).read()
        lines=data.split('\n')
        current=__version__.split('.')
        last=lines[0].split('.')
        for i in range(min(len(current), len(last))):
            if int(current[i])<int(last[i]):
                up2date='no'
                msg='\n'.join(lines[1:])
                break
            
    except urllib2.URLError:
        up2date='unk'
    except Exception, e:
        up2date='unk'

    return up2date, msg

# ---------------------------------------------------------------------------
def check_boolean(job, errors, options):
    for option, default in options:
        value=boolean.get(job.get(option, default).lower(), None)
        if value==None:
            errors[option]='boolean must have value in (on, yes, true, 1, off, no, false and 0)'
            value=default
        job[option]=value

# ---------------------------------------------------------------------------
def check_mail_config(job, errors):
    
    new_errors={}
#    smtp_host=job.setdefault('smtp_host', '127.0.0.1')
#    smtp_port=job.setdefault('smtp_port', '25')
    smtp_host=job['smtp_host']
    smtp_port=job['smtp_port']
    
    if not valid_ipRE.match(smtp_host):
        if not valid_hostnameRE.match(smtp_host):
            new_errors['smtp_host']='invalid hostname or ip address'
        else:
            try:
                ip=socket.gethostbyname(smtp_host)
            except socket.gaierror:
                new_errors['smtp_host']='cannot resolve address'
    try:
        port=int(smtp_port)
    except Exception:
        new_errors['smtp_port']='must be an integer'
    else:
        if not (0<port and port<65535):
            new_errors['smtp_port']='must be an integer between 1 and 65535'
        
    sender=job.get('sender', None)
    if not sender:
        new_errors['sender']='option mandatory'
    elif not email_addressRE.match(sender):
        new_errors['sender']='invalid email address'

    recipients=job.get('recipients')
    if not recipients:
        new_errors['recipients']='option mandatory'
    else:
        recipients=recipients.split()
        bad=[]
        for recipient in recipients:
            if not email_addressRE.match(recipient):
                bad.append(recipient)
        if bad:
            new_errors['recipients']='invalid email addresses: %s' % ' '.join(bad)
        else:
            job['recipients']=recipients

    errors.update(new_errors)
    return new_errors

# ---------------------------------------------------------------------------
def sendmail(sender, recipients, subject, text, attachements, smtp_host, smtp_port):

    msg=MIMEMultipart()
    msg.preamble='' # This line is not visible on mime enable MUA
    msg.epilogue=''
    
    msg['From'] = formataddr((sender, sender)) # Display name, email address
    msg['To'] =  ', '.join([ formataddr((recipient, recipient)) for recipient in recipients ])
    msg['Date'] = formatdate(localtime=True)
    msg['Subject'] = subject

#        text=u'\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
#        core=MIMEText(text.encode('US-ASCII'), 'plain', 'US-ASCII')
    core=MIMEText(text, 'plain')
    msg.attach(core)
    
    for filename, target, data in attachements:
        
        if not data and target and os.path.exists(target):
            data=open(target, 'rb').read()

        if data:
            maintype, subtype = 'application', 'octet-stream'
            attachement=MIMEBase(maintype, subtype)
            attachement.set_payload(data)
            email.Encoders.encode_base64(attachement)
    
        attachement.add_header('Content-Disposition', 'attachment', filename=filename)
        msg.attach(attachement)
    
    body=msg.as_string()
    
    smtp=smtplib.SMTP(smtp_host, smtp_port)
    ret=smtp.sendmail(sender, recipients, body)

# ===========================================================================
class NtBackup:
    # http://support.microsoft.com/kb/814583
    # http://support.microsoft.com/default.aspx?scid=kb;en-us;233427
    #
    # Normal - selected files, marking their archive attributes
    # Copy - selected files without marking their archive attributes. This is good for making tape copies that do not interfere with archive backups, since it does not set the archive attribute.
    # Incremental - selected files, marking their archive attributes, but only backs up the ones that have changed since the last backup.
    # Differential - selected files, NOT marking their archive attributes, but only backs up the ones that have changed since the last backup.
    # Daily - only backs up files that have changed that day, does not mark their archive attributes.
    #
    # ntbackup stuff
    #   http://www.fishbrains.com/2007/11/12/utilizing-the-built-in-windows-backup-ntbackupexe-for-windows/
    #   http://episteme.arstechnica.com/eve/forums/a/tpc/f/12009443/m/165002540831
    
    name='ntbackup'
    exe='ntbackup.exe'
    ev_logtype='Application'
    ev_source='NTBackup'
    
    boolean={ True: 'yes', False:'no'}
    
    types=dict(normal='normal', full='normal', incremental='incremental', inc='incremental', differential='differential', diff='differential', copy='copy', daily='daily')

    # -----------------------------------------------------------------------
    def run(self, command, job):

        errors, warning={}, {}

        #
        # check job config
        #
        
        selection=job.get('selection', None)
        if not selection:
            errors['selection']='option mandatory'
        elif selection.startswith('@'):
            if not os.path.isfile(selection[1:]):
                errors['selection']='file not found: %s' % (selection[1:],)
        elif os.path.isfile(selection) and selection[-4].lower()=='.bks':
            error['selection']='looks like a ".bks" file and need to be prefixed by a @'
        elif not os.path.isdir(selection) and not os.path.isfile(selection):
            errors['selection']='file or directory not found'

        check_boolean(job, errors, [('verify', 'no'), ('restricted', 'no'), ])

        logdir_default=os.path.join(os.environ.get('USERPROFILE'), 'Local Settings\Application Data\Microsoft\Windows NT\NTBackup\data')
        print 'logdir (default)', logdir_default
        logdir=job.setdefault('logdir', logdir_default)
        if not os.path.isdir(logdir):
            errors['logdir']='directory not found'
    
        program_exe=job.setdefault(self.name, self.exe)
        if os.path.basename(program_exe)!=program_exe and not os.path.isfile(program_exe):
            errors[self.name]='file not found'
        else:
            # TODO: search in %PATH%
            pass

        destination=job.get('destination', None)
        if not destination:
            errors['destination']='option mandatory'
        else:
            destination=destination.replace('\n','')
            try:
                destination=Destinations(destination, self)
            except (DestinationSyntaxError, cron.CronException), e:
                errors['destination']='syntax error: %s' % (str(e), )

        mail_config=not check_mail_config(job, errors)

        msg_body=''
        if errors:
            for k, v in errors.iteritems():
                msg_body+='%s=%r\n    %s\n\n' % (k, job.get(k,''), v)
            print 'Error in section: %s\n%s' % (job['name'], msg_body)
            if mail_config and not command=='check':
                subject='MKBACKUP: CONFIG ERR %s ' % (command, job['name'])
                sendmail(job['sender'], job['recipients'], subject, msg_body, [], job['smtp_host'], job['smtp_port'])

            # --------->  RETURN <-----------
            return

        print 'No error in section: %s' % job['name']

        #
        # 
        #
        
        # C:\WINDOWS\system32\ntbackup.exe backup "@m:\asx\src\magik\job1.bks" /a /d "Set created 14/11/2009 at 2:29" /v:no /r:no /rs:no /hc:off /m normal /j "backup" /l:s /f "s:\Backup.bkf"
        
        now=datetime.now()
        if command in ('check', 'checkmail'):
            msg_body+='Destinations by date:\n'
            for i in range(destination.max_weekdivisor*7):
                today=now+timedelta(days=i)
                typ, target=destination.match(today)
                msg_body+='    %s %-12s %s\n' % (today.strftime('%a %d %b %Y'), typ, target)
                if typ!='exempted' and not os.path.isdir(os.path.dirname(target)):
                    msg_body+='        directory not found !!!\n'

        print msg_body
        
        typ, target=destination.match(now)
        
        if not command in ('check', 'checkmail') and typ=='exempted':
            print 'EXEMPTED today'
            print 'bye'
            sys.exit(0)
        
        args=[ program_exe, 'backup', job.get('selection'), '/J', job_name, '/M', typ, '/F', target, '/rs:no', ]
        if job.get('description', None): args.extend(['/d', job.get('description')])
        args.append('/v:'+self.boolean[job.get('verify')])
        args.append('/r:'+self.boolean[job.get('restricted')])
        args.append('/l:s') # logging [s]ummary
        # '/hc:off'
        
        cmdline=' '.join(args)
        msg_body+='\n%s\n' % cmdline
        
        print cmdline
        
        if command=='checkmail':
            subject='MKBACKUP: CONFIG OK %s ' % (job['name'], )
            sendmail(job['sender'], job['recipients'], subject, msg_body, [], job['smtp_host'], job['smtp_port'])
            
        if command in ('check', 'checkmail'):
            return
           
        start=int(time.time())
        process=subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        out, err=process.communicate()
        end=int(time.time())
        
        logfile, last=None, 0
        for filename in os.listdir(logdir):
            full_filename=os.path.join(logdir, filename)
            current=os.stat(full_filename).st_mtime
            if last<current:
                logfile, last=full_filename, current
                
        print 'logfile', logfile

        status, ev_out=ReadEvLog(NtBackup.ev_logtype, NtBackup.ev_source, start)
        
        # HKEY_CURRENT_USER\Software\Microsoft\Ntbackup\Log Files
        
        #
        # dir of target directory
        #
        args=[ os.environ['comspec'], '/c', 'dir', os.path.dirname(target) ]
        print ' '.join(args)
        process=subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        dir_out, err=process.communicate()
        
        attachements=[ ('log.txt', logfile, None),
                       ('dir.txt', None, dir_out),
                       ('evlog.txt', None, ev_out),
                       ('config.ini', job['config'], None)
                    ]
        if job['selection'].startswith('@'):
            attachements.append(('selection.bks', job['selection'][1:], None))
        
        subject='MKBACKUP BACKUP %s %s' % (status, job['name'])
        sendmail(job['sender'], job['recipients'], subject, cmdline, attachements, job['smtp_host'], job['smtp_port'])
    

archivers=dict((x.name, x) for x in (NtBackup, ))


#======================================================================
#
# Event Viewer logging
#
#======================================================================
import win32evtlog
import win32evtlogutil
import winerror
import win32con

evt_dict={
        win32con.EVENTLOG_AUDIT_FAILURE:'AUDIT_FAILURE',
        win32con.EVENTLOG_AUDIT_SUCCESS:'AUDIT_SUCCESS',
        win32con.EVENTLOG_INFORMATION_TYPE:'INF',
        win32con.EVENTLOG_WARNING_TYPE:'WAR',
        win32con.EVENTLOG_ERROR_TYPE:'ERR'
        }

def FormatEv(ev_obj, logtype):
    computer=str(ev_obj.ComputerName)
    # cat=str(ev_obj.EventCategory)
    level=str(ev_obj.EventType )
    record=str(ev_obj.RecordNumber)
    evt_id=str(winerror.HRESULT_CODE(ev_obj.EventID))
    evt_type=str(evt_dict[ev_obj.EventType])
    msg=str(win32evtlogutil.SafeFormatMessage(ev_obj, logtype))
    msg='======== %d %s %s =====\n%s' % ( ev_obj.EventID, evt_type, ev_obj.TimeGenerated, msg)
    #print ev_obj.EventID, evt_type, int(ev_obj.TimeGenerated), level, msg
    return msg 

def ReadEvLog(logtype, source, start, end=None):
    flags = win32evtlog.EVENTLOG_BACKWARDS_READ|win32evtlog.EVENTLOG_SEQUENTIAL_READ
    hand=win32evtlog.OpenEventLog('', logtype) # '' for localhost
    cont='first'
    output=''
    status='ERR'
    while cont:
        events=win32evtlog.ReadEventLog(hand,flags,0)
        for ev_obj in events:
            src=str(ev_obj.SourceName)
            if src!=source:
                break
            if int(ev_obj.TimeGenerated)<start:
                cont=False
                break
            if cont=='first':
                print 'first %r %r %r %r' % (ev_obj.EventID=='8001', ev_obj.EventType==win32con.EVENTLOG_INFORMATION_TYPE, ev_obj.EventID, ev_obj.EventType)
                cont=True
                if ev_obj.EventID==8019 and ev_obj.EventType==win32con.EVENTLOG_INFORMATION_TYPE:
                    status='OK'
                
            output=output+'\n'+FormatEv(ev_obj, logtype)
        cont=cont and events
    win32evtlog.CloseEventLog(hand)
    return status, output


#======================================================================
#
# Destinations
#
#======================================================================
class DestinationSyntaxError(Exception):
    pass

class Destinations:
    def __init__(self, raw, archiver):
        
        self.destinations=[]
        st=raw
        self.max_weekdivisor=1
        
        while st:
            if st[0]=='<':
                try:
                    pos=st.index('>')
                except ValueError:
                    raise DestinationSyntaxError, 'a ">" is missing'
                
                selector=st[1:pos]
                st=st[pos+1:]
            
                pos=st.find('<')
                if pos>0:
                    target=st[:pos]
                    st=st[pos:]
                else:
                    target=st
                    st=''
                
                typ, period=selector.split('=', 1)
                
                try:
                    if typ.lower()=='exempted':
                        typ='exempted'
                    else:
                        typ=archiver.types[typ.lower()]
                except KeyError:
                    raise DestinationSyntaxError, 'type "%s" unknow' % (typ, )

                #print "SELECTOR<%s=%s>%s" % (typ,period,target )
                #print "CARRY=%s" % st
                period=cron.Cron(period)
                if period.weekdivisor!=None:
                    self.max_weekdivisor=max(self.max_weekdivisor, period.weekdivisor)
            else:
                typ='full'
                period=None
                target=st
                st=''
                
            self.destinations.append((typ, period, target))


    def match(self, day):
        #print '---------------------------------'
        globals=dict(day=day.day, month=day.month, year=day.year, weekday=day.weekday, yearday=day.timetuple().tm_yday)
        for typ, period, target in self.destinations:
            #print typ, period, target
            if period:
                #print 'Month', day.month-1, period.months
                if period.months and not day.month-1 in period.months:
                    continue
                #print 'Day of week', day.isoweekday()-1, period.daysofweek
                if period.daysofweek and not day.isoweekday()-1 in period.daysofweek:
                    continue
                #print 'Day of month', day.day-1, period.daysofmonth
                if period.daysofmonth and not day.day-1 in period.daysofmonth:
                    continue
                if period.firstdayofweek!=None:
                    epoch=calendar.timegm(day.timetuple())
                    nday=int(epoch/86400)-period.firstdayofweek+3
                    week=int(nday/7)%period.weekdivisor
                    if period.weekselector!=None and week!=period.weekselector:
                        continue
                    globals['epoch']=epoch
                    globals['week']=week

            #print 'MATCH', typ, target
            
            for exp in re.findall('\{([^}]*)\}',target):
                target=target.replace('${%s}' % (exp,), str(eval(exp, globals, {})))
            target=day.strftime(target)

            return typ, target
            
        return None, None

#======================================================================
#
# Main
#
#======================================================================

#
# Parse the command line
#
parser=parser=OptionParser(version='%%prog %s' % __version__ )
parser.set_usage=('%prog [options] command [job..]\n'+
                  '\tcommand in "backup", "check", "checkmail"')

parser.add_option("-c", "--config", dest="config", default='mkbackup.ini', help="use another ini file", metavar="config.ini")
parser.add_option("-d", "--debug", dest="debug", action="store_true", default=False, help="switch to debug level")
parser.add_option("-v", "--verbose", dest="verbose", action="store_true", default=False, help="write log to the terminal")

cmd_options, cmd_args=parser.parse_args(sys.argv)

if len(cmd_args)<2:
    parser.error('no "command" set')
    
command=cmd_args[1]
job_list=cmd_args[2:]

if not job_list:
    parser.error('no "job"')

if command not in ('backup', 'check', 'checkmail'):
    parser.error('invalid command "%s"' % command)
#
# load the configuration file
#
config_default=dict(verify='no',
                    update_check='yes',
                    smtp_port='25',
                    smtp_host='127.0.0.1',
                    )

config=ConfigParser.RawConfigParser(config_default)
try:
    config.readfp(open(cmd_options.config))
except IOError:
    print "error reading configuration file %s" % cmd_options.config
    raise

check_for_update=config.get('DEFAULT', 'update_check')
if boolean.get(check_for_update.lower(), None)!=False:
    up2date, msg=update_check()
    if up2date=='no':
        print '!'*72,'\n'
        print msg
        print '!'*72

err=0
for job_name in job_list:
    try:
        program=config.get(job_name, 'program')
    except ConfigParser.NoOptionError, e:
        print 'no "program" set for job "%s"' % job_name
        err+=1   
    else:
        if not program in archivers:
            print 'program "%s" unknow in job "%s"' % (program, job_name)
            err+=1   

if err>0:
    sys.exit(3)

for job_name in job_list:

    job=dict(config.items(job_name))
    program=job.get('program')
    archiver=archivers[program]()
    job=dict(config.items(job_name))
    job['name']=job_name
    job['config']=cmd_options.config

    print '%s job="%s" archiver="%s"' % (command, job_name, program)
    archiver.run(command, job)




#<full,first su>S:\monthly\data-full-month-${month%2}.bkf
#<full,su>S:\week${week%4}\data-full-%a.bkf
#<inc,mo-sa>S:\week${week%4}\data-inc-%a.bkf



#<full,week%4=0>S:\data-full.bkf
#<inc,week%4=1>S:\data-inc.bkf

#
#w1/4
#jan/2,su
#feb-dec/2


# when start a week
# firstday=sat
#last day OF THE MONTH
#1st mon,2nd tue,3th wed,4th thu,last sat  OF THE MONTH
#1st week,2nd week,3th week,4th week,last week OF THE MONTH
#<> => empty selector to allow "no backup"
# empty target => no backup




# windows unicode file http://www.helpware.net/FAR/help/Unicode2.htm
# Universal Encoding Detector http://chardet.feedparser.org/docs/supported-encodings.html
