Thursday, December 09, 2010

Data conversion

Some data conversion
import time

dayNames = {0:'Mon',1:'Tue',2:'Wed',3:'Thu', 4:'Fri',5:'Sat',6:'Sun'}
monthNames = {1:'Jan',2:'Feb',3:'Mar',4:'Apr',5:'May',6:'Jun',7:'Jul',8:'Aug',9:'Sep',10:'Oct',11:'Nov',12:'Dec'}

nameToNumber = {}
lines = []
with open("C:\RawCallLogs.csv","r") as f:
    for line in f:
        fields = line.split(',')
        folder = fields[0].replace('\"','')
        if folder == 'Folder':
            continue
        number = fields[1].replace('\"','')
        name = fields[2].replace('\"','')
        rawtime = fields[3].replace('\"','')
        structTime = time.strptime(rawtime,'%m/%d/%Y %H:%M:%S')
        weekday = "Weekday"
        if (structTime.tm_wday >= 5):
            weekday = "Weekend"
        dayName = dayNames[structTime.tm_wday]
        month = monthNames[structTime.tm_mon]
        day = structTime.tm_mday
        year = structTime.tm_year
        evening = "Daytime"
        if structTime.tm_hour > 18:
            evening = "Evening"
        duration = int(fields[4].replace('\"','').replace('\n',''))/60.0
        if number == '-1':
            name = 'Unknown'

        if name != "":
            if name not in nameToNumber:
                nameToNumber[name]  = set([number])
            else:
                nameToNumber[name].add(number)
        lines.append([folder,number,name,rawtime,duration,weekday,evening,dayName,month,day,year,structTime.tm_hour,structTime.tm_min])
        
print len(lines)

def is08Number(number):
    return number.startswith('08') or number.startswith('0448')

def isLandlineNumber(number):
    return number.startswith('02') or number.startswith('0442')

def isMobile(number):
    return len(number) > 7

def isInternational(number):
    return number.startswith('0091') or number.startswith('+91')

with open("C:\RawCallLogsOut.csv","w") as f:
    f.write('Folder,Number,Name,Raw time,Duration,Weekday(s),DayTime,DayofWeek,Month,Day,Year,Hour,Min,NumberType\n')

    for line in lines:
        if (line[2] == ''):# No name
            for name,numbers in nameToNumber.items():
                for number in numbers:
                    numberInLine = line[1][::-1][:6][::-1]
                    numberInMap = number[::-1][:6][::-1]
                    if numberInLine == numberInMap:
                        line[2] = name
                        break
        if (line[2] == ''):
            if is08Number(line[1]):
                line[2] = "Unknown 08 Number"
            elif isLandlineNumber(line[1]):
                line[2] = 'Unknown landline'
            elif isMobile(line[1]):
                line[2] = 'Unknown mobile'
            else:
                line[2] = 'Unknown'
        numberType = 'Unknown'
        if (is08Number(line[1])):
            numberType = "08 Number"
        elif isLandlineNumber(line[1]):
            numberType = "Landline"
        elif isInternational(line[1]):
            numberType = "International"
        elif isMobile(line[1]):
            numberType = "Mobile"
        f.write("%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n" % (line[0],line[1],line[2],line[3],line[4],line[5],line[6],line[7],\
                                                     line[8],line[9],line[10],line[11],line[12],numberType))




No comments: