Thursday, December 09, 2010

Data conversion

Some data conversion
import time

dayNames = {0:'Mon',1:'Tue',2:'Wed',3:'Thu', 4:'Fri',5:'Sat',6:'Sun'}
monthNames = {1:'Jan',2:'Feb',3:'Mar',4:'Apr',5:'May',6:'Jun',7:'Jul',8:'Aug',9:'Sep',10:'Oct',11:'Nov',12:'Dec'}

nameToNumber = {}
lines = []
with open("C:\RawCallLogs.csv","r") as f:
    for line in f:
        fields = line.split(',')
        folder = fields[0].replace('\"','')
        if folder == 'Folder':
        number = fields[1].replace('\"','')
        name = fields[2].replace('\"','')
        rawtime = fields[3].replace('\"','')
        structTime = time.strptime(rawtime,'%m/%d/%Y %H:%M:%S')
        weekday = "Weekday"
        if (structTime.tm_wday >= 5):
            weekday = "Weekend"
        dayName = dayNames[structTime.tm_wday]
        month = monthNames[structTime.tm_mon]
        day = structTime.tm_mday
        year = structTime.tm_year
        evening = "Daytime"
        if structTime.tm_hour > 18:
            evening = "Evening"
        duration = int(fields[4].replace('\"','').replace('\n',''))/60.0
        if number == '-1':
            name = 'Unknown'

        if name != "":
            if name not in nameToNumber:
                nameToNumber[name]  = set([number])
print len(lines)

def is08Number(number):
    return number.startswith('08') or number.startswith('0448')

def isLandlineNumber(number):
    return number.startswith('02') or number.startswith('0442')

def isMobile(number):
    return len(number) > 7

def isInternational(number):
    return number.startswith('0091') or number.startswith('+91')

with open("C:\RawCallLogsOut.csv","w") as f:
    f.write('Folder,Number,Name,Raw time,Duration,Weekday(s),DayTime,DayofWeek,Month,Day,Year,Hour,Min,NumberType\n')

    for line in lines:
        if (line[2] == ''):# No name
            for name,numbers in nameToNumber.items():
                for number in numbers:
                    numberInLine = line[1][::-1][:6][::-1]
                    numberInMap = number[::-1][:6][::-1]
                    if numberInLine == numberInMap:
                        line[2] = name
        if (line[2] == ''):
            if is08Number(line[1]):
                line[2] = "Unknown 08 Number"
            elif isLandlineNumber(line[1]):
                line[2] = 'Unknown landline'
            elif isMobile(line[1]):
                line[2] = 'Unknown mobile'
                line[2] = 'Unknown'
        numberType = 'Unknown'
        if (is08Number(line[1])):
            numberType = "08 Number"
        elif isLandlineNumber(line[1]):
            numberType = "Landline"
        elif isInternational(line[1]):
            numberType = "International"
        elif isMobile(line[1]):
            numberType = "Mobile"
        f.write("%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n" % (line[0],line[1],line[2],line[3],line[4],line[5],line[6],line[7],\

Wednesday, October 20, 2010

Python: Accessing https pages from behing an authenticating proxy server

from urllib2 import ProxyHandler, build_opener

proxyMap = {'https': 'http://(user):(pass)@(proxyhost):(proxypass)/'}
proxy = ProxyHandler(proxyMap)
opener = build_opener(proxy)
u ='https://somehttpspage')
data =

charset ='charset') # Python 2
print 'CharSet %s' % charset
print '------------------------------------------------------------------------'
print data

Friday, April 16, 2010

Python script for creating video using images and adding audio

I am usng a python script for converting images into a vidoe (using Java JMF) and then adding the audio to the vidoe and converting it to an mp4 format so that I can play it on my phone.
Created on 15 Apr 2010

@author: viswanav
import glob, subprocess, os

vlcLocation = "C:\Development\Apps\PortableApps\VLCPortable\App\\vlc\\vlc.exe"

for g in glob.glob("C:\Development\Docs\Books\VideoCDs\*\PK*"):
    audioFile = ""
    for flvGlob in glob.glob(g + "\*\media\*.flv"):
        audioFile = flvGlob
    convertToMov = ["java","-cp","C:\Development\workspaces\Trials\JavaTrials\JMF\out\production\JMFTrial","JpegImagesToMovie", "-w", "400","-h","300","-f","15",g]
    #print convertToMov
    print "Convert to Mov File"
    movFileLocation = ""
    for movFile in glob.glob(g + "\*.mov"):
        movFileLocation= movFile
    stdoutCommand = "--sout=#transcode{vcodec=mp4v,vb=800,acodec=mp4a,ab=128,scale=1,channels=2,audio-sync}:std{access=file,mux=mp4,dst=\"%s.mp4\"}" % movFileLocation
    print stdoutCommand  
    addAudioAndConverToMp4Command = [vlcLocation,"-I","dummy",movFileLocation,"--input-slave=\"%s\"" % audioFile,stdoutCommand,"vlc://quit"]
    print addAudioAndConverToMp4Command 
    print "Creating mp4 file"
    deleteMovCommand = ["del",movFileLocation]
    print deleteMovCommand
    print "Deleting mov file"

Python application for downloading a book and converting to pdf

A similar application in python for downloading book images from the net and converting it to a pdf file, but this time using python

Created on 25 Mar 2010

import httplib
import os
import sys
from optparse import OptionParser

import time, thread
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter, A4
from reportlab.lib.units import cm, mm, inch, pica

write = sys.stdout.write
h = httplib.HTTPConnection('webproxy',8080)
headers = {'Cookie':''}
size = 1800
urltemplate = '/%s/pages/%i.jpg?width=1800'
books = ('isbn2','isbn1')
maxPages = 10000

def downloadBookImages(folderLocation,isbn):
 bookLocation = folderLocation + "\\%s" % isbn
 print 'Downloading book to %s' % bookLocation
 #Check if the folder location exists
 if os.path.exists(bookLocation) == False:
  os.mkdir(bookLocation) # If not create the folder
 h = httplib.HTTPConnection('webproxy',8080)
 imageFilePathTemplate = bookLocation + "\\%i.jpg"
 # Loop through all the pages for the book and save the page
 for page in range(maxPages):
  # Create the image file path
  imageFilePath = imageFilePathTemplate % page;
  # Check if the file exists
  if os.path.exists(imageFilePath) == True:
   write('E%i ' % page)  
  url = urltemplate % (isbn,page)
  h.request('GET', url,headers=headers)
  responce = h.getresponse()
  if responce.status == 401:
   print 'Not Authorized Book %s, the cookie has expired' % isbn
  data =
  if len(data) < 1000:
   print 'End of book %s since the page size is too small : %i' % (isbn,len(data))
  f = open(imageFilePath,'wb')

def pdfDirectory(outputPDFName, imageDirectory ):
    dirim = str(imageDirectory)
    output = str(outputPDFName)
    print 'Converting to pdf %s, images %s' % (output,dirim)
    width, height = A4
    height, width = A4
    c = canvas.Canvas(output, pagesize=A4)
    for root, dirs, files in os.walk(dirim):
     nopage = len(files)
     for name in range(nopage):
      #print name/nopage
      name = str(name) + ".jpg"
      filepath = os.path.join(root, name)
      #if filepath.endswith('20.jpg'): break
      c.drawImage(filepath, mm * 0.001 , mm * 0.001, height, width, preserveAspectRatio=False)
    print "PDF of Image directory created %s" % outputPDFName

def threadExe(location,isbn):
 print 'Executing Thread for location %s book %s\n' % (location,isbn)
 for page in range(maxPages):
  print '%s Page - %i \n' % (isbn,page)

def createOptionParser():
 parser = OptionParser(usage="Usage: %prog [options]", version="%prog 1.0")
 parser.add_option("-i", "--isbn", dest="isbn", action='store', type='string')
 parser.add_option("-d", action="store_true", dest="download", help="Download the book images", default=False)
 parser.add_option("-c", action="store_true", dest="converToPdf", help="Convert images to pdf file", default=False)
 parser.add_option("-f", "--imageFolder", type="string", help="Image folder to download or to read from.")
 parser.add_option("-p", "--pdfOutputFolder", type="string", help="Pdf output folder.")
 return parser

if __name__ == '__main__':
 parser = createOptionParser()
 (options, args) = parser.parse_args()
 if not options.isbn:
  parser.error("You have to specify the isbn book number")
 if or options.converToPdf:
  if not options.imageFolder:
   parser.error("Specify the image folder")
  if options.converToPdf:
   if not options.pdfOutputFolder:
    parser.error("Specify the pdf folder")
   print 'Converting to pdf %s, images %s' % (options.pdfOutputFolder,options.imageFolder)
   pdfDirectory(options.pdfOutputFolder + options.isbn + ".pdf" , options.imageFolder)
  print 'Specify if you would like to download or convert the book or both?'
# for book in books:
#  downloadBookImages('c:\\tmp',book)
# for book in books:
#  pdfDirectory('w:\\%s' % book , 'c:\\tmp\\%s.pdf' % book)

Sunday, February 07, 2010

Extracting the audio from an flv file and creating a new flv with only audio

As a part of my studies I wanted to extract out only the audio from the flv file into a new flv file.

Here is what I did
$ffmpeg -i input.flv -f flv -vn -acodec copy output.flv

I also wanted to convert all the files while taking a copy of the original file
$find . -name *.flv -exec cp '{}' '{}'.`date +%d.%m.%y`.original \; -exec ffmpeg -i '{}' -y -f flv -vn -acodec copy '{}' \;

The above command didnt exactly work since, it was overwriting the old file as it was being converted, so I used this one

$find . -name *.flv -exec cp '{}' '{}'.original \; -exec ffmpeg -i '{}'.original -y -f flv -vn -acodec copy '{}' \;

ffmpeg man pages: