This is the python script to concatenate the files by network wise. It will consider the first file as destination file and append the content of second file to destination and so on ... until the size of the destination file exceeds the value( configurable) and moves the distnation file into some other location and deletes the source files.
#!/usr/bin/env python import sys, os, fnmatch, re from threading import Thread from threading import BoundedSemaphore pool_sema = BoundedSemaphore(value=20) # only 10 can run at a time maxsize = 200 # 10 lines for testing source_directory = "/ranger/RangerRoot/RangerData/CDRDelegatorData/" destination_directory = "/ranger/RangerRoot/RangerData/CDRDelegatorDataPerf/" log_directory = "/ranger/RangerRoot/LOG/" Success_Directory = "/ranger/RangerRoot/RangerData/CDRDelegatorData/success/" Success_Directory_Perf = "/ranger/RangerRoot/RangerData/CDRDelegatorDataPerf/success/" filters = [] os.system('date') for i in range(1, 24): filters.append("%02d*" % i) filters.append("90*") print filters def fnumlines(filename): f = open(filename) return f.read().count("\n") class PROCESS(Thread): def __init__(self, filelist, pool_sema, network_id): Thread.__init__(self) self.list = filelist self.pool_sema = pool_sema self.network_id = network_id def run(self): self.pool_sema.acquire() filename = self.list[0] PerfSuccessFile = filename logFile = re.sub("[*]","",self.network_id) + "CDRDelegatorConcatenation.log" LogFullName = os.path.join(log_directory, logFile) log = open ( LogFullName ,"a") file = os.path.join(source_directory, filename) os.system('cp %s /ranger/ravi/subscriber/' % (file)) rm_success = os.path.join(Success_Directory,filename) filename = file touch_Perf = os.path.join(Success_Directory_Perf, PerfSuccessFile ) numlines = fnumlines(file) try: f = open(file, "a") for filename_temp in self.list[1:]: file = os.path.join(source_directory, filename_temp) os.system('cp %s /ranger/ravi/subscriber/' % (file)) rm_success_file = os.path.join(Success_Directory, filename_temp) log_file = self.network_id + "date.log" log_date = os.path.join(log_directory, log_file) if numlines < maxsize: try: os.system('date > %s' % ( log_date)) log.write(open(log_date).read()) log.write("%s is concatenating to %s \n " % (file,filename)) except IOError: log.write("Error: can\'t find file or write the data \n") try: f.write(open(file).read()) except IOError: os.system('date > %s' % ( log_date )) log.write(open(log_date).read()) log.write("Error occured while reading the file %s \n" % (file)) else: log.write("%s has been concatenated to %s ... DONE\n" % (file,filename)) numlines += fnumlines(file) os.system('rm %s' %(file) ) os.system('rm %s' %(rm_success_file)) else: f.close() # move the file filename os.system('mv %s %s' %(filename, destination_directory)) touch_Perf = os.path.join(Success_Directory_Perf, PerfSuccessFile) touch_file = open(touch_Perf,"w") print touch_Perf touch_file.close() PerfSuccessFile = filename_temp os.system('rm %s' % (rm_success)) # Use a new file filename = os.path.join(source_directory, file) rm_success = rm_success_file numlines = fnumlines(file) f = open(file, "a") filename = file finally: f.close() log.close() os.system('mv %s %s' %(filename, destination_directory)) touch_Perf = os.path.join(Success_Directory_Perf, PerfSuccessFile ) touch_file = open(touch_Perf,"w") print touch_Perf touch_file.close() os.system('rm %s' % (rm_success)) self.pool_sema.release() def derectory_listing(directory): flist = os.listdir(directory) for i in range(len(flist)): full_path = os.path.join(Success_Directory,flist[i]) statinfo = os.stat(full_path) flist[i] = statinfo.st_mtime,flist[i] flist.sort() x = [] for i in range(len(flist)): x.append(flist[i][1]) return x #allfiles = os.listdir(sys.argv[1]) allfiles = derectory_listing(sys.argv[1]) threadlist = [] for filter in filters: files = fnmatch.filter(allfiles, filter) if not files: continue thread = PROCESS(files, pool_sema, filter) threadlist.append(thread) thread.start() for thread in threadlist: thread.join() os.system('date') #Usage: ./thread_merge_exception.py "Directory Name"