This is the python script to concatenate the files by network wise. It will consider the first file as destination file and append the content of second file to destination and so on ... until the size of the destination file exceeds the value( configurable) and moves the distnation file into some other location and deletes the source files.
#!/usr/bin/env python
import sys, os, fnmatch, re
from threading import Thread
from threading import BoundedSemaphore
pool_sema = BoundedSemaphore(value=20) # only 10 can run at a time
maxsize = 200 # 10 lines for testing
source_directory = "/ranger/RangerRoot/RangerData/CDRDelegatorData/"
destination_directory = "/ranger/RangerRoot/RangerData/CDRDelegatorDataPerf/"
log_directory = "/ranger/RangerRoot/LOG/"
Success_Directory = "/ranger/RangerRoot/RangerData/CDRDelegatorData/success/"
Success_Directory_Perf = "/ranger/RangerRoot/RangerData/CDRDelegatorDataPerf/success/"
filters = []
os.system('date')
for i in range(1, 24):
filters.append("%02d*" % i)
filters.append("90*")
print filters
def fnumlines(filename):
f = open(filename)
return f.read().count("\n")
class PROCESS(Thread):
def __init__(self, filelist, pool_sema, network_id):
Thread.__init__(self)
self.list = filelist
self.pool_sema = pool_sema
self.network_id = network_id
def run(self):
self.pool_sema.acquire()
filename = self.list[0]
PerfSuccessFile = filename
logFile = re.sub("[*]","",self.network_id) + "CDRDelegatorConcatenation.log"
LogFullName = os.path.join(log_directory, logFile)
log = open ( LogFullName ,"a")
file = os.path.join(source_directory, filename)
os.system('cp %s /ranger/ravi/subscriber/' % (file))
rm_success = os.path.join(Success_Directory,filename)
filename = file
touch_Perf = os.path.join(Success_Directory_Perf, PerfSuccessFile )
numlines = fnumlines(file)
try:
f = open(file, "a")
for filename_temp in self.list[1:]:
file = os.path.join(source_directory, filename_temp)
os.system('cp %s /ranger/ravi/subscriber/' % (file))
rm_success_file = os.path.join(Success_Directory, filename_temp)
log_file = self.network_id + "date.log"
log_date = os.path.join(log_directory, log_file)
if numlines < maxsize:
try:
os.system('date > %s' % ( log_date))
log.write(open(log_date).read())
log.write("%s is concatenating to %s \n " % (file,filename))
except IOError:
log.write("Error: can\'t find file or write the data \n")
try:
f.write(open(file).read())
except IOError:
os.system('date > %s' % ( log_date ))
log.write(open(log_date).read())
log.write("Error occured while reading the file %s \n" % (file))
else:
log.write("%s has been concatenated to %s ... DONE\n" % (file,filename))
numlines += fnumlines(file)
os.system('rm %s' %(file) )
os.system('rm %s' %(rm_success_file))
else:
f.close()
# move the file filename
os.system('mv %s %s' %(filename, destination_directory))
touch_Perf = os.path.join(Success_Directory_Perf, PerfSuccessFile)
touch_file = open(touch_Perf,"w")
print touch_Perf
touch_file.close()
PerfSuccessFile = filename_temp
os.system('rm %s' % (rm_success))
# Use a new file
filename = os.path.join(source_directory, file)
rm_success = rm_success_file
numlines = fnumlines(file)
f = open(file, "a")
filename = file
finally:
f.close()
log.close()
os.system('mv %s %s' %(filename, destination_directory))
touch_Perf = os.path.join(Success_Directory_Perf, PerfSuccessFile )
touch_file = open(touch_Perf,"w")
print touch_Perf
touch_file.close()
os.system('rm %s' % (rm_success))
self.pool_sema.release()
def derectory_listing(directory):
flist = os.listdir(directory)
for i in range(len(flist)):
full_path = os.path.join(Success_Directory,flist[i])
statinfo = os.stat(full_path)
flist[i] = statinfo.st_mtime,flist[i]
flist.sort()
x = []
for i in range(len(flist)):
x.append(flist[i][1])
return x
#allfiles = os.listdir(sys.argv[1])
allfiles = derectory_listing(sys.argv[1])
threadlist = []
for filter in filters:
files = fnmatch.filter(allfiles, filter)
if not files:
continue
thread = PROCESS(files, pool_sema, filter)
threadlist.append(thread)
thread.start()
for thread in threadlist:
thread.join()
os.system('date')
#Usage: ./thread_merge_exception.py "Directory Name"


