bobbycar/logdata_visualization/logfix.py

201 lines
6.3 KiB
Python

import numpy as np
from datetime import datetime
import time
import argparse
import os.path
parser = argparse.ArgumentParser(description='Copys, renames and fixes logfiles written by bobbycar sd logger.')
parser.add_argument('-i', '--input', type=argparse.FileType('r'), nargs='+', required=True, help="list of input log files")
parser.add_argument('-o', '--output', nargs='?', type=argparse.FileType('w'), help="output filename")
parser.add_argument('-c','--consecutive', action="store_true", help="add consecutive files to input. If the input file ends with a number the following logfiles will be added.")
args = parser.parse_args()
ok=True
def getTimestamp(plines):
timestampline=-1
timestampfound=False
while not timestampfound:
timestampline+=1
timestampfound = (plines[timestampline].find('TIMESTAMP:')!=-1)
timestamp=int(plines[timestampline].split('TIMESTAMP:')[1]) #timestamp when file was created
if (timestampline==-1):
print("Error: Timestamp not found!")
exit()
return timestamp
def filterLines(plines,plinesStarttime=None):
plines = [x.rstrip("\n") for x in plines] #remove \n
pcommentlinesMask = [True if x.startswith('#') else False for x in plines] #generate mask for lines with comments
plines=np.array(plines)
pcommentlinesMask=np.array(pcommentlinesMask)
if (plinesStarttime is not None): #if starttimelist given, match with pdatalinesOK
plinesStarttime = plinesStarttime[pcommentlinesMask==False] #get lines with data
pdatalines = plines[pcommentlinesMask==False] #get lines with data
pheader = pdatalines[0] #header is the first non comment line
pheaderSize = len(pheader.split(',')) #how many elements are expected per line
pdatalinesSize = [len(x.split(',')) for x in pdatalines] #count arraysize for every dataline
if (plinesStarttime is not None): #if starttimelist given, match with pdatalinesOK
plinesStarttime=plinesStarttime[np.array(pdatalinesSize)==pheaderSize]
pdatalinesOK = pdatalines[np.array(pdatalinesSize)==pheaderSize]
if (plinesStarttime is not None): #if starttimelist given, match with pdatalinesOK
plinesStarttime = [plinesStarttime[i] for i,x in enumerate(pdatalinesOK) if x != pheader]
pdatalinesOK = [x for x in pdatalinesOK if x != pheader] #exclude header from data lines
pdatalinesFail = pdatalines[np.array(pdatalinesSize)!=pheaderSize]
plinesSize = [len(x.split(',')) for x in plines] #count arraysize for every dataline
plinesOK = np.array(plinesSize)==pheaderSize #mask for okay lines (valid for data lines)
return plines,pheader,pcommentlinesMask,pdatalines,pdatalinesFail,pdatalinesOK,pheaderSize,plinesOK,plinesStarttime
inputFilenames=[]
if (args.consecutive):
if(len(args.input)!=1):
parser.error("in consequtive mode exactly one input file is required")
exit()
nextFilename=args.input[0].name
while os.path.isfile(nextFilename):
print(nextFilename+" exists")
inputFilenames.append(nextFilename)
digitStartpos=len(nextFilename)-1
digitEndpos=len(nextFilename)
while (not nextFilename[digitStartpos:digitEndpos].isdigit() and digitStartpos>0 and digitEndpos>0):
digitStartpos-=1
digitEndpos-=1
while (nextFilename[digitStartpos:digitEndpos].isdigit() and digitStartpos>0 and digitEndpos>0):
digitStartpos-=1
digitStartpos+=1
number=int(nextFilename[digitStartpos:digitEndpos])+1
nextFilename=nextFilename[0:digitStartpos]+str(number).zfill(digitEndpos-digitStartpos)+nextFilename[digitEndpos:]
print("")
print(inputFilenames)
else:
inputFilenames=[x.name for x in args.input]
outputFilename=None
if args.output is not None:
outputFilename=args.output.name
lines=[]
linesStarttime=[] #offset for every line with timestamp. will be combined to new column
header=""
for inputFilename in inputFilenames:
print("Reading "+str(inputFilename))
inputlines=[]
with open(inputFilename, 'r') as reader:
inputlines = reader.readlines()
lines+=inputlines
#Check Headers
_lines,_header,_,_,_,_,_,_,_=filterLines(inputlines)
if (header==""): #is first header
header=_header
assert header==_header, "Header is different!"
_timestamp=getTimestamp(_lines)
print("Timestamp="+str(_timestamp))
_linesStarttime=[_timestamp for x in inputlines] #create as many entries with start timestamp as there are lines in the current file
linesStarttime+=_linesStarttime
print("Line in file="+str(len(inputlines)))
assert len(lines)==len(linesStarttime), "Length of lines and linesStarttime does not match"
linesStarttime=np.array(linesStarttime)
lines,header,commentlinesMask,datalines,datalinesFail,datalinesOK,headerSize,linesOK,linesStarttime=filterLines(lines,linesStarttime)
print("Found "+str(len(lines))+" lines")
print(str(np.sum(commentlinesMask))+" comments")
print(str(len(datalinesFail))+" Datalines Failed")
print(str(len(datalinesOK))+" Datalines OK")
print("Header Size is "+str(headerSize))
timestamp=getTimestamp(lines)
filetime = time.strftime('%Y%m%d_%H%M%S', time.localtime(timestamp))
if outputFilename is None:
outputFilename = filetime+".csv"
#is_dst(datetime(2019, 4, 1), timezone="US/Pacific")
print("Timestamp:"+str(timestamp)+" -> "+str(filetime))
print("UTC: "+ datetime.utcfromtimestamp(timestamp).strftime('%A, %Y-%m-%d %H:%M:%S'))
print("Local Time:"+time.strftime('%A, %Y-%m-%d %H:%M:%S', time.localtime(timestamp)))
print("Writing to: "+str(outputFilename))
print("Size lines="+str(len(lines)))
print("Size commentlinesMask="+str(len(commentlinesMask)))
print("Size datalines="+str(len(datalines)))
print("Size linesOK="+str(len(linesOK)))
header="timestamp,"+header #add timestamp column
writelines = [str(linesStarttime[i]+float(x.split(',')[0]))+","+x for i,x in enumerate(datalinesOK)] #add file timestamp to line time and add column to data
linesWritten = 0
if ok:
with open(outputFilename, 'w') as writer:
writer.write(header+"\n") #write header
for i,line in enumerate(writelines):
writer.write(line+"\n")
linesWritten+=1
print(str(linesWritten)+" lines written to "+str(outputFilename))
else:
print("Failed!")