#!/cms/sw/python/2.5/bin/python # # Makes plots of jobs running vs time. # # Uses data from the Will Maier's tsar server. # # Michael Anderson # Original: Oct 21, 2009 # Updated: Apr 13, 2010 (to use tsar data) ######################################## # Make matplotlib tempdir import os # For listing files, folders import shutil # For making tempdir from tempfile import mkdtemp # base = "/scrach/cmsprod" if not os.path.isdir(base): base = "/tmp" tmpdir = mkdtemp("-matplotlib", dir=base) os.environ["MPLCONFIGDIR"] = tmpdir ######################################## ######################################## import sys import datetime import matplotlib # Do this before importing pylab or pyplot matplotlib.use('Agg') # Anti-Grain Geometry (raster images) import matplotlib.pyplot as plt import matplotlib.dates as mdates # To read timestamps import csv # Comma-separated values (for reading in file) import pytz # For timezone info ######################################## ######################################## # Variables outImageDirectory = '/cms/www/comp/cmsprod/productionJobMonitor/plots/' outputImageSize = (7,3) # Plot size (inches) xAxisTimeFormat = '%a\n%H:%M' # X-axis (time) format on plot xAxisDaysDuration = 2 timeZone = pytz.timezone("US/Central") xAxisMajorTics = mdates.HourLocator(interval=6, tz=timeZone) # Every few hours xAxisMinorTics = mdates.MinuteLocator(interval=60, tz=timeZone) # Every hour routePlotYAxisMax = 10000 mergePlotYAxisMax = 200 list_servers = ['stevia.hep.wisc.edu', 'cassia.hep.wisc.edu', 'caraway.hep.wisc.edu'] # A plot will be made for every combination of jobType and jobStatuses list_jobTypes = ['processing','merge'] list_jobStatuses = ['running','idle','held'] # : (gatekeeper, a school color (rgb code)) list_sites = {"Caltech" : ("cit-gatekeeper.ultralight.org", "#FF7F00"), "Caltech1" : ("cit-gatekeeper2.ultralight.org","#9900dd"), "Florida" : ("pg.ihepa.ufl.edu", "#0033FF"), "Florida1" : ("osg.hpc.ufl.edu", "#FFCC00"), "Florida2" : ("uscms1.fltech-grid3.fit.edu", "#66FFFF"), "MIT_CMS" : ("ce01.cmsaf.mit.edu", "#808080"), "Nebraska" : ("red.unl.edu", "#FF2400"), "Omaha" : ("ff-grid.unl.edu", "#FFFDD0"), "Omaha1" : ("ff-grid2.unl.edu", "#99FF33"), "Omaha2" : ("ff-grid3.unl.edu", "#99CCCC"), "Purdue" : ("osg.rcac.purdue.edu", "#CFB53B"), "Purdue1" : ("lepton.rcac.purdue.edu","#000000"), "UCSD" : ("osg-gw-2.t2.ucsd.edu", "#75B2DD"), "UCSD1" : ("osg-gw-4.t2.ucsd.edu", "#000080"), "UWMadisonCMS" : ("cmsgrid02.hep.wisc.edu","#C41E3A"), "UWMadisonCMS1": ("cmsgrid01.hep.wisc.edu","#008000")} ######################################## ######################################## def fill_default(dictionary): ''' Input example: dictionary = {"Caltech": {date0:value0,date1:value1}, "UCSD" : {date0:value0,date2:value1}, ...] Return: {"Caltech" : {date0:value0, date1:value1, date2:0} "UCSD" : {date0:value0, date1:0 , date2:value1}}''' default = 0 list_keys = [] for keyA in dictionary: for keyB in dictionary[keyA]: if keyB not in list_keys: list_keys.append(keyB) for keyA in dictionary: for key in list_keys: if key not in dictionary[keyA]: dictionary[keyA][key] = default return list_keys, dictionary def add_dictionaries(*list_dicts): ''' Input example: list_dicts = [{"a":value0,"b":value1}, {"a":value0,"c":value1}, ...] Return: {"a":, "b":, "c":, ...} ''' total = {} for d in list_dicts: for key in d: if d[key]: if key in total: total[key] += d[key] else: total[key] = d[key] else: if key not in total: total[key] = 0 return total def sum_dict_values(dictionaries): ''' Input = dictionary of dictionaries example: {"stevia.hep.wisc.edu" : {"Caltech": {:,...}, "Caltech1":{:, ...}, ... }, "caraway.hep.wisc.edu": {"Caltech": {:,...}, "Caltech1":{:, ...}, ... }, "cassia.hep.wisc.edu" : {...}, ...} Output = a dictionary which is the sum across the keys (So Caltech will be a sum across stevia,caraway,cassia)''' total = {} for keyA in dictionaries: # keyA = server for keyB in dictionaries[keyA]: # keyB = site if not dictionaries[keyA][keyB]: continue # skip empty dictionaries if keyB in total: total[keyB] = add_dictionaries(total[keyB], dictionaries[keyA][keyB]) else: total[keyB] = dictionaries[keyA][keyB] return fill_default(total) #dates = sorted(total.items(), key=lambda x: len(x))[-1][1].keys() # Shortest list of dates #print dates #sys.exit() #for site, series in total.items(): # total[site] = dict((k, v) for k, v in series.items() if k not in dates) #return total ######################################## ######################################## # Get monitoring data from Tsar from tsar.client import Tsar tsar = Tsar() def getMonitoringData(jobType, jobStatus): '''Return a dictionary with two keys: xValues: list of datetime objects, yValues: dictionary like {"site0":[value0,value1,...], "site1":[value0,...], ...}''' #print "%s %s" % (jobType, jobStatus) #### # Step 1: Collect Data from Tsar # create a dictionary with site_names as the key. the values are a dictionary of {:,...} tsar_data = {} for server_name in list_servers: tsar_data[server_name] = {} for site_name in list_sites: # attributeQuery example: 'prod_cit-gatekeeper.ultralight.org_processing_running_jobs' attributeQuery = '_'.join( ("prod", list_sites[site_name][0], jobType, jobStatus, "jobs") ) # tsar.query returns list of (,,, ,) tsarRawData = tsar.query(subject=server_name, attribute=attributeQuery, cf="max", start=-2*84600, interval=60, filters=["skipnull"]) current_data = dict([x[3:5] for x in tsarRawData]) # dict of {:,...} #if jobType == "merge" and jobStatus == "running": # print server_name # print attributeQuery # print sorted(current_data) if current_data: tsar_data[server_name][site_name] = current_data #### # Step 2: Sum across the servers list_timestamps, dict_all_site_data = sum_dict_values(tsar_data) # if jobType == "merge" and jobStatus == "running": # attributeQuery = '_'.join( ("prod", list_sites['Nebraska'][0], jobType, jobStatus, "jobs") ) # print attributeQuery # print sorted(dict_all_site_data['Nebraska']) #### # Step 3: To make stacked plot of site data # add data from site "below" it in the list dictJobsAtSites = {} for site in dict_all_site_data: dictJobsAtSites[site] = [] # Go through sites in reverse order so that the first site, alphabetically, is on top for date in list_timestamps: runningTotal = None for site in sorted(dict_all_site_data, reverse=True): if date in dict_all_site_data[site]: if not runningTotal: runningTotal = dict_all_site_data[site][date] else: runningTotal += dict_all_site_data[site][date] else: if not runningTotal: runningTotal = 0 dictJobsAtSites[site].append(runningTotal) return {"xValues" : sorted(list_timestamps), "yValues" : dictJobsAtSites} ######################################## ######################################## # Plot jobs vs time from monitoring data def plotStackedJobsVsTime(jobType, jobStatus, outputImageName, plotTitle): try: dictRawData = getMonitoringData(jobType, jobStatus) except: raise yValues = dictRawData["yValues"] # A dictionary of lists xValues = dictRawData["xValues"] # A list of x-values (timestamps) # Set up plot fig = plt.figure(figsize=outputImageSize) plt.ylabel("Number of Jobs") plt.title(plotTitle) ax = fig.add_subplot(111) # ??? plt.subplots_adjust(bottom=0.14,right=0.82) # adjust location of edges of plot (in % of image size) ############### # Plot the data i=0 listOfProxyArtists = [] listOfLegendNames = [] for column in sorted(yValues): #ax.plot(xValues, yValues[column], label=column) # Line plot ax.fill_between(xValues, yValues[column], color=list_sites[column][1]) # Legend doesn't support fill_between, make invisible rectangles for legend. p = plt.Rectangle((0, 0), 1, 1, fc=list_sites[column][1]) # These rectangles are of listOfProxyArtists.append(p) # size=0, so they don't appear listOfLegendNames.append(column) # on plot. They are needed due to bug in matplotlib. i+=1 ############### import matplotlib.font_manager # Legend prop = matplotlib.font_manager.FontProperties(size=7) # Font Size #legend = plt.legend(bbox_to_anchor=(1.25, 1), shadow=True, fancybox=True, prop=prop) # Doesn't work in current matplotlib legend = plt.legend(listOfProxyArtists, listOfLegendNames, bbox_to_anchor=(1.25, 1), shadow=True, fancybox=True, prop=prop) ax.xaxis.set_major_locator(xAxisMajorTics) # Set major tick marks ax.xaxis.set_minor_locator(xAxisMinorTics) # Set minor tick marks ax.xaxis.set_major_formatter(mdates.DateFormatter(xAxisTimeFormat)) # Format x-axis labels if "merge" in plotTitle: ax.set_ylim(0,mergePlotYAxisMax) else: ax.set_ylim(0,routePlotYAxisMax) ax.grid(True) # Turn on x&y grid fig.savefig(outputImageName) plt.clf() ######################################## ######################################## # Make plots for all servers, job types and job statuses if __name__ == '__main__': for jobType in list_jobTypes: for jobStatus in list_jobStatuses: outputFile = outImageDirectory + '_'.join( (jobType,jobStatus) )+".png" outputTitle= ' '.join((jobType,jobStatus))+datetime.datetime.today().strftime(" (%b %e %Y)") #try: plotStackedJobsVsTime(jobType, jobStatus, outputFile, outputTitle) #except Exception, e: # print e # Get rid of matplotlib tempdir shutil.rmtree(tmpdir) ######################################## # Run the main function #main() # OR, comment out above line, and uncomment lines below # to see what is taking the most time to run! #import cProfile #cProfile.run('main()', 'fooprof') #import pstats #p = pstats.Stats('fooprof') #p.sort_stats('cumulative').print_stats(15)

AltStyle によって変換されたページ (->オリジナル) /