#!/cms/sw/python/2.5/bin/python
#
# Makes plots of jobs running vs time.
#
# Uses data from the Will Maier's tsar server.
#
# Michael Anderson
# Original: Oct 21, 2009
# Updated: Apr 13, 2010 (to use tsar data)
########################################
# Make matplotlib tempdir
import os # For listing files, folders
import shutil # For making tempdir
from tempfile import mkdtemp #
base = "/scrach/cmsprod"
if not os.path.isdir(base):
 base = "/tmp"
tmpdir = mkdtemp("-matplotlib", dir=base)
os.environ["MPLCONFIGDIR"] = tmpdir
########################################
########################################
import sys
import datetime
import matplotlib # Do this before importing pylab or pyplot
matplotlib.use('Agg') # Anti-Grain Geometry (raster images)
import matplotlib.pyplot as plt
import matplotlib.dates as mdates # To read timestamps
import csv # Comma-separated values (for reading in file)
import pytz # For timezone info
########################################
########################################
# Variables
outImageDirectory = '/cms/www/comp/cmsprod/productionJobMonitor/plots/'
outputImageSize = (7,3) # Plot size (inches)
xAxisTimeFormat = '%a\n%H:%M' # X-axis (time) format on plot
xAxisDaysDuration = 2
timeZone = pytz.timezone("US/Central")
xAxisMajorTics = mdates.HourLocator(interval=6, tz=timeZone) # Every few hours
xAxisMinorTics = mdates.MinuteLocator(interval=60, tz=timeZone) # Every hour
routePlotYAxisMax = 10000
mergePlotYAxisMax = 200
list_servers = ['stevia.hep.wisc.edu',
 'cassia.hep.wisc.edu',
 'caraway.hep.wisc.edu']
# A plot will be made for every combination of jobType and jobStatuses
list_jobTypes = ['processing','merge']
list_jobStatuses = ['running','idle','held']
# <site name> : (gatekeeper, a school color (rgb code))
list_sites = {"Caltech" : ("cit-gatekeeper.ultralight.org", "#FF7F00"),
	 "Caltech1" : ("cit-gatekeeper2.ultralight.org","#9900dd"),
	 "Florida" : ("pg.ihepa.ufl.edu", "#0033FF"),
	 "Florida1" : ("osg.hpc.ufl.edu", "#FFCC00"),
	 "Florida2" : ("uscms1.fltech-grid3.fit.edu", "#66FFFF"),
	 "MIT_CMS" : ("ce01.cmsaf.mit.edu", "#808080"),
	 "Nebraska" : ("red.unl.edu", "#FF2400"),
	 "Omaha" : ("ff-grid.unl.edu", "#FFFDD0"),
	 "Omaha1" : ("ff-grid2.unl.edu", "#99FF33"),
	 "Omaha2" : ("ff-grid3.unl.edu", "#99CCCC"),
	 "Purdue" : ("osg.rcac.purdue.edu", "#CFB53B"),
	 "Purdue1" : ("lepton.rcac.purdue.edu","#000000"),
	 "UCSD" : ("osg-gw-2.t2.ucsd.edu", "#75B2DD"),
	 "UCSD1" : ("osg-gw-4.t2.ucsd.edu", "#000080"),
	 "UWMadisonCMS" : ("cmsgrid02.hep.wisc.edu","#C41E3A"),
	 "UWMadisonCMS1": ("cmsgrid01.hep.wisc.edu","#008000")}
########################################
########################################
def fill_default(dictionary):
 '''
 Input example:
 dictionary = {"Caltech": {date0:value0,date1:value1},
 "UCSD" : {date0:value0,date2:value1},
 ...]
 Return:
 {"Caltech" : {date0:value0, date1:value1, date2:0}
 "UCSD" : {date0:value0, date1:0 , date2:value1}}'''
 default = 0
 list_keys = []
 for keyA in dictionary:
 for keyB in dictionary[keyA]:
 if keyB not in list_keys:
 list_keys.append(keyB)
 for keyA in dictionary:
 for key in list_keys:
 if key not in dictionary[keyA]:
 dictionary[keyA][key] = default
 return list_keys, dictionary
def add_dictionaries(*list_dicts):
 '''
 Input example:
 list_dicts = [{"a":value0,"b":value1},
 {"a":value0,"c":value1},
 ...]
 
 Return:
 {"a":<sum>, "b":<sum>, "c":<sum>, ...}
 '''
 total = {}
 for d in list_dicts:
 for key in d:
 if d[key]:
 if key in total:
 total[key] += d[key]
 else:
 total[key] = d[key]
 else:
 if key not in total:
 total[key] = 0
 return total
def sum_dict_values(dictionaries):
 '''
 Input = dictionary of dictionaries
 example: {"stevia.hep.wisc.edu" : {"Caltech": {<date0>:<value0>,...}, "Caltech1":{<date0>:<value0>, ...}, ... },
 "caraway.hep.wisc.edu": {"Caltech": {<date0>:<value0>,...}, "Caltech1":{<date0>:<value0>, ...}, ... },
 "cassia.hep.wisc.edu" : {...}, ...}
 Output = a dictionary which is the sum across the keys (So Caltech will be a sum across stevia,caraway,cassia)'''
 total = {}
 for keyA in dictionaries:
 # keyA = server
 for keyB in dictionaries[keyA]:
 # keyB = site
 if not dictionaries[keyA][keyB]: continue # skip empty dictionaries
 if keyB in total:
 total[keyB] = add_dictionaries(total[keyB], dictionaries[keyA][keyB])
 else:
 total[keyB] = dictionaries[keyA][keyB]
 return fill_default(total)
 #dates = sorted(total.items(), key=lambda x: len(x))[-1][1].keys() # Shortest list of dates
 #print dates
 #sys.exit()
 #for site, series in total.items():
 # total[site] = dict((k, v) for k, v in series.items() if k not in dates)
 #return total
########################################
########################################
# Get monitoring data from Tsar
from tsar.client import Tsar
tsar = Tsar()
def getMonitoringData(jobType, jobStatus):
 '''Return a dictionary with two keys:
 xValues: list of datetime objects,
 yValues: dictionary like {"site0":[value0,value1,...], "site1":[value0,...], ...}'''
 #print "%s %s" % (jobType, jobStatus)
 ####
 # Step 1: Collect Data from Tsar
 # create a dictionary with site_names as the key. the values are a dictionary of {<date>:<value>,...}
 tsar_data = {}
 for server_name in list_servers:
 tsar_data[server_name] = {}
 for site_name in list_sites:
 # attributeQuery example: 'prod_cit-gatekeeper.ultralight.org_processing_running_jobs'
 attributeQuery = '_'.join( ("prod", list_sites[site_name][0], jobType, jobStatus, "jobs") )
 # tsar.query returns list of (<subject>,<atribute>,<consolidation function>, <timestamp>,<value>)
 tsarRawData = tsar.query(subject=server_name, attribute=attributeQuery, cf="max", start=-2*84600, interval=60, filters=["skipnull"])
 current_data = dict([x[3:5] for x in tsarRawData]) # dict of {<timestamp>:<value>,...}
 #if jobType == "merge" and jobStatus == "running":
 # print server_name
 # print attributeQuery
 # print sorted(current_data)
 if current_data:
 tsar_data[server_name][site_name] = current_data
 ####
 # Step 2: Sum across the servers
 list_timestamps, dict_all_site_data = sum_dict_values(tsar_data)
# if jobType == "merge" and jobStatus == "running":
# attributeQuery = '_'.join( ("prod", list_sites['Nebraska'][0], jobType, jobStatus, "jobs") )
# print attributeQuery
# print sorted(dict_all_site_data['Nebraska'])
 ####
 # Step 3: To make stacked plot of site data
 # add data from site "below" it in the list
 dictJobsAtSites = {}
 for site in dict_all_site_data:
 dictJobsAtSites[site] = []
 # Go through sites in reverse order so that the first site, alphabetically, is on top
 for date in list_timestamps:
 runningTotal = None
 for site in sorted(dict_all_site_data, reverse=True):
 if date in dict_all_site_data[site]:
 if not runningTotal:
 runningTotal = dict_all_site_data[site][date]
 else:
 runningTotal += dict_all_site_data[site][date]
 else:
 if not runningTotal:
 runningTotal = 0
 dictJobsAtSites[site].append(runningTotal)
 return {"xValues" : sorted(list_timestamps),
 "yValues" : dictJobsAtSites}
########################################
########################################
# Plot jobs vs time from monitoring data
def plotStackedJobsVsTime(jobType, jobStatus, outputImageName, plotTitle):
 try:
 dictRawData = getMonitoringData(jobType, jobStatus)
 except:
 raise
 yValues = dictRawData["yValues"] # A dictionary of lists
 xValues = dictRawData["xValues"] # A list of x-values (timestamps)
 # Set up plot
 fig = plt.figure(figsize=outputImageSize)
 plt.ylabel("Number of Jobs")
 plt.title(plotTitle)
 ax = fig.add_subplot(111) # ???
 plt.subplots_adjust(bottom=0.14,right=0.82) # adjust location of edges of plot (in % of image size)
 ###############
 # Plot the data
 i=0
 listOfProxyArtists = []
 listOfLegendNames = []
 for column in sorted(yValues):
 #ax.plot(xValues, yValues[column], label=column) # Line plot
 ax.fill_between(xValues, yValues[column], color=list_sites[column][1])
 # Legend doesn't support fill_between, make invisible rectangles for legend.
 p = plt.Rectangle((0, 0), 1, 1, fc=list_sites[column][1]) # These rectangles are of
 listOfProxyArtists.append(p) # size=0, so they don't appear
 listOfLegendNames.append(column) # on plot. They are needed due to bug in matplotlib.
 i+=1
 ###############
 import matplotlib.font_manager # Legend
 prop = matplotlib.font_manager.FontProperties(size=7) # Font Size
 #legend = plt.legend(bbox_to_anchor=(1.25, 1), shadow=True, fancybox=True, prop=prop) # Doesn't work in current matplotlib
 legend = plt.legend(listOfProxyArtists, listOfLegendNames, bbox_to_anchor=(1.25, 1), shadow=True, fancybox=True, prop=prop)
 
 ax.xaxis.set_major_locator(xAxisMajorTics) # Set major tick marks
 ax.xaxis.set_minor_locator(xAxisMinorTics) # Set minor tick marks
 ax.xaxis.set_major_formatter(mdates.DateFormatter(xAxisTimeFormat)) # Format x-axis labels
 if "merge" in plotTitle:
 ax.set_ylim(0,mergePlotYAxisMax)
 else:
 ax.set_ylim(0,routePlotYAxisMax)
 ax.grid(True) # Turn on x&y grid
 fig.savefig(outputImageName)
 plt.clf()
########################################
########################################
# Make plots for all servers, job types and job statuses
if __name__ == '__main__':
 for jobType in list_jobTypes:
 for jobStatus in list_jobStatuses:
 outputFile = outImageDirectory + '_'.join( (jobType,jobStatus) )+".png"
 outputTitle= ' '.join((jobType,jobStatus))+datetime.datetime.today().strftime(" (%b %e %Y)")
 #try:
 plotStackedJobsVsTime(jobType, jobStatus, outputFile, outputTitle)
 #except Exception, e:
 # print e
 # Get rid of matplotlib tempdir
 shutil.rmtree(tmpdir)
########################################
# Run the main function
#main()
# OR, comment out above line, and uncomment lines below
# to see what is taking the most time to run!
#import cProfile
#cProfile.run('main()', 'fooprof')
#import pstats
#p = pstats.Stats('fooprof')
#p.sort_stats('cumulative').print_stats(15)
</div><div class="naked_ctrl">
<form action="/index.cgi/contrast" method="get" name="gate">
<p><a href="http://altstyle.alfasado.net">AltStyle</a> によって変換されたページ <a href="https://www.hep.wisc.edu/cms/comp/routerqMonitor/prodJobMonitorPlots_matplotlib.py">(-&gt;オリジナル)</a>
/ <label>アドレス: <input type="text" name="naked_post_url" value="https://www.hep.wisc.edu/cms/comp/routerqMonitor/prodJobMonitorPlots_matplotlib.py" size="22" /></label> <label>モード: <select name="naked_post_mode">
<option value="default">デフォルト</option>
<option value="speech">音声ブラウザ</option>
<option value="ruby">ルビ付き</option>
<option value="contrast" selected="selected">配色反転</option>
<option value="larger-text">文字拡大</option>
<option value="mobile">モバイル</option>
</select>
<input type="submit" value="表示" />
</p>
</form>
</div>