sfconfig/tools/graph_render.py - software-factory/sf-config.git - Gitiles

 # Licensed under the Apache License, Version 2.0 (the "License"); you may
 # not use this file except in compliance with the License. You may obtain
 # a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 # License for the specific language governing permissions and limitations
 # under the License.

 import argparse
 import urllib.request
 from urllib.error import URLError
 import json
 import os
 import yaml
 import time

 import sfconfig.utils


 #############################
 # Influxdb query management #
 #############################
 CONTINOUS_QUERY = {
     'zuul.all_jobs': {
         'measurement': '/zuul.tenant.*.pipeline.*.all_jobs/',
     },
 }
 NODE_STATUS = ("building", "ready", "in-use", "used",
                "deleting", "hold", "failed")
 for node_status in NODE_STATUS:
     CONTINOUS_QUERY['nodepool.all_providers.%s' % node_status] = {
         'measurement': '/nodepool.provider.*.nodes.%s/' % node_status}


 def update_cq_action(args):
     '''Update influx db continuous query, return 4 on changed'''
     if args.influxdb_host is None or args.influxdb_password is None:
         sfconfig.utils.fail("Host and password is required")
     influx_cmd = ['influx', '-ssl', '-host', args.influxdb_host,
                   '-port', '8086', '-username', 'admin', '-password',
                   args.influxdb_password, '-database', 'telegraf', '-execute']
     if sfconfig.utils.pread(["podman", "container", "list", "-a", "--format",
                             "'{{ .Names}}'", "--filter", "name=^influxdb$"]):
         influx_cmd = ["podman", "exec", "-t", "influxdb"] + influx_cmd
     queries = sfconfig.utils.pread(
         influx_cmd + ["SHOW CONTINUOUS QUERIES"]).split('\n')
     changed = False
     for cq, cq_data in CONTINOUS_QUERY.items():
         cmd = 'CREATE CONTINUOUS QUERY "qc_%s" ON telegraf' % cq
         cmd += ' RESAMPLE FOR %s' % cq_data.get('time', '30s')
         cmd += ' BEGIN SELECT %s(value) AS value' % cq_data.get('function',
                                                                 'sum')
         cmd += ' INTO "%s"' % cq
         cmd += ' FROM %s' % cq_data['measurement']
         cmd += ' GROUP BY time(%s)' % cq_data.get('time', '30s')
         cmd += ' fill(%s)' % cq_data.get('resample_fill', '0')
         cmd += ' END'
         to_create = True
         for query in queries:
             if query.startswith("qc_%s " % cq):
                 # Query name already exists, check if it needs update
                 query = query.split(None, 1)[1]
                 # Fix query creation command to match show command output
                 qmd = cmd.replace("INTO ", "INTO telegraf.autogen.")
                 qmd = qmd.replace("FROM ", "FROM telegraf.autogen.")
                 if query == qmd:
                     to_create = False
                 else:
                     # ALTER doesn't exists, we need to delete the previous one
                     print("Removing old %s" % cq)
                     sfconfig.utils.execute(influx_cmd + [
                         'DROP CONTINUOUS QUERY "qc_%s" ON "telegraf"' % cq])
                 break
         if to_create:
             changed = True
             print("Adding %s" % cq)
             sfconfig.utils.execute(influx_cmd + [cmd])
     return 4 if changed else 0


 #######################
 # Grafyaml generation #
 #######################
 def dashboard(name, description):
     desc = "**%s**\n\n%s" % (
         description,
         'This dashboard is automatically generated by sfconfig.'
         ' If you would like to make changes, check the graph_render module'
     )
     return {
         'title': name,
         'rows': [{
             'title': 'Description',
             'height': '100px',
             'panels': [{
                 'title': 'Description',
                 'type': 'text',
                 'content': desc,
                 }]
             }
         ]}


 def row(title, height='150px', show_title=True):
     return {
         'title': title.capitalize(),
         'height': height,
         'showTitle': show_title,
         'panels': []}


 def single_stat(title, measurement, span=2, tags=True):
     stat = {
         'title': title.capitalize(),
         'span': span,
         'sparkline': {'full': True, 'show': True},
         'targets': [{
             'groupBy': [{'type': 'time', 'params': ['$__interval']},
                         {'type': 'fill', 'params': ['0']}],
             'measurement': measurement,
             'orderByTime': 'ASC',
             'resultFormat': 'time_series',
             'select': [[{'type': 'field', 'params': ['value']},
                         {'type': 'distinct', 'params': []}]],
         }],
         'type': 'singlestat',
         'valueName': 'current',
     }
     if tags:
         for target in stat['targets']:
             target['tags'] = [{'key': 'metric_type', 'operator': '=',
                                'value': 'gauge'}]
     return stat


 def graph(title, metrics, span=4, interval='$__interval', scale=None,
           select_fct='count', field='value', stack=False, tooltip=None,
           seriesOverrides=None, legend=None, yaxes_format=None):
     targets = []
     for metric in metrics:
         if not metric.get('alias'):
             metric['alias'] = metric['m'].split('.')[-1].replace(
                 '/', '').replace('_', ' ').capitalize()
         select = [
             {'type': 'field', 'params': [field]},
             {'type': select_fct, 'params': []},
         ]
         if scale:
             select.append({'type': 'math', 'params': [scale]})
         targets.append({
             'groupBy': [{'type': 'time', 'params': [interval]},
                         {'type': 'fill', 'params': ['0']}],
             'measurement': metric['m'],
             'alias': metric['alias'],
             'orderByTime': 'ASC',
             'resultFormat': 'time_series',
             'select': [select],
             'tags': [{'key': 'metric_type',
                       'operator': '=',
                       'value': metric.get('type', 'gauge')}]
         })
     g = {
         'title': title.capitalize(),
         'span': span,
         'targets': targets,
         'type': 'graph',
         'yaxes': [{"min": 0}, {"min": 0}]
     }
     if legend:
         g['legend'] = legend
     if yaxes_format:
         g['yaxes'] = [yaxes_format, yaxes_format]
     if stack:
         g['stack'] = True
     if seriesOverrides:
         g['seriesOverrides'] = [{'alias': seriesOverrides, 'stack': False}]
     if tooltip:
         g['tooltip'] = {'value_type': tooltip}
     return g


 def zuul_dashboard(args):
     d = dashboard("Zuul Status", "The Zuul service metrics")
     for tenant in args.tenants:
         tenant_row = row("%s's pipelines" % tenant['name'].capitalize())
         for pipeline in tenant['pipelines']:
             span = 3 if pipeline in ("check", "gate") else 2
             tenant_row['panels'].append(single_stat(
                 pipeline,
                 'zuul.tenant.%s.pipeline.%s.current_changes' % (
                     tenant['name'], pipeline),
                 span))
         d['rows'].append(tenant_row)
     global_row = row("Global stats", height="250px")
     global_row["panels"].append(graph(
         'Jobs Launched (per Hour)',
         [{'m': '/zuul.tenant.*.pipeline.*.all_jobs/', 'alias': '$5',
           'type': 'counter'}], select_fct='distinct', interval='1h'))
     global_row["panels"].append(graph(
         'Node Requests',
         [{'m': 'zuul.nodepool.current_requests', 'alias': 'Requests'}],
         select_fct='distinct'))
     global_row["panels"].append(graph(
         'Job Queue',
         [{'m': 'zuul.geard.queue.running'},
          {'m': 'zuul.geard.queue.waiting'},
          {'m': 'zuul.geard.queue.total'}],
         select_fct='distinct'))
     global_row["panels"].append(graph(
         'Gerrit Events (per Hour)',
         [{'m': 'zuul.event.gerrit.comment-added', 'type': 'counter'},
          {'m': 'zuul.event.gerrit.patchset-created', 'type': 'counter'},
          {'m': 'zuul.event.gerrit.change-merged', 'type': 'counter'}],
         interval='1h', select_fct='distinct'))
     global_row["panels"].append(graph(
         'Test Nodes',
         [{'m': 'nodepool.nodes.building'},
          {'m': 'nodepool.nodes.ready'},
          {'m': 'nodepool.nodes.in-use'},
          {'m': 'nodepool.nodes.used'},
          {'m': 'nodepool.nodes.deleting'}, ],
         select_fct='distinct',
         stack=True,
         seriesOverrides='Max',
         tooltip='individual'))
     d["rows"].append(global_row)
     executors_row = row("Executors", height="250px")
     executors_row["panels"].append(graph(
         "Executors",
         [{"m": "zuul.executors.online"}, {"m": "zuul.executors.accepting"}],
         select_fct='distinct',
         span=6))
     executors_row["panels"].append(graph(
         "Executors Queue",
         [{"m": "zuul.executors.jobs_queued"},
          {"m": "zuul.executors.jobs_running"}],
         select_fct='distinct',
         span=6))
     executors_row["panels"].append(graph(
         "Starting Builds",
         [{"m": "/zuul.executor.*.starting_builds/", 'alias': '$2'}],
         select_fct='distinct',
         span=6))
     executors_row["panels"].append(graph(
         "Running Builds",
         [{"m": "/zuul.executor.*.running_builds/", 'alias': '$2'}],
         select_fct='distinct',
         span=6))
     executors_row["panels"].append(graph(
         "Load Average",
         [{"m": "/zuul.executor.*.load_average/", 'alias': '$2'}],
         scale="/ 100",
         select_fct='distinct',
         span=6))
     executors_row["panels"].append(graph(
         "Used HDD",
         [{"m": "/zuul.executor.*.pct_used_hdd/", 'alias': '$2'}],
         scale="/ 100",
         select_fct='distinct',
         span=6))
     executors_row["panels"].append(graph(
         "Used RAM",
         [{"m": "/zuul.executor.*.pct_used_ram/", 'alias': '$2'}],
         scale="/ 100",
         select_fct='distinct',
         span=6))
     d["rows"].append(executors_row)
     mergers_row = row("Mergers", height="250px")
     mergers_row["panels"].append(graph(
         "Mergers", [{"m": "zuul.mergers.online"}], span=6,
         select_fct='distinct'))
     mergers_row["panels"].append(graph(
         "Merger Queue", [
             {"m": "zuul.mergers.jobs_queued"},
             {"m": "zuul.mergers.jobs_running"}], span=6,
         select_fct='distinct'))
     d["rows"].append(mergers_row)
     return d


 def nodepool_dashboard(args):
     d = dashboard("Nodepool Status", "The Nodepool service metrics")
     nodes_row = row("Nodes", height="150px")
     for status in NODE_STATUS:
         nodes_row["panels"].append(single_stat(
             status,
             '/nodepool.all_providers.%s/' % status,
             tags=False
             ))
     d["rows"].append(nodes_row)
     images_row = row("Images", height="320px")
     for status in ("building", "ready", "in-use", "deleting"):
         images_row["panels"].append(graph(
             status,
             [{"m": "/nodepool.label.*.nodes.%s/" % status, 'alias': '$2'}],
             span=3))
     d["rows"].append(images_row)
     launch_row = row("Launches", height="250px")
     launch_row["panels"].append(graph(
         'Ready Node Launch Attempts',
         [{'m': '/nodepool.launch.provider.*.ready/', 'alias': '$3',
           'type': 'counter'}], yaxes_format={'label': 'events/min'},
         interval='1m'))
     launch_row["panels"].append(graph(
         'Error Node Launch Attempts',
         [{'m': '/nodepool.launch.provider.*.error/', 'alias': '$5',
           'type': 'counter'}], yaxes_format={'label': 'events/min'},
         interval='1m'))
     launch_row["panels"].append(graph(
         'Time to Ready',
         [{'m': '/nodepool.launch.provider.*.ready/', 'alias': '$3',
           'type': 'timing'}], select_fct='distinct', field='mean',
         yaxes_format={'label': 'time', 'format': 'ms'}))
     d["rows"].append(launch_row)
     return d


 def provider_dashboard(args, provider):
     d = dashboard("Provider - %s" % provider["name"],
                   "The Nodepool provider %s metrics" % provider["name"])
     nodes_row = row("Nodes", height="150px")
     for status in NODE_STATUS:
         nodes_row["panels"].append(single_stat(
             status,
             '/nodepool.provider.%s.nodes.%s/' % (provider["name"], status)))
     d["rows"].append(nodes_row)
     if provider["driver"] == "openstack":
         api_row = row("API Operations", height="250px")
         for name, metric in (("Create Server", "ComputePostServers"),
                              ("Get Server", "ComputeGetServersDetail"),
                              ("Delete Server", "ComputeDeleteServers"),
                              ("List Server", "ComputeGetServers"),
                              ("Get Limits", "ComputeGetLimits")):
             api_row["panels"].append(graph(
                 name, [{"m": "nodepool.task.%s.%s" % (
                     provider["name"], metric), 'type': 'timing',
                     'alias': provider["name"]}], select_fct='distinct',
                 field='mean', yaxes_format={'label': 'time', 'format': 'ms'},
                 span=4))
         d["rows"].append(api_row)
     launch_row = row("Node Launches", height="250px")
     launch_row["panels"].append(graph(
         'Ready Node Launch Attempts',
         [{'m': '/nodepool.launch.provider.%s.ready/' % provider["name"],
           'alias': '$3', 'type': 'counter'}], interval='1m',
         yaxes_format={'label': 'events/min'}))
     launch_row["panels"].append(graph(
         'Error Node Launch Attempts',
         [{'m': '/nodepool.launch.provider.%s.error/' % provider["name"],
           'alias': '$5', 'type': 'counter'}], interval='1m',
         yaxes_format={'label': 'events/min'}))
     launch_row["panels"].append(graph(
         'Time to Ready',
         [{'m': '/nodepool.launch.provider.%s.ready/' % provider["name"],
           'alias': '$3', 'type': 'timing'}], select_fct='distinct',
         field='mean', yaxes_format={'label': 'time', 'format': 'ms'}))
     launch_row["panels"].append(graph(
         'Test Nodes',
         [{'m': 'nodepool.provider.%s.nodes.building' % provider["name"]},
          {'m': 'nodepool.provider.%s.nodes.ready' % provider["name"]},
          {'m': 'nodepool.provider.%s.nodes.in-use' % provider["name"]},
          {'m': 'nodepool.provider.%s.nodes.used' % provider["name"]},
          {'m': 'nodepool.provider.%s.nodes.deleting' % provider["name"]}, ],
         select_fct='distinct',
         stack=True,
         seriesOverrides='Max',
         yaxes_format={'label': 'node'},
         tooltip='individual'))
     d["rows"].append(launch_row)
     return d


 def update_grafyaml_action(args):
     '''Update grafyaml file, return 4 on changed'''
     if args.zuul_url is None or args.config_dir is None or \
        args.output_dir is None:
         sfconfig.utils.fail("Zuul url and config, output dir are required")
     args.tenants = get_tenants(args.zuul_url)
     args.providers = get_providers(args.config_dir)
     data = {
         "_zuul.yaml": zuul_dashboard(args),
         "_nodepool.yaml": nodepool_dashboard(args)
     }
     for provider in args.providers:
         data["_nodepool-%s.yaml" % provider["name"]] = provider_dashboard(
             args, provider)
     changed = False
     for name, data in data.items():
         content = yaml.safe_dump({'dashboard': data},
                                  default_flow_style=False)
         graf_file = os.path.join(args.output_dir, name)
         if os.path.exists(graf_file) and open(graf_file).read() == content:
             continue
         changed = True
         open(graf_file, "w").write(content)
         print("%s: updated content" % graf_file)
     return 4 if changed else 0


 ############################
 # Deployment configuration #
 ############################
 def get(url):
     for _ in range(12):
         try:
             return urllib.request.urlopen(url)
         except URLError as err:
             print("Unable to fetch %s due to: %s (will retry in 10s)" % (
                 url, err))
             time.sleep(10)
     raise RuntimeError("Unable to fetch %s due to: %s (after 6 retries)")


 def get_tenants(zuul_url):
     result = []
     tenants = json.loads(get("%s/api/tenants" % zuul_url).read())
     for tenant in tenants:
         tenant_data = {"name": tenant["name"], "pipelines": []}
         pipelines = json.loads(get(
             "%s/api/tenant/%s/pipelines" % (zuul_url, tenant["name"])).read())
         for pipeline in pipelines:
             tenant_data["pipelines"].append(pipeline["name"])
         result.append(tenant_data)
     return result


 def get_providers(config_dir):
     result = []
     nodepool_conf = sfconfig.utils.yaml_merge_load(os.path.join(config_dir,
                                                                 "nodepool"))
     for provider in nodepool_conf.get("providers", []):
         result.append(
             {"name": provider["name"],
              "driver": provider.get("driver", "openstack")})
     return result


 #########
 # Usage #
 #########
 def main():
     p = argparse.ArgumentParser(
         description="Generate dynamic grafyaml dashboard")
     p.add_argument("--influxdb-host")
     p.add_argument("--influxdb-password")
     p.add_argument("--zuul-url")
     p.add_argument("--config-dir")
     p.add_argument("--output-dir")
     p.add_argument("action", choices=("update-cq", "update-grafyaml"))
     args = p.parse_args()
     if args.action == "update-cq":
         ret = update_cq_action(args)
     elif args.action == "update-grafyaml":
         ret = update_grafyaml_action(args)
     exit(ret)


 if __name__ == "__main__":
     main()
	# Licensed under the Apache License, Version 2.0 (the "License"); you may
	# not use this file except in compliance with the License. You may obtain
	# a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
	# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
	# License for the specific language governing permissions and limitations
	# under the License.

	import argparse
	import urllib.request
	from urllib.error import URLError
	import json
	import os
	import yaml
	import time

	import sfconfig.utils


	#############################
	# Influxdb query management #
	#############################
	CONTINOUS_QUERY = {
	'zuul.all_jobs': {
	'measurement': '/zuul.tenant..pipeline..all_jobs/',
	},
	}
	NODE_STATUS = ("building", "ready", "in-use", "used",
	"deleting", "hold", "failed")
	for node_status in NODE_STATUS:
	CONTINOUS_QUERY['nodepool.all_providers.%s' % node_status] = {
	'measurement': '/nodepool.provider.*.nodes.%s/' % node_status}


	def update_cq_action(args):
	'''Update influx db continuous query, return 4 on changed'''
	if args.influxdb_host is None or args.influxdb_password is None:
	sfconfig.utils.fail("Host and password is required")
	influx_cmd = ['influx', '-ssl', '-host', args.influxdb_host,
	'-port', '8086', '-username', 'admin', '-password',
	args.influxdb_password, '-database', 'telegraf', '-execute']
	if sfconfig.utils.pread(["podman", "container", "list", "-a", "--format",
	"'{{ .Names}}'", "--filter", "name=^influxdb$"]):
	influx_cmd = ["podman", "exec", "-t", "influxdb"] + influx_cmd
	queries = sfconfig.utils.pread(
	influx_cmd + ["SHOW CONTINUOUS QUERIES"]).split('\n')
	changed = False
	for cq, cq_data in CONTINOUS_QUERY.items():
	cmd = 'CREATE CONTINUOUS QUERY "qc_%s" ON telegraf' % cq
	cmd += ' RESAMPLE FOR %s' % cq_data.get('time', '30s')
	cmd += ' BEGIN SELECT %s(value) AS value' % cq_data.get('function',
	'sum')
	cmd += ' INTO "%s"' % cq
	cmd += ' FROM %s' % cq_data['measurement']
	cmd += ' GROUP BY time(%s)' % cq_data.get('time', '30s')
	cmd += ' fill(%s)' % cq_data.get('resample_fill', '0')
	cmd += ' END'
	to_create = True
	for query in queries:
	if query.startswith("qc_%s " % cq):
	# Query name already exists, check if it needs update
	query = query.split(None, 1)[1]
	# Fix query creation command to match show command output
	qmd = cmd.replace("INTO ", "INTO telegraf.autogen.")
	qmd = qmd.replace("FROM ", "FROM telegraf.autogen.")
	if query == qmd:
	to_create = False
	else:
	# ALTER doesn't exists, we need to delete the previous one
	print("Removing old %s" % cq)
	sfconfig.utils.execute(influx_cmd + [
	'DROP CONTINUOUS QUERY "qc_%s" ON "telegraf"' % cq])
	break
	if to_create:
	changed = True
	print("Adding %s" % cq)
	sfconfig.utils.execute(influx_cmd + [cmd])
	return 4 if changed else 0


	#######################
	# Grafyaml generation #
	#######################
	def dashboard(name, description):
	desc = "%s\n\n%s" % (
	description,
	'This dashboard is automatically generated by sfconfig.'
	' If you would like to make changes, check the graph_render module'
	)
	return {
	'title': name,
	'rows': [{
	'title': 'Description',
	'height': '100px',
	'panels': [{
	'title': 'Description',
	'type': 'text',
	'content': desc,
	}]
	}
	]}


	def row(title, height='150px', show_title=True):
	return {
	'title': title.capitalize(),
	'height': height,
	'showTitle': show_title,
	'panels': []}


	def single_stat(title, measurement, span=2, tags=True):
	stat = {
	'title': title.capitalize(),
	'span': span,
	'sparkline': {'full': True, 'show': True},
	'targets': [{
	'groupBy': [{'type': 'time', 'params': ['$__interval']},
	{'type': 'fill', 'params': ['0']}],
	'measurement': measurement,
	'orderByTime': 'ASC',
	'resultFormat': 'time_series',
	'select': [[{'type': 'field', 'params': ['value']},
	{'type': 'distinct', 'params': []}]],
	}],
	'type': 'singlestat',
	'valueName': 'current',
	}
	if tags:
	for target in stat['targets']:
	target['tags'] = [{'key': 'metric_type', 'operator': '=',
	'value': 'gauge'}]
	return stat


	def graph(title, metrics, span=4, interval='$__interval', scale=None,
	select_fct='count', field='value', stack=False, tooltip=None,
	seriesOverrides=None, legend=None, yaxes_format=None):
	targets = []
	for metric in metrics:
	if not metric.get('alias'):
	metric['alias'] = metric['m'].split('.')[-1].replace(
	'/', '').replace('_', ' ').capitalize()
	select = [
	{'type': 'field', 'params': [field]},
	{'type': select_fct, 'params': []},
	]
	if scale:
	select.append({'type': 'math', 'params': [scale]})
	targets.append({
	'groupBy': [{'type': 'time', 'params': [interval]},
	{'type': 'fill', 'params': ['0']}],
	'measurement': metric['m'],
	'alias': metric['alias'],
	'orderByTime': 'ASC',
	'resultFormat': 'time_series',
	'select': [select],
	'tags': [{'key': 'metric_type',
	'operator': '=',
	'value': metric.get('type', 'gauge')}]
	})
	g = {
	'title': title.capitalize(),
	'span': span,
	'targets': targets,
	'type': 'graph',
	'yaxes': [{"min": 0}, {"min": 0}]
	}
	if legend:
	g['legend'] = legend
	if yaxes_format:
	g['yaxes'] = [yaxes_format, yaxes_format]
	if stack:
	g['stack'] = True
	if seriesOverrides:
	g['seriesOverrides'] = [{'alias': seriesOverrides, 'stack': False}]
	if tooltip:
	g['tooltip'] = {'value_type': tooltip}
	return g


	def zuul_dashboard(args):
	d = dashboard("Zuul Status", "The Zuul service metrics")
	for tenant in args.tenants:
	tenant_row = row("%s's pipelines" % tenant['name'].capitalize())
	for pipeline in tenant['pipelines']:
	span = 3 if pipeline in ("check", "gate") else 2
	tenant_row['panels'].append(single_stat(
	pipeline,
	'zuul.tenant.%s.pipeline.%s.current_changes' % (
	tenant['name'], pipeline),
	span))
	d['rows'].append(tenant_row)
	global_row = row("Global stats", height="250px")
	global_row["panels"].append(graph(
	'Jobs Launched (per Hour)',
	[{'m': '/zuul.tenant..pipeline..all_jobs/', 'alias': '$5',
	'type': 'counter'}], select_fct='distinct', interval='1h'))
	global_row["panels"].append(graph(
	'Node Requests',
	[{'m': 'zuul.nodepool.current_requests', 'alias': 'Requests'}],
	select_fct='distinct'))
	global_row["panels"].append(graph(
	'Job Queue',
	[{'m': 'zuul.geard.queue.running'},
	{'m': 'zuul.geard.queue.waiting'},
	{'m': 'zuul.geard.queue.total'}],
	select_fct='distinct'))
	global_row["panels"].append(graph(
	'Gerrit Events (per Hour)',
	[{'m': 'zuul.event.gerrit.comment-added', 'type': 'counter'},
	{'m': 'zuul.event.gerrit.patchset-created', 'type': 'counter'},
	{'m': 'zuul.event.gerrit.change-merged', 'type': 'counter'}],
	interval='1h', select_fct='distinct'))
	global_row["panels"].append(graph(
	'Test Nodes',
	[{'m': 'nodepool.nodes.building'},
	{'m': 'nodepool.nodes.ready'},
	{'m': 'nodepool.nodes.in-use'},
	{'m': 'nodepool.nodes.used'},
	{'m': 'nodepool.nodes.deleting'}, ],
	select_fct='distinct',
	stack=True,
	seriesOverrides='Max',
	tooltip='individual'))
	d["rows"].append(global_row)
	executors_row = row("Executors", height="250px")
	executors_row["panels"].append(graph(
	"Executors",
	[{"m": "zuul.executors.online"}, {"m": "zuul.executors.accepting"}],
	select_fct='distinct',
	span=6))
	executors_row["panels"].append(graph(
	"Executors Queue",
	[{"m": "zuul.executors.jobs_queued"},
	{"m": "zuul.executors.jobs_running"}],
	select_fct='distinct',
	span=6))
	executors_row["panels"].append(graph(
	"Starting Builds",
	[{"m": "/zuul.executor.*.starting_builds/", 'alias': '$2'}],
	select_fct='distinct',
	span=6))
	executors_row["panels"].append(graph(
	"Running Builds",
	[{"m": "/zuul.executor.*.running_builds/", 'alias': '$2'}],
	select_fct='distinct',
	span=6))
	executors_row["panels"].append(graph(
	"Load Average",
	[{"m": "/zuul.executor.*.load_average/", 'alias': '$2'}],
	scale="/ 100",
	select_fct='distinct',
	span=6))
	executors_row["panels"].append(graph(
	"Used HDD",
	[{"m": "/zuul.executor.*.pct_used_hdd/", 'alias': '$2'}],
	scale="/ 100",
	select_fct='distinct',
	span=6))
	executors_row["panels"].append(graph(
	"Used RAM",
	[{"m": "/zuul.executor.*.pct_used_ram/", 'alias': '$2'}],
	scale="/ 100",
	select_fct='distinct',
	span=6))
	d["rows"].append(executors_row)
	mergers_row = row("Mergers", height="250px")
	mergers_row["panels"].append(graph(
	"Mergers", [{"m": "zuul.mergers.online"}], span=6,
	select_fct='distinct'))
	mergers_row["panels"].append(graph(
	"Merger Queue", [
	{"m": "zuul.mergers.jobs_queued"},
	{"m": "zuul.mergers.jobs_running"}], span=6,
	select_fct='distinct'))
	d["rows"].append(mergers_row)
	return d


	def nodepool_dashboard(args):
	d = dashboard("Nodepool Status", "The Nodepool service metrics")
	nodes_row = row("Nodes", height="150px")
	for status in NODE_STATUS:
	nodes_row["panels"].append(single_stat(
	status,
	'/nodepool.all_providers.%s/' % status,
	tags=False
	))
	d["rows"].append(nodes_row)
	images_row = row("Images", height="320px")
	for status in ("building", "ready", "in-use", "deleting"):
	images_row["panels"].append(graph(
	status,
	[{"m": "/nodepool.label.*.nodes.%s/" % status, 'alias': '$2'}],
	span=3))
	d["rows"].append(images_row)
	launch_row = row("Launches", height="250px")
	launch_row["panels"].append(graph(
	'Ready Node Launch Attempts',
	[{'m': '/nodepool.launch.provider.*.ready/', 'alias': '$3',
	'type': 'counter'}], yaxes_format={'label': 'events/min'},
	interval='1m'))
	launch_row["panels"].append(graph(
	'Error Node Launch Attempts',
	[{'m': '/nodepool.launch.provider.*.error/', 'alias': '$5',
	'type': 'counter'}], yaxes_format={'label': 'events/min'},
	interval='1m'))
	launch_row["panels"].append(graph(
	'Time to Ready',
	[{'m': '/nodepool.launch.provider.*.ready/', 'alias': '$3',
	'type': 'timing'}], select_fct='distinct', field='mean',
	yaxes_format={'label': 'time', 'format': 'ms'}))
	d["rows"].append(launch_row)
	return d


	def provider_dashboard(args, provider):
	d = dashboard("Provider - %s" % provider["name"],
	"The Nodepool provider %s metrics" % provider["name"])
	nodes_row = row("Nodes", height="150px")
	for status in NODE_STATUS:
	nodes_row["panels"].append(single_stat(
	status,
	'/nodepool.provider.%s.nodes.%s/' % (provider["name"], status)))
	d["rows"].append(nodes_row)
	if provider["driver"] == "openstack":
	api_row = row("API Operations", height="250px")
	for name, metric in (("Create Server", "ComputePostServers"),
	("Get Server", "ComputeGetServersDetail"),
	("Delete Server", "ComputeDeleteServers"),
	("List Server", "ComputeGetServers"),
	("Get Limits", "ComputeGetLimits")):
	api_row["panels"].append(graph(
	name, [{"m": "nodepool.task.%s.%s" % (
	provider["name"], metric), 'type': 'timing',
	'alias': provider["name"]}], select_fct='distinct',
	field='mean', yaxes_format={'label': 'time', 'format': 'ms'},
	span=4))
	d["rows"].append(api_row)
	launch_row = row("Node Launches", height="250px")
	launch_row["panels"].append(graph(
	'Ready Node Launch Attempts',
	[{'m': '/nodepool.launch.provider.%s.ready/' % provider["name"],
	'alias': '$3', 'type': 'counter'}], interval='1m',
	yaxes_format={'label': 'events/min'}))
	launch_row["panels"].append(graph(
	'Error Node Launch Attempts',
	[{'m': '/nodepool.launch.provider.%s.error/' % provider["name"],
	'alias': '$5', 'type': 'counter'}], interval='1m',
	yaxes_format={'label': 'events/min'}))
	launch_row["panels"].append(graph(
	'Time to Ready',
	[{'m': '/nodepool.launch.provider.%s.ready/' % provider["name"],
	'alias': '$3', 'type': 'timing'}], select_fct='distinct',
	field='mean', yaxes_format={'label': 'time', 'format': 'ms'}))
	launch_row["panels"].append(graph(
	'Test Nodes',
	[{'m': 'nodepool.provider.%s.nodes.building' % provider["name"]},
	{'m': 'nodepool.provider.%s.nodes.ready' % provider["name"]},
	{'m': 'nodepool.provider.%s.nodes.in-use' % provider["name"]},
	{'m': 'nodepool.provider.%s.nodes.used' % provider["name"]},
	{'m': 'nodepool.provider.%s.nodes.deleting' % provider["name"]}, ],
	select_fct='distinct',
	stack=True,
	seriesOverrides='Max',
	yaxes_format={'label': 'node'},
	tooltip='individual'))
	d["rows"].append(launch_row)
	return d


	def update_grafyaml_action(args):
	'''Update grafyaml file, return 4 on changed'''
	if args.zuul_url is None or args.config_dir is None or \
	args.output_dir is None:
	sfconfig.utils.fail("Zuul url and config, output dir are required")
	args.tenants = get_tenants(args.zuul_url)
	args.providers = get_providers(args.config_dir)
	data = {
	"_zuul.yaml": zuul_dashboard(args),
	"_nodepool.yaml": nodepool_dashboard(args)
	}
	for provider in args.providers:
	data["_nodepool-%s.yaml" % provider["name"]] = provider_dashboard(
	args, provider)
	changed = False
	for name, data in data.items():
	content = yaml.safe_dump({'dashboard': data},
	default_flow_style=False)
	graf_file = os.path.join(args.output_dir, name)
	if os.path.exists(graf_file) and open(graf_file).read() == content:
	continue
	changed = True
	open(graf_file, "w").write(content)
	print("%s: updated content" % graf_file)
	return 4 if changed else 0


	############################
	# Deployment configuration #
	############################
	def get(url):
	for _ in range(12):
	try:
	return urllib.request.urlopen(url)
	except URLError as err:
	print("Unable to fetch %s due to: %s (will retry in 10s)" % (
	url, err))
	time.sleep(10)
	raise RuntimeError("Unable to fetch %s due to: %s (after 6 retries)")


	def get_tenants(zuul_url):
	result = []
	tenants = json.loads(get("%s/api/tenants" % zuul_url).read())
	for tenant in tenants:
	tenant_data = {"name": tenant["name"], "pipelines": []}
	pipelines = json.loads(get(
	"%s/api/tenant/%s/pipelines" % (zuul_url, tenant["name"])).read())
	for pipeline in pipelines:
	tenant_data["pipelines"].append(pipeline["name"])
	result.append(tenant_data)
	return result


	def get_providers(config_dir):
	result = []
	nodepool_conf = sfconfig.utils.yaml_merge_load(os.path.join(config_dir,
	"nodepool"))
	for provider in nodepool_conf.get("providers", []):
	result.append(
	{"name": provider["name"],
	"driver": provider.get("driver", "openstack")})
	return result


	#########
	# Usage #
	#########
	def main():
	p = argparse.ArgumentParser(
	description="Generate dynamic grafyaml dashboard")
	p.add_argument("--influxdb-host")
	p.add_argument("--influxdb-password")
	p.add_argument("--zuul-url")
	p.add_argument("--config-dir")
	p.add_argument("--output-dir")
	p.add_argument("action", choices=("update-cq", "update-grafyaml"))
	args = p.parse_args()
	if args.action == "update-cq":
	ret = update_cq_action(args)
	elif args.action == "update-grafyaml":
	ret = update_grafyaml_action(args)
	exit(ret)


	if __name__ == "__main__":
	main()