1# SPDX-License-Identifier: BSD-3-Clause 2# Copyright (C) 2018 Intel Corporation. 3# All rights reserved. 4 5import os 6import re 7import json 8import logging 9from subprocess import check_output 10from collections import OrderedDict 11from json.decoder import JSONDecodeError 12 13 14def read_json_stats(file): 15 with open(file, "r") as json_data: 16 data = json.load(json_data) 17 job_pos = 0 # job_post = 0 because using aggregated results 18 19 # Check if latency is in nano or microseconds to choose correct dict key 20 def get_lat_unit(key_prefix, dict_section): 21 # key prefix - lat, clat or slat. 22 # dict section - portion of json containing latency bucket in question 23 # Return dict key to access the bucket and unit as string 24 for k, _ in dict_section.items(): 25 if k.startswith(key_prefix): 26 return k, k.split("_")[1] 27 28 def get_clat_percentiles(clat_dict_leaf): 29 if "percentile" in clat_dict_leaf: 30 p99_lat = float(clat_dict_leaf["percentile"]["99.000000"]) 31 p99_9_lat = float(clat_dict_leaf["percentile"]["99.900000"]) 32 p99_99_lat = float(clat_dict_leaf["percentile"]["99.990000"]) 33 p99_999_lat = float(clat_dict_leaf["percentile"]["99.999000"]) 34 35 return [p99_lat, p99_9_lat, p99_99_lat, p99_999_lat] 36 else: 37 # Latest fio versions do not provide "percentile" results if no 38 # measurements were done, so just return zeroes 39 return [0, 0, 0, 0] 40 41 read_iops = float(data["jobs"][job_pos]["read"]["iops"]) 42 read_bw = float(data["jobs"][job_pos]["read"]["bw"]) 43 lat_key, lat_unit = get_lat_unit("lat", data["jobs"][job_pos]["read"]) 44 read_avg_lat = float(data["jobs"][job_pos]["read"][lat_key]["mean"]) 45 read_min_lat = float(data["jobs"][job_pos]["read"][lat_key]["min"]) 46 read_max_lat = float(data["jobs"][job_pos]["read"][lat_key]["max"]) 47 clat_key, clat_unit = get_lat_unit("clat", data["jobs"][job_pos]["read"]) 48 read_p99_lat, read_p99_9_lat, read_p99_99_lat, read_p99_999_lat = get_clat_percentiles( 49 data["jobs"][job_pos]["read"][clat_key]) 50 51 if "ns" in lat_unit: 52 read_avg_lat, read_min_lat, read_max_lat = [x / 1000 for x in [read_avg_lat, read_min_lat, read_max_lat]] 53 if "ns" in clat_unit: 54 read_p99_lat = read_p99_lat / 1000 55 read_p99_9_lat = read_p99_9_lat / 1000 56 read_p99_99_lat = read_p99_99_lat / 1000 57 read_p99_999_lat = read_p99_999_lat / 1000 58 59 write_iops = float(data["jobs"][job_pos]["write"]["iops"]) 60 write_bw = float(data["jobs"][job_pos]["write"]["bw"]) 61 lat_key, lat_unit = get_lat_unit("lat", data["jobs"][job_pos]["write"]) 62 write_avg_lat = float(data["jobs"][job_pos]["write"][lat_key]["mean"]) 63 write_min_lat = float(data["jobs"][job_pos]["write"][lat_key]["min"]) 64 write_max_lat = float(data["jobs"][job_pos]["write"][lat_key]["max"]) 65 clat_key, clat_unit = get_lat_unit("clat", data["jobs"][job_pos]["write"]) 66 write_p99_lat, write_p99_9_lat, write_p99_99_lat, write_p99_999_lat = get_clat_percentiles( 67 data["jobs"][job_pos]["write"][clat_key]) 68 69 if "ns" in lat_unit: 70 write_avg_lat, write_min_lat, write_max_lat = [x / 1000 for x in [write_avg_lat, write_min_lat, write_max_lat]] 71 if "ns" in clat_unit: 72 write_p99_lat = write_p99_lat / 1000 73 write_p99_9_lat = write_p99_9_lat / 1000 74 write_p99_99_lat = write_p99_99_lat / 1000 75 write_p99_999_lat = write_p99_999_lat / 1000 76 77 return [read_iops, read_bw, read_avg_lat, read_min_lat, read_max_lat, 78 read_p99_lat, read_p99_9_lat, read_p99_99_lat, read_p99_999_lat, 79 write_iops, write_bw, write_avg_lat, write_min_lat, write_max_lat, 80 write_p99_lat, write_p99_9_lat, write_p99_99_lat, write_p99_999_lat] 81 82 83def read_target_stats(measurement_name, results_file_list, results_dir): 84 # Read additional metrics measurements done on target side and 85 # calculate the average from across all workload iterations. 86 # Currently only works for SAR CPU utilization and power draw measurements. 87 # Other (bwm-ng, pcm, dpdk memory) need to be refactored and provide more 88 # structured result files instead of a output dump. 89 total_util = 0 90 for result_file in results_file_list: 91 with open(os.path.join(results_dir, result_file), "r") as result_file_fh: 92 total_util += float(result_file_fh.read()) 93 avg_util = total_util / len(results_file_list) 94 95 return {measurement_name: "{0:.3f}".format(avg_util)} 96 97 98def parse_results(results_dir, csv_file): 99 files = os.listdir(results_dir) 100 fio_files = filter(lambda x: ".fio" in x, files) 101 json_files = [x for x in files if ".json" in x] 102 sar_files = [x for x in files if "sar" in x and "util" in x] 103 pm_files = [x for x in files if "pm" in x and "avg" in x] 104 105 headers = ["read_iops", "read_bw", "read_avg_lat_us", "read_min_lat_us", "read_max_lat_us", 106 "read_p99_lat_us", "read_p99.9_lat_us", "read_p99.99_lat_us", "read_p99.999_lat_us", 107 "write_iops", "write_bw", "write_avg_lat_us", "write_min_lat_us", "write_max_lat_us", 108 "write_p99_lat_us", "write_p99.9_lat_us", "write_p99.99_lat_us", "write_p99.999_lat_us"] 109 110 header_line = ",".join(["Name", *headers]) 111 rows = set() 112 113 for fio_config in fio_files: 114 logging.info("Getting FIO stats for %s" % fio_config) 115 job_name, _ = os.path.splitext(fio_config) 116 aggr_headers = ["iops", "bw", "avg_lat_us", "min_lat_us", "max_lat_us", 117 "p99_lat_us", "p99.9_lat_us", "p99.99_lat_us", "p99.999_lat_us"] 118 119 # Look in the filename for rwmixread value. Function arguments do 120 # not have that information. 121 # TODO: Improve this function by directly using workload params instead 122 # of regexing through filenames. 123 if "read" in job_name: 124 rw_mixread = 1 125 elif "write" in job_name: 126 rw_mixread = 0 127 else: 128 rw_mixread = float(re.search(r"m_(\d+)", job_name).group(1)) / 100 129 130 # If "_CPU" exists in name - ignore it 131 # Initiators for the same job could have different num_cores parameter 132 job_name = re.sub(r"_\d+CPU", "", job_name) 133 job_result_files = [x for x in json_files if x.startswith(job_name)] 134 sar_result_files = [x for x in sar_files if x.startswith(job_name)] 135 pm_result_files = [x for x in pm_files if x.startswith(job_name)] 136 137 logging.info("Matching result files for current fio config %s:" % job_name) 138 for j in job_result_files: 139 logging.info("\t %s" % j) 140 141 # There may have been more than 1 initiator used in test, need to check that 142 # Result files are created so that string after last "_" separator is server name 143 inits_names = set([os.path.splitext(x)[0].split("_")[-1] for x in job_result_files]) 144 inits_avg_results = [] 145 for i in inits_names: 146 logging.info("\tGetting stats for initiator %s" % i) 147 # There may have been more than 1 test run for this job, calculate average results for initiator 148 i_results = [x for x in job_result_files if i in x] 149 i_results_filename = re.sub(r"run_\d+_", "", i_results[0].replace("json", "csv")) 150 151 separate_stats = [] 152 for r in i_results: 153 try: 154 stats = read_json_stats(os.path.join(results_dir, r)) 155 separate_stats.append(stats) 156 logging.info(stats) 157 except JSONDecodeError: 158 logging.error("ERROR: Failed to parse %s results! Results might be incomplete!" % r) 159 160 init_results = [sum(x) for x in zip(*separate_stats)] 161 init_results = [x / len(separate_stats) for x in init_results] 162 inits_avg_results.append(init_results) 163 164 logging.info("\tAverage results for initiator %s" % i) 165 logging.info(init_results) 166 with open(os.path.join(results_dir, i_results_filename), "w") as fh: 167 fh.write(header_line + "\n") 168 fh.write(",".join([job_name, *["{0:.3f}".format(x) for x in init_results]]) + "\n") 169 170 # Sum results of all initiators running this FIO job. 171 # Latency results are an average of latencies from accros all initiators. 172 inits_avg_results = [sum(x) for x in zip(*inits_avg_results)] 173 inits_avg_results = OrderedDict(zip(headers, inits_avg_results)) 174 for key in inits_avg_results: 175 if "lat" in key: 176 inits_avg_results[key] /= len(inits_names) 177 178 # Aggregate separate read/write values into common labels 179 # Take rw_mixread into consideration for mixed read/write workloads. 180 aggregate_results = OrderedDict() 181 for h in aggr_headers: 182 read_stat, write_stat = [float(value) for key, value in inits_avg_results.items() if h in key] 183 if "lat" in h: 184 _ = rw_mixread * read_stat + (1 - rw_mixread) * write_stat 185 else: 186 _ = read_stat + write_stat 187 aggregate_results[h] = "{0:.3f}".format(_) 188 189 if sar_result_files: 190 aggr_headers.append("target_avg_cpu_util") 191 aggregate_results.update(read_target_stats("target_avg_cpu_util", sar_result_files, results_dir)) 192 193 if pm_result_files: 194 aggr_headers.append("target_avg_power") 195 aggregate_results.update(read_target_stats("target_avg_power", pm_result_files, results_dir)) 196 197 rows.add(",".join([job_name, *aggregate_results.values()])) 198 199 # Create empty results file with just the header line 200 aggr_header_line = ",".join(["Name", *aggr_headers]) 201 with open(os.path.join(results_dir, csv_file), "w") as fh: 202 fh.write(aggr_header_line + "\n") 203 204 # Save results to file 205 for row in rows: 206 with open(os.path.join(results_dir, csv_file), "a") as fh: 207 fh.write(row + "\n") 208 logging.info("You can find the test results in the file %s" % os.path.join(results_dir, csv_file)) 209