1import os 2import re 3import json 4import logging 5from subprocess import check_output 6from collections import OrderedDict 7from json.decoder import JSONDecodeError 8 9 10def read_json_stats(file): 11 with open(file, "r") as json_data: 12 data = json.load(json_data) 13 job_pos = 0 # job_post = 0 because using aggregated results 14 15 # Check if latency is in nano or microseconds to choose correct dict key 16 def get_lat_unit(key_prefix, dict_section): 17 # key prefix - lat, clat or slat. 18 # dict section - portion of json containing latency bucket in question 19 # Return dict key to access the bucket and unit as string 20 for k, _ in dict_section.items(): 21 if k.startswith(key_prefix): 22 return k, k.split("_")[1] 23 24 def get_clat_percentiles(clat_dict_leaf): 25 if "percentile" in clat_dict_leaf: 26 p99_lat = float(clat_dict_leaf["percentile"]["99.000000"]) 27 p99_9_lat = float(clat_dict_leaf["percentile"]["99.900000"]) 28 p99_99_lat = float(clat_dict_leaf["percentile"]["99.990000"]) 29 p99_999_lat = float(clat_dict_leaf["percentile"]["99.999000"]) 30 31 return [p99_lat, p99_9_lat, p99_99_lat, p99_999_lat] 32 else: 33 # Latest fio versions do not provide "percentile" results if no 34 # measurements were done, so just return zeroes 35 return [0, 0, 0, 0] 36 37 read_iops = float(data["jobs"][job_pos]["read"]["iops"]) 38 read_bw = float(data["jobs"][job_pos]["read"]["bw"]) 39 lat_key, lat_unit = get_lat_unit("lat", data["jobs"][job_pos]["read"]) 40 read_avg_lat = float(data["jobs"][job_pos]["read"][lat_key]["mean"]) 41 read_min_lat = float(data["jobs"][job_pos]["read"][lat_key]["min"]) 42 read_max_lat = float(data["jobs"][job_pos]["read"][lat_key]["max"]) 43 clat_key, clat_unit = get_lat_unit("clat", data["jobs"][job_pos]["read"]) 44 read_p99_lat, read_p99_9_lat, read_p99_99_lat, read_p99_999_lat = get_clat_percentiles( 45 data["jobs"][job_pos]["read"][clat_key]) 46 47 if "ns" in lat_unit: 48 read_avg_lat, read_min_lat, read_max_lat = [x / 1000 for x in [read_avg_lat, read_min_lat, read_max_lat]] 49 if "ns" in clat_unit: 50 read_p99_lat = read_p99_lat / 1000 51 read_p99_9_lat = read_p99_9_lat / 1000 52 read_p99_99_lat = read_p99_99_lat / 1000 53 read_p99_999_lat = read_p99_999_lat / 1000 54 55 write_iops = float(data["jobs"][job_pos]["write"]["iops"]) 56 write_bw = float(data["jobs"][job_pos]["write"]["bw"]) 57 lat_key, lat_unit = get_lat_unit("lat", data["jobs"][job_pos]["write"]) 58 write_avg_lat = float(data["jobs"][job_pos]["write"][lat_key]["mean"]) 59 write_min_lat = float(data["jobs"][job_pos]["write"][lat_key]["min"]) 60 write_max_lat = float(data["jobs"][job_pos]["write"][lat_key]["max"]) 61 clat_key, clat_unit = get_lat_unit("clat", data["jobs"][job_pos]["write"]) 62 write_p99_lat, write_p99_9_lat, write_p99_99_lat, write_p99_999_lat = get_clat_percentiles( 63 data["jobs"][job_pos]["write"][clat_key]) 64 65 if "ns" in lat_unit: 66 write_avg_lat, write_min_lat, write_max_lat = [x / 1000 for x in [write_avg_lat, write_min_lat, write_max_lat]] 67 if "ns" in clat_unit: 68 write_p99_lat = write_p99_lat / 1000 69 write_p99_9_lat = write_p99_9_lat / 1000 70 write_p99_99_lat = write_p99_99_lat / 1000 71 write_p99_999_lat = write_p99_999_lat / 1000 72 73 return [read_iops, read_bw, read_avg_lat, read_min_lat, read_max_lat, 74 read_p99_lat, read_p99_9_lat, read_p99_99_lat, read_p99_999_lat, 75 write_iops, write_bw, write_avg_lat, write_min_lat, write_max_lat, 76 write_p99_lat, write_p99_9_lat, write_p99_99_lat, write_p99_999_lat] 77 78 79def parse_results(results_dir, csv_file): 80 files = os.listdir(results_dir) 81 fio_files = filter(lambda x: ".fio" in x, files) 82 json_files = [x for x in files if ".json" in x] 83 84 headers = ["read_iops", "read_bw", "read_avg_lat_us", "read_min_lat_us", "read_max_lat_us", 85 "read_p99_lat_us", "read_p99.9_lat_us", "read_p99.99_lat_us", "read_p99.999_lat_us", 86 "write_iops", "write_bw", "write_avg_lat_us", "write_min_lat_us", "write_max_lat_us", 87 "write_p99_lat_us", "write_p99.9_lat_us", "write_p99.99_lat_us", "write_p99.999_lat_us"] 88 89 aggr_headers = ["iops", "bw", "avg_lat_us", "min_lat_us", "max_lat_us", 90 "p99_lat_us", "p99.9_lat_us", "p99.99_lat_us", "p99.999_lat_us"] 91 92 header_line = ",".join(["Name", *headers]) 93 aggr_header_line = ",".join(["Name", *aggr_headers]) 94 95 # Create empty results file 96 with open(os.path.join(results_dir, csv_file), "w") as fh: 97 fh.write(aggr_header_line + "\n") 98 rows = set() 99 100 for fio_config in fio_files: 101 logging.info("Getting FIO stats for %s" % fio_config) 102 job_name, _ = os.path.splitext(fio_config) 103 104 # Look in the filename for rwmixread value. Function arguments do 105 # not have that information. 106 # TODO: Improve this function by directly using workload params instead 107 # of regexing through filenames. 108 if "read" in job_name: 109 rw_mixread = 1 110 elif "write" in job_name: 111 rw_mixread = 0 112 else: 113 rw_mixread = float(re.search(r"m_(\d+)", job_name).group(1)) / 100 114 115 # If "_CPU" exists in name - ignore it 116 # Initiators for the same job could have different num_cores parameter 117 job_name = re.sub(r"_\d+CPU", "", job_name) 118 job_result_files = [x for x in json_files if x.startswith(job_name)] 119 logging.info("Matching result files for current fio config:") 120 for j in job_result_files: 121 logging.info("\t %s" % j) 122 123 # There may have been more than 1 initiator used in test, need to check that 124 # Result files are created so that string after last "_" separator is server name 125 inits_names = set([os.path.splitext(x)[0].split("_")[-1] for x in job_result_files]) 126 inits_avg_results = [] 127 for i in inits_names: 128 logging.info("\tGetting stats for initiator %s" % i) 129 # There may have been more than 1 test run for this job, calculate average results for initiator 130 i_results = [x for x in job_result_files if i in x] 131 i_results_filename = re.sub(r"run_\d+_", "", i_results[0].replace("json", "csv")) 132 133 separate_stats = [] 134 for r in i_results: 135 try: 136 stats = read_json_stats(os.path.join(results_dir, r)) 137 separate_stats.append(stats) 138 logging.info(stats) 139 except JSONDecodeError: 140 logging.error("ERROR: Failed to parse %s results! Results might be incomplete!" % r) 141 142 init_results = [sum(x) for x in zip(*separate_stats)] 143 init_results = [x / len(separate_stats) for x in init_results] 144 inits_avg_results.append(init_results) 145 146 logging.info("\tAverage results for initiator %s" % i) 147 logging.info(init_results) 148 with open(os.path.join(results_dir, i_results_filename), "w") as fh: 149 fh.write(header_line + "\n") 150 fh.write(",".join([job_name, *["{0:.3f}".format(x) for x in init_results]]) + "\n") 151 152 # Sum results of all initiators running this FIO job. 153 # Latency results are an average of latencies from accros all initiators. 154 inits_avg_results = [sum(x) for x in zip(*inits_avg_results)] 155 inits_avg_results = OrderedDict(zip(headers, inits_avg_results)) 156 for key in inits_avg_results: 157 if "lat" in key: 158 inits_avg_results[key] /= len(inits_names) 159 160 # Aggregate separate read/write values into common labels 161 # Take rw_mixread into consideration for mixed read/write workloads. 162 aggregate_results = OrderedDict() 163 for h in aggr_headers: 164 read_stat, write_stat = [float(value) for key, value in inits_avg_results.items() if h in key] 165 if "lat" in h: 166 _ = rw_mixread * read_stat + (1 - rw_mixread) * write_stat 167 else: 168 _ = read_stat + write_stat 169 aggregate_results[h] = "{0:.3f}".format(_) 170 171 rows.add(",".join([job_name, *aggregate_results.values()])) 172 173 # Save results to file 174 for row in rows: 175 with open(os.path.join(results_dir, csv_file), "a") as fh: 176 fh.write(row + "\n") 177 logging.info("You can find the test results in the file %s" % os.path.join(results_dir, csv_file)) 178