xref: /spdk/scripts/perf/nvmf/common.py (revision c85df53551dd911ff9dbccfe5d24bf82f0a3d9bf)
1import os
2import re
3import json
4import logging
5from subprocess import check_output
6from collections import OrderedDict
7from json.decoder import JSONDecodeError
8
9
10def read_json_stats(file):
11    with open(file, "r") as json_data:
12        data = json.load(json_data)
13        job_pos = 0  # job_post = 0 because using aggregated results
14
15        # Check if latency is in nano or microseconds to choose correct dict key
16        def get_lat_unit(key_prefix, dict_section):
17            # key prefix - lat, clat or slat.
18            # dict section - portion of json containing latency bucket in question
19            # Return dict key to access the bucket and unit as string
20            for k, _ in dict_section.items():
21                if k.startswith(key_prefix):
22                    return k, k.split("_")[1]
23
24        def get_clat_percentiles(clat_dict_leaf):
25            if "percentile" in clat_dict_leaf:
26                p99_lat = float(clat_dict_leaf["percentile"]["99.000000"])
27                p99_9_lat = float(clat_dict_leaf["percentile"]["99.900000"])
28                p99_99_lat = float(clat_dict_leaf["percentile"]["99.990000"])
29                p99_999_lat = float(clat_dict_leaf["percentile"]["99.999000"])
30
31                return [p99_lat, p99_9_lat, p99_99_lat, p99_999_lat]
32            else:
33                # Latest fio versions do not provide "percentile" results if no
34                # measurements were done, so just return zeroes
35                return [0, 0, 0, 0]
36
37        read_iops = float(data["jobs"][job_pos]["read"]["iops"])
38        read_bw = float(data["jobs"][job_pos]["read"]["bw"])
39        lat_key, lat_unit = get_lat_unit("lat", data["jobs"][job_pos]["read"])
40        read_avg_lat = float(data["jobs"][job_pos]["read"][lat_key]["mean"])
41        read_min_lat = float(data["jobs"][job_pos]["read"][lat_key]["min"])
42        read_max_lat = float(data["jobs"][job_pos]["read"][lat_key]["max"])
43        clat_key, clat_unit = get_lat_unit("clat", data["jobs"][job_pos]["read"])
44        read_p99_lat, read_p99_9_lat, read_p99_99_lat, read_p99_999_lat = get_clat_percentiles(
45            data["jobs"][job_pos]["read"][clat_key])
46
47        if "ns" in lat_unit:
48            read_avg_lat, read_min_lat, read_max_lat = [x / 1000 for x in [read_avg_lat, read_min_lat, read_max_lat]]
49        if "ns" in clat_unit:
50            read_p99_lat = read_p99_lat / 1000
51            read_p99_9_lat = read_p99_9_lat / 1000
52            read_p99_99_lat = read_p99_99_lat / 1000
53            read_p99_999_lat = read_p99_999_lat / 1000
54
55        write_iops = float(data["jobs"][job_pos]["write"]["iops"])
56        write_bw = float(data["jobs"][job_pos]["write"]["bw"])
57        lat_key, lat_unit = get_lat_unit("lat", data["jobs"][job_pos]["write"])
58        write_avg_lat = float(data["jobs"][job_pos]["write"][lat_key]["mean"])
59        write_min_lat = float(data["jobs"][job_pos]["write"][lat_key]["min"])
60        write_max_lat = float(data["jobs"][job_pos]["write"][lat_key]["max"])
61        clat_key, clat_unit = get_lat_unit("clat", data["jobs"][job_pos]["write"])
62        write_p99_lat, write_p99_9_lat, write_p99_99_lat, write_p99_999_lat = get_clat_percentiles(
63            data["jobs"][job_pos]["write"][clat_key])
64
65        if "ns" in lat_unit:
66            write_avg_lat, write_min_lat, write_max_lat = [x / 1000 for x in [write_avg_lat, write_min_lat, write_max_lat]]
67        if "ns" in clat_unit:
68            write_p99_lat = write_p99_lat / 1000
69            write_p99_9_lat = write_p99_9_lat / 1000
70            write_p99_99_lat = write_p99_99_lat / 1000
71            write_p99_999_lat = write_p99_999_lat / 1000
72
73    return [read_iops, read_bw, read_avg_lat, read_min_lat, read_max_lat,
74            read_p99_lat, read_p99_9_lat, read_p99_99_lat, read_p99_999_lat,
75            write_iops, write_bw, write_avg_lat, write_min_lat, write_max_lat,
76            write_p99_lat, write_p99_9_lat, write_p99_99_lat, write_p99_999_lat]
77
78
79def parse_results(results_dir, csv_file):
80    files = os.listdir(results_dir)
81    fio_files = filter(lambda x: ".fio" in x, files)
82    json_files = [x for x in files if ".json" in x]
83
84    headers = ["read_iops", "read_bw", "read_avg_lat_us", "read_min_lat_us", "read_max_lat_us",
85               "read_p99_lat_us", "read_p99.9_lat_us", "read_p99.99_lat_us", "read_p99.999_lat_us",
86               "write_iops", "write_bw", "write_avg_lat_us", "write_min_lat_us", "write_max_lat_us",
87               "write_p99_lat_us", "write_p99.9_lat_us", "write_p99.99_lat_us", "write_p99.999_lat_us"]
88
89    aggr_headers = ["iops", "bw", "avg_lat_us", "min_lat_us", "max_lat_us",
90                    "p99_lat_us", "p99.9_lat_us", "p99.99_lat_us", "p99.999_lat_us"]
91
92    header_line = ",".join(["Name", *headers])
93    aggr_header_line = ",".join(["Name", *aggr_headers])
94
95    # Create empty results file
96    with open(os.path.join(results_dir, csv_file), "w") as fh:
97        fh.write(aggr_header_line + "\n")
98    rows = set()
99
100    for fio_config in fio_files:
101        logging.info("Getting FIO stats for %s" % fio_config)
102        job_name, _ = os.path.splitext(fio_config)
103
104        # Look in the filename for rwmixread value. Function arguments do
105        # not have that information.
106        # TODO: Improve this function by directly using workload params instead
107        # of regexing through filenames.
108        if "read" in job_name:
109            rw_mixread = 1
110        elif "write" in job_name:
111            rw_mixread = 0
112        else:
113            rw_mixread = float(re.search(r"m_(\d+)", job_name).group(1)) / 100
114
115        # If "_CPU" exists in name - ignore it
116        # Initiators for the same job could have different num_cores parameter
117        job_name = re.sub(r"_\d+CPU", "", job_name)
118        job_result_files = [x for x in json_files if x.startswith(job_name)]
119        logging.info("Matching result files for current fio config:")
120        for j in job_result_files:
121            logging.info("\t %s" % j)
122
123        # There may have been more than 1 initiator used in test, need to check that
124        # Result files are created so that string after last "_" separator is server name
125        inits_names = set([os.path.splitext(x)[0].split("_")[-1] for x in job_result_files])
126        inits_avg_results = []
127        for i in inits_names:
128            logging.info("\tGetting stats for initiator %s" % i)
129            # There may have been more than 1 test run for this job, calculate average results for initiator
130            i_results = [x for x in job_result_files if i in x]
131            i_results_filename = re.sub(r"run_\d+_", "", i_results[0].replace("json", "csv"))
132
133            separate_stats = []
134            for r in i_results:
135                try:
136                    stats = read_json_stats(os.path.join(results_dir, r))
137                    separate_stats.append(stats)
138                    logging.info(stats)
139                except JSONDecodeError:
140                    logging.error("ERROR: Failed to parse %s results! Results might be incomplete!" % r)
141
142            init_results = [sum(x) for x in zip(*separate_stats)]
143            init_results = [x / len(separate_stats) for x in init_results]
144            inits_avg_results.append(init_results)
145
146            logging.info("\tAverage results for initiator %s" % i)
147            logging.info(init_results)
148            with open(os.path.join(results_dir, i_results_filename), "w") as fh:
149                fh.write(header_line + "\n")
150                fh.write(",".join([job_name, *["{0:.3f}".format(x) for x in init_results]]) + "\n")
151
152        # Sum results of all initiators running this FIO job.
153        # Latency results are an average of latencies from accros all initiators.
154        inits_avg_results = [sum(x) for x in zip(*inits_avg_results)]
155        inits_avg_results = OrderedDict(zip(headers, inits_avg_results))
156        for key in inits_avg_results:
157            if "lat" in key:
158                inits_avg_results[key] /= len(inits_names)
159
160        # Aggregate separate read/write values into common labels
161        # Take rw_mixread into consideration for mixed read/write workloads.
162        aggregate_results = OrderedDict()
163        for h in aggr_headers:
164            read_stat, write_stat = [float(value) for key, value in inits_avg_results.items() if h in key]
165            if "lat" in h:
166                _ = rw_mixread * read_stat + (1 - rw_mixread) * write_stat
167            else:
168                _ = read_stat + write_stat
169            aggregate_results[h] = "{0:.3f}".format(_)
170
171        rows.add(",".join([job_name, *aggregate_results.values()]))
172
173    # Save results to file
174    for row in rows:
175        with open(os.path.join(results_dir, csv_file), "a") as fh:
176            fh.write(row + "\n")
177    logging.info("You can find the test results in the file %s" % os.path.join(results_dir, csv_file))
178