1a42dfab1SKarol Latecki#!/usr/bin/env python3 217538bdcSpaul luse# SPDX-License-Identifier: BSD-3-Clause 317538bdcSpaul luse# Copyright (C) 2018 Intel Corporation 417538bdcSpaul luse# All rights reserved. 517538bdcSpaul luse# 6a42dfab1SKarol Latecki 78301b041SPawel Piatek# Note: we use setattr 88301b041SPawel Piatek# pylint: disable=no-member 98301b041SPawel Piatek 10a42dfab1SKarol Lateckiimport os 11a42dfab1SKarol Lateckiimport re 12a42dfab1SKarol Lateckiimport sys 132f0fdf0eSPawel Piatekimport argparse 14a42dfab1SKarol Lateckiimport json 15986fe095SKarol Lateckiimport inspect 1680e92723SKarol Lateckiimport logging 17a42dfab1SKarol Lateckiimport zipfile 18a42dfab1SKarol Lateckiimport threading 19a42dfab1SKarol Lateckiimport subprocess 20a42dfab1SKarol Lateckiimport itertools 21a35f2b34SKarol Lateckiimport configparser 22a42dfab1SKarol Lateckiimport time 23a42dfab1SKarol Lateckiimport uuid 246882e045SKarol Latecki 256882e045SKarol Lateckiimport paramiko 266882e045SKarol Lateckiimport pandas as pd 27a42dfab1SKarol Lateckifrom common import * 28f67c74a2SKarol Lateckifrom subprocess import CalledProcessError 297e291332SPawel Piatekfrom abc import abstractmethod, ABC 308301b041SPawel Piatekfrom dataclasses import dataclass 318301b041SPawel Piatekfrom typing import Any 32a42dfab1SKarol Latecki 337610bc38SKonrad Sztybersys.path.append(os.path.dirname(__file__) + '/../../../python') 347610bc38SKonrad Sztyber 357610bc38SKonrad Sztyberimport spdk.rpc as rpc # noqa 367610bc38SKonrad Sztyberimport spdk.rpc.client as rpc_client # noqa 377610bc38SKonrad Sztyber 38a42dfab1SKarol Latecki 398301b041SPawel Piatek@dataclass 408301b041SPawel Piatekclass ConfigField: 418301b041SPawel Piatek name: str = None 428301b041SPawel Piatek default: Any = None 438301b041SPawel Piatek required: bool = False 448301b041SPawel Piatek 458301b041SPawel Piatek 467e291332SPawel Piatekclass Server(ABC): 4701cf43d8SKarol Latecki 4801cf43d8SKarol Latecki RDMA_PROTOCOL_IWARP = 0 4901cf43d8SKarol Latecki RDMA_PROTOCOL_ROCE = 1 5001cf43d8SKarol Latecki RDMA_PROTOCOL_UNKNOWN = -1 5101cf43d8SKarol Latecki 529fec8853SKarol Latecki def __init__(self, name, general_config, server_config): 53a42dfab1SKarol Latecki self.name = name 54696d55bfSKarol Latecki 5580e92723SKarol Latecki self.log = logging.getLogger(self.name) 5680e92723SKarol Latecki 578301b041SPawel Piatek config_fields = [ 588301b041SPawel Piatek ConfigField(name='username', required=True), 598301b041SPawel Piatek ConfigField(name='password', required=True), 608301b041SPawel Piatek ConfigField(name='transport', required=True), 617be0593aSKarol Latecki ConfigField(name='skip_spdk_install', default=False), 62fb3b0cb7SJaroslaw Chachulski ConfigField(name='irdma_roce_enable', default=False), 63ab3ff5a1SJaroslaw Chachulski ConfigField(name='pause_frames', default=None) 648301b041SPawel Piatek ] 65ab3ff5a1SJaroslaw Chachulski 668301b041SPawel Piatek self.read_config(config_fields, general_config) 678301b041SPawel Piatek self.transport = self.transport.lower() 688301b041SPawel Piatek 698301b041SPawel Piatek config_fields = [ 708301b041SPawel Piatek ConfigField(name='nic_ips', required=True), 718301b041SPawel Piatek ConfigField(name='mode', required=True), 727be0593aSKarol Latecki ConfigField(name='irq_scripts_dir', default='/usr/src/local/mlnx-tools/ofed_scripts'), 737be0593aSKarol Latecki ConfigField(name='enable_arfs', default=False), 748301b041SPawel Piatek ConfigField(name='tuned_profile', default='') 758301b041SPawel Piatek ] 768301b041SPawel Piatek self.read_config(config_fields, server_config) 77696d55bfSKarol Latecki 78ddb4d24bSKarol Latecki self.local_nic_info = [] 7978ce3152SKarol Latecki self._nics_json_obj = {} 80a35f2b34SKarol Latecki self.svc_restore_dict = {} 8181ccc81dSKarol Latecki self.sysctl_restore_dict = {} 822f910330SKarol Latecki self.tuned_restore_dict = {} 833992eb28SKarol Latecki self.governor_restore = "" 84a42dfab1SKarol Latecki 858301b041SPawel Piatek self.enable_adq = server_config.get('adq_enable', False) 868301b041SPawel Piatek self.adq_priority = 1 if self.enable_adq else None 878301b041SPawel Piatek self.irq_settings = {'mode': 'default', **server_config.get('irq_settings', {})} 882f910330SKarol Latecki 891e629c32SKarol Latecki if not re.match(r'^[A-Za-z0-9\-]+$', name): 901e629c32SKarol Latecki self.log.info("Please use a name which contains only letters, numbers or dashes") 91a42dfab1SKarol Latecki sys.exit(1) 92a42dfab1SKarol Latecki 938301b041SPawel Piatek def read_config(self, config_fields, config): 948301b041SPawel Piatek for config_field in config_fields: 958301b041SPawel Piatek value = config.get(config_field.name, config_field.default) 9696474e9eSKamil Godzwon if config_field.required and value is None: 978301b041SPawel Piatek raise ValueError('%s config key is required' % config_field.name) 988301b041SPawel Piatek 998301b041SPawel Piatek setattr(self, config_field.name, value) 1008301b041SPawel Piatek 10120955a49SPawel Piatek @staticmethod 10220955a49SPawel Piatek def get_uncommented_lines(lines): 103f79d4b8bSSimon A. F. Lund return [line for line in lines if line and not line.startswith('#')] 104632703f6SMaciej Wawryk 1050ebf2509SKarol Latecki @staticmethod 1060ebf2509SKarol Latecki def get_core_list_from_mask(core_mask): 1070ebf2509SKarol Latecki # Generate list of individual cores from hex core mask 1080ebf2509SKarol Latecki # (e.g. '0xffff') or list containing: 1090ebf2509SKarol Latecki # - individual cores (e.g. '1, 2, 3') 1100ebf2509SKarol Latecki # - core ranges (e.g. '0-3') 1110ebf2509SKarol Latecki # - mix of both (e.g. '0, 1-4, 9, 11-13') 1120ebf2509SKarol Latecki 1130ebf2509SKarol Latecki core_list = [] 1140ebf2509SKarol Latecki if "0x" in core_mask: 1150ebf2509SKarol Latecki core_mask_int = int(core_mask, 16) 1160ebf2509SKarol Latecki for i in range(core_mask_int.bit_length()): 1170ebf2509SKarol Latecki if (1 << i) & core_mask_int: 1180ebf2509SKarol Latecki core_list.append(i) 1190ebf2509SKarol Latecki return core_list 1200ebf2509SKarol Latecki else: 1210ebf2509SKarol Latecki # Core list can be provided in .json config with square brackets 1220ebf2509SKarol Latecki # remove them first 1230ebf2509SKarol Latecki core_mask = core_mask.replace("[", "") 1240ebf2509SKarol Latecki core_mask = core_mask.replace("]", "") 1250ebf2509SKarol Latecki 1260ebf2509SKarol Latecki for i in core_mask.split(","): 1270ebf2509SKarol Latecki if "-" in i: 1280ebf2509SKarol Latecki start, end = i.split("-") 1290ebf2509SKarol Latecki core_range = range(int(start), int(end) + 1) 1300ebf2509SKarol Latecki core_list.extend(core_range) 1310ebf2509SKarol Latecki else: 1320ebf2509SKarol Latecki core_list.append(int(i)) 1330ebf2509SKarol Latecki return core_list 1340ebf2509SKarol Latecki 13578ce3152SKarol Latecki def get_nic_name_by_ip(self, ip): 13678ce3152SKarol Latecki if not self._nics_json_obj: 13778ce3152SKarol Latecki nics_json_obj = self.exec_cmd(["ip", "-j", "address", "show"]) 13878ce3152SKarol Latecki self._nics_json_obj = list(filter(lambda x: x["addr_info"], json.loads(nics_json_obj))) 13978ce3152SKarol Latecki for nic in self._nics_json_obj: 14078ce3152SKarol Latecki for addr in nic["addr_info"]: 14178ce3152SKarol Latecki if ip in addr["local"]: 14278ce3152SKarol Latecki return nic["ifname"] 14378ce3152SKarol Latecki 1447e291332SPawel Piatek @abstractmethod 145ddb4d24bSKarol Latecki def set_local_nic_info_helper(self): 146ddb4d24bSKarol Latecki pass 147ddb4d24bSKarol Latecki 148ddb4d24bSKarol Latecki def set_local_nic_info(self, pci_info): 149ddb4d24bSKarol Latecki def extract_network_elements(json_obj): 150ddb4d24bSKarol Latecki nic_list = [] 151ddb4d24bSKarol Latecki if isinstance(json_obj, list): 152ddb4d24bSKarol Latecki for x in json_obj: 153ddb4d24bSKarol Latecki nic_list.extend(extract_network_elements(x)) 154ddb4d24bSKarol Latecki elif isinstance(json_obj, dict): 155ddb4d24bSKarol Latecki if "children" in json_obj: 156ddb4d24bSKarol Latecki nic_list.extend(extract_network_elements(json_obj["children"])) 157ddb4d24bSKarol Latecki if "class" in json_obj.keys() and "network" in json_obj["class"]: 158ddb4d24bSKarol Latecki nic_list.append(json_obj) 159ddb4d24bSKarol Latecki return nic_list 160ddb4d24bSKarol Latecki 161ddb4d24bSKarol Latecki self.local_nic_info = extract_network_elements(pci_info) 162ddb4d24bSKarol Latecki 16392b643b0SKarol Latecki def get_nic_numa_node(self, nic_name): 16492b643b0SKarol Latecki return int(self.exec_cmd(["cat", "/sys/class/net/%s/device/numa_node" % nic_name])) 16592b643b0SKarol Latecki 16647b87952SKarol Latecki def get_numa_cpu_map(self): 16747b87952SKarol Latecki numa_cpu_json_obj = json.loads(self.exec_cmd(["lscpu", "-b", "-e=NODE,CPU", "-J"])) 16847b87952SKarol Latecki numa_cpu_json_map = {} 16947b87952SKarol Latecki 17047b87952SKarol Latecki for cpu in numa_cpu_json_obj["cpus"]: 17147b87952SKarol Latecki cpu_num = int(cpu["cpu"]) 17247b87952SKarol Latecki numa_node = int(cpu["node"]) 17347b87952SKarol Latecki numa_cpu_json_map.setdefault(numa_node, []) 17447b87952SKarol Latecki numa_cpu_json_map[numa_node].append(cpu_num) 17547b87952SKarol Latecki 17647b87952SKarol Latecki return numa_cpu_json_map 17747b87952SKarol Latecki 17820955a49SPawel Piatek # pylint: disable=R0201 179696d55bfSKarol Latecki def exec_cmd(self, cmd, stderr_redirect=False, change_dir=None): 18026d7e3d4SKarol Latecki return "" 18126d7e3d4SKarol Latecki 182a35f2b34SKarol Latecki def configure_system(self): 1837807960dSKarol Latecki self.load_drivers() 184a35f2b34SKarol Latecki self.configure_services() 18581ccc81dSKarol Latecki self.configure_sysctl() 18617e5084fSKarol Latecki self.configure_arfs() 1872f910330SKarol Latecki self.configure_tuned() 1883992eb28SKarol Latecki self.configure_cpu_governor() 189cdab715bSKarol Latecki self.configure_irq_affinity(**self.irq_settings) 190fb3b0cb7SJaroslaw Chachulski self.configure_pause_frames() 191a35f2b34SKarol Latecki 1921c7b4b7bSJaroslaw Chachulski def check_rdma_protocol(self): 1931c7b4b7bSJaroslaw Chachulski try: 19401cf43d8SKarol Latecki roce_ena = self.exec_cmd(["cat", "/sys/module/irdma/parameters/roce_ena"]).strip() 19501cf43d8SKarol Latecki return self.RDMA_PROTOCOL_IWARP if roce_ena == "0" else self.RDMA_PROTOCOL_ROCE 1961c7b4b7bSJaroslaw Chachulski except CalledProcessError as e: 1971c7b4b7bSJaroslaw Chachulski self.log.error("ERROR: failed to check RDMA protocol!") 1981c7b4b7bSJaroslaw Chachulski self.log.error("%s resulted in error: %s" % (e.cmd, e.output)) 1991c7b4b7bSJaroslaw Chachulski return self.RDMA_PROTOCOL_UNKNOWN 2001c7b4b7bSJaroslaw Chachulski 2017807960dSKarol Latecki def load_drivers(self): 20280e92723SKarol Latecki self.log.info("Loading drivers") 20301cf43d8SKarol Latecki self.exec_cmd(["sudo", "modprobe", "-a", f"nvme-{self.transport}", f"nvmet-{self.transport}"]) 20401cf43d8SKarol Latecki 20501cf43d8SKarol Latecki if self.transport != "rdma": 20601cf43d8SKarol Latecki return 20701cf43d8SKarol Latecki 2081c7b4b7bSJaroslaw Chachulski current_mode = self.check_rdma_protocol() 2091c7b4b7bSJaroslaw Chachulski if current_mode == self.RDMA_PROTOCOL_UNKNOWN: 2101c7b4b7bSJaroslaw Chachulski self.log.error("ERROR: failed to check RDMA protocol mode") 2111c7b4b7bSJaroslaw Chachulski return 21201cf43d8SKarol Latecki 21301cf43d8SKarol Latecki if self.irdma_roce_enable: 21401cf43d8SKarol Latecki if current_mode == self.RDMA_PROTOCOL_IWARP: 2151c7b4b7bSJaroslaw Chachulski self.reload_driver("irdma", "roce_ena=1") 21620c6796eSJaroslaw Chachulski self.log.info("Loaded irdma driver with RoCE enabled") 21701cf43d8SKarol Latecki else: 2181c7b4b7bSJaroslaw Chachulski self.log.info("Leaving irdma driver with RoCE enabled") 2191c7b4b7bSJaroslaw Chachulski else: 2201c7b4b7bSJaroslaw Chachulski self.reload_driver("irdma", "roce_ena=0") 22120c6796eSJaroslaw Chachulski self.log.info("Loaded irdma driver with iWARP enabled") 2227807960dSKarol Latecki 223fb3b0cb7SJaroslaw Chachulski def set_pause_frames(self, rx_state, tx_state): 224fb3b0cb7SJaroslaw Chachulski for nic_ip in self.nic_ips: 225fb3b0cb7SJaroslaw Chachulski nic_name = self.get_nic_name_by_ip(nic_ip) 226fb3b0cb7SJaroslaw Chachulski self.exec_cmd(["sudo", "ethtool", "-A", nic_name, "rx", rx_state, "tx", tx_state]) 227fb3b0cb7SJaroslaw Chachulski 228fb3b0cb7SJaroslaw Chachulski def configure_pause_frames(self): 229ab3ff5a1SJaroslaw Chachulski if self.pause_frames is None: 230ab3ff5a1SJaroslaw Chachulski self.log.info("Keeping NIC's default pause frames setting") 231ab3ff5a1SJaroslaw Chachulski return 232ab3ff5a1SJaroslaw Chachulski elif not self.pause_frames: 233fb3b0cb7SJaroslaw Chachulski self.log.info("Turning off pause frames") 234fb3b0cb7SJaroslaw Chachulski self.set_pause_frames("off", "off") 235ab3ff5a1SJaroslaw Chachulski else: 236ab3ff5a1SJaroslaw Chachulski self.log.info("Turning on pause frames") 237fb3b0cb7SJaroslaw Chachulski self.set_pause_frames("on", "on") 238fb3b0cb7SJaroslaw Chachulski 23917e5084fSKarol Latecki def configure_arfs(self): 2407798a257SKarol Latecki rps_flow_cnt = 512 if self.enable_arfs else 0 24117e5084fSKarol Latecki 24217e5084fSKarol Latecki nic_names = [self.get_nic_name_by_ip(n) for n in self.nic_ips] 24317e5084fSKarol Latecki for nic_name in nic_names: 2447798a257SKarol Latecki rps_wildcard_path = f"/sys/class/net/{nic_name}/queues/rx-*/rps_flow_cnt" 24517e5084fSKarol Latecki self.exec_cmd(["sudo", "ethtool", "-K", nic_name, "ntuple", "on"]) 2467798a257SKarol Latecki self.log.info(f"Setting rps_flow_cnt={rps_flow_cnt} for {nic_name} rx queues") 24717e5084fSKarol Latecki 2487798a257SKarol Latecki self.exec_cmd(["bash", "-c", f"echo {rps_flow_cnt} | sudo tee {rps_wildcard_path}"]) 2497fba5433SJaroslaw Chachulski 2507798a257SKarol Latecki set_values = list(set(self.exec_cmd(["sudo", "bash", "-c", f'cat {rps_wildcard_path}']).strip().split("\n"))) 2517798a257SKarol Latecki if len(set_values) > 1: 2527798a257SKarol Latecki self.log.error(f"""Not all NIC {nic_name} queues had rps_flow_cnt set properly. Found rps_flow_cnt values: {set_values}""") 2537798a257SKarol Latecki elif set_values[0] != str(rps_flow_cnt): 2547798a257SKarol Latecki self.log.error(f"""Wrong rps_flow_cnt set on {nic_name} queues. Found {set_values[0]}, should be {rps_flow_cnt}""") 2557798a257SKarol Latecki else: 2567fba5433SJaroslaw Chachulski self.log.info(f"Configuration of {nic_name} completed with confirmed rps_flow_cnt settings.") 2577fba5433SJaroslaw Chachulski 2587fba5433SJaroslaw Chachulski self.log.info("ARFS configuration completed.") 25917e5084fSKarol Latecki 2603c09b2fbSKarol Latecki def configure_adq(self): 2613c09b2fbSKarol Latecki self.adq_load_modules() 2622c5849c7SKarol Latecki self.adq_configure_nic() 2633c09b2fbSKarol Latecki 2643c09b2fbSKarol Latecki def adq_load_modules(self): 26580e92723SKarol Latecki self.log.info("Modprobing ADQ-related Linux modules...") 2663c09b2fbSKarol Latecki adq_module_deps = ["sch_mqprio", "act_mirred", "cls_flower"] 2673c09b2fbSKarol Latecki for module in adq_module_deps: 2683c09b2fbSKarol Latecki try: 2693c09b2fbSKarol Latecki self.exec_cmd(["sudo", "modprobe", module]) 27080e92723SKarol Latecki self.log.info("%s loaded!" % module) 2713c09b2fbSKarol Latecki except CalledProcessError as e: 27280e92723SKarol Latecki self.log.error("ERROR: failed to load module %s" % module) 27380e92723SKarol Latecki self.log.error("%s resulted in error: %s" % (e.cmd, e.output)) 2743c09b2fbSKarol Latecki 275c0fc19f3SKarol Latecki def adq_configure_tc(self): 27680e92723SKarol Latecki self.log.info("Configuring ADQ Traffic classes and filters...") 27702cb1fe8SKarol Latecki 278c0fc19f3SKarol Latecki num_queues_tc0 = 2 # 2 is minimum number of queues for TC0 279c0fc19f3SKarol Latecki num_queues_tc1 = self.num_cores 280c0fc19f3SKarol Latecki port_param = "dst_port" if isinstance(self, Target) else "src_port" 281c0fc19f3SKarol Latecki xps_script_path = os.path.join(self.spdk_dir, "scripts", "perf", "nvmf", "set_xps_rxqs") 282c0fc19f3SKarol Latecki 283c0fc19f3SKarol Latecki for nic_ip in self.nic_ips: 284c0fc19f3SKarol Latecki nic_name = self.get_nic_name_by_ip(nic_ip) 28566aeb061SKarol Latecki nic_ports = [x[0] for x in self.subsystem_info_list if nic_ip in x[2]] 2867abdd30dSKarol Latecki 287c0fc19f3SKarol Latecki tc_qdisc_map_cmd = ["sudo", "tc", "qdisc", "add", "dev", nic_name, 288c0fc19f3SKarol Latecki "root", "mqprio", "num_tc", "2", "map", "0", "1", 289c0fc19f3SKarol Latecki "queues", "%s@0" % num_queues_tc0, 290c0fc19f3SKarol Latecki "%s@%s" % (num_queues_tc1, num_queues_tc0), 291c0fc19f3SKarol Latecki "hw", "1", "mode", "channel"] 29280e92723SKarol Latecki self.log.info(" ".join(tc_qdisc_map_cmd)) 293c0fc19f3SKarol Latecki self.exec_cmd(tc_qdisc_map_cmd) 294c0fc19f3SKarol Latecki 2956f2f94e6SMaciej Wawryk time.sleep(5) 296c0fc19f3SKarol Latecki tc_qdisc_ingress_cmd = ["sudo", "tc", "qdisc", "add", "dev", nic_name, "ingress"] 29780e92723SKarol Latecki self.log.info(" ".join(tc_qdisc_ingress_cmd)) 298c0fc19f3SKarol Latecki self.exec_cmd(tc_qdisc_ingress_cmd) 299c0fc19f3SKarol Latecki 3002b2dc8b3SKarol Latecki nic_bdf = os.path.basename(self.exec_cmd(["readlink", "-f", "/sys/class/net/%s/device" % nic_name]).strip()) 3012b2dc8b3SKarol Latecki self.exec_cmd(["sudo", "devlink", "dev", "param", "set", "pci/%s" % nic_bdf, 3022b2dc8b3SKarol Latecki "name", "tc1_inline_fd", "value", "true", "cmode", "runtime"]) 3032b2dc8b3SKarol Latecki 3047abdd30dSKarol Latecki for port in nic_ports: 305c0fc19f3SKarol Latecki tc_filter_cmd = ["sudo", "tc", "filter", "add", "dev", nic_name, 306c0fc19f3SKarol Latecki "protocol", "ip", "ingress", "prio", "1", "flower", 307c0fc19f3SKarol Latecki "dst_ip", "%s/32" % nic_ip, "ip_proto", "tcp", port_param, port, 308c0fc19f3SKarol Latecki "skip_sw", "hw_tc", "1"] 30980e92723SKarol Latecki self.log.info(" ".join(tc_filter_cmd)) 310c0fc19f3SKarol Latecki self.exec_cmd(tc_filter_cmd) 311c0fc19f3SKarol Latecki 3126f2f94e6SMaciej Wawryk # show tc configuration 31380e92723SKarol Latecki self.log.info("Show tc configuration for %s NIC..." % nic_name) 3146f2f94e6SMaciej Wawryk tc_disk_out = self.exec_cmd(["sudo", "tc", "qdisc", "show", "dev", nic_name]) 3156f2f94e6SMaciej Wawryk tc_filter_out = self.exec_cmd(["sudo", "tc", "filter", "show", "dev", nic_name, "ingress"]) 31680e92723SKarol Latecki self.log.info("%s" % tc_disk_out) 31780e92723SKarol Latecki self.log.info("%s" % tc_filter_out) 3186f2f94e6SMaciej Wawryk 3191ff3715dSJosh Soref # Ethtool coalesce settings must be applied after configuring traffic classes 320c0fc19f3SKarol Latecki self.exec_cmd(["sudo", "ethtool", "--coalesce", nic_name, "adaptive-rx", "off", "rx-usecs", "0"]) 321c0fc19f3SKarol Latecki self.exec_cmd(["sudo", "ethtool", "--coalesce", nic_name, "adaptive-tx", "off", "tx-usecs", "500"]) 322c0fc19f3SKarol Latecki 32380e92723SKarol Latecki self.log.info("Running set_xps_rxqs script for %s NIC..." % nic_name) 324c0fc19f3SKarol Latecki xps_cmd = ["sudo", xps_script_path, nic_name] 32580e92723SKarol Latecki self.log.info(xps_cmd) 326c0fc19f3SKarol Latecki self.exec_cmd(xps_cmd) 327c0fc19f3SKarol Latecki 3281c7b4b7bSJaroslaw Chachulski def reload_driver(self, driver, *modprobe_args): 3291c7b4b7bSJaroslaw Chachulski 3302874d8ffSKarol Latecki try: 3312874d8ffSKarol Latecki self.exec_cmd(["sudo", "rmmod", driver]) 3321c7b4b7bSJaroslaw Chachulski self.exec_cmd(["sudo", "modprobe", driver, *modprobe_args]) 3332874d8ffSKarol Latecki except CalledProcessError as e: 33480e92723SKarol Latecki self.log.error("ERROR: failed to reload %s module!" % driver) 33580e92723SKarol Latecki self.log.error("%s resulted in error: %s" % (e.cmd, e.output)) 3362874d8ffSKarol Latecki 3372c5849c7SKarol Latecki def adq_configure_nic(self): 33880e92723SKarol Latecki self.log.info("Configuring NIC port settings for ADQ testing...") 3392c5849c7SKarol Latecki 3402c5849c7SKarol Latecki # Reload the driver first, to make sure any previous settings are re-set. 3412874d8ffSKarol Latecki self.reload_driver("ice") 3422c5849c7SKarol Latecki 3432c5849c7SKarol Latecki nic_names = [self.get_nic_name_by_ip(n) for n in self.nic_ips] 3442c5849c7SKarol Latecki for nic in nic_names: 34580e92723SKarol Latecki self.log.info(nic) 3462c5849c7SKarol Latecki try: 3472c5849c7SKarol Latecki self.exec_cmd(["sudo", "ethtool", "-K", nic, 3482c5849c7SKarol Latecki "hw-tc-offload", "on"]) # Enable hardware TC offload 3492b2dc8b3SKarol Latecki nic_bdf = self.exec_cmd(["bash", "-c", "source /sys/class/net/%s/device/uevent; echo $PCI_SLOT_NAME" % nic]) 3502c5849c7SKarol Latecki self.exec_cmd(["sudo", "ethtool", "--set-priv-flags", nic, "fw-lldp-agent", "off"]) # Disable LLDP 3512b2dc8b3SKarol Latecki self.exec_cmd(["sudo", "ethtool", "--set-priv-flags", nic, "channel-pkt-inspect-optimize", "off"]) 3522b2dc8b3SKarol Latecki # Following are suggested in ADQ Configuration Guide to be enabled for large block sizes, 3532b2dc8b3SKarol Latecki # but can be used with 4k block sizes as well 3542b2dc8b3SKarol Latecki self.exec_cmd(["sudo", "ethtool", "--set-priv-flags", nic, "channel-pkt-clean-bp-stop", "on"]) 3552b2dc8b3SKarol Latecki self.exec_cmd(["sudo", "ethtool", "--set-priv-flags", nic, "channel-pkt-clean-bp-stop-cfg", "on"]) 3562b2dc8b3SKarol Latecki except subprocess.CalledProcessError as e: 35780e92723SKarol Latecki self.log.error("ERROR: failed to configure NIC port using ethtool!") 35880e92723SKarol Latecki self.log.error("%s resulted in error: %s" % (e.cmd, e.output)) 35980e92723SKarol Latecki self.log.info("Please update your NIC driver and firmware versions and try again.") 36080e92723SKarol Latecki self.log.info(self.exec_cmd(["sudo", "ethtool", "-k", nic])) 36180e92723SKarol Latecki self.log.info(self.exec_cmd(["sudo", "ethtool", "--show-priv-flags", nic])) 3622c5849c7SKarol Latecki 363a35f2b34SKarol Latecki def configure_services(self): 36480e92723SKarol Latecki self.log.info("Configuring active services...") 365a35f2b34SKarol Latecki svc_config = configparser.ConfigParser(strict=False) 366a35f2b34SKarol Latecki 367a35f2b34SKarol Latecki # Below list is valid only for RHEL / Fedora systems and might not 368a35f2b34SKarol Latecki # contain valid names for other distributions. 369a35f2b34SKarol Latecki svc_target_state = { 370a35f2b34SKarol Latecki "firewalld": "inactive", 371a35f2b34SKarol Latecki "irqbalance": "inactive", 372a35f2b34SKarol Latecki "lldpad.service": "inactive", 373a35f2b34SKarol Latecki "lldpad.socket": "inactive" 374a35f2b34SKarol Latecki } 375a35f2b34SKarol Latecki 376a35f2b34SKarol Latecki for service in svc_target_state: 377a35f2b34SKarol Latecki out = self.exec_cmd(["sudo", "systemctl", "show", "--no-page", service]) 378a35f2b34SKarol Latecki out = "\n".join(["[%s]" % service, out]) 379a35f2b34SKarol Latecki svc_config.read_string(out) 380a35f2b34SKarol Latecki 381a35f2b34SKarol Latecki if "LoadError" in svc_config[service] and "not found" in svc_config[service]["LoadError"]: 382a35f2b34SKarol Latecki continue 383a35f2b34SKarol Latecki 384a35f2b34SKarol Latecki service_state = svc_config[service]["ActiveState"] 38580e92723SKarol Latecki self.log.info("Current state of %s service is %s" % (service, service_state)) 386a35f2b34SKarol Latecki self.svc_restore_dict.update({service: service_state}) 387a35f2b34SKarol Latecki if service_state != "inactive": 38880e92723SKarol Latecki self.log.info("Disabling %s. It will be restored after the test has finished." % service) 389a35f2b34SKarol Latecki self.exec_cmd(["sudo", "systemctl", "stop", service]) 390a35f2b34SKarol Latecki 39181ccc81dSKarol Latecki def configure_sysctl(self): 39280e92723SKarol Latecki self.log.info("Tuning sysctl settings...") 39381ccc81dSKarol Latecki 3942b2dc8b3SKarol Latecki busy_read = 0 3952b2dc8b3SKarol Latecki busy_poll = 0 3962b2dc8b3SKarol Latecki if self.enable_adq and self.mode == "spdk": 3972b2dc8b3SKarol Latecki busy_read = 1 3982b2dc8b3SKarol Latecki busy_poll = 1 3992b2dc8b3SKarol Latecki 40081ccc81dSKarol Latecki sysctl_opts = { 4012b2dc8b3SKarol Latecki "net.core.busy_poll": busy_poll, 4022b2dc8b3SKarol Latecki "net.core.busy_read": busy_read, 40381ccc81dSKarol Latecki "net.core.somaxconn": 4096, 40481ccc81dSKarol Latecki "net.core.netdev_max_backlog": 8192, 40581ccc81dSKarol Latecki "net.ipv4.tcp_max_syn_backlog": 16384, 40681ccc81dSKarol Latecki "net.core.rmem_max": 268435456, 40781ccc81dSKarol Latecki "net.core.wmem_max": 268435456, 40881ccc81dSKarol Latecki "net.ipv4.tcp_mem": "268435456 268435456 268435456", 40981ccc81dSKarol Latecki "net.ipv4.tcp_rmem": "8192 1048576 33554432", 41081ccc81dSKarol Latecki "net.ipv4.tcp_wmem": "8192 1048576 33554432", 41181ccc81dSKarol Latecki "net.ipv4.route.flush": 1, 41281ccc81dSKarol Latecki "vm.overcommit_memory": 1, 41317e5084fSKarol Latecki "net.core.rps_sock_flow_entries": 0 41481ccc81dSKarol Latecki } 41581ccc81dSKarol Latecki 416de7c3691SKarol Latecki if self.enable_adq: 417de7c3691SKarol Latecki sysctl_opts.update(self.adq_set_busy_read(1)) 418de7c3691SKarol Latecki 41917e5084fSKarol Latecki if self.enable_arfs: 42017e5084fSKarol Latecki sysctl_opts.update({"net.core.rps_sock_flow_entries": 32768}) 42117e5084fSKarol Latecki 42281ccc81dSKarol Latecki for opt, value in sysctl_opts.items(): 42381ccc81dSKarol Latecki self.sysctl_restore_dict.update({opt: self.exec_cmd(["sysctl", "-n", opt]).strip()}) 42480e92723SKarol Latecki self.log.info(self.exec_cmd(["sudo", "sysctl", "-w", "%s=%s" % (opt, value)]).strip()) 42581ccc81dSKarol Latecki 4262f910330SKarol Latecki def configure_tuned(self): 4272f910330SKarol Latecki if not self.tuned_profile: 42880e92723SKarol Latecki self.log.warning("WARNING: Tuned profile not set in configuration file. Skipping configuration.") 4292f910330SKarol Latecki return 4302f910330SKarol Latecki 43180e92723SKarol Latecki self.log.info("Configuring tuned-adm profile to %s." % self.tuned_profile) 4322f910330SKarol Latecki service = "tuned" 4332f910330SKarol Latecki tuned_config = configparser.ConfigParser(strict=False) 4342f910330SKarol Latecki 4352f910330SKarol Latecki out = self.exec_cmd(["sudo", "systemctl", "show", "--no-page", service]) 4362f910330SKarol Latecki out = "\n".join(["[%s]" % service, out]) 4372f910330SKarol Latecki tuned_config.read_string(out) 4382f910330SKarol Latecki tuned_state = tuned_config[service]["ActiveState"] 4392f910330SKarol Latecki self.svc_restore_dict.update({service: tuned_state}) 4402f910330SKarol Latecki 4412f910330SKarol Latecki if tuned_state != "inactive": 4422f910330SKarol Latecki profile = self.exec_cmd(["cat", "/etc/tuned/active_profile"]).strip() 4432f910330SKarol Latecki profile_mode = self.exec_cmd(["cat", "/etc/tuned/profile_mode"]).strip() 4442f910330SKarol Latecki 4452f910330SKarol Latecki self.tuned_restore_dict = { 4462f910330SKarol Latecki "profile": profile, 4472f910330SKarol Latecki "mode": profile_mode 4482f910330SKarol Latecki } 4492f910330SKarol Latecki 4502f910330SKarol Latecki self.exec_cmd(["sudo", "systemctl", "start", service]) 4512f910330SKarol Latecki self.exec_cmd(["sudo", "tuned-adm", "profile", self.tuned_profile]) 45280e92723SKarol Latecki self.log.info("Tuned profile set to %s." % self.exec_cmd(["cat", "/etc/tuned/active_profile"])) 4532f910330SKarol Latecki 4543992eb28SKarol Latecki def configure_cpu_governor(self): 45580e92723SKarol Latecki self.log.info("Setting CPU governor to performance...") 4563992eb28SKarol Latecki 4573992eb28SKarol Latecki # This assumes that there is the same CPU scaling governor on each CPU 4583992eb28SKarol Latecki self.governor_restore = self.exec_cmd(["cat", "/sys/devices/system/cpu/cpu0/cpufreq/scaling_governor"]).strip() 4593992eb28SKarol Latecki self.exec_cmd(["sudo", "cpupower", "frequency-set", "-g", "performance"]) 4603992eb28SKarol Latecki 461cdab715bSKarol Latecki def configure_irq_affinity(self, mode="default", cpulist=None, exclude_cpulist=False): 462cdab715bSKarol Latecki self.log.info("Setting NIC irq affinity for NICs. Using %s mode" % mode) 463cdab715bSKarol Latecki 464cdab715bSKarol Latecki if mode not in ["default", "bynode", "cpulist"]: 465cdab715bSKarol Latecki raise ValueError("%s irq affinity setting not supported" % mode) 466cdab715bSKarol Latecki 467cdab715bSKarol Latecki if mode == "cpulist" and not cpulist: 468cdab715bSKarol Latecki raise ValueError("%s irq affinity setting set, but no cpulist provided" % mode) 469cdab715bSKarol Latecki 470cdab715bSKarol Latecki affinity_script = "set_irq_affinity.sh" 471cdab715bSKarol Latecki if "default" not in mode: 472cdab715bSKarol Latecki affinity_script = "set_irq_affinity_cpulist.sh" 473cdab715bSKarol Latecki system_cpu_map = self.get_numa_cpu_map() 474cdab715bSKarol Latecki irq_script_path = os.path.join(self.irq_scripts_dir, affinity_script) 475cdab715bSKarol Latecki 476cdab715bSKarol Latecki def cpu_list_to_string(cpulist): 477cdab715bSKarol Latecki return ",".join(map(lambda x: str(x), cpulist)) 478cdab715bSKarol Latecki 479696d55bfSKarol Latecki nic_names = [self.get_nic_name_by_ip(n) for n in self.nic_ips] 480cdab715bSKarol Latecki for nic_name in nic_names: 481cdab715bSKarol Latecki irq_cmd = ["sudo", irq_script_path] 482cdab715bSKarol Latecki 483cdab715bSKarol Latecki # Use only CPU cores matching NIC NUMA node. 484cdab715bSKarol Latecki # Remove any CPU cores if they're on exclusion list. 485cdab715bSKarol Latecki if mode == "bynode": 486cdab715bSKarol Latecki irq_cpus = system_cpu_map[self.get_nic_numa_node(nic_name)] 487cdab715bSKarol Latecki if cpulist and exclude_cpulist: 488cdab715bSKarol Latecki disallowed_cpus = self.get_core_list_from_mask(cpulist) 489cdab715bSKarol Latecki irq_cpus = list(set(irq_cpus) - set(disallowed_cpus)) 490cdab715bSKarol Latecki if not irq_cpus: 491cdab715bSKarol Latecki raise Exception("No CPUs left to process IRQs after excluding CPUs!") 492cdab715bSKarol Latecki irq_cmd.append(cpu_list_to_string(irq_cpus)) 493cdab715bSKarol Latecki 494cdab715bSKarol Latecki if mode == "cpulist": 495cdab715bSKarol Latecki irq_cpus = self.get_core_list_from_mask(cpulist) 496cdab715bSKarol Latecki if exclude_cpulist: 497cdab715bSKarol Latecki # Flatten system CPU list, we don't need NUMA awareness here 498cdab715bSKarol Latecki system_cpu_list = sorted({x for v in system_cpu_map.values() for x in v}) 499cdab715bSKarol Latecki irq_cpus = list(set(system_cpu_list) - set(irq_cpus)) 500cdab715bSKarol Latecki if not irq_cpus: 501cdab715bSKarol Latecki raise Exception("No CPUs left to process IRQs after excluding CPUs!") 502cdab715bSKarol Latecki irq_cmd.append(cpu_list_to_string(irq_cpus)) 503cdab715bSKarol Latecki 504cdab715bSKarol Latecki irq_cmd.append(nic_name) 50580e92723SKarol Latecki self.log.info(irq_cmd) 506696d55bfSKarol Latecki self.exec_cmd(irq_cmd, change_dir=self.irq_scripts_dir) 507696d55bfSKarol Latecki 508a35f2b34SKarol Latecki def restore_services(self): 50980e92723SKarol Latecki self.log.info("Restoring services...") 510a35f2b34SKarol Latecki for service, state in self.svc_restore_dict.items(): 511a35f2b34SKarol Latecki cmd = "stop" if state == "inactive" else "start" 512a35f2b34SKarol Latecki self.exec_cmd(["sudo", "systemctl", cmd, service]) 513a35f2b34SKarol Latecki 51481ccc81dSKarol Latecki def restore_sysctl(self): 51580e92723SKarol Latecki self.log.info("Restoring sysctl settings...") 51681ccc81dSKarol Latecki for opt, value in self.sysctl_restore_dict.items(): 51780e92723SKarol Latecki self.log.info(self.exec_cmd(["sudo", "sysctl", "-w", "%s=%s" % (opt, value)]).strip()) 51881ccc81dSKarol Latecki 5192f910330SKarol Latecki def restore_tuned(self): 52080e92723SKarol Latecki self.log.info("Restoring tuned-adm settings...") 5212f910330SKarol Latecki 5222f910330SKarol Latecki if not self.tuned_restore_dict: 5232f910330SKarol Latecki return 5242f910330SKarol Latecki 5252f910330SKarol Latecki if self.tuned_restore_dict["mode"] == "auto": 5262f910330SKarol Latecki self.exec_cmd(["sudo", "tuned-adm", "auto_profile"]) 52780e92723SKarol Latecki self.log.info("Reverted tuned-adm to auto_profile.") 5282f910330SKarol Latecki else: 5292f910330SKarol Latecki self.exec_cmd(["sudo", "tuned-adm", "profile", self.tuned_restore_dict["profile"]]) 53080e92723SKarol Latecki self.log.info("Reverted tuned-adm to %s profile." % self.tuned_restore_dict["profile"]) 5312f910330SKarol Latecki 5323992eb28SKarol Latecki def restore_governor(self): 53380e92723SKarol Latecki self.log.info("Restoring CPU governor setting...") 5343992eb28SKarol Latecki if self.governor_restore: 5353992eb28SKarol Latecki self.exec_cmd(["sudo", "cpupower", "frequency-set", "-g", self.governor_restore]) 53680e92723SKarol Latecki self.log.info("Reverted CPU governor to %s." % self.governor_restore) 5373992eb28SKarol Latecki 538d9027e81SKarol Latecki def restore_settings(self): 539d9027e81SKarol Latecki self.restore_governor() 540d9027e81SKarol Latecki self.restore_tuned() 541d9027e81SKarol Latecki self.restore_services() 542d9027e81SKarol Latecki self.restore_sysctl() 543d9027e81SKarol Latecki if self.enable_adq: 544d9027e81SKarol Latecki self.reload_driver("ice") 545d9027e81SKarol Latecki 5464cff5cc2SJaroslaw Chachulski def check_for_throttling(self): 5474cff5cc2SJaroslaw Chachulski patterns = [ 5484cff5cc2SJaroslaw Chachulski r"CPU\d+: Core temperature is above threshold, cpu clock is throttled", 5494cff5cc2SJaroslaw Chachulski r"mlx5_core \S+: poll_health:\d+:\(pid \d+\): device's health compromised", 5504cff5cc2SJaroslaw Chachulski r"mlx5_core \S+: print_health_info:\d+:\(pid \d+\): Health issue observed, High temperature", 5514cff5cc2SJaroslaw Chachulski r"mlx5_core \S+: temp_warn:\d+:\(pid \d+\): High temperature on sensors" 5524cff5cc2SJaroslaw Chachulski ] 5534cff5cc2SJaroslaw Chachulski 5544cff5cc2SJaroslaw Chachulski issue_found = False 5554cff5cc2SJaroslaw Chachulski try: 5564cff5cc2SJaroslaw Chachulski output = self.exec_cmd(["dmesg"]) 5574cff5cc2SJaroslaw Chachulski 5584cff5cc2SJaroslaw Chachulski for line in output.split("\n"): 5594cff5cc2SJaroslaw Chachulski for pattern in patterns: 5604cff5cc2SJaroslaw Chachulski if re.search(pattern, line): 5614cff5cc2SJaroslaw Chachulski self.log.warning("Throttling or temperature issue found") 5624cff5cc2SJaroslaw Chachulski issue_found = True 5634cff5cc2SJaroslaw Chachulski return 1 if issue_found else 0 5644cff5cc2SJaroslaw Chachulski except Exception as e: 5654cff5cc2SJaroslaw Chachulski self.log.error("ERROR: failed to execute dmesg command") 5664cff5cc2SJaroslaw Chachulski self.log.error(f"Error {e}") 5674cff5cc2SJaroslaw Chachulski return 1 5684cff5cc2SJaroslaw Chachulski 5694cff5cc2SJaroslaw Chachulski def stop(self): 5704cff5cc2SJaroslaw Chachulski if self.check_for_throttling(): 5714cff5cc2SJaroslaw Chachulski err_message = (f"Throttling or temperature issue found on {self.name} system! " 5724cff5cc2SJaroslaw Chachulski "Check system logs!") 5734cff5cc2SJaroslaw Chachulski raise CpuThrottlingError(err_message) 5744cff5cc2SJaroslaw Chachulski 575a42dfab1SKarol Latecki 576a42dfab1SKarol Lateckiclass Target(Server): 5779fec8853SKarol Latecki def __init__(self, name, general_config, target_config): 5786d988167SPawel Piatek super().__init__(name, general_config, target_config) 5799dfe2a4bSKarol Latecki 5809fec8853SKarol Latecki # Defaults 5812e449d41SKarol Latecki self._nics_json_obj = json.loads(self.exec_cmd(["ip", "-j", "address", "show"])) 5822e449d41SKarol Latecki self.subsystem_info_list = [] 5832e449d41SKarol Latecki self.initiator_info = [] 5842e449d41SKarol Latecki 58574f2d4f7SPawel Piatek config_fields = [ 58674f2d4f7SPawel Piatek ConfigField(name='mode', required=True), 58774f2d4f7SPawel Piatek ConfigField(name='results_dir', required=True), 58874f2d4f7SPawel Piatek ConfigField(name='enable_pm', default=True), 58974f2d4f7SPawel Piatek ConfigField(name='enable_sar', default=True), 59074f2d4f7SPawel Piatek ConfigField(name='enable_pcm', default=True), 59174f2d4f7SPawel Piatek ConfigField(name='enable_dpdk_memory', default=True) 59274f2d4f7SPawel Piatek ] 59374f2d4f7SPawel Piatek self.read_config(config_fields, target_config) 594db65d583SMaciej Wawryk 59574f2d4f7SPawel Piatek self.null_block = target_config.get('null_block_devices', 0) 59674f2d4f7SPawel Piatek self.scheduler_name = target_config.get('scheduler_settings', 'static') 59774f2d4f7SPawel Piatek self.enable_zcopy = target_config.get('zcopy_settings', False) 59874f2d4f7SPawel Piatek self.enable_bw = target_config.get('enable_bandwidth', True) 59974f2d4f7SPawel Piatek self.nvme_blocklist = target_config.get('blocklist', []) 60074f2d4f7SPawel Piatek self.nvme_allowlist = target_config.get('allowlist', []) 60174f2d4f7SPawel Piatek 6022e449d41SKarol Latecki # Blocklist takes precedence, remove common elements from allowlist 6032e449d41SKarol Latecki self.nvme_allowlist = list(set(self.nvme_allowlist) - set(self.nvme_blocklist)) 604f5d6d594SKarol Latecki 605f5d6d594SKarol Latecki self.log.info("Items now on allowlist: %s" % self.nvme_allowlist) 606f5d6d594SKarol Latecki self.log.info("Items now on blocklist: %s" % self.nvme_blocklist) 607da5c9b32SMaciej Wawryk 608a42dfab1SKarol Latecki self.script_dir = os.path.dirname(os.path.abspath(sys.argv[0])) 609a42dfab1SKarol Latecki self.spdk_dir = os.path.abspath(os.path.join(self.script_dir, "../../../")) 610ddb4d24bSKarol Latecki self.set_local_nic_info(self.set_local_nic_info_helper()) 611ad783359SKarol Latecki 61296474e9eSKamil Godzwon if not self.skip_spdk_install: 613ad783359SKarol Latecki self.zip_spdk_sources(self.spdk_dir, "/tmp/spdk.zip") 614ad783359SKarol Latecki 615a35f2b34SKarol Latecki self.configure_system() 6163c09b2fbSKarol Latecki if self.enable_adq: 6173c09b2fbSKarol Latecki self.configure_adq() 6182b2dc8b3SKarol Latecki self.configure_irq_affinity() 619632703f6SMaciej Wawryk self.sys_config() 620a42dfab1SKarol Latecki 621ddb4d24bSKarol Latecki def set_local_nic_info_helper(self): 62226d7e3d4SKarol Latecki return json.loads(self.exec_cmd(["lshw", "-json"])) 62326d7e3d4SKarol Latecki 624696d55bfSKarol Latecki def exec_cmd(self, cmd, stderr_redirect=False, change_dir=None): 62526d7e3d4SKarol Latecki stderr_opt = None 62626d7e3d4SKarol Latecki if stderr_redirect: 62726d7e3d4SKarol Latecki stderr_opt = subprocess.STDOUT 628696d55bfSKarol Latecki if change_dir: 629696d55bfSKarol Latecki old_cwd = os.getcwd() 630696d55bfSKarol Latecki os.chdir(change_dir) 63180e92723SKarol Latecki self.log.info("Changing directory to %s" % change_dir) 632696d55bfSKarol Latecki 63326d7e3d4SKarol Latecki out = check_output(cmd, stderr=stderr_opt).decode(encoding="utf-8") 634696d55bfSKarol Latecki 635696d55bfSKarol Latecki if change_dir: 636696d55bfSKarol Latecki os.chdir(old_cwd) 63780e92723SKarol Latecki self.log.info("Changing directory to %s" % old_cwd) 63826d7e3d4SKarol Latecki return out 639ddb4d24bSKarol Latecki 640a42dfab1SKarol Latecki def zip_spdk_sources(self, spdk_dir, dest_file): 64180e92723SKarol Latecki self.log.info("Zipping SPDK source directory") 642a42dfab1SKarol Latecki fh = zipfile.ZipFile(dest_file, "w", zipfile.ZIP_DEFLATED) 6436d988167SPawel Piatek for root, _directories, files in os.walk(spdk_dir, followlinks=True): 644a42dfab1SKarol Latecki for file in files: 645a42dfab1SKarol Latecki fh.write(os.path.relpath(os.path.join(root, file))) 646a42dfab1SKarol Latecki fh.close() 64780e92723SKarol Latecki self.log.info("Done zipping") 648a42dfab1SKarol Latecki 64920955a49SPawel Piatek @staticmethod 650e05299f1SKarol Latecki def _chunks(input_list, chunks_no): 651e05299f1SKarol Latecki div, rem = divmod(len(input_list), chunks_no) 652e05299f1SKarol Latecki for i in range(chunks_no): 653e05299f1SKarol Latecki si = (div + 1) * (i if i < rem else rem) + div * (0 if i < rem else i - rem) 654e05299f1SKarol Latecki yield input_list[si:si + (div + 1 if i < rem else div)] 655e05299f1SKarol Latecki 656e05299f1SKarol Latecki def spread_bdevs(self, req_disks): 657e05299f1SKarol Latecki # Spread available block devices indexes: 658e05299f1SKarol Latecki # - evenly across available initiator systems 659e05299f1SKarol Latecki # - evenly across available NIC interfaces for 660e05299f1SKarol Latecki # each initiator 661e05299f1SKarol Latecki # Not NUMA aware. 662e05299f1SKarol Latecki ip_bdev_map = [] 663e05299f1SKarol Latecki initiator_chunks = self._chunks(range(0, req_disks), len(self.initiator_info)) 664e05299f1SKarol Latecki 665e05299f1SKarol Latecki for i, (init, init_chunk) in enumerate(zip(self.initiator_info, initiator_chunks)): 666e05299f1SKarol Latecki self.initiator_info[i]["bdev_range"] = init_chunk 667e05299f1SKarol Latecki init_chunks_list = list(self._chunks(init_chunk, len(init["target_nic_ips"]))) 668e05299f1SKarol Latecki for ip, nic_chunk in zip(self.initiator_info[i]["target_nic_ips"], init_chunks_list): 669e05299f1SKarol Latecki for c in nic_chunk: 670e05299f1SKarol Latecki ip_bdev_map.append((ip, c)) 671e05299f1SKarol Latecki return ip_bdev_map 672e05299f1SKarol Latecki 67390b95e28SKarol Latecki def measure_sar(self, results_dir, sar_file_prefix, ramp_time, run_time): 6745293d380SMaciej Wawryk cpu_number = os.cpu_count() 6755293d380SMaciej Wawryk sar_idle_sum = 0 6765da326eeSKarol Latecki sar_output_file = os.path.join(results_dir, sar_file_prefix + ".txt") 6775da326eeSKarol Latecki sar_cpu_util_file = os.path.join(results_dir, ".".join([sar_file_prefix + "cpu_util", "txt"])) 6786acfe379SKarol Latecki 67990b95e28SKarol Latecki self.log.info("Waiting %d seconds for ramp-up to finish before measuring SAR stats" % ramp_time) 68090b95e28SKarol Latecki time.sleep(ramp_time) 68180e92723SKarol Latecki self.log.info("Starting SAR measurements") 6826acfe379SKarol Latecki 68390b95e28SKarol Latecki out = self.exec_cmd(["sar", "-P", "ALL", "%s" % 1, "%s" % run_time]) 6845da326eeSKarol Latecki with open(os.path.join(results_dir, sar_output_file), "w") as fh: 685a42dfab1SKarol Latecki for line in out.split("\n"): 6865293d380SMaciej Wawryk if "Average" in line: 6875293d380SMaciej Wawryk if "CPU" in line: 68880e92723SKarol Latecki self.log.info("Summary CPU utilization from SAR:") 68980e92723SKarol Latecki self.log.info(line) 6905293d380SMaciej Wawryk elif "all" in line: 69180e92723SKarol Latecki self.log.info(line) 6925293d380SMaciej Wawryk else: 693328a75c0SMaciej Wawryk sar_idle_sum += float(line.split()[7]) 694a42dfab1SKarol Latecki fh.write(out) 6955293d380SMaciej Wawryk sar_cpu_usage = cpu_number * 100 - sar_idle_sum 6965da326eeSKarol Latecki 6975da326eeSKarol Latecki with open(os.path.join(results_dir, sar_cpu_util_file), "w") as f: 6985da326eeSKarol Latecki f.write("%0.2f" % sar_cpu_usage) 699a42dfab1SKarol Latecki 700ecbb99e4SKarol Latecki def measure_power(self, results_dir, prefix, script_full_dir, ramp_time, run_time): 701ecbb99e4SKarol Latecki time.sleep(ramp_time) 702241d9c0aSKarol Latecki self.log.info("Starting power measurements") 703eb0159ccSMichal Berger self.exec_cmd(["%s/../pm/collect-bmc-pm" % script_full_dir, 704eb0159ccSMichal Berger "-d", "%s" % results_dir, "-l", "-p", "%s" % prefix, 705ecbb99e4SKarol Latecki "-x", "-c", "%s" % run_time, "-t", "%s" % 1, "-r"]) 70604dd0282SMichal Berger 7073d9b0628SKarol Latecki def measure_pcm(self, results_dir, pcm_file_name, ramp_time, run_time): 7083d9b0628SKarol Latecki time.sleep(ramp_time) 7093d9b0628SKarol Latecki cmd = ["pcm", "1", "-i=%s" % run_time, 7105d82cd2bSKarol Latecki "-csv=%s/%s" % (results_dir, pcm_file_name)] 711739a3e86SKarol Latecki out = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 71223f78f22SMaciej Wawryk df = pd.read_csv(os.path.join(results_dir, pcm_file_name), header=[0, 1]) 71323f78f22SMaciej Wawryk df = df.rename(columns=lambda x: re.sub(r'Unnamed:[\w\s]*$', '', x)) 71423f78f22SMaciej Wawryk skt = df.loc[:, df.columns.get_level_values(1).isin({'UPI0', 'UPI1', 'UPI2'})] 71523f78f22SMaciej Wawryk skt_pcm_file_name = "_".join(["skt", pcm_file_name]) 71623f78f22SMaciej Wawryk skt.to_csv(os.path.join(results_dir, skt_pcm_file_name), index=False) 717db65d583SMaciej Wawryk 718739a3e86SKarol Latecki if out.returncode: 719739a3e86SKarol Latecki self.log.warning("PCM Power measurement finished with a non-zero return code.") 720739a3e86SKarol Latecki self.log.warning(out.stdout.decode(encoding="utf-8")) 721739a3e86SKarol Latecki 722b9403ae5SKarol Latecki def measure_network_bandwidth(self, results_dir, bandwidth_file_name, ramp_time, run_time): 723b9403ae5SKarol Latecki self.log.info("Waiting %d seconds for ramp-up to finish before measuring bandwidth stats" % ramp_time) 724b9403ae5SKarol Latecki time.sleep(ramp_time) 72580e92723SKarol Latecki self.log.info("INFO: starting network bandwidth measure") 7264503500eSMaciej Wawryk self.exec_cmd(["bwm-ng", "-o", "csv", "-F", "%s/%s" % (results_dir, bandwidth_file_name), 727b9403ae5SKarol Latecki "-a", "1", "-t", "1000", "-c", "%s" % run_time]) 728b9403ae5SKarol Latecki # TODO: Above command results in a .csv file containing measurements for all gathered samples. 729b9403ae5SKarol Latecki # Improve this so that additional file containing measurements average is generated too. 730b9403ae5SKarol Latecki # TODO: Monitor only these interfaces which are currently used to run the workload. 7313c8451e6SMaciej Wawryk 7329b3f5e98SKarol Latecki def measure_dpdk_memory(self, results_dir, dump_file_name, ramp_time): 73380e92723SKarol Latecki self.log.info("INFO: waiting to generate DPDK memory usage") 7349b3f5e98SKarol Latecki time.sleep(ramp_time) 73580e92723SKarol Latecki self.log.info("INFO: generating DPDK memory usage") 7369b3f5e98SKarol Latecki tmp_dump_file = rpc.env_dpdk.env_dpdk_get_mem_stats(self.client)["filename"] 7379b3f5e98SKarol Latecki os.rename(tmp_dump_file, "%s/%s" % (results_dir, dump_file_name)) 738d1fc2883SMaciej Wawryk 739632703f6SMaciej Wawryk def sys_config(self): 74080e92723SKarol Latecki self.log.info("====Kernel release:====") 74180e92723SKarol Latecki self.log.info(os.uname().release) 74280e92723SKarol Latecki self.log.info("====Kernel command line:====") 743632703f6SMaciej Wawryk with open('/proc/cmdline') as f: 744632703f6SMaciej Wawryk cmdline = f.readlines() 74580e92723SKarol Latecki self.log.info('\n'.join(self.get_uncommented_lines(cmdline))) 74680e92723SKarol Latecki self.log.info("====sysctl conf:====") 747632703f6SMaciej Wawryk with open('/etc/sysctl.conf') as f: 748632703f6SMaciej Wawryk sysctl = f.readlines() 74980e92723SKarol Latecki self.log.info('\n'.join(self.get_uncommented_lines(sysctl))) 75080e92723SKarol Latecki self.log.info("====Cpu power info:====") 75180e92723SKarol Latecki self.log.info(self.exec_cmd(["cpupower", "frequency-info"])) 75280e92723SKarol Latecki self.log.info("====zcopy settings:====") 75380e92723SKarol Latecki self.log.info("zcopy enabled: %s" % (self.enable_zcopy)) 75480e92723SKarol Latecki self.log.info("====Scheduler settings:====") 75580e92723SKarol Latecki self.log.info("SPDK scheduler: %s" % (self.scheduler_name)) 756632703f6SMaciej Wawryk 757a42dfab1SKarol Latecki 758a42dfab1SKarol Lateckiclass Initiator(Server): 7599fec8853SKarol Latecki def __init__(self, name, general_config, initiator_config): 7606d988167SPawel Piatek super().__init__(name, general_config, initiator_config) 7619dfe2a4bSKarol Latecki 76274f2d4f7SPawel Piatek # required fields and defaults 7638301b041SPawel Piatek config_fields = [ 76474f2d4f7SPawel Piatek ConfigField(name='mode', required=True), 7658301b041SPawel Piatek ConfigField(name='ip', required=True), 7668301b041SPawel Piatek ConfigField(name='target_nic_ips', required=True), 7678301b041SPawel Piatek ConfigField(name='cpus_allowed', default=None), 7688301b041SPawel Piatek ConfigField(name='cpus_allowed_policy', default='shared'), 7698301b041SPawel Piatek ConfigField(name='spdk_dir', default='/tmp/spdk'), 7708301b041SPawel Piatek ConfigField(name='fio_bin', default='/usr/src/fio/fio'), 7718301b041SPawel Piatek ConfigField(name='nvmecli_bin', default='nvme'), 7728301b041SPawel Piatek ConfigField(name='cpu_frequency', default=None), 7738301b041SPawel Piatek ConfigField(name='allow_cpu_sharing', default=True) 7748301b041SPawel Piatek ] 7759dfe2a4bSKarol Latecki 7768301b041SPawel Piatek self.read_config(config_fields, initiator_config) 7779fec8853SKarol Latecki 778ae730042SMichal Berger if os.getenv('SPDK_WORKSPACE'): 779ae730042SMichal Berger self.spdk_dir = os.getenv('SPDK_WORKSPACE') 7809fec8853SKarol Latecki 7818301b041SPawel Piatek self.subsystem_info_list = [] 7828301b041SPawel Piatek 783a42dfab1SKarol Latecki self.ssh_connection = paramiko.SSHClient() 784d6013743SJohn Kariuki self.ssh_connection.set_missing_host_key_policy(paramiko.AutoAddPolicy()) 785a42dfab1SKarol Latecki self.ssh_connection.connect(self.ip, username=self.username, password=self.password) 78626d7e3d4SKarol Latecki self.exec_cmd(["sudo", "rm", "-rf", "%s/nvmf_perf" % self.spdk_dir]) 78726d7e3d4SKarol Latecki self.exec_cmd(["mkdir", "-p", "%s" % self.spdk_dir]) 78826d7e3d4SKarol Latecki self._nics_json_obj = json.loads(self.exec_cmd(["ip", "-j", "address", "show"])) 789ad783359SKarol Latecki 79096474e9eSKamil Godzwon if not self.skip_spdk_install: 791ad783359SKarol Latecki self.copy_spdk("/tmp/spdk.zip") 792624133b5SPawel Piatek 793ddb4d24bSKarol Latecki self.set_local_nic_info(self.set_local_nic_info_helper()) 7940eab3515SMaciej Wawryk self.set_cpu_frequency() 795a35f2b34SKarol Latecki self.configure_system() 7963c09b2fbSKarol Latecki if self.enable_adq: 7973c09b2fbSKarol Latecki self.configure_adq() 798632703f6SMaciej Wawryk self.sys_config() 799a42dfab1SKarol Latecki 800ddb4d24bSKarol Latecki def set_local_nic_info_helper(self): 80126d7e3d4SKarol Latecki return json.loads(self.exec_cmd(["lshw", "-json"])) 802ddb4d24bSKarol Latecki 803741b0839SKarol Latecki def stop(self): 804d9027e81SKarol Latecki self.restore_settings() 8054cff5cc2SJaroslaw Chachulski try: 8064cff5cc2SJaroslaw Chachulski super().stop() 8074cff5cc2SJaroslaw Chachulski except CpuThrottlingError as err: 8084cff5cc2SJaroslaw Chachulski raise err 8094cff5cc2SJaroslaw Chachulski finally: 810a42dfab1SKarol Latecki self.ssh_connection.close() 811a42dfab1SKarol Latecki 812696d55bfSKarol Latecki def exec_cmd(self, cmd, stderr_redirect=False, change_dir=None): 813696d55bfSKarol Latecki if change_dir: 814696d55bfSKarol Latecki cmd = ["cd", change_dir, ";", *cmd] 815696d55bfSKarol Latecki 81681ccc81dSKarol Latecki # In case one of the command elements contains whitespace and is not 81781ccc81dSKarol Latecki # already quoted, # (e.g. when calling sysctl) quote it again to prevent expansion 81881ccc81dSKarol Latecki # when sending to remote system. 81981ccc81dSKarol Latecki for i, c in enumerate(cmd): 82081ccc81dSKarol Latecki if (" " in c or "\t" in c) and not (c.startswith("'") and c.endswith("'")): 82181ccc81dSKarol Latecki cmd[i] = '"%s"' % c 82226d7e3d4SKarol Latecki cmd = " ".join(cmd) 82381ccc81dSKarol Latecki 82481ccc81dSKarol Latecki # Redirect stderr to stdout thanks using get_pty option if needed 82526d7e3d4SKarol Latecki _, stdout, _ = self.ssh_connection.exec_command(cmd, get_pty=stderr_redirect) 82626d7e3d4SKarol Latecki out = stdout.read().decode(encoding="utf-8") 82726d7e3d4SKarol Latecki 82826d7e3d4SKarol Latecki # Check the return code 82926d7e3d4SKarol Latecki rc = stdout.channel.recv_exit_status() 83026d7e3d4SKarol Latecki if rc: 83126d7e3d4SKarol Latecki raise CalledProcessError(int(rc), cmd, out) 83226d7e3d4SKarol Latecki 83326d7e3d4SKarol Latecki return out 83426d7e3d4SKarol Latecki 835a42dfab1SKarol Latecki def put_file(self, local, remote_dest): 836a42dfab1SKarol Latecki ftp = self.ssh_connection.open_sftp() 837a42dfab1SKarol Latecki ftp.put(local, remote_dest) 838a42dfab1SKarol Latecki ftp.close() 839a42dfab1SKarol Latecki 840a42dfab1SKarol Latecki def get_file(self, remote, local_dest): 841a42dfab1SKarol Latecki ftp = self.ssh_connection.open_sftp() 842a42dfab1SKarol Latecki ftp.get(remote, local_dest) 843a42dfab1SKarol Latecki ftp.close() 844a42dfab1SKarol Latecki 845ad783359SKarol Latecki def copy_spdk(self, local_spdk_zip): 84680e92723SKarol Latecki self.log.info("Copying SPDK sources to initiator %s" % self.name) 847ad783359SKarol Latecki self.put_file(local_spdk_zip, "/tmp/spdk_drop.zip") 84880e92723SKarol Latecki self.log.info("Copied sources zip from target") 849ad783359SKarol Latecki self.exec_cmd(["unzip", "-qo", "/tmp/spdk_drop.zip", "-d", self.spdk_dir]) 85080e92723SKarol Latecki self.log.info("Sources unpacked") 851ad783359SKarol Latecki 852a42dfab1SKarol Latecki def copy_result_files(self, dest_dir): 85380e92723SKarol Latecki self.log.info("Copying results") 854a42dfab1SKarol Latecki 855a42dfab1SKarol Latecki if not os.path.exists(dest_dir): 856a42dfab1SKarol Latecki os.mkdir(dest_dir) 857a42dfab1SKarol Latecki 858a42dfab1SKarol Latecki # Get list of result files from initiator and copy them back to target 85926d7e3d4SKarol Latecki file_list = self.exec_cmd(["ls", "%s/nvmf_perf" % self.spdk_dir]).strip().split("\n") 860a42dfab1SKarol Latecki 861a42dfab1SKarol Latecki for file in file_list: 862a42dfab1SKarol Latecki self.get_file(os.path.join(self.spdk_dir, "nvmf_perf", file), 863a42dfab1SKarol Latecki os.path.join(dest_dir, file)) 86480e92723SKarol Latecki self.log.info("Done copying results") 865a42dfab1SKarol Latecki 8666fb592faSKamil Godzwon def match_subsystems(self, target_subsystems): 8676fb592faSKamil Godzwon subsystems = [subsystem for subsystem in target_subsystems if subsystem[2] in self.target_nic_ips] 868607d2226SKamil Godzwon if not subsystems: 869607d2226SKamil Godzwon raise Exception("No matching subsystems found on target side!") 870a42dfab1SKarol Latecki subsystems.sort(key=lambda x: x[1]) 87180e92723SKarol Latecki self.log.info("Found matching subsystems on target side:") 872a42dfab1SKarol Latecki for s in subsystems: 87380e92723SKarol Latecki self.log.info(s) 874128df535SKarol Latecki self.subsystem_info_list = subsystems 875a42dfab1SKarol Latecki 8767e291332SPawel Piatek @abstractmethod 8777e291332SPawel Piatek def init_connect(self): 8787e291332SPawel Piatek pass 8797e291332SPawel Piatek 8807e291332SPawel Piatek @abstractmethod 8817e291332SPawel Piatek def init_disconnect(self): 8827e291332SPawel Piatek pass 8837e291332SPawel Piatek 8847e291332SPawel Piatek @abstractmethod 885a8e59148SKarol Latecki def gen_fio_filename_conf(self, *args, **kwargs): 886a8e59148SKarol Latecki # Logic implemented in SPDKInitiator and KernelInitiator classes 887a8e59148SKarol Latecki pass 888a8e59148SKarol Latecki 88947b87952SKarol Latecki def get_route_nic_numa(self, remote_nvme_ip): 89047b87952SKarol Latecki local_nvme_nic = json.loads(self.exec_cmd(["ip", "-j", "route", "get", remote_nvme_ip])) 89147b87952SKarol Latecki local_nvme_nic = local_nvme_nic[0]["dev"] 89247b87952SKarol Latecki return self.get_nic_numa_node(local_nvme_nic) 89347b87952SKarol Latecki 89423e767c4SKarol Latecki @staticmethod 89523e767c4SKarol Latecki def gen_fio_offset_section(offset_inc, num_jobs): 89623e767c4SKarol Latecki offset_inc = 100 // num_jobs if offset_inc == 0 else offset_inc 89747b87952SKarol Latecki return "\n".join(["size=%s%%" % offset_inc, 89847b87952SKarol Latecki "offset=0%", 89947b87952SKarol Latecki "offset_increment=%s%%" % offset_inc]) 90047b87952SKarol Latecki 90187b94e3bSKarol Latecki def gen_fio_numa_section(self, fio_filenames_list, num_jobs): 90247b87952SKarol Latecki numa_stats = {} 90387b94e3bSKarol Latecki allowed_cpus = [] 90447b87952SKarol Latecki for nvme in fio_filenames_list: 90547b87952SKarol Latecki nvme_numa = self.get_nvme_subsystem_numa(os.path.basename(nvme)) 90647b87952SKarol Latecki numa_stats[nvme_numa] = numa_stats.setdefault(nvme_numa, 0) + 1 90747b87952SKarol Latecki 90847b87952SKarol Latecki # Use the most common NUMA node for this chunk to allocate memory and CPUs 90947b87952SKarol Latecki section_local_numa = sorted(numa_stats.items(), key=lambda item: item[1], reverse=True)[0][0] 91047b87952SKarol Latecki 91183c8db49SKarol Latecki # Check if we have enough free CPUs to pop from the list before assigning them 91283c8db49SKarol Latecki if len(self.available_cpus[section_local_numa]) < num_jobs: 91383c8db49SKarol Latecki if self.allow_cpu_sharing: 91483c8db49SKarol Latecki self.log.info("Regenerating available CPU list %s" % section_local_numa) 91583c8db49SKarol Latecki # Remove still available CPUs from the regenerated list. We don't want to 91683c8db49SKarol Latecki # regenerate it with duplicates. 91783c8db49SKarol Latecki cpus_regen = set(self.get_numa_cpu_map()[section_local_numa]) - set(self.available_cpus[section_local_numa]) 91883c8db49SKarol Latecki self.available_cpus[section_local_numa].extend(cpus_regen) 91983c8db49SKarol Latecki self.log.info(self.log.info(self.available_cpus[section_local_numa])) 92083c8db49SKarol Latecki else: 92183c8db49SKarol Latecki self.log.error("No more free CPU cores to use from allowed_cpus list!") 92283c8db49SKarol Latecki raise IndexError 92383c8db49SKarol Latecki 92487b94e3bSKarol Latecki for _ in range(num_jobs): 92587b94e3bSKarol Latecki try: 92687b94e3bSKarol Latecki allowed_cpus.append(str(self.available_cpus[section_local_numa].pop(0))) 92787b94e3bSKarol Latecki except IndexError: 92887b94e3bSKarol Latecki self.log.error("No more free CPU cores to use from allowed_cpus list!") 92987b94e3bSKarol Latecki raise 93087b94e3bSKarol Latecki 93187b94e3bSKarol Latecki return "\n".join(["cpus_allowed=%s" % ",".join(allowed_cpus), 93247b87952SKarol Latecki "numa_mem_policy=prefer:%s" % section_local_numa]) 93323e767c4SKarol Latecki 93476eb5372SKamil Godzwon def gen_fio_config(self, rw, block_size, io_depth, subsys_no, fio_settings): 935986fe095SKarol Latecki fio_conf_template = inspect.cleandoc(""" 936a42dfab1SKarol Latecki [global] 937a42dfab1SKarol Latecki ioengine={ioengine} 938a42dfab1SKarol Latecki {spdk_conf} 939a42dfab1SKarol Latecki thread=1 940a42dfab1SKarol Latecki group_reporting=1 941a42dfab1SKarol Latecki direct=1 9426ec89263SMaciej Wawryk percentile_list=50:90:99:99.5:99.9:99.99:99.999 943a42dfab1SKarol Latecki 944a42dfab1SKarol Latecki norandommap=1 945a42dfab1SKarol Latecki rw={rw} 946a42dfab1SKarol Latecki rwmixread={rwmixread} 947a42dfab1SKarol Latecki bs={block_size} 948a42dfab1SKarol Latecki time_based=1 949a42dfab1SKarol Latecki ramp_time={ramp_time} 950a42dfab1SKarol Latecki runtime={run_time} 9511c71de70STomasz Zawadzki rate_iops={rate_iops} 952986fe095SKarol Latecki """) 953e068db83SMaciej Wawryk 954e068db83SMaciej Wawryk if self.cpus_allowed is not None: 95580e92723SKarol Latecki self.log.info("Limiting FIO workload execution on specific cores %s" % self.cpus_allowed) 9560ebf2509SKarol Latecki self.num_cores = len(self.get_core_list_from_mask(self.cpus_allowed)) 95742d2e588SKarol Latecki threads = range(0, self.num_cores) 958e068db83SMaciej Wawryk elif hasattr(self, 'num_cores'): 95980e92723SKarol Latecki self.log.info("Limiting FIO workload execution to %s cores" % self.num_cores) 960e068db83SMaciej Wawryk threads = range(0, int(self.num_cores)) 961e068db83SMaciej Wawryk else: 962de7c3691SKarol Latecki self.num_cores = len(self.subsystem_info_list) 963de7c3691SKarol Latecki threads = range(0, len(self.subsystem_info_list)) 964e068db83SMaciej Wawryk 96576eb5372SKamil Godzwon filename_section = self.gen_fio_filename_conf(self.subsystem_info_list, threads, io_depth, fio_settings["num_jobs"], 96676eb5372SKamil Godzwon fio_settings["offset"], fio_settings["offset_inc"], fio_settings["numa_align"]) 967a42dfab1SKarol Latecki 968de7c3691SKarol Latecki fio_config = fio_conf_template.format(ioengine=self.ioengine, spdk_conf=self.spdk_conf, 96976eb5372SKamil Godzwon rw=rw, rwmixread=fio_settings["rw_mix_read"], block_size=block_size, 97076eb5372SKamil Godzwon ramp_time=fio_settings["ramp_time"], run_time=fio_settings["run_time"], 97176eb5372SKamil Godzwon rate_iops=fio_settings["rate_iops"]) 9722c8baa46Swawryk 9732c8baa46Swawryk # TODO: hipri disabled for now, as it causes fio errors: 9742c8baa46Swawryk # io_u error on file /dev/nvme2n1: Operation not supported 9754e07343fSKarol Latecki # See comment in KernelInitiator class, init_connect() function 976de7c3691SKarol Latecki if "io_uring" in self.ioengine: 977986fe095SKarol Latecki fio_config = "\n".join([fio_config, "fixedbufs=1", "registerfiles=1", "#hipri=1"]) 97876eb5372SKamil Godzwon if fio_settings["num_jobs"]: 979986fe095SKarol Latecki fio_config = "\n".join([fio_config, f"numjobs={fio_settings['num_jobs']}"]) 980e068db83SMaciej Wawryk if self.cpus_allowed is not None: 981986fe095SKarol Latecki fio_config = "\n".join([fio_config, f"cpus_allowed={self.cpus_allowed}", f"cpus_allowed_policy={self.cpus_allowed_policy}"]) 982986fe095SKarol Latecki fio_config = "\n".join([fio_config, filename_section]) 983a42dfab1SKarol Latecki 98476eb5372SKamil Godzwon fio_config_filename = "%s_%s_%s_m_%s" % (block_size, io_depth, rw, fio_settings["rw_mix_read"]) 985a42dfab1SKarol Latecki if hasattr(self, "num_cores"): 986a42dfab1SKarol Latecki fio_config_filename += "_%sCPU" % self.num_cores 987a42dfab1SKarol Latecki fio_config_filename += ".fio" 988a42dfab1SKarol Latecki 98926d7e3d4SKarol Latecki self.exec_cmd(["mkdir", "-p", "%s/nvmf_perf" % self.spdk_dir]) 99026d7e3d4SKarol Latecki self.exec_cmd(["echo", "'%s'" % fio_config, ">", "%s/nvmf_perf/%s" % (self.spdk_dir, fio_config_filename)]) 99180e92723SKarol Latecki self.log.info("Created FIO Config:") 99280e92723SKarol Latecki self.log.info(fio_config) 993a42dfab1SKarol Latecki 994a42dfab1SKarol Latecki return os.path.join(self.spdk_dir, "nvmf_perf", fio_config_filename) 995a42dfab1SKarol Latecki 9960eab3515SMaciej Wawryk def set_cpu_frequency(self): 9970eab3515SMaciej Wawryk if self.cpu_frequency is not None: 9980eab3515SMaciej Wawryk try: 99926d7e3d4SKarol Latecki self.exec_cmd(["sudo", "cpupower", "frequency-set", "-g", "userspace"], True) 100026d7e3d4SKarol Latecki self.exec_cmd(["sudo", "cpupower", "frequency-set", "-f", "%s" % self.cpu_frequency], True) 100180e92723SKarol Latecki self.log.info(self.exec_cmd(["sudo", "cpupower", "frequency-info"])) 10020eab3515SMaciej Wawryk except Exception: 100380e92723SKarol Latecki self.log.error("ERROR: cpu_frequency will not work when intel_pstate is enabled!") 1004e0659fa8SPawel Piatek sys.exit(1) 10050eab3515SMaciej Wawryk else: 100680e92723SKarol Latecki self.log.warning("WARNING: you have disabled intel_pstate and using default cpu governance.") 10070eab3515SMaciej Wawryk 1008babef5b1SKarol Latecki def run_fio(self, fio_config_file, run_num=1): 1009a42dfab1SKarol Latecki job_name, _ = os.path.splitext(fio_config_file) 101080e92723SKarol Latecki self.log.info("Starting FIO run for job: %s" % job_name) 101180e92723SKarol Latecki self.log.info("Using FIO: %s" % self.fio_bin) 10120eab3515SMaciej Wawryk 1013babef5b1SKarol Latecki output_filename = job_name + "_run_" + str(run_num) + "_" + self.name + ".json" 10140574f36dSKarol Latecki try: 1015ead0cc34SKarol Latecki output = self.exec_cmd(["sudo", self.fio_bin, fio_config_file, "--output-format=json", 1016ead0cc34SKarol Latecki "--output=%s" % output_filename, "--eta=never"], True) 101780e92723SKarol Latecki self.log.info(output) 101880e92723SKarol Latecki self.log.info("FIO run finished. Results in: %s" % output_filename) 1019babef5b1SKarol Latecki except subprocess.CalledProcessError as e: 1020babef5b1SKarol Latecki self.log.error("ERROR: Fio process failed!") 1021babef5b1SKarol Latecki self.log.error(e.stdout) 1022a42dfab1SKarol Latecki 1023632703f6SMaciej Wawryk def sys_config(self): 102480e92723SKarol Latecki self.log.info("====Kernel release:====") 102580e92723SKarol Latecki self.log.info(self.exec_cmd(["uname", "-r"])) 102680e92723SKarol Latecki self.log.info("====Kernel command line:====") 102726d7e3d4SKarol Latecki cmdline = self.exec_cmd(["cat", "/proc/cmdline"]) 102880e92723SKarol Latecki self.log.info('\n'.join(self.get_uncommented_lines(cmdline.splitlines()))) 102980e92723SKarol Latecki self.log.info("====sysctl conf:====") 10303db78959SKarol Latecki sysctl = self.exec_cmd(["sudo", "cat", "/etc/sysctl.conf"]) 103180e92723SKarol Latecki self.log.info('\n'.join(self.get_uncommented_lines(sysctl.splitlines()))) 103280e92723SKarol Latecki self.log.info("====Cpu power info:====") 103380e92723SKarol Latecki self.log.info(self.exec_cmd(["cpupower", "frequency-info"])) 1034632703f6SMaciej Wawryk 1035a42dfab1SKarol Latecki 1036a42dfab1SKarol Lateckiclass KernelTarget(Target): 10379fec8853SKarol Latecki def __init__(self, name, general_config, target_config): 10386d988167SPawel Piatek super().__init__(name, general_config, target_config) 10399fec8853SKarol Latecki # Defaults 10408301b041SPawel Piatek self.nvmet_bin = target_config.get('nvmet_bin', 'nvmetcli') 1041a42dfab1SKarol Latecki 1042de7c3691SKarol Latecki def load_drivers(self): 1043de7c3691SKarol Latecki self.log.info("Loading drivers") 1044de7c3691SKarol Latecki super().load_drivers() 1045de7c3691SKarol Latecki if self.null_block: 1046de7c3691SKarol Latecki self.exec_cmd(["sudo", "modprobe", "null_blk", "nr_devices=%s" % self.null_block]) 1047de7c3691SKarol Latecki 1048de7c3691SKarol Latecki def configure_adq(self): 1049de7c3691SKarol Latecki self.log.warning("WARNING: ADQ setup not yet supported for Kernel mode. Skipping configuration.") 1050de7c3691SKarol Latecki 1051de7c3691SKarol Latecki def adq_configure_tc(self): 1052de7c3691SKarol Latecki self.log.warning("WARNING: ADQ setup not yet supported for Kernel mode. Skipping configuration.") 1053de7c3691SKarol Latecki 1054de7c3691SKarol Latecki def adq_set_busy_read(self, busy_read_val): 1055de7c3691SKarol Latecki self.log.warning("WARNING: ADQ setup not yet supported for Kernel mode. busy_read set to 0") 1056de7c3691SKarol Latecki return {"net.core.busy_read": 0} 1057de7c3691SKarol Latecki 1058741b0839SKarol Latecki def stop(self): 105993e056baSKarol Latecki self.nvmet_command(self.nvmet_bin, "clear") 1060d9027e81SKarol Latecki self.restore_settings() 10614cff5cc2SJaroslaw Chachulski super().stop() 106293e056baSKarol Latecki 1063f5d6d594SKarol Latecki def get_nvme_device_bdf(self, nvme_dev_path): 1064f5d6d594SKarol Latecki nvme_name = os.path.basename(nvme_dev_path) 1065f5d6d594SKarol Latecki return self.exec_cmd(["cat", "/sys/block/%s/device/address" % nvme_name]).strip() 1066f5d6d594SKarol Latecki 10670b995d4aSKarol Latecki def get_nvme_devices(self): 1068f5d6d594SKarol Latecki dev_list = self.exec_cmd(["lsblk", "-o", "NAME", "-nlpd"]).split("\n") 1069f5d6d594SKarol Latecki nvme_list = [] 1070f5d6d594SKarol Latecki for dev in dev_list: 1071f5d6d594SKarol Latecki if "nvme" not in dev: 1072f5d6d594SKarol Latecki continue 1073f5d6d594SKarol Latecki if self.get_nvme_device_bdf(dev) in self.nvme_blocklist: 1074f5d6d594SKarol Latecki continue 1075f5d6d594SKarol Latecki if len(self.nvme_allowlist) == 0: 1076f5d6d594SKarol Latecki nvme_list.append(dev) 1077f5d6d594SKarol Latecki continue 1078f5d6d594SKarol Latecki if self.get_nvme_device_bdf(dev) in self.nvme_allowlist: 1079f5d6d594SKarol Latecki nvme_list.append(dev) 10809eaf49b2SKarol Latecki return nvme_list 10810b995d4aSKarol Latecki 108293e056baSKarol Latecki def nvmet_command(self, nvmet_bin, command): 108393e056baSKarol Latecki return self.exec_cmd([nvmet_bin, *(command.split(" "))]) 1084a42dfab1SKarol Latecki 1085e05299f1SKarol Latecki def kernel_tgt_gen_subsystem_conf(self, nvme_list): 1086a42dfab1SKarol Latecki 1087a42dfab1SKarol Latecki nvmet_cfg = { 1088a42dfab1SKarol Latecki "ports": [], 1089a42dfab1SKarol Latecki "hosts": [], 1090a42dfab1SKarol Latecki "subsystems": [], 1091a42dfab1SKarol Latecki } 1092a42dfab1SKarol Latecki 1093e05299f1SKarol Latecki for ip, bdev_num in self.spread_bdevs(len(nvme_list)): 1094e05299f1SKarol Latecki port = str(4420 + bdev_num) 1095e05299f1SKarol Latecki nqn = "nqn.2018-09.io.spdk:cnode%s" % bdev_num 1096e05299f1SKarol Latecki serial = "SPDK00%s" % bdev_num 1097e05299f1SKarol Latecki bdev_name = nvme_list[bdev_num] 1098a42dfab1SKarol Latecki 1099a42dfab1SKarol Latecki nvmet_cfg["subsystems"].append({ 1100a42dfab1SKarol Latecki "allowed_hosts": [], 1101a42dfab1SKarol Latecki "attr": { 1102a42dfab1SKarol Latecki "allow_any_host": "1", 1103e05299f1SKarol Latecki "serial": serial, 1104a42dfab1SKarol Latecki "version": "1.3" 1105a42dfab1SKarol Latecki }, 1106a42dfab1SKarol Latecki "namespaces": [ 1107a42dfab1SKarol Latecki { 1108a42dfab1SKarol Latecki "device": { 1109e05299f1SKarol Latecki "path": bdev_name, 1110a42dfab1SKarol Latecki "uuid": "%s" % uuid.uuid4() 1111a42dfab1SKarol Latecki }, 1112a42dfab1SKarol Latecki "enable": 1, 1113e05299f1SKarol Latecki "nsid": port 1114a42dfab1SKarol Latecki } 1115a42dfab1SKarol Latecki ], 1116128df535SKarol Latecki "nqn": nqn 1117a42dfab1SKarol Latecki }) 1118a42dfab1SKarol Latecki 1119a42dfab1SKarol Latecki nvmet_cfg["ports"].append({ 1120a42dfab1SKarol Latecki "addr": { 1121a42dfab1SKarol Latecki "adrfam": "ipv4", 1122a42dfab1SKarol Latecki "traddr": ip, 1123e05299f1SKarol Latecki "trsvcid": port, 1124e05299f1SKarol Latecki "trtype": self.transport 1125a42dfab1SKarol Latecki }, 1126e05299f1SKarol Latecki "portid": bdev_num, 1127a42dfab1SKarol Latecki "referrals": [], 1128128df535SKarol Latecki "subsystems": [nqn] 1129a42dfab1SKarol Latecki }) 1130e05299f1SKarol Latecki 11314f74eb12SKarol Latecki self.subsystem_info_list.append((port, nqn, ip)) 1132e05299f1SKarol Latecki self.subsys_no = len(self.subsystem_info_list) 1133a42dfab1SKarol Latecki 1134a42dfab1SKarol Latecki with open("kernel.conf", "w") as fh: 1135a42dfab1SKarol Latecki fh.write(json.dumps(nvmet_cfg, indent=2)) 1136a42dfab1SKarol Latecki 1137a42dfab1SKarol Latecki def tgt_start(self): 113880e92723SKarol Latecki self.log.info("Configuring kernel NVMeOF Target") 1139a42dfab1SKarol Latecki 1140a42dfab1SKarol Latecki if self.null_block: 114180e92723SKarol Latecki self.log.info("Configuring with null block device.") 1142e05299f1SKarol Latecki nvme_list = ["/dev/nullb{}".format(x) for x in range(self.null_block)] 1143a42dfab1SKarol Latecki else: 114480e92723SKarol Latecki self.log.info("Configuring with NVMe drives.") 11450b995d4aSKarol Latecki nvme_list = self.get_nvme_devices() 1146e05299f1SKarol Latecki 1147e05299f1SKarol Latecki self.kernel_tgt_gen_subsystem_conf(nvme_list) 1148a42dfab1SKarol Latecki self.subsys_no = len(nvme_list) 1149a42dfab1SKarol Latecki 115093e056baSKarol Latecki self.nvmet_command(self.nvmet_bin, "clear") 115193e056baSKarol Latecki self.nvmet_command(self.nvmet_bin, "restore kernel.conf") 1152c0fc19f3SKarol Latecki 1153c0fc19f3SKarol Latecki if self.enable_adq: 1154c0fc19f3SKarol Latecki self.adq_configure_tc() 1155c0fc19f3SKarol Latecki 115680e92723SKarol Latecki self.log.info("Done configuring kernel NVMeOF Target") 1157a42dfab1SKarol Latecki 1158a42dfab1SKarol Latecki 1159a42dfab1SKarol Lateckiclass SPDKTarget(Target): 11609fec8853SKarol Latecki def __init__(self, name, general_config, target_config): 11618301b041SPawel Piatek # IRQ affinity on SPDK Target side takes Target's core mask into consideration. 11628301b041SPawel Piatek # Method setting IRQ affinity is run as part of parent classes init, 11638301b041SPawel Piatek # so we need to have self.core_mask set before changing IRQ affinity. 11644b3a60daSKarol Latecki self.core_mask = target_config["core_mask"] 1165cdab715bSKarol Latecki self.num_cores = len(self.get_core_list_from_mask(self.core_mask)) 1166cdab715bSKarol Latecki 1167cdab715bSKarol Latecki super().__init__(name, general_config, target_config) 11689dfe2a4bSKarol Latecki 11698301b041SPawel Piatek # Format: property, default value 11708301b041SPawel Piatek config_fields = [ 11718301b041SPawel Piatek ConfigField(name='dif_insert_strip', default=False), 11728301b041SPawel Piatek ConfigField(name='null_block_dif_type', default=0), 11738301b041SPawel Piatek ConfigField(name='num_shared_buffers', default=4096), 11748301b041SPawel Piatek ConfigField(name='max_queue_depth', default=128), 11758301b041SPawel Piatek ConfigField(name='bpf_scripts', default=[]), 11768301b041SPawel Piatek ConfigField(name='scheduler_core_limit', default=None), 11778301b041SPawel Piatek ConfigField(name='dsa_settings', default=False), 11788301b041SPawel Piatek ConfigField(name='iobuf_small_pool_count', default=32767), 11798301b041SPawel Piatek ConfigField(name='iobuf_large_pool_count', default=16383), 11805ec4a069SJaroslaw Chachulski ConfigField(name='num_cqe', default=4096), 11815ec4a069SJaroslaw Chachulski ConfigField(name='sock_impl', default='posix') 11828301b041SPawel Piatek ] 11839fec8853SKarol Latecki 11848301b041SPawel Piatek self.read_config(config_fields, target_config) 11858301b041SPawel Piatek 11868301b041SPawel Piatek self.bpf_proc = None 11878301b041SPawel Piatek self.enable_dsa = False 1188b7e49409SJohn Kariuki 118980e92723SKarol Latecki self.log.info("====DSA settings:====") 119080e92723SKarol Latecki self.log.info("DSA enabled: %s" % (self.enable_dsa)) 1191a42dfab1SKarol Latecki 1192cdab715bSKarol Latecki def configure_irq_affinity(self, mode="default", cpulist=None, exclude_cpulist=False): 1193cdab715bSKarol Latecki if mode not in ["default", "bynode", "cpulist", 1194cdab715bSKarol Latecki "shared", "split", "split-bynode"]: 1195cdab715bSKarol Latecki self.log.error("%s irq affinity setting not supported" % mode) 1196cdab715bSKarol Latecki raise Exception 1197cdab715bSKarol Latecki 1198cdab715bSKarol Latecki # Create core list from SPDK's mask and change it to string. 1199cdab715bSKarol Latecki # This is the type configure_irq_affinity expects for cpulist parameter. 1200cdab715bSKarol Latecki spdk_tgt_core_list = self.get_core_list_from_mask(self.core_mask) 1201cdab715bSKarol Latecki spdk_tgt_core_list = ",".join(map(lambda x: str(x), spdk_tgt_core_list)) 1202cdab715bSKarol Latecki spdk_tgt_core_list = "[" + spdk_tgt_core_list + "]" 1203cdab715bSKarol Latecki 1204cdab715bSKarol Latecki if mode == "shared": 1205cdab715bSKarol Latecki super().configure_irq_affinity(mode="cpulist", cpulist=spdk_tgt_core_list) 1206cdab715bSKarol Latecki elif mode == "split": 1207cdab715bSKarol Latecki super().configure_irq_affinity(mode="cpulist", cpulist=spdk_tgt_core_list, exclude_cpulist=True) 1208cdab715bSKarol Latecki elif mode == "split-bynode": 1209cdab715bSKarol Latecki super().configure_irq_affinity(mode="bynode", cpulist=spdk_tgt_core_list, exclude_cpulist=True) 1210cdab715bSKarol Latecki else: 1211cdab715bSKarol Latecki super().configure_irq_affinity(mode=mode, cpulist=cpulist, exclude_cpulist=exclude_cpulist) 1212cdab715bSKarol Latecki 1213de7c3691SKarol Latecki def adq_set_busy_read(self, busy_read_val): 1214de7c3691SKarol Latecki return {"net.core.busy_read": busy_read_val} 1215de7c3691SKarol Latecki 1216ede10499SKarol Latecki def get_nvme_devices_count(self): 1217ede10499SKarol Latecki return len(self.get_nvme_devices()) 1218ede10499SKarol Latecki 1219ede10499SKarol Latecki def get_nvme_devices(self): 1220ede10499SKarol Latecki bdev_subsys_json_obj = json.loads(self.exec_cmd([os.path.join(self.spdk_dir, "scripts/gen_nvme.sh")])) 1221f5d6d594SKarol Latecki bdev_bdfs = [] 1222f5d6d594SKarol Latecki for bdev in bdev_subsys_json_obj["config"]: 1223f5d6d594SKarol Latecki bdev_traddr = bdev["params"]["traddr"] 1224f5d6d594SKarol Latecki if bdev_traddr in self.nvme_blocklist: 1225f5d6d594SKarol Latecki continue 1226f5d6d594SKarol Latecki if len(self.nvme_allowlist) == 0: 1227f5d6d594SKarol Latecki bdev_bdfs.append(bdev_traddr) 1228f5d6d594SKarol Latecki if bdev_traddr in self.nvme_allowlist: 1229f5d6d594SKarol Latecki bdev_bdfs.append(bdev_traddr) 1230ede10499SKarol Latecki return bdev_bdfs 1231ede10499SKarol Latecki 1232a42dfab1SKarol Latecki def spdk_tgt_configure(self): 123380e92723SKarol Latecki self.log.info("Configuring SPDK NVMeOF target via RPC") 1234a42dfab1SKarol Latecki 1235160af093SKarol Latecki # Create transport layer 12362b2dc8b3SKarol Latecki nvmf_transport_params = { 12372b2dc8b3SKarol Latecki "client": self.client, 12382b2dc8b3SKarol Latecki "trtype": self.transport, 12392b2dc8b3SKarol Latecki "num_shared_buffers": self.num_shared_buffers, 12402b2dc8b3SKarol Latecki "max_queue_depth": self.max_queue_depth, 12412b2dc8b3SKarol Latecki "dif_insert_or_strip": self.dif_insert_strip, 1242d9687a60SKarol Latecki "sock_priority": self.adq_priority, 1243d9687a60SKarol Latecki "num_cqe": self.num_cqe 12442b2dc8b3SKarol Latecki } 12452b2dc8b3SKarol Latecki 12462b2dc8b3SKarol Latecki if self.enable_adq: 12472b2dc8b3SKarol Latecki nvmf_transport_params["acceptor_poll_rate"] = 10000 12482b2dc8b3SKarol Latecki 12492b2dc8b3SKarol Latecki rpc.nvmf.nvmf_create_transport(**nvmf_transport_params) 125080e92723SKarol Latecki self.log.info("SPDK NVMeOF transport layer:") 12517610bc38SKonrad Sztyber rpc_client.print_dict(rpc.nvmf.nvmf_get_transports(self.client)) 1252a42dfab1SKarol Latecki 1253a42dfab1SKarol Latecki if self.null_block: 125462c2c1c1SKarol Latecki self.spdk_tgt_add_nullblock(self.null_block) 125562c2c1c1SKarol Latecki self.spdk_tgt_add_subsystem_conf(self.nic_ips, self.null_block) 1256a42dfab1SKarol Latecki else: 125762c2c1c1SKarol Latecki self.spdk_tgt_add_nvme_conf() 125862c2c1c1SKarol Latecki self.spdk_tgt_add_subsystem_conf(self.nic_ips) 1259c0fc19f3SKarol Latecki 126027285e20SKarol Latecki if self.enable_adq: 126127285e20SKarol Latecki self.adq_configure_tc() 126227285e20SKarol Latecki 126380e92723SKarol Latecki self.log.info("Done configuring SPDK NVMeOF Target") 12646f2f94e6SMaciej Wawryk 12657049fe93SKarol Latecki def spdk_tgt_add_nullblock(self, null_block_count): 126666f42f37SKarol Latecki md_size = 0 126766f42f37SKarol Latecki block_size = 4096 126866f42f37SKarol Latecki if self.null_block_dif_type != 0: 126966f42f37SKarol Latecki md_size = 128 127066f42f37SKarol Latecki 127180e92723SKarol Latecki self.log.info("Adding null block bdevices to config via RPC") 12727049fe93SKarol Latecki for i in range(null_block_count): 127380e92723SKarol Latecki self.log.info("Setting bdev protection to :%s" % self.null_block_dif_type) 1274a9837c70SChangpeng Liu rpc.bdev.bdev_null_create(self.client, 102400, block_size, "Nvme{}n1".format(i), 127566f42f37SKarol Latecki dif_type=self.null_block_dif_type, md_size=md_size) 127680e92723SKarol Latecki self.log.info("SPDK Bdevs configuration:") 12777610bc38SKonrad Sztyber rpc_client.print_dict(rpc.bdev.bdev_get_bdevs(self.client)) 1278a42dfab1SKarol Latecki 1279a42dfab1SKarol Latecki def spdk_tgt_add_nvme_conf(self, req_num_disks=None): 128080e92723SKarol Latecki self.log.info("Adding NVMe bdevs to config via RPC") 1281a42dfab1SKarol Latecki 1282ede10499SKarol Latecki bdfs = self.get_nvme_devices() 1283a42dfab1SKarol Latecki bdfs = [b.replace(":", ".") for b in bdfs] 1284a42dfab1SKarol Latecki 1285a42dfab1SKarol Latecki if req_num_disks: 1286a42dfab1SKarol Latecki if req_num_disks > len(bdfs): 128780e92723SKarol Latecki self.log.error("ERROR: Requested number of disks is more than available %s" % len(bdfs)) 1288a42dfab1SKarol Latecki sys.exit(1) 1289a42dfab1SKarol Latecki else: 1290a42dfab1SKarol Latecki bdfs = bdfs[0:req_num_disks] 1291a42dfab1SKarol Latecki 1292a42dfab1SKarol Latecki for i, bdf in enumerate(bdfs): 1293f54df840SPawel Kaminski rpc.bdev.bdev_nvme_attach_controller(self.client, name="Nvme%s" % i, trtype="PCIe", traddr=bdf) 1294a42dfab1SKarol Latecki 129580e92723SKarol Latecki self.log.info("SPDK Bdevs configuration:") 12967610bc38SKonrad Sztyber rpc_client.print_dict(rpc.bdev.bdev_get_bdevs(self.client)) 1297a42dfab1SKarol Latecki 1298a42dfab1SKarol Latecki def spdk_tgt_add_subsystem_conf(self, ips=None, req_num_disks=None): 129980e92723SKarol Latecki self.log.info("Adding subsystems to config") 1300a42dfab1SKarol Latecki if not req_num_disks: 1301ede10499SKarol Latecki req_num_disks = self.get_nvme_devices_count() 1302a42dfab1SKarol Latecki 1303e05299f1SKarol Latecki for ip, bdev_num in self.spread_bdevs(req_num_disks): 1304e05299f1SKarol Latecki port = str(4420 + bdev_num) 1305e05299f1SKarol Latecki nqn = "nqn.2018-09.io.spdk:cnode%s" % bdev_num 1306e05299f1SKarol Latecki serial = "SPDK00%s" % bdev_num 1307e05299f1SKarol Latecki bdev_name = "Nvme%sn1" % bdev_num 13089cf27937SMaciej Wawryk 13097538af70SMaciej Wawryk rpc.nvmf.nvmf_create_subsystem(self.client, nqn, serial, 1310a42dfab1SKarol Latecki allow_any_host=True, max_namespaces=8) 1311a60e1accSMarek Chomnicki rpc.nvmf.nvmf_subsystem_add_ns(client=self.client, nqn=nqn, bdev_name=bdev_name) 1312bf642c08SKarol Latecki for nqn_name in [nqn, "discovery"]: 1313b686c825SKarol Latecki rpc.nvmf.nvmf_subsystem_add_listener(self.client, 1314bf642c08SKarol Latecki nqn=nqn_name, 13157e2052e3SKarol Latecki trtype=self.transport, 1316a42dfab1SKarol Latecki traddr=ip, 1317128df535SKarol Latecki trsvcid=port, 1318a42dfab1SKarol Latecki adrfam="ipv4") 13194f74eb12SKarol Latecki self.subsystem_info_list.append((port, nqn, ip)) 1320e05299f1SKarol Latecki self.subsys_no = len(self.subsystem_info_list) 1321e05299f1SKarol Latecki 132280e92723SKarol Latecki self.log.info("SPDK NVMeOF subsystem configuration:") 13237610bc38SKonrad Sztyber rpc_client.print_dict(rpc.nvmf.nvmf_get_subsystems(self.client)) 1324a42dfab1SKarol Latecki 13254ce17359SKarol Latecki def bpf_start(self): 132680e92723SKarol Latecki self.log.info("Starting BPF Trace scripts: %s" % self.bpf_scripts) 13274ce17359SKarol Latecki bpf_script = os.path.join(self.spdk_dir, "scripts/bpftrace.sh") 13284ce17359SKarol Latecki bpf_traces = [os.path.join(self.spdk_dir, "scripts/bpf", trace) for trace in self.bpf_scripts] 13294ce17359SKarol Latecki results_path = os.path.join(self.results_dir, "bpf_traces.txt") 13304ce17359SKarol Latecki 13314ce17359SKarol Latecki with open(self.pid, "r") as fh: 13324ce17359SKarol Latecki nvmf_pid = str(fh.readline()) 13334ce17359SKarol Latecki 13344ce17359SKarol Latecki cmd = [bpf_script, nvmf_pid, *bpf_traces] 133580e92723SKarol Latecki self.log.info(cmd) 13364ce17359SKarol Latecki self.bpf_proc = subprocess.Popen(cmd, env={"BPF_OUTFILE": results_path}) 13374ce17359SKarol Latecki 1338a42dfab1SKarol Latecki def tgt_start(self): 13399de6d03eSMaciej Wawryk if self.null_block: 13409de6d03eSMaciej Wawryk self.subsys_no = 1 1341efd3f83dSMaciej Wawryk else: 1342ede10499SKarol Latecki self.subsys_no = self.get_nvme_devices_count() 134380e92723SKarol Latecki self.log.info("Starting SPDK NVMeOF Target process") 134422e240ebSKarol Latecki nvmf_app_path = os.path.join(self.spdk_dir, "build/bin/nvmf_tgt") 13454b3a60daSKarol Latecki proc = subprocess.Popen([nvmf_app_path, "--wait-for-rpc", "-m", self.core_mask]) 1346a42dfab1SKarol Latecki self.pid = os.path.join(self.spdk_dir, "nvmf.pid") 1347a42dfab1SKarol Latecki 1348a42dfab1SKarol Latecki with open(self.pid, "w") as fh: 1349a42dfab1SKarol Latecki fh.write(str(proc.pid)) 1350a42dfab1SKarol Latecki self.nvmf_proc = proc 135180e92723SKarol Latecki self.log.info("SPDK NVMeOF Target PID=%s" % self.pid) 135280e92723SKarol Latecki self.log.info("Waiting for spdk to initialize...") 1353a42dfab1SKarol Latecki while True: 1354a42dfab1SKarol Latecki if os.path.exists("/var/tmp/spdk.sock"): 1355a42dfab1SKarol Latecki break 1356a42dfab1SKarol Latecki time.sleep(1) 13577610bc38SKonrad Sztyber self.client = rpc_client.JSONRPCClient("/var/tmp/spdk.sock") 1358a42dfab1SKarol Latecki 13595ec4a069SJaroslaw Chachulski rpc.sock.sock_set_default_impl(self.client, impl_name=self.sock_impl) 1360cf0cf479SKarol Latecki rpc.iobuf.iobuf_set_options(self.client, 1361cf0cf479SKarol Latecki small_pool_count=self.iobuf_small_pool_count, 1362cf0cf479SKarol Latecki large_pool_count=self.iobuf_large_pool_count, 1363cf0cf479SKarol Latecki small_bufsize=None, 1364cf0cf479SKarol Latecki large_bufsize=None) 1365750896ceSKarol Latecki 1366da5c9b32SMaciej Wawryk if self.enable_zcopy: 13675ec4a069SJaroslaw Chachulski rpc.sock.sock_impl_set_options(self.client, impl_name=self.sock_impl, 13687cb301c0Swawryk enable_zerocopy_send_server=True) 136980e92723SKarol Latecki self.log.info("Target socket options:") 13705ec4a069SJaroslaw Chachulski rpc_client.print_dict(rpc.sock.sock_impl_get_options(self.client, impl_name=self.sock_impl)) 1371da5c9b32SMaciej Wawryk 1372f9dd94b5SKarol Latecki if self.enable_adq: 13735ec4a069SJaroslaw Chachulski rpc.sock.sock_impl_set_options(self.client, impl_name=self.sock_impl, enable_placement_id=1) 1374f9dd94b5SKarol Latecki rpc.bdev.bdev_nvme_set_options(self.client, timeout_us=0, action_on_timeout=None, 1375f9dd94b5SKarol Latecki nvme_adminq_poll_period_us=100000, retry_count=4) 1376f9dd94b5SKarol Latecki 1377ffef30aeSpaul luse if self.enable_dsa: 1378712e8cb7SBen Walker rpc.dsa.dsa_scan_accel_module(self.client, config_kernel_mode=None) 137980e92723SKarol Latecki self.log.info("Target DSA accel module enabled") 1380e7fb0e9aSMaciej Wawryk 13819c262548SJaroslaw Chachulski rpc.app.framework_set_scheduler(self.client, name=self.scheduler_name, core_limit=self.scheduler_core_limit) 1382da5c9b32SMaciej Wawryk rpc.framework_start_init(self.client) 13834ce17359SKarol Latecki 13844ce17359SKarol Latecki if self.bpf_scripts: 13854ce17359SKarol Latecki self.bpf_start() 13864ce17359SKarol Latecki 1387a42dfab1SKarol Latecki self.spdk_tgt_configure() 1388a42dfab1SKarol Latecki 1389741b0839SKarol Latecki def stop(self): 13904ce17359SKarol Latecki if self.bpf_proc: 139180e92723SKarol Latecki self.log.info("Stopping BPF Trace script") 13924ce17359SKarol Latecki self.bpf_proc.terminate() 13934ce17359SKarol Latecki self.bpf_proc.wait() 13944ce17359SKarol Latecki 1395a42dfab1SKarol Latecki if hasattr(self, "nvmf_proc"): 1396a42dfab1SKarol Latecki try: 1397a42dfab1SKarol Latecki self.nvmf_proc.terminate() 1398b0997a59SKarol Latecki self.nvmf_proc.wait(timeout=30) 1399a42dfab1SKarol Latecki except Exception as e: 140080e92723SKarol Latecki self.log.info("Failed to terminate SPDK Target process. Sending SIGKILL.") 140180e92723SKarol Latecki self.log.info(e) 1402a42dfab1SKarol Latecki self.nvmf_proc.kill() 1403a42dfab1SKarol Latecki self.nvmf_proc.communicate() 1404b0997a59SKarol Latecki # Try to clean up RPC socket files if they were not removed 1405b0997a59SKarol Latecki # because of using 'kill' 1406b0997a59SKarol Latecki try: 1407b0997a59SKarol Latecki os.remove("/var/tmp/spdk.sock") 1408b0997a59SKarol Latecki os.remove("/var/tmp/spdk.sock.lock") 1409b0997a59SKarol Latecki except FileNotFoundError: 1410b0997a59SKarol Latecki pass 1411d9027e81SKarol Latecki self.restore_settings() 14124cff5cc2SJaroslaw Chachulski super().stop() 1413a42dfab1SKarol Latecki 1414a42dfab1SKarol Latecki 1415a42dfab1SKarol Lateckiclass KernelInitiator(Initiator): 14169fec8853SKarol Latecki def __init__(self, name, general_config, initiator_config): 14176d988167SPawel Piatek super().__init__(name, general_config, initiator_config) 14189dfe2a4bSKarol Latecki 14199fec8853SKarol Latecki # Defaults 14208301b041SPawel Piatek self.extra_params = initiator_config.get('extra_params', '') 14218301b041SPawel Piatek 14222c8baa46Swawryk self.ioengine = "libaio" 1423de7c3691SKarol Latecki self.spdk_conf = "" 14249fec8853SKarol Latecki 1425894360fbSKarol Latecki if "num_cores" in initiator_config: 1426894360fbSKarol Latecki self.num_cores = initiator_config["num_cores"] 1427894360fbSKarol Latecki 14282c8baa46Swawryk if "kernel_engine" in initiator_config: 14292c8baa46Swawryk self.ioengine = initiator_config["kernel_engine"] 14302c8baa46Swawryk if "io_uring" in self.ioengine: 1431acc2fcd8SKarol Latecki self.extra_params += ' --nr-poll-queues=8' 14322c8baa46Swawryk 1433de7c3691SKarol Latecki def configure_adq(self): 1434de7c3691SKarol Latecki self.log.warning("WARNING: ADQ setup not yet supported for Kernel mode. Skipping configuration.") 1435de7c3691SKarol Latecki 1436de7c3691SKarol Latecki def adq_configure_tc(self): 1437de7c3691SKarol Latecki self.log.warning("WARNING: ADQ setup not yet supported for Kernel mode. Skipping configuration.") 1438de7c3691SKarol Latecki 1439de7c3691SKarol Latecki def adq_set_busy_read(self, busy_read_val): 1440de7c3691SKarol Latecki self.log.warning("WARNING: ADQ setup not yet supported for Kernel mode. busy_read set to 0") 1441de7c3691SKarol Latecki return {"net.core.busy_read": 0} 1442de7c3691SKarol Latecki 14432c8baa46Swawryk def get_connected_nvme_list(self): 14442c8baa46Swawryk json_obj = json.loads(self.exec_cmd(["sudo", "nvme", "list", "-o", "json"])) 14452c8baa46Swawryk nvme_list = [os.path.basename(x["DevicePath"]) for x in json_obj["Devices"] 14462c8baa46Swawryk if "SPDK" in x["ModelNumber"] or "Linux" in x["ModelNumber"]] 14472c8baa46Swawryk return nvme_list 14482c8baa46Swawryk 14494e07343fSKarol Latecki def init_connect(self): 145080e92723SKarol Latecki self.log.info("Below connection attempts may result in error messages, this is expected!") 1451128df535SKarol Latecki for subsystem in self.subsystem_info_list: 145280e92723SKarol Latecki self.log.info("Trying to connect %s %s %s" % subsystem) 145326d7e3d4SKarol Latecki self.exec_cmd(["sudo", self.nvmecli_bin, "connect", "-t", self.transport, 145426d7e3d4SKarol Latecki "-s", subsystem[0], "-n", subsystem[1], "-a", subsystem[2], self.extra_params]) 1455a42dfab1SKarol Latecki time.sleep(2) 1456a42dfab1SKarol Latecki 14572c8baa46Swawryk if "io_uring" in self.ioengine: 145880e92723SKarol Latecki self.log.info("Setting block layer settings for io_uring.") 14592c8baa46Swawryk 14602c8baa46Swawryk # TODO: io_poll=1 and io_poll_delay=-1 params not set here, because 14612c8baa46Swawryk # apparently it's not possible for connected subsystems. 14622c8baa46Swawryk # Results in "error: Invalid argument" 14632c8baa46Swawryk block_sysfs_settings = { 14642c8baa46Swawryk "iostats": "0", 14652c8baa46Swawryk "rq_affinity": "0", 14662c8baa46Swawryk "nomerges": "2" 14672c8baa46Swawryk } 14682c8baa46Swawryk 14692c8baa46Swawryk for disk in self.get_connected_nvme_list(): 14702c8baa46Swawryk sysfs = os.path.join("/sys/block", disk, "queue") 14712c8baa46Swawryk for k, v in block_sysfs_settings.items(): 14722c8baa46Swawryk sysfs_opt_path = os.path.join(sysfs, k) 14732c8baa46Swawryk try: 14742c8baa46Swawryk self.exec_cmd(["sudo", "bash", "-c", "echo %s > %s" % (v, sysfs_opt_path)], stderr_redirect=True) 1475f67c74a2SKarol Latecki except CalledProcessError as e: 147680e92723SKarol Latecki self.log.warning("Warning: command %s failed due to error %s. %s was not set!" % (e.cmd, e.output, v)) 14772c8baa46Swawryk finally: 14782c8baa46Swawryk _ = self.exec_cmd(["sudo", "cat", "%s" % (sysfs_opt_path)]) 147980e92723SKarol Latecki self.log.info("%s=%s" % (sysfs_opt_path, _)) 14802c8baa46Swawryk 14814e07343fSKarol Latecki def init_disconnect(self): 1482128df535SKarol Latecki for subsystem in self.subsystem_info_list: 148326d7e3d4SKarol Latecki self.exec_cmd(["sudo", self.nvmecli_bin, "disconnect", "-n", subsystem[1]]) 1484a42dfab1SKarol Latecki time.sleep(1) 1485a42dfab1SKarol Latecki 148647b87952SKarol Latecki def get_nvme_subsystem_numa(self, dev_name): 148747b87952SKarol Latecki # Remove two last characters to get controller name instead of subsystem name 148847b87952SKarol Latecki nvme_ctrl = os.path.basename(dev_name)[:-2] 148947b87952SKarol Latecki remote_nvme_ip = re.search(r'(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})', 149047b87952SKarol Latecki self.exec_cmd(["cat", "/sys/class/nvme/%s/address" % nvme_ctrl])) 14919a028f88SKarol Latecki return self.get_route_nic_numa(remote_nvme_ip.group(0)) 149247b87952SKarol Latecki 1493b27d57dcSKarol Latecki def gen_fio_filename_conf(self, subsystems, threads, io_depth, num_jobs=1, offset=False, offset_inc=0, numa_align=True): 149487b94e3bSKarol Latecki self.available_cpus = self.get_numa_cpu_map() 1495de7c3691SKarol Latecki if len(threads) >= len(subsystems): 1496de7c3691SKarol Latecki threads = range(0, len(subsystems)) 1497de7c3691SKarol Latecki 149847b87952SKarol Latecki # Generate connected nvme devices names and sort them by used NIC numa node 149947b87952SKarol Latecki # to allow better grouping when splitting into fio sections. 15008dace9e2SKarol Latecki nvme_list = [os.path.join("/dev", nvme) for nvme in self.get_connected_nvme_list()] 150147b87952SKarol Latecki nvme_numas = [self.get_nvme_subsystem_numa(x) for x in nvme_list] 150247b87952SKarol Latecki nvme_list = [x for _, x in sorted(zip(nvme_numas, nvme_list))] 1503a42dfab1SKarol Latecki 1504a42dfab1SKarol Latecki filename_section = "" 15056f702d4eSMaciej Wawryk nvme_per_split = int(len(nvme_list) / len(threads)) 15066f702d4eSMaciej Wawryk remainder = len(nvme_list) % len(threads) 15079de6d03eSMaciej Wawryk iterator = iter(nvme_list) 15080ebd0dceSMaciej Wawryk result = [] 15097c1e69b3SMaciej Wawryk for i in range(len(threads)): 15100ebd0dceSMaciej Wawryk result.append([]) 151199465fbdSPawel Piatek for _ in range(nvme_per_split): 15120ebd0dceSMaciej Wawryk result[i].append(next(iterator)) 15130ebd0dceSMaciej Wawryk if remainder: 15140ebd0dceSMaciej Wawryk result[i].append(next(iterator)) 15150ebd0dceSMaciej Wawryk remainder -= 1 15160ebd0dceSMaciej Wawryk for i, r in enumerate(result): 15170ebd0dceSMaciej Wawryk header = "[filename%s]" % i 15189de6d03eSMaciej Wawryk disks = "\n".join(["filename=%s" % x for x in r]) 1519af0057e6SMaciej Wawryk job_section_qd = round((io_depth * len(r)) / num_jobs) 1520cb94476cSMaciej Wawryk if job_section_qd == 0: 1521cb94476cSMaciej Wawryk job_section_qd = 1 1522af0057e6SMaciej Wawryk iodepth = "iodepth=%s" % job_section_qd 152323e767c4SKarol Latecki 152423e767c4SKarol Latecki offset_section = "" 152523e767c4SKarol Latecki if offset: 152623e767c4SKarol Latecki offset_section = self.gen_fio_offset_section(offset_inc, num_jobs) 152723e767c4SKarol Latecki 1528b27d57dcSKarol Latecki numa_opts = "" 1529b27d57dcSKarol Latecki if numa_align: 153087b94e3bSKarol Latecki numa_opts = self.gen_fio_numa_section(r, num_jobs) 153147b87952SKarol Latecki 153247b87952SKarol Latecki filename_section = "\n".join([filename_section, header, disks, iodepth, numa_opts, offset_section, ""]) 1533a42dfab1SKarol Latecki 1534a42dfab1SKarol Latecki return filename_section 1535a42dfab1SKarol Latecki 1536a42dfab1SKarol Latecki 1537a42dfab1SKarol Lateckiclass SPDKInitiator(Initiator): 15389fec8853SKarol Latecki def __init__(self, name, general_config, initiator_config): 15396d988167SPawel Piatek super().__init__(name, general_config, initiator_config) 15409dfe2a4bSKarol Latecki 154196474e9eSKamil Godzwon if not self.skip_spdk_install: 154299465fbdSPawel Piatek self.install_spdk() 1543ad783359SKarol Latecki 1544b7e49409SJohn Kariuki # Optional fields 15458301b041SPawel Piatek self.enable_data_digest = initiator_config.get('enable_data_digest', False) 1546568e4802SMichal Berger self.small_pool_count = initiator_config.get('small_pool_count', 32768) 1547568e4802SMichal Berger self.large_pool_count = initiator_config.get('large_pool_count', 16384) 15485ec4a069SJaroslaw Chachulski self.sock_impl = initiator_config.get('sock_impl', 'posix') 15498301b041SPawel Piatek 1550894360fbSKarol Latecki if "num_cores" in initiator_config: 1551894360fbSKarol Latecki self.num_cores = initiator_config["num_cores"] 1552b7e49409SJohn Kariuki 1553de7c3691SKarol Latecki self.ioengine = "%s/build/fio/spdk_bdev" % self.spdk_dir 1554de7c3691SKarol Latecki self.spdk_conf = "spdk_json_conf=%s/bdev.conf" % self.spdk_dir 1555de7c3691SKarol Latecki 1556de7c3691SKarol Latecki def adq_set_busy_read(self, busy_read_val): 1557de7c3691SKarol Latecki return {"net.core.busy_read": busy_read_val} 1558de7c3691SKarol Latecki 155999465fbdSPawel Piatek def install_spdk(self): 156080e92723SKarol Latecki self.log.info("Using fio binary %s" % self.fio_bin) 156126d7e3d4SKarol Latecki self.exec_cmd(["git", "-C", self.spdk_dir, "submodule", "update", "--init"]) 156226d7e3d4SKarol Latecki self.exec_cmd(["git", "-C", self.spdk_dir, "clean", "-ffdx"]) 156370066e0cSKarol Latecki self.exec_cmd(["cd", self.spdk_dir, "&&", "./configure", "--with-rdma", 15647be3200fSKarol Latecki "--with-fio=%s" % os.path.dirname(self.fio_bin), 15657be3200fSKarol Latecki "--enable-lto", "--disable-unit-tests"]) 156626d7e3d4SKarol Latecki self.exec_cmd(["make", "-C", self.spdk_dir, "clean"]) 156726d7e3d4SKarol Latecki self.exec_cmd(["make", "-C", self.spdk_dir, "-j$(($(nproc)*2))"]) 1568a42dfab1SKarol Latecki 156980e92723SKarol Latecki self.log.info("SPDK built") 157026d7e3d4SKarol Latecki self.exec_cmd(["sudo", "%s/scripts/setup.sh" % self.spdk_dir]) 1571a42dfab1SKarol Latecki 15724e07343fSKarol Latecki def init_connect(self): 15734e07343fSKarol Latecki # Not a real "connect" like when doing "nvme connect" because SPDK's fio 15744e07343fSKarol Latecki # bdev plugin initiates connection just before starting IO traffic. 15754e07343fSKarol Latecki # This is just to have a "init_connect" equivalent of the same function 15764e07343fSKarol Latecki # from KernelInitiator class. 15774e07343fSKarol Latecki # Just prepare bdev.conf JSON file for later use and consider it 15784e07343fSKarol Latecki # "making a connection". 15794e07343fSKarol Latecki bdev_conf = self.gen_spdk_bdev_conf(self.subsystem_info_list) 15804e07343fSKarol Latecki self.exec_cmd(["echo", "'%s'" % bdev_conf, ">", "%s/bdev.conf" % self.spdk_dir]) 15814e07343fSKarol Latecki 15824e07343fSKarol Latecki def init_disconnect(self): 1583*34edd9f1SKamil Godzwon # SPDK Initiator does not need to explicitly disconnect as this gets done 15844e07343fSKarol Latecki # after fio bdev plugin finishes IO. 15857e291332SPawel Piatek return 15864e07343fSKarol Latecki 1587a42dfab1SKarol Latecki def gen_spdk_bdev_conf(self, remote_subsystem_list): 1588b08206dfSJaroslaw Chachulski spdk_cfg_section = { 1589dc45a73dSKarol Latecki "subsystems": [ 1590dc45a73dSKarol Latecki { 1591dc45a73dSKarol Latecki "subsystem": "bdev", 1592dc45a73dSKarol Latecki "config": [] 1593b08206dfSJaroslaw Chachulski }, 1594b08206dfSJaroslaw Chachulski { 1595b08206dfSJaroslaw Chachulski "subsystem": "iobuf", 1596b08206dfSJaroslaw Chachulski "config": [ 1597b08206dfSJaroslaw Chachulski { 1598b08206dfSJaroslaw Chachulski "method": "iobuf_set_options", 1599b08206dfSJaroslaw Chachulski "params": { 1600b08206dfSJaroslaw Chachulski "small_pool_count": self.small_pool_count, 1601b08206dfSJaroslaw Chachulski "large_pool_count": self.large_pool_count 1602b08206dfSJaroslaw Chachulski } 1603b08206dfSJaroslaw Chachulski } 1604b08206dfSJaroslaw Chachulski ] 16055ec4a069SJaroslaw Chachulski }, 16065ec4a069SJaroslaw Chachulski { 16075ec4a069SJaroslaw Chachulski "subsystem": "sock", 16085ec4a069SJaroslaw Chachulski "config": [ 16095ec4a069SJaroslaw Chachulski { 16105ec4a069SJaroslaw Chachulski "method": "sock_set_default_impl", 16115ec4a069SJaroslaw Chachulski "params": { 16125ec4a069SJaroslaw Chachulski "impl_name": self.sock_impl 16135ec4a069SJaroslaw Chachulski } 16145ec4a069SJaroslaw Chachulski } 16155ec4a069SJaroslaw Chachulski ] 1616dc45a73dSKarol Latecki } 1617dc45a73dSKarol Latecki ] 1618dc45a73dSKarol Latecki } 1619a42dfab1SKarol Latecki 1620dc45a73dSKarol Latecki for i, subsys in enumerate(remote_subsystem_list): 1621dc45a73dSKarol Latecki sub_port, sub_nqn, sub_addr = map(lambda x: str(x), subsys) 1622dc45a73dSKarol Latecki nvme_ctrl = { 1623dc45a73dSKarol Latecki "method": "bdev_nvme_attach_controller", 1624dc45a73dSKarol Latecki "params": { 1625dc45a73dSKarol Latecki "name": "Nvme{}".format(i), 1626dc45a73dSKarol Latecki "trtype": self.transport, 1627dc45a73dSKarol Latecki "traddr": sub_addr, 1628dc45a73dSKarol Latecki "trsvcid": sub_port, 1629dc45a73dSKarol Latecki "subnqn": sub_nqn, 1630dc45a73dSKarol Latecki "adrfam": "IPv4" 1631dc45a73dSKarol Latecki } 1632dc45a73dSKarol Latecki } 1633f9dd94b5SKarol Latecki 1634f9dd94b5SKarol Latecki if self.enable_adq: 1635f9dd94b5SKarol Latecki nvme_ctrl["params"].update({"priority": "1"}) 1636f9dd94b5SKarol Latecki 1637b7e49409SJohn Kariuki if self.enable_data_digest: 1638b7e49409SJohn Kariuki nvme_ctrl["params"].update({"ddgst": self.enable_data_digest}) 1639b7e49409SJohn Kariuki 1640b08206dfSJaroslaw Chachulski spdk_cfg_section["subsystems"][0]["config"].append(nvme_ctrl) 1641dc45a73dSKarol Latecki 1642b08206dfSJaroslaw Chachulski return json.dumps(spdk_cfg_section, indent=2) 1643a42dfab1SKarol Latecki 1644b27d57dcSKarol Latecki def gen_fio_filename_conf(self, subsystems, threads, io_depth, num_jobs=1, offset=False, offset_inc=0, numa_align=True): 164587b94e3bSKarol Latecki self.available_cpus = self.get_numa_cpu_map() 1646a42dfab1SKarol Latecki filename_section = "" 16477c1e69b3SMaciej Wawryk if len(threads) >= len(subsystems): 16487c1e69b3SMaciej Wawryk threads = range(0, len(subsystems)) 164947b87952SKarol Latecki 165047b87952SKarol Latecki # Generate expected NVMe Bdev names and sort them by used NIC numa node 165147b87952SKarol Latecki # to allow better grouping when splitting into fio sections. 16527c1e69b3SMaciej Wawryk filenames = ["Nvme%sn1" % x for x in range(0, len(subsystems))] 165347b87952SKarol Latecki filename_numas = [self.get_nvme_subsystem_numa(x) for x in filenames] 165447b87952SKarol Latecki filenames = [x for _, x in sorted(zip(filename_numas, filenames))] 165547b87952SKarol Latecki 16567c1e69b3SMaciej Wawryk nvme_per_split = int(len(subsystems) / len(threads)) 16577c1e69b3SMaciej Wawryk remainder = len(subsystems) % len(threads) 1658e068db83SMaciej Wawryk iterator = iter(filenames) 1659e068db83SMaciej Wawryk result = [] 16607c1e69b3SMaciej Wawryk for i in range(len(threads)): 1661e068db83SMaciej Wawryk result.append([]) 166299465fbdSPawel Piatek for _ in range(nvme_per_split): 1663e068db83SMaciej Wawryk result[i].append(next(iterator)) 1664e068db83SMaciej Wawryk if remainder: 1665e068db83SMaciej Wawryk result[i].append(next(iterator)) 1666e068db83SMaciej Wawryk remainder -= 1 1667e068db83SMaciej Wawryk for i, r in enumerate(result): 1668e068db83SMaciej Wawryk header = "[filename%s]" % i 1669e068db83SMaciej Wawryk disks = "\n".join(["filename=%s" % x for x in r]) 1670af0057e6SMaciej Wawryk job_section_qd = round((io_depth * len(r)) / num_jobs) 1671cb94476cSMaciej Wawryk if job_section_qd == 0: 1672cb94476cSMaciej Wawryk job_section_qd = 1 1673af0057e6SMaciej Wawryk iodepth = "iodepth=%s" % job_section_qd 167423e767c4SKarol Latecki 167523e767c4SKarol Latecki offset_section = "" 167623e767c4SKarol Latecki if offset: 167723e767c4SKarol Latecki offset_section = self.gen_fio_offset_section(offset_inc, num_jobs) 167823e767c4SKarol Latecki 1679b27d57dcSKarol Latecki numa_opts = "" 1680b27d57dcSKarol Latecki if numa_align: 168187b94e3bSKarol Latecki numa_opts = self.gen_fio_numa_section(r, num_jobs) 168247b87952SKarol Latecki 168347b87952SKarol Latecki filename_section = "\n".join([filename_section, header, disks, iodepth, numa_opts, offset_section, ""]) 1684a42dfab1SKarol Latecki 1685a42dfab1SKarol Latecki return filename_section 1686a42dfab1SKarol Latecki 168747b87952SKarol Latecki def get_nvme_subsystem_numa(self, bdev_name): 168847b87952SKarol Latecki bdev_conf_json_obj = json.loads(self.exec_cmd(["cat", "%s/bdev.conf" % self.spdk_dir])) 168947b87952SKarol Latecki bdev_conf_json_obj = bdev_conf_json_obj["subsystems"][0]["config"] 169047b87952SKarol Latecki 169147b87952SKarol Latecki # Remove two last characters to get controller name instead of subsystem name 169247b87952SKarol Latecki nvme_ctrl = bdev_name[:-2] 16938474d7ccSMichal Berger for bdev in bdev_conf_json_obj: 16948474d7ccSMichal Berger if bdev["method"] == "bdev_nvme_attach_controller" and bdev["params"]["name"] == nvme_ctrl: 16958474d7ccSMichal Berger return self.get_route_nic_numa(bdev["params"]["traddr"]) 16968474d7ccSMichal Berger return None 169747b87952SKarol Latecki 1698a42dfab1SKarol Latecki 169925541b28SKamil Godzwondef initiators_match_subsystems(initiators, target_obj): 170025541b28SKamil Godzwon for i in initiators: 170125541b28SKamil Godzwon i.match_subsystems(target_obj.subsystem_info_list) 170225541b28SKamil Godzwon if i.enable_adq: 170325541b28SKamil Godzwon i.adq_configure_tc() 170425541b28SKamil Godzwon 170525541b28SKamil Godzwon 170676eb5372SKamil Godzwondef run_single_fio_test(args, initiators, configs, target_obj, block_size, io_depth, rw, fio_settings): 1707661bb0b7SKamil Godzwon 170876eb5372SKamil Godzwon for run_no in range(1, fio_settings["run_num"]+1): 1709661bb0b7SKamil Godzwon threads = [] 1710661bb0b7SKamil Godzwon power_daemon = None 171176eb5372SKamil Godzwon measurements_prefix = "%s_%s_%s_m_%s_run_%s" % (block_size, io_depth, rw, fio_settings["rw_mix_read"], run_no) 1712661bb0b7SKamil Godzwon 1713661bb0b7SKamil Godzwon for i, cfg in zip(initiators, configs): 1714661bb0b7SKamil Godzwon t = threading.Thread(target=i.run_fio, args=(cfg, run_no)) 1715661bb0b7SKamil Godzwon threads.append(t) 1716661bb0b7SKamil Godzwon if target_obj.enable_sar: 1717661bb0b7SKamil Godzwon sar_file_prefix = measurements_prefix + "_sar" 171876eb5372SKamil Godzwon t = threading.Thread(target=target_obj.measure_sar, 171976eb5372SKamil Godzwon args=(args.results, sar_file_prefix, fio_settings["ramp_time"], fio_settings["run_time"])) 1720661bb0b7SKamil Godzwon threads.append(t) 1721661bb0b7SKamil Godzwon 1722661bb0b7SKamil Godzwon if target_obj.enable_pcm: 1723661bb0b7SKamil Godzwon pcm_fnames = ["%s_%s.csv" % (measurements_prefix, x) for x in ["pcm_cpu"]] 1724661bb0b7SKamil Godzwon pcm_cpu_t = threading.Thread(target=target_obj.measure_pcm, 172576eb5372SKamil Godzwon args=(args.results, pcm_fnames[0], fio_settings["ramp_time"], fio_settings["run_time"])) 1726661bb0b7SKamil Godzwon threads.append(pcm_cpu_t) 1727661bb0b7SKamil Godzwon 1728661bb0b7SKamil Godzwon if target_obj.enable_bw: 1729661bb0b7SKamil Godzwon bandwidth_file_name = measurements_prefix + "_bandwidth.csv" 1730661bb0b7SKamil Godzwon t = threading.Thread(target=target_obj.measure_network_bandwidth, 173176eb5372SKamil Godzwon args=(args.results, bandwidth_file_name, fio_settings["ramp_time"], fio_settings["run_time"])) 1732661bb0b7SKamil Godzwon threads.append(t) 1733661bb0b7SKamil Godzwon 1734661bb0b7SKamil Godzwon if target_obj.enable_dpdk_memory: 1735661bb0b7SKamil Godzwon dpdk_mem_file_name = measurements_prefix + "_dpdk_mem.txt" 173676eb5372SKamil Godzwon t = threading.Thread(target=target_obj.measure_dpdk_memory, args=(args.results, dpdk_mem_file_name, fio_settings["ramp_time"])) 1737661bb0b7SKamil Godzwon threads.append(t) 1738661bb0b7SKamil Godzwon 1739661bb0b7SKamil Godzwon if target_obj.enable_pm: 1740661bb0b7SKamil Godzwon power_daemon = threading.Thread(target=target_obj.measure_power, 1741661bb0b7SKamil Godzwon args=(args.results, measurements_prefix, script_full_dir, 174276eb5372SKamil Godzwon fio_settings["ramp_time"], fio_settings["run_time"])) 1743661bb0b7SKamil Godzwon threads.append(power_daemon) 1744661bb0b7SKamil Godzwon 1745661bb0b7SKamil Godzwon for t in threads: 1746661bb0b7SKamil Godzwon t.start() 1747661bb0b7SKamil Godzwon for t in threads: 1748661bb0b7SKamil Godzwon t.join() 1749661bb0b7SKamil Godzwon 1750661bb0b7SKamil Godzwon 175176eb5372SKamil Godzwondef run_fio_tests(args, initiators, target_obj, fio_settings): 1752661bb0b7SKamil Godzwon 175376eb5372SKamil Godzwon for block_size, io_depth, rw in fio_settings["workloads"]: 1754661bb0b7SKamil Godzwon configs = [] 1755661bb0b7SKamil Godzwon for i in initiators: 1756661bb0b7SKamil Godzwon i.init_connect() 175776eb5372SKamil Godzwon cfg = i.gen_fio_config(rw, block_size, io_depth, target_obj.subsys_no, fio_settings) 1758661bb0b7SKamil Godzwon configs.append(cfg) 1759661bb0b7SKamil Godzwon 176076eb5372SKamil Godzwon run_single_fio_test(args, initiators, configs, target_obj, block_size, io_depth, rw, fio_settings) 1761661bb0b7SKamil Godzwon 1762661bb0b7SKamil Godzwon for i in initiators: 1763661bb0b7SKamil Godzwon i.init_disconnect() 1764661bb0b7SKamil Godzwon i.copy_result_files(args.results) 1765661bb0b7SKamil Godzwon 1766661bb0b7SKamil Godzwon try: 1767661bb0b7SKamil Godzwon parse_results(args.results, args.csv_filename) 1768661bb0b7SKamil Godzwon except Exception as err: 1769661bb0b7SKamil Godzwon logging.error("There was an error with parsing the results") 17706cc57905SKamil Godzwon raise err 1771661bb0b7SKamil Godzwon 1772661bb0b7SKamil Godzwon 17732eb1b096SKamil Godzwondef validate_allowlist_settings(data, is_forced_used): 17742eb1b096SKamil Godzwon if data["target"].get("null_block_devices"): 17752eb1b096SKamil Godzwon logging.info("Null block devices used for testing. Allow and block lists will not be checked.") 17762eb1b096SKamil Godzwon return True 17772eb1b096SKamil Godzwon 17782eb1b096SKamil Godzwon if is_forced_used: 17792eb1b096SKamil Godzwon logging.warning("""Force mode used. All available NVMe drives on your system will be used, 17802eb1b096SKamil Godzwon which may potentially lead to data loss""") 17812eb1b096SKamil Godzwon return True 17822eb1b096SKamil Godzwon 17832eb1b096SKamil Godzwon if not (data["target"].get("allowlist") or data["target"].get("blocklist")): 17842eb1b096SKamil Godzwon logging.error("""This script requires allowlist or blocklist to be defined. 17852eb1b096SKamil Godzwon You can choose to use all available NVMe drives on your system (-f option), 17862eb1b096SKamil Godzwon which may potentially lead to data loss.""") 17872eb1b096SKamil Godzwon return False 17882eb1b096SKamil Godzwon 17892eb1b096SKamil Godzwon return True 17902eb1b096SKamil Godzwon 17912eb1b096SKamil Godzwon 17924cff5cc2SJaroslaw Chachulskiclass CpuThrottlingError(Exception): 17934cff5cc2SJaroslaw Chachulski def __init__(self, message): 17944cff5cc2SJaroslaw Chachulski super().__init__(message) 17954cff5cc2SJaroslaw Chachulski 17964cff5cc2SJaroslaw Chachulski 1797a42dfab1SKarol Lateckiif __name__ == "__main__": 1798e0659fa8SPawel Piatek exit_code = 0 1799e0659fa8SPawel Piatek 1800f240e2bfSLiang Yan script_full_dir = os.path.dirname(os.path.realpath(__file__)) 18012f0fdf0eSPawel Piatek default_config_file_path = os.path.relpath(os.path.join(script_full_dir, "config.json")) 1802f240e2bfSLiang Yan 18032f0fdf0eSPawel Piatek parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) 18042f0fdf0eSPawel Piatek parser.add_argument('-c', '--config', type=str, default=default_config_file_path, 18052f0fdf0eSPawel Piatek help='Configuration file.') 18062f0fdf0eSPawel Piatek parser.add_argument('-r', '--results', type=str, default='/tmp/results', 18072f0fdf0eSPawel Piatek help='Results directory.') 18082f0fdf0eSPawel Piatek parser.add_argument('-s', '--csv-filename', type=str, default='nvmf_results.csv', 18092f0fdf0eSPawel Piatek help='CSV results filename.') 1810f5d6d594SKarol Latecki parser.add_argument('-f', '--force', default=False, action='store_true', 1811f5d6d594SKarol Latecki dest='force', help="""Force script to continue and try to use all 1812f5d6d594SKarol Latecki available NVMe devices during test. 1813f5d6d594SKarol Latecki WARNING: Might result in data loss on used NVMe drives""") 18142f0fdf0eSPawel Piatek 18152f0fdf0eSPawel Piatek args = parser.parse_args() 18162f0fdf0eSPawel Piatek 181780e92723SKarol Latecki logging.basicConfig(level=logging.INFO, 181880e92723SKarol Latecki format='[%(name)s:%(funcName)s:%(lineno)d] %(message)s') 181980e92723SKarol Latecki 182080e92723SKarol Latecki logging.info("Using config file: %s" % args.config) 18212f0fdf0eSPawel Piatek with open(args.config, "r") as config: 1822a42dfab1SKarol Latecki data = json.load(config) 1823a42dfab1SKarol Latecki 1824a42dfab1SKarol Latecki initiators = [] 18259fec8853SKarol Latecki general_config = data["general"] 18269fec8853SKarol Latecki target_config = data["target"] 18279fec8853SKarol Latecki initiator_configs = [data[x] for x in data.keys() if "initiator" in x] 18289fec8853SKarol Latecki 18292eb1b096SKamil Godzwon if not validate_allowlist_settings(data, args.force): 1830f5d6d594SKarol Latecki exit(1) 1831f5d6d594SKarol Latecki 1832a42dfab1SKarol Latecki for k, v in data.items(): 1833a42dfab1SKarol Latecki if "target" in k: 18344ce17359SKarol Latecki v.update({"results_dir": args.results}) 1835a42dfab1SKarol Latecki if data[k]["mode"] == "spdk": 18369fec8853SKarol Latecki target_obj = SPDKTarget(k, data["general"], v) 1837a42dfab1SKarol Latecki elif data[k]["mode"] == "kernel": 18389fec8853SKarol Latecki target_obj = KernelTarget(k, data["general"], v) 1839a42dfab1SKarol Latecki elif "initiator" in k: 1840a42dfab1SKarol Latecki if data[k]["mode"] == "spdk": 18419fec8853SKarol Latecki init_obj = SPDKInitiator(k, data["general"], v) 1842a42dfab1SKarol Latecki elif data[k]["mode"] == "kernel": 18439fec8853SKarol Latecki init_obj = KernelInitiator(k, data["general"], v) 1844a42dfab1SKarol Latecki initiators.append(init_obj) 1845a42dfab1SKarol Latecki elif "fio" in k: 184676eb5372SKamil Godzwon fio_settings = { 184776eb5372SKamil Godzwon "workloads": itertools.product(data[k]["bs"], data[k]["qd"], data[k]["rw"]), 184876eb5372SKamil Godzwon "run_time": data[k].get("run_time", 10), 184976eb5372SKamil Godzwon "ramp_time": data[k].get("ramp_time", 0), 185076eb5372SKamil Godzwon "rw_mix_read": data[k].get("rwmixread", 70), 185176eb5372SKamil Godzwon "run_num": data[k].get("run_num", None), 185276eb5372SKamil Godzwon "num_jobs": data[k].get("num_jobs", None), 185376eb5372SKamil Godzwon "rate_iops": data[k].get("rate_iops", 0), 185476eb5372SKamil Godzwon "offset": data[k].get("offset", False), 185576eb5372SKamil Godzwon "offset_inc": data[k].get("offset_inc", 0), 185676eb5372SKamil Godzwon "numa_align": data[k].get("numa_align", 1) 185776eb5372SKamil Godzwon } 1858a42dfab1SKarol Latecki else: 1859a42dfab1SKarol Latecki continue 1860a42dfab1SKarol Latecki 186179199d41SKarol Latecki try: 18622f0fdf0eSPawel Piatek os.mkdir(args.results) 186379199d41SKarol Latecki except FileExistsError: 186479199d41SKarol Latecki pass 186579199d41SKarol Latecki 1866b6103a58SKarol Latecki for i in initiators: 1867b6103a58SKarol Latecki target_obj.initiator_info.append( 1868b6103a58SKarol Latecki {"name": i.name, "target_nic_ips": i.target_nic_ips, "initiator_nic_ips": i.nic_ips} 1869b6103a58SKarol Latecki ) 1870b6103a58SKarol Latecki 1871741b0839SKarol Latecki try: 18724ce17359SKarol Latecki target_obj.tgt_start() 187325541b28SKamil Godzwon initiators_match_subsystems(initiators, target_obj) 1874128df535SKarol Latecki 1875a42dfab1SKarol Latecki # Poor mans threading 1876a42dfab1SKarol Latecki # Run FIO tests 187776eb5372SKamil Godzwon run_fio_tests(args, initiators, target_obj, fio_settings) 1878a42dfab1SKarol Latecki 1879661bb0b7SKamil Godzwon except Exception as e: 1880661bb0b7SKamil Godzwon logging.error("Exception occurred while running FIO tests") 1881661bb0b7SKamil Godzwon logging.error(e) 1882e0659fa8SPawel Piatek exit_code = 1 1883661bb0b7SKamil Godzwon 1884741b0839SKarol Latecki finally: 18854cff5cc2SJaroslaw Chachulski for i in [*initiators, target_obj]: 1886741b0839SKarol Latecki try: 1887741b0839SKarol Latecki i.stop() 18884cff5cc2SJaroslaw Chachulski except CpuThrottlingError as err: 18894cff5cc2SJaroslaw Chachulski logging.error(err) 18904cff5cc2SJaroslaw Chachulski exit_code = 1 1891741b0839SKarol Latecki except Exception as err: 1892741b0839SKarol Latecki pass 1893e0659fa8SPawel Piatek sys.exit(exit_code) 1894