xref: /spdk/test/scheduler/cgroups.sh (revision b22f1b34d9eaedfda6df16d178770048cce4e5dc)
1588dfe31SMichal Berger#  SPDX-License-Identifier: BSD-3-Clause
2588dfe31SMichal Berger#  Copyright (C) 2021 Intel Corporation.
3588dfe31SMichal Berger#  All rights reserved.
4588dfe31SMichal Berger
51fd9dccfSMichal Bergercheck_cgroup() {
61fd9dccfSMichal Berger	# Try to work with both, cgroup-v1 and cgroup-v2. Verify which version is
71fd9dccfSMichal Berger	# in use by looking up interfaces common for either of the versions.
81fd9dccfSMichal Berger	if [[ -e $sysfs_cgroup/cgroup.controllers ]]; then
91fd9dccfSMichal Berger		# cgroup2 is mounted, check if cpuset controller is available
101fd9dccfSMichal Berger		[[ $(< "$sysfs_cgroup/cgroup.controllers") == *cpuset* ]] && echo 2
111fd9dccfSMichal Berger	elif [[ -e $sysfs_cgroup/cpuset/tasks ]]; then
121fd9dccfSMichal Berger		# cgroup's cpuset subsystem is mounted
131fd9dccfSMichal Berger		echo 1
141fd9dccfSMichal Berger	fi || return 1
151fd9dccfSMichal Berger}
161fd9dccfSMichal Berger
171fd9dccfSMichal Bergerinit_cpuset_cgroup() {
183d5e27dfSMichal Berger	local cgroup pid
19a9614bf6SMichal Berger	local -A cgroups=()
201fd9dccfSMichal Berger
211fd9dccfSMichal Berger	# For cgroup-v2 we need to prepare cpuset subsystem on our own
221fd9dccfSMichal Berger	if ((cgroup_version == 2)); then
231fd9dccfSMichal Berger		set_cgroup_attr / cgroup.subtree_control "+cpuset"
241fd9dccfSMichal Berger		create_cgroup /cpuset
251fd9dccfSMichal Berger		set_cgroup_attr /cpuset cgroup.subtree_control "+cpuset"
261fd9dccfSMichal Berger	elif ((cgroup_version == 1)); then
271fd9dccfSMichal Berger		set_cgroup_attr /cpuset cgroup.procs "$$"
281fd9dccfSMichal Berger	fi
291fd9dccfSMichal Berger}
301fd9dccfSMichal Berger
311fd9dccfSMichal Bergeris_cgroup_threaded() {
321fd9dccfSMichal Berger	[[ -e $sysfs_cgroup/$1/cgroup.type ]] || return 1
331fd9dccfSMichal Berger	[[ $(< "$sysfs_cgroup/$1/cgroup.type") == threaded ]]
341fd9dccfSMichal Berger}
351fd9dccfSMichal Berger
361fd9dccfSMichal Bergermove_cgroup_procs() {
371fd9dccfSMichal Berger	local old_cgroup=$1
381fd9dccfSMichal Berger	local new_cgroup=$2
391fd9dccfSMichal Berger	local proc procs old_proc_interface new_proc_interface
401fd9dccfSMichal Berger
415d4ccc83SMichal Berger	# If target cgroups don't exist then there's nothing to do.
425d4ccc83SMichal Berger	[[ -e $sysfs_cgroup/$old_cgroup ]] || return 0
435d4ccc83SMichal Berger	[[ -e $sysfs_cgroup/$new_cgroup ]] || return 0
441fd9dccfSMichal Berger
451fd9dccfSMichal Berger	old_proc_interface=cgroup.procs
461fd9dccfSMichal Berger	new_proc_interface=cgroup.procs
471fd9dccfSMichal Berger	if ((cgroup_version == 2)); then
481fd9dccfSMichal Berger		if is_cgroup_threaded "$new_cgroup"; then
491fd9dccfSMichal Berger			new_proc_interface=cgroup.threads
501fd9dccfSMichal Berger		fi
511fd9dccfSMichal Berger		if is_cgroup_threaded "$old_cgroup"; then
521fd9dccfSMichal Berger			old_proc_interface=cgroup.threads
531fd9dccfSMichal Berger		fi
541fd9dccfSMichal Berger	fi
551fd9dccfSMichal Berger
561fd9dccfSMichal Berger	fold_list_onto_array procs $(< "$sysfs_cgroup/$old_cgroup/$old_proc_interface")
571fd9dccfSMichal Berger
58b78531c7SMichal Berger	local moved=0
591fd9dccfSMichal Berger	for proc in "${!procs[@]}"; do
601fd9dccfSMichal Berger		# We can't move every kernel thread around and every process can
611fd9dccfSMichal Berger		# exit at any point so ignore any failures upon writing the
62b78531c7SMichal Berger		# processes out but keep count of any failed attempts for debugging
63b78531c7SMichal Berger		# purposes.
64b78531c7SMichal Berger		if move_proc "$proc" "$new_cgroup" "$old_cgroup" "$new_proc_interface"; then
65b78531c7SMichal Berger			((++moved))
66b78531c7SMichal Berger		fi
671fd9dccfSMichal Berger	done
68b78531c7SMichal Berger	echo "Moved $moved processes, failed $((${#procs[@]} - moved))" >&2
69b78531c7SMichal Berger}
70b78531c7SMichal Berger
71b78531c7SMichal Bergermove_proc() {
72*b22f1b34SMichal Berger	local proc=$1 new_cgroup=$2 old_cgroup=${3:-"$(get_cgroup "$1")"} attr=$4 write_fail
73b78531c7SMichal Berger
749da18248SMichal Berger	echo "Moving $proc ($(id_proc "$proc" 2>&1)) to $new_cgroup from $old_cgroup" >&2
75b78531c7SMichal Berger	if ! write_fail=$(set_cgroup_attr "$new_cgroup" "$attr" "$proc" 2>&1); then
769da18248SMichal Berger		echo "Moving $proc failed: ${write_fail##*: }" >&2
77b78531c7SMichal Berger		return 1
78b78531c7SMichal Berger	fi
791fd9dccfSMichal Berger}
801fd9dccfSMichal Berger
811fd9dccfSMichal Bergerset_cgroup_attr() {
821fd9dccfSMichal Berger	local cgroup=$1
831fd9dccfSMichal Berger	local attr=$2
841fd9dccfSMichal Berger	local val=$3
851fd9dccfSMichal Berger
861fd9dccfSMichal Berger	[[ -e $sysfs_cgroup/$cgroup/$attr ]] || return 1
871fd9dccfSMichal Berger
881fd9dccfSMichal Berger	if [[ -n $val ]]; then
891fd9dccfSMichal Berger		echo "$val" > "$sysfs_cgroup/$cgroup/$attr"
901fd9dccfSMichal Berger	fi
911fd9dccfSMichal Berger}
921fd9dccfSMichal Berger
931fd9dccfSMichal Bergercreate_cgroup() {
941fd9dccfSMichal Berger	[[ ! -e $sysfs_cgroup/$1 ]] || return 0
951fd9dccfSMichal Berger	mkdir "$sysfs_cgroup/$1"
961fd9dccfSMichal Berger	if ((cgroup_version == 2)); then
971fd9dccfSMichal Berger		echo "threaded" > "$sysfs_cgroup/$1/cgroup.type"
981fd9dccfSMichal Berger	fi
991fd9dccfSMichal Berger}
1001fd9dccfSMichal Berger
1011fd9dccfSMichal Bergerremove_cgroup() {
1028571999dSMichal Berger	local cgroup=${1#"$sysfs_cgroup"} root_cgroup leaf_cgroup
1038571999dSMichal Berger	root_cgroup=$(dirname "$cgroup")
1041fd9dccfSMichal Berger
1058571999dSMichal Berger	[[ -e $sysfs_cgroup/$cgroup ]] || return 0
1068571999dSMichal Berger	# Remove all lingering leaf cgroups if any
1078571999dSMichal Berger	for leaf_cgroup in "$sysfs_cgroup/$cgroup/"*/; do
1088571999dSMichal Berger		remove_cgroup "$leaf_cgroup"
1098571999dSMichal Berger	done
1108571999dSMichal Berger	# Instead of killing all the potential processes, we play it nice
1118571999dSMichal Berger	# and move them to the parent cgroup.
1128571999dSMichal Berger	move_cgroup_procs "$cgroup" "$root_cgroup"
1138571999dSMichal Berger	rmdir "$sysfs_cgroup/$cgroup"
1141fd9dccfSMichal Berger}
1151fd9dccfSMichal Berger
1161fd9dccfSMichal Bergerexec_in_cgroup() {
1171fd9dccfSMichal Berger	# Run this function as a background job - the reason why it remains {} instead
1181fd9dccfSMichal Berger	# of being declared as a subshell is to avoid having an extra bash fork around
1191fd9dccfSMichal Berger	# - note the exec call.
1201fd9dccfSMichal Berger
1211fd9dccfSMichal Berger	local cgroup=$1
1221fd9dccfSMichal Berger	local proc_interface=cgroup.procs
1231fd9dccfSMichal Berger
1241fd9dccfSMichal Berger	shift || return 1
1251fd9dccfSMichal Berger
1261fd9dccfSMichal Berger	if ((cgroup_version == 2)) && is_cgroup_threaded "$cgroup"; then
1271fd9dccfSMichal Berger		proc_interface=cgroup.threads
1281fd9dccfSMichal Berger	fi
1291fd9dccfSMichal Berger	set_cgroup_attr "$cgroup" "$proc_interface" "$BASHPID"
1301fd9dccfSMichal Berger	exec "$@"
1311fd9dccfSMichal Berger}
1321fd9dccfSMichal Berger
1331fd9dccfSMichal Bergerkill_in_cgroup() {
1341fd9dccfSMichal Berger	local cgroup=$1
1351fd9dccfSMichal Berger	local pid=$2
1361fd9dccfSMichal Berger	local proc_interface=cgroup.procs
1371fd9dccfSMichal Berger	local cgroup_pids
1381fd9dccfSMichal Berger
1391fd9dccfSMichal Berger	if ((cgroup_version == 2)) && is_cgroup_threaded "$cgroup"; then
1401fd9dccfSMichal Berger		proc_interface=cgroup.threads
1411fd9dccfSMichal Berger	fi
1421fd9dccfSMichal Berger
1431fd9dccfSMichal Berger	fold_list_onto_array \
1441fd9dccfSMichal Berger		cgroup_pids \
1451fd9dccfSMichal Berger		$(< "$sysfs_cgroup/$cgroup/$proc_interface")
1461fd9dccfSMichal Berger
1471fd9dccfSMichal Berger	if [[ -n $pid ]]; then
1481fd9dccfSMichal Berger		if [[ -n ${cgroup_pids[pid]} ]]; then
1491fd9dccfSMichal Berger			kill "$pid"
1501fd9dccfSMichal Berger		fi
1511fd9dccfSMichal Berger	elif ((${#cgroup_pids[@]} > 0)); then
1521fd9dccfSMichal Berger		kill "${cgroup_pids[@]}"
1531fd9dccfSMichal Berger	fi
1541fd9dccfSMichal Berger}
1551fd9dccfSMichal Berger
156977a5357SMichal Bergerremove_cpuset_cgroup() {
157977a5357SMichal Berger	if ((cgroup_version == 2)); then
158977a5357SMichal Berger		remove_cgroup /cpuset
159977a5357SMichal Berger	fi
160977a5357SMichal Berger}
161977a5357SMichal Berger
1623d5e27dfSMichal Bergerget_cgroup() {
1633d5e27dfSMichal Berger	local pid=${1:-self} cgroup
1643d5e27dfSMichal Berger
1653d5e27dfSMichal Berger	[[ -e /proc/$pid/cgroup ]] || return 1
1663d5e27dfSMichal Berger	cgroup=$(< "/proc/$pid/cgroup")
1673d5e27dfSMichal Berger	echo "${cgroup##*:}"
1683d5e27dfSMichal Berger}
1693d5e27dfSMichal Berger
1703d5e27dfSMichal Bergerget_cgroup_path() {
1713d5e27dfSMichal Berger	local cgroup
1723d5e27dfSMichal Berger
1733d5e27dfSMichal Berger	cgroup=$(get_cgroup "$1") || return 1
1743d5e27dfSMichal Berger	echo "$sysfs_cgroup$cgroup"
1753d5e27dfSMichal Berger}
1763d5e27dfSMichal Berger
1773d5e27dfSMichal Berger_set_cgroup_attr_top_bottom() {
1783d5e27dfSMichal Berger	local cgroup_path=$1 attr=$2 val=$3
1793d5e27dfSMichal Berger
1803d5e27dfSMichal Berger	if [[ -e ${cgroup_path%/*}/$attr ]]; then
1813d5e27dfSMichal Berger		_set_cgroup_attr_top_bottom "${cgroup_path%/*}" "$attr" "$val"
1823d5e27dfSMichal Berger	fi
1833d5e27dfSMichal Berger
1843d5e27dfSMichal Berger	if [[ -e $cgroup_path/$attr ]]; then
1853d5e27dfSMichal Berger		echo "$val" > "$cgroup_path/$attr"
1863d5e27dfSMichal Berger	fi
1873d5e27dfSMichal Berger}
1883d5e27dfSMichal Berger
1893d5e27dfSMichal Bergerset_cgroup_attr_top_bottom() {
1903d5e27dfSMichal Berger	_set_cgroup_attr_top_bottom "$(get_cgroup_path "$1")" "$2" "$3"
1913d5e27dfSMichal Berger}
1923d5e27dfSMichal Berger
193b78531c7SMichal Bergerid_proc() {
194b78531c7SMichal Berger	local pid=$1 flag_to_check=${2:-all}
195b78531c7SMichal Berger	local flags flags_map=() comm stats tflags
196b78531c7SMichal Berger
197b78531c7SMichal Berger	[[ -e /proc/$pid/stat ]] || return 1
198b78531c7SMichal Berger	# Comm is wrapped in () but the name of the thread itself may include "()", giving in result
199b78531c7SMichal Berger	# something similar to: ((sd-pam))
200b78531c7SMichal Berger	comm=$(< "/proc/$pid/stat") || return 1
201b78531c7SMichal Berger
202b78531c7SMichal Berger	stats=(${comm/*) /}) tflags=${stats[6]}
203b78531c7SMichal Berger
204b78531c7SMichal Berger	# include/linux/sched.h
205b78531c7SMichal Berger	flags_map[0x1]=PF_VCPU
206b78531c7SMichal Berger	flags_map[0x2]=PF_IDLE
207b78531c7SMichal Berger	flags_map[0x4]=PF_EXITING
208b78531c7SMichal Berger	flags_map[0x8]=PF_POSTCOREDUMP
209b78531c7SMichal Berger	flags_map[0x10]=PF_IO_WORKER
210b78531c7SMichal Berger	flags_map[0x20]=PF_WQ_WORKER
211b78531c7SMichal Berger	flags_map[0x40]=PF_FORK_NO_EXEC
212b78531c7SMichal Berger	flags_map[0x80]=PF_MCE_PROCESS
213b78531c7SMichal Berger	flags_map[0x100]=PF_SUPERPRIV
214b78531c7SMichal Berger	flags_map[0x200]=PF_DUMPCORE
215b78531c7SMichal Berger	flags_map[0x400]=PF_SIGNALED
216b78531c7SMichal Berger	flags_map[0x800]=PF_MEMALLOC
217b78531c7SMichal Berger	flags_map[0x1000]=PF_NPROC_EXCEEDED
218b78531c7SMichal Berger	flags_map[0x2000]=PF_USED_MATH
219b78531c7SMichal Berger	flags_map[0x4000]=PF_USER_WORKER
220b78531c7SMichal Berger	flags_map[0x8000]=PF_NOFREEZE
221b78531c7SMichal Berger	flags_map[0x20000]=PF_KSWAPD
222b78531c7SMichal Berger	flags_map[0x40000]=PF_MEMALLOC_NOFS
223b78531c7SMichal Berger	flags_map[0x80000]=PF_MEMALLOC_NOIO
224b78531c7SMichal Berger	flags_map[0x100000]=PF_LOCAL_THROTTLE
225b78531c7SMichal Berger	flags_map[0x00200000]=PF_KTHREAD
226b78531c7SMichal Berger	flags_map[0x00400000]=PF_RANDOMIZE
227b78531c7SMichal Berger	flags_map[0x04000000]=PF_NO_SETAFFINITY
228b78531c7SMichal Berger	flags_map[0x08000000]=PF_MCE_EARLY
229b78531c7SMichal Berger	flags_map[0x10000000]=PF_MEMALLOC_PIN
230b78531c7SMichal Berger	flags_map[0x80000000]=PF_SUSPEND_TASK
231b78531c7SMichal Berger
232b78531c7SMichal Berger	for flag in "${!flags_map[@]}"; do
233b78531c7SMichal Berger		[[ $flag_to_check == "${flags_map[flag]}" || $flag_to_check == all ]] || continue
234b78531c7SMichal Berger		((tflags & flag)) && flags=${flags:+$flags,}"${flags_map[flag]}"
235b78531c7SMichal Berger	done
236b78531c7SMichal Berger	if [[ -n $flags ]]; then
237b78531c7SMichal Berger		echo "$flags" >&2
238b78531c7SMichal Berger		return 0
239b78531c7SMichal Berger	fi
240b78531c7SMichal Berger	return 1
241b78531c7SMichal Berger}
242b78531c7SMichal Berger
2431fd9dccfSMichal Bergerdeclare -r sysfs_cgroup=/sys/fs/cgroup
2441fd9dccfSMichal Bergercgroup_version=$(check_cgroup)
245