xref: /spdk/test/scheduler/cgroups.sh (revision b22f1b34d9eaedfda6df16d178770048cce4e5dc)
1#  SPDX-License-Identifier: BSD-3-Clause
2#  Copyright (C) 2021 Intel Corporation.
3#  All rights reserved.
4
5check_cgroup() {
6	# Try to work with both, cgroup-v1 and cgroup-v2. Verify which version is
7	# in use by looking up interfaces common for either of the versions.
8	if [[ -e $sysfs_cgroup/cgroup.controllers ]]; then
9		# cgroup2 is mounted, check if cpuset controller is available
10		[[ $(< "$sysfs_cgroup/cgroup.controllers") == *cpuset* ]] && echo 2
11	elif [[ -e $sysfs_cgroup/cpuset/tasks ]]; then
12		# cgroup's cpuset subsystem is mounted
13		echo 1
14	fi || return 1
15}
16
17init_cpuset_cgroup() {
18	local cgroup pid
19	local -A cgroups=()
20
21	# For cgroup-v2 we need to prepare cpuset subsystem on our own
22	if ((cgroup_version == 2)); then
23		set_cgroup_attr / cgroup.subtree_control "+cpuset"
24		create_cgroup /cpuset
25		set_cgroup_attr /cpuset cgroup.subtree_control "+cpuset"
26	elif ((cgroup_version == 1)); then
27		set_cgroup_attr /cpuset cgroup.procs "$$"
28	fi
29}
30
31is_cgroup_threaded() {
32	[[ -e $sysfs_cgroup/$1/cgroup.type ]] || return 1
33	[[ $(< "$sysfs_cgroup/$1/cgroup.type") == threaded ]]
34}
35
36move_cgroup_procs() {
37	local old_cgroup=$1
38	local new_cgroup=$2
39	local proc procs old_proc_interface new_proc_interface
40
41	# If target cgroups don't exist then there's nothing to do.
42	[[ -e $sysfs_cgroup/$old_cgroup ]] || return 0
43	[[ -e $sysfs_cgroup/$new_cgroup ]] || return 0
44
45	old_proc_interface=cgroup.procs
46	new_proc_interface=cgroup.procs
47	if ((cgroup_version == 2)); then
48		if is_cgroup_threaded "$new_cgroup"; then
49			new_proc_interface=cgroup.threads
50		fi
51		if is_cgroup_threaded "$old_cgroup"; then
52			old_proc_interface=cgroup.threads
53		fi
54	fi
55
56	fold_list_onto_array procs $(< "$sysfs_cgroup/$old_cgroup/$old_proc_interface")
57
58	local moved=0
59	for proc in "${!procs[@]}"; do
60		# We can't move every kernel thread around and every process can
61		# exit at any point so ignore any failures upon writing the
62		# processes out but keep count of any failed attempts for debugging
63		# purposes.
64		if move_proc "$proc" "$new_cgroup" "$old_cgroup" "$new_proc_interface"; then
65			((++moved))
66		fi
67	done
68	echo "Moved $moved processes, failed $((${#procs[@]} - moved))" >&2
69}
70
71move_proc() {
72	local proc=$1 new_cgroup=$2 old_cgroup=${3:-"$(get_cgroup "$1")"} attr=$4 write_fail
73
74	echo "Moving $proc ($(id_proc "$proc" 2>&1)) to $new_cgroup from $old_cgroup" >&2
75	if ! write_fail=$(set_cgroup_attr "$new_cgroup" "$attr" "$proc" 2>&1); then
76		echo "Moving $proc failed: ${write_fail##*: }" >&2
77		return 1
78	fi
79}
80
81set_cgroup_attr() {
82	local cgroup=$1
83	local attr=$2
84	local val=$3
85
86	[[ -e $sysfs_cgroup/$cgroup/$attr ]] || return 1
87
88	if [[ -n $val ]]; then
89		echo "$val" > "$sysfs_cgroup/$cgroup/$attr"
90	fi
91}
92
93create_cgroup() {
94	[[ ! -e $sysfs_cgroup/$1 ]] || return 0
95	mkdir "$sysfs_cgroup/$1"
96	if ((cgroup_version == 2)); then
97		echo "threaded" > "$sysfs_cgroup/$1/cgroup.type"
98	fi
99}
100
101remove_cgroup() {
102	local cgroup=${1#"$sysfs_cgroup"} root_cgroup leaf_cgroup
103	root_cgroup=$(dirname "$cgroup")
104
105	[[ -e $sysfs_cgroup/$cgroup ]] || return 0
106	# Remove all lingering leaf cgroups if any
107	for leaf_cgroup in "$sysfs_cgroup/$cgroup/"*/; do
108		remove_cgroup "$leaf_cgroup"
109	done
110	# Instead of killing all the potential processes, we play it nice
111	# and move them to the parent cgroup.
112	move_cgroup_procs "$cgroup" "$root_cgroup"
113	rmdir "$sysfs_cgroup/$cgroup"
114}
115
116exec_in_cgroup() {
117	# Run this function as a background job - the reason why it remains {} instead
118	# of being declared as a subshell is to avoid having an extra bash fork around
119	# - note the exec call.
120
121	local cgroup=$1
122	local proc_interface=cgroup.procs
123
124	shift || return 1
125
126	if ((cgroup_version == 2)) && is_cgroup_threaded "$cgroup"; then
127		proc_interface=cgroup.threads
128	fi
129	set_cgroup_attr "$cgroup" "$proc_interface" "$BASHPID"
130	exec "$@"
131}
132
133kill_in_cgroup() {
134	local cgroup=$1
135	local pid=$2
136	local proc_interface=cgroup.procs
137	local cgroup_pids
138
139	if ((cgroup_version == 2)) && is_cgroup_threaded "$cgroup"; then
140		proc_interface=cgroup.threads
141	fi
142
143	fold_list_onto_array \
144		cgroup_pids \
145		$(< "$sysfs_cgroup/$cgroup/$proc_interface")
146
147	if [[ -n $pid ]]; then
148		if [[ -n ${cgroup_pids[pid]} ]]; then
149			kill "$pid"
150		fi
151	elif ((${#cgroup_pids[@]} > 0)); then
152		kill "${cgroup_pids[@]}"
153	fi
154}
155
156remove_cpuset_cgroup() {
157	if ((cgroup_version == 2)); then
158		remove_cgroup /cpuset
159	fi
160}
161
162get_cgroup() {
163	local pid=${1:-self} cgroup
164
165	[[ -e /proc/$pid/cgroup ]] || return 1
166	cgroup=$(< "/proc/$pid/cgroup")
167	echo "${cgroup##*:}"
168}
169
170get_cgroup_path() {
171	local cgroup
172
173	cgroup=$(get_cgroup "$1") || return 1
174	echo "$sysfs_cgroup$cgroup"
175}
176
177_set_cgroup_attr_top_bottom() {
178	local cgroup_path=$1 attr=$2 val=$3
179
180	if [[ -e ${cgroup_path%/*}/$attr ]]; then
181		_set_cgroup_attr_top_bottom "${cgroup_path%/*}" "$attr" "$val"
182	fi
183
184	if [[ -e $cgroup_path/$attr ]]; then
185		echo "$val" > "$cgroup_path/$attr"
186	fi
187}
188
189set_cgroup_attr_top_bottom() {
190	_set_cgroup_attr_top_bottom "$(get_cgroup_path "$1")" "$2" "$3"
191}
192
193id_proc() {
194	local pid=$1 flag_to_check=${2:-all}
195	local flags flags_map=() comm stats tflags
196
197	[[ -e /proc/$pid/stat ]] || return 1
198	# Comm is wrapped in () but the name of the thread itself may include "()", giving in result
199	# something similar to: ((sd-pam))
200	comm=$(< "/proc/$pid/stat") || return 1
201
202	stats=(${comm/*) /}) tflags=${stats[6]}
203
204	# include/linux/sched.h
205	flags_map[0x1]=PF_VCPU
206	flags_map[0x2]=PF_IDLE
207	flags_map[0x4]=PF_EXITING
208	flags_map[0x8]=PF_POSTCOREDUMP
209	flags_map[0x10]=PF_IO_WORKER
210	flags_map[0x20]=PF_WQ_WORKER
211	flags_map[0x40]=PF_FORK_NO_EXEC
212	flags_map[0x80]=PF_MCE_PROCESS
213	flags_map[0x100]=PF_SUPERPRIV
214	flags_map[0x200]=PF_DUMPCORE
215	flags_map[0x400]=PF_SIGNALED
216	flags_map[0x800]=PF_MEMALLOC
217	flags_map[0x1000]=PF_NPROC_EXCEEDED
218	flags_map[0x2000]=PF_USED_MATH
219	flags_map[0x4000]=PF_USER_WORKER
220	flags_map[0x8000]=PF_NOFREEZE
221	flags_map[0x20000]=PF_KSWAPD
222	flags_map[0x40000]=PF_MEMALLOC_NOFS
223	flags_map[0x80000]=PF_MEMALLOC_NOIO
224	flags_map[0x100000]=PF_LOCAL_THROTTLE
225	flags_map[0x00200000]=PF_KTHREAD
226	flags_map[0x00400000]=PF_RANDOMIZE
227	flags_map[0x04000000]=PF_NO_SETAFFINITY
228	flags_map[0x08000000]=PF_MCE_EARLY
229	flags_map[0x10000000]=PF_MEMALLOC_PIN
230	flags_map[0x80000000]=PF_SUSPEND_TASK
231
232	for flag in "${!flags_map[@]}"; do
233		[[ $flag_to_check == "${flags_map[flag]}" || $flag_to_check == all ]] || continue
234		((tflags & flag)) && flags=${flags:+$flags,}"${flags_map[flag]}"
235	done
236	if [[ -n $flags ]]; then
237		echo "$flags" >&2
238		return 0
239	fi
240	return 1
241}
242
243declare -r sysfs_cgroup=/sys/fs/cgroup
244cgroup_version=$(check_cgroup)
245