xref: /spdk/test/scheduler/cgroups.sh (revision 18c8b52afa69f39481ebb75711b2f30b11693f9d)
1check_cgroup() {
2	# Try to work with both, cgroup-v1 and cgroup-v2. Verify which version is
3	# in use by looking up interfaces common for either of the versions.
4	if [[ -e $sysfs_cgroup/cgroup.controllers ]]; then
5		# cgroup2 is mounted, check if cpuset controller is available
6		[[ $(< "$sysfs_cgroup/cgroup.controllers") == *cpuset* ]] && echo 2
7	elif [[ -e $sysfs_cgroup/cpuset/tasks ]]; then
8		# cgroup's cpuset subsystem is mounted
9		echo 1
10	fi || return 1
11}
12
13init_cpuset_cgroup() {
14	local cgroup pid
15	local -A cgroups=()
16
17	# For cgroup-v2 we need to prepare cpuset subsystem on our own
18	if ((cgroup_version == 2)); then
19		set_cgroup_attr / cgroup.subtree_control "+cpuset"
20		create_cgroup /cpuset
21		set_cgroup_attr /cpuset cgroup.subtree_control "+cpuset"
22		# On distros which use cgroup-v2 under systemd, each process is
23		# maintained under separate, pre-configured subtree. With the rule of
24		# "internal processes are not permitted" this means that we won't find
25		# ourselves under subsystem's root, rather on the bottom of the cgroup
26		# maintaining user's session. To recreate the simple /cpuset setup from
27		# v1, move all the threads from all the existing cgroups to the top
28		# cgroup / and then migrate it to the /cpuset we created above.
29		for pid in /proc/+([0-9]); do
30			cgroup=$(get_cgroup "${pid##*/}") || continue
31			[[ $cgroup != / ]] || continue
32			cgroups["$cgroup"]=$cgroup
33		done 2> /dev/null
34		for cgroup in "${!cgroups[@]}"; do
35			move_cgroup_procs "$cgroup" /
36		done
37		# Now, move all the threads to the cpuset
38		move_cgroup_procs / /cpuset
39	elif ((cgroup_version == 1)); then
40		set_cgroup_attr /cpuset cgroup.procs "$$"
41	fi
42}
43
44is_cgroup_threaded() {
45	[[ -e $sysfs_cgroup/$1/cgroup.type ]] || return 1
46	[[ $(< "$sysfs_cgroup/$1/cgroup.type") == threaded ]]
47}
48
49move_cgroup_procs() {
50	local old_cgroup=$1
51	local new_cgroup=$2
52	local proc procs old_proc_interface new_proc_interface
53
54	# If target cgroups don't exist then there's nothing to do.
55	[[ -e $sysfs_cgroup/$old_cgroup ]] || return 0
56	[[ -e $sysfs_cgroup/$new_cgroup ]] || return 0
57
58	old_proc_interface=cgroup.procs
59	new_proc_interface=cgroup.procs
60	if ((cgroup_version == 2)); then
61		if is_cgroup_threaded "$new_cgroup"; then
62			new_proc_interface=cgroup.threads
63		fi
64		if is_cgroup_threaded "$old_cgroup"; then
65			old_proc_interface=cgroup.threads
66		fi
67	fi
68
69	fold_list_onto_array procs $(< "$sysfs_cgroup/$old_cgroup/$old_proc_interface")
70
71	for proc in "${!procs[@]}"; do
72		# We can't move every kernel thread around and every process can
73		# exit at any point so ignore any failures upon writing the
74		# processes out. FIXME: Check PF_KTHREAD instead?
75		[[ -n $(readlink -f "/proc/$proc/exe") ]] || continue
76		echo "$proc" > "$sysfs_cgroup/$new_cgroup/$new_proc_interface" 2> /dev/null || :
77	done
78}
79
80set_cgroup_attr() {
81	local cgroup=$1
82	local attr=$2
83	local val=$3
84
85	[[ -e $sysfs_cgroup/$cgroup/$attr ]] || return 1
86
87	if [[ -n $val ]]; then
88		echo "$val" > "$sysfs_cgroup/$cgroup/$attr"
89	fi
90}
91
92create_cgroup() {
93	[[ ! -e $sysfs_cgroup/$1 ]] || return 0
94	mkdir "$sysfs_cgroup/$1"
95	if ((cgroup_version == 2)); then
96		echo "threaded" > "$sysfs_cgroup/$1/cgroup.type"
97	fi
98}
99
100remove_cgroup() {
101	local root_cgroup
102	root_cgroup=$(dirname "$1")
103
104	[[ -e $sysfs_cgroup/$1 ]] || return 0
105	move_cgroup_procs "$1" "$root_cgroup"
106	rmdir "$sysfs_cgroup/$1"
107}
108
109exec_in_cgroup() {
110	# Run this function as a background job - the reason why it remains {} instead
111	# of being declared as a subshell is to avoid having an extra bash fork around
112	# - note the exec call.
113
114	local cgroup=$1
115	local proc_interface=cgroup.procs
116
117	shift || return 1
118
119	if ((cgroup_version == 2)) && is_cgroup_threaded "$cgroup"; then
120		proc_interface=cgroup.threads
121	fi
122	set_cgroup_attr "$cgroup" "$proc_interface" "$BASHPID"
123	exec "$@"
124}
125
126kill_in_cgroup() {
127	local cgroup=$1
128	local pid=$2
129	local proc_interface=cgroup.procs
130	local cgroup_pids
131
132	if ((cgroup_version == 2)) && is_cgroup_threaded "$cgroup"; then
133		proc_interface=cgroup.threads
134	fi
135
136	fold_list_onto_array \
137		cgroup_pids \
138		$(< "$sysfs_cgroup/$cgroup/$proc_interface")
139
140	if [[ -n $pid ]]; then
141		if [[ -n ${cgroup_pids[pid]} ]]; then
142			kill "$pid"
143		fi
144	elif ((${#cgroup_pids[@]} > 0)); then
145		kill "${cgroup_pids[@]}"
146	fi
147}
148
149remove_cpuset_cgroup() {
150	if ((cgroup_version == 2)); then
151		remove_cgroup /cpuset
152	fi
153}
154
155get_cgroup() {
156	local pid=${1:-self} cgroup
157
158	[[ -e /proc/$pid/cgroup ]] || return 1
159	cgroup=$(< "/proc/$pid/cgroup")
160	echo "${cgroup##*:}"
161}
162
163get_cgroup_path() {
164	local cgroup
165
166	cgroup=$(get_cgroup "$1") || return 1
167	echo "$sysfs_cgroup$cgroup"
168}
169
170_set_cgroup_attr_top_bottom() {
171	local cgroup_path=$1 attr=$2 val=$3
172
173	if [[ -e ${cgroup_path%/*}/$attr ]]; then
174		_set_cgroup_attr_top_bottom "${cgroup_path%/*}" "$attr" "$val"
175	fi
176
177	if [[ -e $cgroup_path/$attr ]]; then
178		echo "$val" > "$cgroup_path/$attr"
179	fi
180}
181
182set_cgroup_attr_top_bottom() {
183	_set_cgroup_attr_top_bottom "$(get_cgroup_path "$1")" "$2" "$3"
184}
185
186declare -r sysfs_cgroup=/sys/fs/cgroup
187cgroup_version=$(check_cgroup)
188