1# SPDX-License-Identifier: BSD-3-Clause 2# Copyright (C) 2021 Intel Corporation. 3# All rights reserved. 4 5check_cgroup() { 6 # Try to work with both, cgroup-v1 and cgroup-v2. Verify which version is 7 # in use by looking up interfaces common for either of the versions. 8 if [[ -e $sysfs_cgroup/cgroup.controllers ]]; then 9 # cgroup2 is mounted, check if cpuset controller is available 10 [[ $(< "$sysfs_cgroup/cgroup.controllers") == *cpuset* ]] && echo 2 11 elif [[ -e $sysfs_cgroup/cpuset/tasks ]]; then 12 # cgroup's cpuset subsystem is mounted 13 echo 1 14 fi || return 1 15} 16 17init_cpuset_cgroup() { 18 local cgroup pid 19 local -A cgroups=() 20 21 # For cgroup-v2 we need to prepare cpuset subsystem on our own 22 if ((cgroup_version == 2)); then 23 set_cgroup_attr / cgroup.subtree_control "+cpuset" 24 create_cgroup /cpuset 25 set_cgroup_attr /cpuset cgroup.subtree_control "+cpuset" 26 # On distros which use cgroup-v2 under systemd, each process is 27 # maintained under separate, pre-configured subtree. With the rule of 28 # "internal processes are not permitted" this means that we won't find 29 # ourselves under subsystem's root, rather on the bottom of the cgroup 30 # maintaining user's session. To recreate the simple /cpuset setup from 31 # v1, move all the threads from all the existing cgroups to the top 32 # cgroup / and then migrate it to the /cpuset we created above. 33 for pid in /proc/+([0-9]); do 34 cgroup=$(get_cgroup "${pid##*/}") || continue 35 [[ $cgroup != / ]] || continue 36 cgroups["$cgroup"]=$cgroup 37 done 2> /dev/null 38 for cgroup in "${!cgroups[@]}"; do 39 move_cgroup_procs "$cgroup" / 40 done 41 # Now, move all the threads to the cpuset 42 move_cgroup_procs / /cpuset 43 elif ((cgroup_version == 1)); then 44 set_cgroup_attr /cpuset cgroup.procs "$$" 45 fi 46} 47 48is_cgroup_threaded() { 49 [[ -e $sysfs_cgroup/$1/cgroup.type ]] || return 1 50 [[ $(< "$sysfs_cgroup/$1/cgroup.type") == threaded ]] 51} 52 53move_cgroup_procs() { 54 local old_cgroup=$1 55 local new_cgroup=$2 56 local proc procs old_proc_interface new_proc_interface 57 58 # If target cgroups don't exist then there's nothing to do. 59 [[ -e $sysfs_cgroup/$old_cgroup ]] || return 0 60 [[ -e $sysfs_cgroup/$new_cgroup ]] || return 0 61 62 old_proc_interface=cgroup.procs 63 new_proc_interface=cgroup.procs 64 if ((cgroup_version == 2)); then 65 if is_cgroup_threaded "$new_cgroup"; then 66 new_proc_interface=cgroup.threads 67 fi 68 if is_cgroup_threaded "$old_cgroup"; then 69 old_proc_interface=cgroup.threads 70 fi 71 fi 72 73 fold_list_onto_array procs $(< "$sysfs_cgroup/$old_cgroup/$old_proc_interface") 74 75 local moved=0 76 for proc in "${!procs[@]}"; do 77 # We can't move every kernel thread around and every process can 78 # exit at any point so ignore any failures upon writing the 79 # processes out but keep count of any failed attempts for debugging 80 # purposes. 81 if move_proc "$proc" "$new_cgroup" "$old_cgroup" "$new_proc_interface"; then 82 ((++moved)) 83 fi 84 done 85 echo "Moved $moved processes, failed $((${#procs[@]} - moved))" >&2 86} 87 88move_proc() { 89 local proc=$1 new_cgroup=$2 old_cgroup=${3:-N/A} attr=$4 write_fail 90 91 echo "Moving $proc ($(id_proc "$proc" 2>&1)) to $new_cgroup from $old_cgroup" >&2 92 if ! write_fail=$(set_cgroup_attr "$new_cgroup" "$attr" "$proc" 2>&1); then 93 echo "Moving $proc failed: ${write_fail##*: }" >&2 94 return 1 95 fi 96} 97 98set_cgroup_attr() { 99 local cgroup=$1 100 local attr=$2 101 local val=$3 102 103 [[ -e $sysfs_cgroup/$cgroup/$attr ]] || return 1 104 105 if [[ -n $val ]]; then 106 echo "$val" > "$sysfs_cgroup/$cgroup/$attr" 107 fi 108} 109 110create_cgroup() { 111 [[ ! -e $sysfs_cgroup/$1 ]] || return 0 112 mkdir "$sysfs_cgroup/$1" 113 if ((cgroup_version == 2)); then 114 echo "threaded" > "$sysfs_cgroup/$1/cgroup.type" 115 fi 116} 117 118remove_cgroup() { 119 local root_cgroup 120 root_cgroup=$(dirname "$1") 121 122 [[ -e $sysfs_cgroup/$1 ]] || return 0 123 move_cgroup_procs "$1" "$root_cgroup" 124 rmdir "$sysfs_cgroup/$1" 125} 126 127exec_in_cgroup() { 128 # Run this function as a background job - the reason why it remains {} instead 129 # of being declared as a subshell is to avoid having an extra bash fork around 130 # - note the exec call. 131 132 local cgroup=$1 133 local proc_interface=cgroup.procs 134 135 shift || return 1 136 137 if ((cgroup_version == 2)) && is_cgroup_threaded "$cgroup"; then 138 proc_interface=cgroup.threads 139 fi 140 set_cgroup_attr "$cgroup" "$proc_interface" "$BASHPID" 141 exec "$@" 142} 143 144kill_in_cgroup() { 145 local cgroup=$1 146 local pid=$2 147 local proc_interface=cgroup.procs 148 local cgroup_pids 149 150 if ((cgroup_version == 2)) && is_cgroup_threaded "$cgroup"; then 151 proc_interface=cgroup.threads 152 fi 153 154 fold_list_onto_array \ 155 cgroup_pids \ 156 $(< "$sysfs_cgroup/$cgroup/$proc_interface") 157 158 if [[ -n $pid ]]; then 159 if [[ -n ${cgroup_pids[pid]} ]]; then 160 kill "$pid" 161 fi 162 elif ((${#cgroup_pids[@]} > 0)); then 163 kill "${cgroup_pids[@]}" 164 fi 165} 166 167remove_cpuset_cgroup() { 168 if ((cgroup_version == 2)); then 169 remove_cgroup /cpuset 170 fi 171} 172 173get_cgroup() { 174 local pid=${1:-self} cgroup 175 176 [[ -e /proc/$pid/cgroup ]] || return 1 177 cgroup=$(< "/proc/$pid/cgroup") 178 echo "${cgroup##*:}" 179} 180 181get_cgroup_path() { 182 local cgroup 183 184 cgroup=$(get_cgroup "$1") || return 1 185 echo "$sysfs_cgroup$cgroup" 186} 187 188_set_cgroup_attr_top_bottom() { 189 local cgroup_path=$1 attr=$2 val=$3 190 191 if [[ -e ${cgroup_path%/*}/$attr ]]; then 192 _set_cgroup_attr_top_bottom "${cgroup_path%/*}" "$attr" "$val" 193 fi 194 195 if [[ -e $cgroup_path/$attr ]]; then 196 echo "$val" > "$cgroup_path/$attr" 197 fi 198} 199 200set_cgroup_attr_top_bottom() { 201 _set_cgroup_attr_top_bottom "$(get_cgroup_path "$1")" "$2" "$3" 202} 203 204id_proc() { 205 local pid=$1 flag_to_check=${2:-all} 206 local flags flags_map=() comm stats tflags 207 208 [[ -e /proc/$pid/stat ]] || return 1 209 # Comm is wrapped in () but the name of the thread itself may include "()", giving in result 210 # something similar to: ((sd-pam)) 211 comm=$(< "/proc/$pid/stat") || return 1 212 213 stats=(${comm/*) /}) tflags=${stats[6]} 214 215 # include/linux/sched.h 216 flags_map[0x1]=PF_VCPU 217 flags_map[0x2]=PF_IDLE 218 flags_map[0x4]=PF_EXITING 219 flags_map[0x8]=PF_POSTCOREDUMP 220 flags_map[0x10]=PF_IO_WORKER 221 flags_map[0x20]=PF_WQ_WORKER 222 flags_map[0x40]=PF_FORK_NO_EXEC 223 flags_map[0x80]=PF_MCE_PROCESS 224 flags_map[0x100]=PF_SUPERPRIV 225 flags_map[0x200]=PF_DUMPCORE 226 flags_map[0x400]=PF_SIGNALED 227 flags_map[0x800]=PF_MEMALLOC 228 flags_map[0x1000]=PF_NPROC_EXCEEDED 229 flags_map[0x2000]=PF_USED_MATH 230 flags_map[0x4000]=PF_USER_WORKER 231 flags_map[0x8000]=PF_NOFREEZE 232 flags_map[0x20000]=PF_KSWAPD 233 flags_map[0x40000]=PF_MEMALLOC_NOFS 234 flags_map[0x80000]=PF_MEMALLOC_NOIO 235 flags_map[0x100000]=PF_LOCAL_THROTTLE 236 flags_map[0x00200000]=PF_KTHREAD 237 flags_map[0x00400000]=PF_RANDOMIZE 238 flags_map[0x04000000]=PF_NO_SETAFFINITY 239 flags_map[0x08000000]=PF_MCE_EARLY 240 flags_map[0x10000000]=PF_MEMALLOC_PIN 241 flags_map[0x80000000]=PF_SUSPEND_TASK 242 243 for flag in "${!flags_map[@]}"; do 244 [[ $flag_to_check == "${flags_map[flag]}" || $flag_to_check == all ]] || continue 245 ((tflags & flag)) && flags=${flags:+$flags,}"${flags_map[flag]}" 246 done 247 if [[ -n $flags ]]; then 248 echo "$flags" >&2 249 return 0 250 fi 251 return 1 252} 253 254declare -r sysfs_cgroup=/sys/fs/cgroup 255cgroup_version=$(check_cgroup) 256