1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -passes=slp-vectorizer -S %s | FileCheck %s 3; RUN: opt -aa-pipeline=basic-aa -passes='slp-vectorizer' -S %s | FileCheck %s 4 5target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" 6target triple = "arm64-apple-ios5.0.0" 7 8define void @select_umin_8xi16(ptr %ptr, i16 %x) { 9; CHECK-LABEL: @select_umin_8xi16( 10; CHECK-NEXT: entry: 11; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2 12; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <8 x i16> [[TMP1]], splat (i16 16383) 13; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> splat (i16 16383) 14; CHECK-NEXT: store <8 x i16> [[TMP3]], ptr [[PTR]], align 2 15; CHECK-NEXT: ret void 16; 17entry: 18 %l.0 = load i16, ptr %ptr 19 %cmp.0 = icmp ult i16 %l.0, 16383 20 %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383 21 store i16 %s.0, ptr %ptr, align 2 22 23 %gep.1 = getelementptr inbounds i16, ptr %ptr, i16 1 24 %l.1 = load i16, ptr %gep.1 25 %cmp.1 = icmp ult i16 %l.1, 16383 26 %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383 27 store i16 %s.1, ptr %gep.1, align 2 28 29 %gep.2 = getelementptr inbounds i16, ptr %ptr, i16 2 30 %l.2 = load i16, ptr %gep.2 31 %cmp.2 = icmp ult i16 %l.2, 16383 32 %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383 33 store i16 %s.2, ptr %gep.2, align 2 34 35 %gep.3 = getelementptr inbounds i16, ptr %ptr, i16 3 36 %l.3 = load i16, ptr %gep.3 37 %cmp.3 = icmp ult i16 %l.3, 16383 38 %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383 39 store i16 %s.3, ptr %gep.3, align 2 40 41 %gep.4 = getelementptr inbounds i16, ptr %ptr, i16 4 42 %l.4 = load i16, ptr %gep.4 43 %cmp.4 = icmp ult i16 %l.4, 16383 44 %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383 45 store i16 %s.4, ptr %gep.4, align 2 46 47 %gep.5 = getelementptr inbounds i16, ptr %ptr, i16 5 48 %l.5 = load i16, ptr %gep.5 49 %cmp.5 = icmp ult i16 %l.5, 16383 50 %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383 51 store i16 %s.5, ptr %gep.5, align 2 52 53 %gep.6 = getelementptr inbounds i16, ptr %ptr, i16 6 54 %l.6 = load i16, ptr %gep.6 55 %cmp.6 = icmp ult i16 %l.6, 16383 56 %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383 57 store i16 %s.6, ptr %gep.6, align 2 58 59 %gep.7 = getelementptr inbounds i16, ptr %ptr, i16 7 60 %l.7 = load i16, ptr %gep.7 61 %cmp.7 = icmp ult i16 %l.7, 16383 62 %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383 63 store i16 %s.7, ptr %gep.7, align 2 64 ret void 65} 66 67define void @select_umin_4xi32(ptr %ptr, i32 %x) { 68; CHECK-LABEL: @select_umin_4xi32( 69; CHECK-NEXT: entry: 70; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4 71; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <4 x i32> [[TMP1]], splat (i32 16383) 72; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> splat (i32 16383) 73; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[PTR]], align 4 74; CHECK-NEXT: ret void 75; 76entry: 77 %l.0 = load i32, ptr %ptr 78 %cmp.0 = icmp ult i32 %l.0, 16383 79 %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383 80 store i32 %s.0, ptr %ptr, align 4 81 82 %gep.1 = getelementptr inbounds i32, ptr %ptr, i32 1 83 %l.1 = load i32, ptr %gep.1 84 %cmp.1 = icmp ult i32 %l.1, 16383 85 %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383 86 store i32 %s.1, ptr %gep.1, align 4 87 88 %gep.2 = getelementptr inbounds i32, ptr %ptr, i32 2 89 %l.2 = load i32, ptr %gep.2 90 %cmp.2 = icmp ult i32 %l.2, 16383 91 %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383 92 store i32 %s.2, ptr %gep.2, align 4 93 94 %gep.3 = getelementptr inbounds i32, ptr %ptr, i32 3 95 %l.3 = load i32, ptr %gep.3 96 %cmp.3 = icmp ult i32 %l.3, 16383 97 %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383 98 store i32 %s.3, ptr %gep.3, align 4 99 100 ret void 101} 102 103define void @select_ule_ugt_mix_4xi32(ptr %ptr, i32 %x) { 104; CHECK-LABEL: @select_ule_ugt_mix_4xi32( 105; CHECK-NEXT: entry: 106; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4 107; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <4 x i32> [[TMP1]], splat (i32 16383) 108; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt <4 x i32> [[TMP1]], splat (i32 16383) 109; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i1> [[TMP2]], <4 x i1> [[TMP3]], <4 x i32> <i32 0, i32 5, i32 2, i32 7> 110; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> [[TMP1]], <4 x i32> splat (i32 16383) 111; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[PTR]], align 4 112; CHECK-NEXT: ret void 113; 114entry: 115 %l.0 = load i32, ptr %ptr 116 %cmp.0 = icmp ult i32 %l.0, 16383 117 %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383 118 store i32 %s.0, ptr %ptr, align 4 119 120 %gep.1 = getelementptr inbounds i32, ptr %ptr, i32 1 121 %l.1 = load i32, ptr %gep.1 122 %cmp.1 = icmp ugt i32 %l.1, 16383 123 %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383 124 store i32 %s.1, ptr %gep.1, align 4 125 126 %gep.2 = getelementptr inbounds i32, ptr %ptr, i32 2 127 %l.2 = load i32, ptr %gep.2 128 %cmp.2 = icmp ult i32 %l.2, 16383 129 %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383 130 store i32 %s.2, ptr %gep.2, align 4 131 132 %gep.3 = getelementptr inbounds i32, ptr %ptr, i32 3 133 %l.3 = load i32, ptr %gep.3 134 %cmp.3 = icmp ugt i32 %l.3, 16383 135 %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383 136 store i32 %s.3, ptr %gep.3, align 4 137 138 ret void 139} 140 141; There is no <2 x i64> version of umin, but we can efficiently lower 142; compare/select pairs with uniform predicates. 143define void @select_umin_2xi64(ptr %ptr, i64 %x) { 144; CHECK-LABEL: @select_umin_2xi64( 145; CHECK-NEXT: entry: 146; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8 147; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <2 x i64> [[TMP1]], splat (i64 16383) 148; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> splat (i64 16383) 149; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr [[PTR]], align 4 150; CHECK-NEXT: ret void 151; 152entry: 153 %l.0 = load i64, ptr %ptr 154 %cmp.0 = icmp ult i64 %l.0, 16383 155 %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383 156 store i64 %s.0, ptr %ptr, align 4 157 158 %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 1 159 %l.1 = load i64, ptr %gep.1 160 %cmp.1 = icmp ult i64 %l.1, 16383 161 %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383 162 store i64 %s.1, ptr %gep.1, align 4 163 164 ret void 165} 166 167 168define void @select_umin_ule_8xi16(ptr %ptr, i16 %x) { 169; CHECK-LABEL: @select_umin_ule_8xi16( 170; CHECK-NEXT: entry: 171; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2 172; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <8 x i16> [[TMP1]], splat (i16 16383) 173; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> splat (i16 16383) 174; CHECK-NEXT: store <8 x i16> [[TMP3]], ptr [[PTR]], align 2 175; CHECK-NEXT: ret void 176; 177entry: 178 %l.0 = load i16, ptr %ptr 179 %cmp.0 = icmp ule i16 %l.0, 16383 180 %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383 181 store i16 %s.0, ptr %ptr, align 2 182 183 %gep.1 = getelementptr inbounds i16, ptr %ptr, i16 1 184 %l.1 = load i16, ptr %gep.1 185 %cmp.1 = icmp ule i16 %l.1, 16383 186 %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383 187 store i16 %s.1, ptr %gep.1, align 2 188 189 %gep.2 = getelementptr inbounds i16, ptr %ptr, i16 2 190 %l.2 = load i16, ptr %gep.2 191 %cmp.2 = icmp ule i16 %l.2, 16383 192 %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383 193 store i16 %s.2, ptr %gep.2, align 2 194 195 %gep.3 = getelementptr inbounds i16, ptr %ptr, i16 3 196 %l.3 = load i16, ptr %gep.3 197 %cmp.3 = icmp ule i16 %l.3, 16383 198 %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383 199 store i16 %s.3, ptr %gep.3, align 2 200 201 %gep.4 = getelementptr inbounds i16, ptr %ptr, i16 4 202 %l.4 = load i16, ptr %gep.4 203 %cmp.4 = icmp ule i16 %l.4, 16383 204 %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383 205 store i16 %s.4, ptr %gep.4, align 2 206 207 %gep.5 = getelementptr inbounds i16, ptr %ptr, i16 5 208 %l.5 = load i16, ptr %gep.5 209 %cmp.5 = icmp ule i16 %l.5, 16383 210 %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383 211 store i16 %s.5, ptr %gep.5, align 2 212 213 %gep.6 = getelementptr inbounds i16, ptr %ptr, i16 6 214 %l.6 = load i16, ptr %gep.6 215 %cmp.6 = icmp ule i16 %l.6, 16383 216 %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383 217 store i16 %s.6, ptr %gep.6, align 2 218 219 %gep.7 = getelementptr inbounds i16, ptr %ptr, i16 7 220 %l.7 = load i16, ptr %gep.7 221 %cmp.7 = icmp ule i16 %l.7, 16383 222 %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383 223 store i16 %s.7, ptr %gep.7, align 2 224 ret void 225} 226 227define void @select_umin_ule_4xi32(ptr %ptr, i32 %x) { 228; CHECK-LABEL: @select_umin_ule_4xi32( 229; CHECK-NEXT: entry: 230; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4 231; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <4 x i32> [[TMP1]], splat (i32 16383) 232; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> splat (i32 16383) 233; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[PTR]], align 4 234; CHECK-NEXT: ret void 235; 236entry: 237 %l.0 = load i32, ptr %ptr 238 %cmp.0 = icmp ule i32 %l.0, 16383 239 %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383 240 store i32 %s.0, ptr %ptr, align 4 241 242 %gep.1 = getelementptr inbounds i32, ptr %ptr, i32 1 243 %l.1 = load i32, ptr %gep.1 244 %cmp.1 = icmp ule i32 %l.1, 16383 245 %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383 246 store i32 %s.1, ptr %gep.1, align 4 247 248 %gep.2 = getelementptr inbounds i32, ptr %ptr, i32 2 249 %l.2 = load i32, ptr %gep.2 250 %cmp.2 = icmp ule i32 %l.2, 16383 251 %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383 252 store i32 %s.2, ptr %gep.2, align 4 253 254 %gep.3 = getelementptr inbounds i32, ptr %ptr, i32 3 255 %l.3 = load i32, ptr %gep.3 256 %cmp.3 = icmp ule i32 %l.3, 16383 257 %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383 258 store i32 %s.3, ptr %gep.3, align 4 259 260 ret void 261} 262 263; There is no <2 x i64> version of umin, but we can efficiently lower 264; compare/select pairs with uniform predicates. 265define void @select_umin_ule_2xi64(ptr %ptr, i64 %x) { 266; CHECK-LABEL: @select_umin_ule_2xi64( 267; CHECK-NEXT: entry: 268; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8 269; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <2 x i64> [[TMP1]], splat (i64 16383) 270; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> splat (i64 16383) 271; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr [[PTR]], align 4 272; CHECK-NEXT: ret void 273; 274entry: 275 %l.0 = load i64, ptr %ptr 276 %cmp.0 = icmp ule i64 %l.0, 16383 277 %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383 278 store i64 %s.0, ptr %ptr, align 4 279 280 %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 1 281 %l.1 = load i64, ptr %gep.1 282 %cmp.1 = icmp ule i64 %l.1, 16383 283 %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383 284 store i64 %s.1, ptr %gep.1, align 4 285 286 ret void 287} 288 289define void @select_smin_8xi16(ptr %ptr, i16 %x) { 290; CHECK-LABEL: @select_smin_8xi16( 291; CHECK-NEXT: entry: 292; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2 293; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <8 x i16> [[TMP1]], splat (i16 16383) 294; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> splat (i16 16383) 295; CHECK-NEXT: store <8 x i16> [[TMP3]], ptr [[PTR]], align 2 296; CHECK-NEXT: ret void 297; 298entry: 299 %l.0 = load i16, ptr %ptr 300 %cmp.0 = icmp slt i16 %l.0, 16383 301 %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383 302 store i16 %s.0, ptr %ptr, align 2 303 304 %gep.1 = getelementptr inbounds i16, ptr %ptr, i16 1 305 %l.1 = load i16, ptr %gep.1 306 %cmp.1 = icmp slt i16 %l.1, 16383 307 %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383 308 store i16 %s.1, ptr %gep.1, align 2 309 310 %gep.2 = getelementptr inbounds i16, ptr %ptr, i16 2 311 %l.2 = load i16, ptr %gep.2 312 %cmp.2 = icmp slt i16 %l.2, 16383 313 %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383 314 store i16 %s.2, ptr %gep.2, align 2 315 316 %gep.3 = getelementptr inbounds i16, ptr %ptr, i16 3 317 %l.3 = load i16, ptr %gep.3 318 %cmp.3 = icmp slt i16 %l.3, 16383 319 %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383 320 store i16 %s.3, ptr %gep.3, align 2 321 322 %gep.4 = getelementptr inbounds i16, ptr %ptr, i16 4 323 %l.4 = load i16, ptr %gep.4 324 %cmp.4 = icmp slt i16 %l.4, 16383 325 %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383 326 store i16 %s.4, ptr %gep.4, align 2 327 328 %gep.5 = getelementptr inbounds i16, ptr %ptr, i16 5 329 %l.5 = load i16, ptr %gep.5 330 %cmp.5 = icmp slt i16 %l.5, 16383 331 %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383 332 store i16 %s.5, ptr %gep.5, align 2 333 334 %gep.6 = getelementptr inbounds i16, ptr %ptr, i16 6 335 %l.6 = load i16, ptr %gep.6 336 %cmp.6 = icmp slt i16 %l.6, 16383 337 %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383 338 store i16 %s.6, ptr %gep.6, align 2 339 340 %gep.7 = getelementptr inbounds i16, ptr %ptr, i16 7 341 %l.7 = load i16, ptr %gep.7 342 %cmp.7 = icmp slt i16 %l.7, 16383 343 %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383 344 store i16 %s.7, ptr %gep.7, align 2 345 ret void 346} 347 348define void @select_smin_4xi32(ptr %ptr, i32 %x) { 349; CHECK-LABEL: @select_smin_4xi32( 350; CHECK-NEXT: entry: 351; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4 352; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <4 x i32> [[TMP1]], splat (i32 16383) 353; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> splat (i32 16383) 354; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[PTR]], align 4 355; CHECK-NEXT: ret void 356; 357entry: 358 %l.0 = load i32, ptr %ptr 359 %cmp.0 = icmp slt i32 %l.0, 16383 360 %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383 361 store i32 %s.0, ptr %ptr, align 4 362 363 %gep.1 = getelementptr inbounds i32, ptr %ptr, i32 1 364 %l.1 = load i32, ptr %gep.1 365 %cmp.1 = icmp slt i32 %l.1, 16383 366 %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383 367 store i32 %s.1, ptr %gep.1, align 4 368 369 %gep.2 = getelementptr inbounds i32, ptr %ptr, i32 2 370 %l.2 = load i32, ptr %gep.2 371 %cmp.2 = icmp slt i32 %l.2, 16383 372 %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383 373 store i32 %s.2, ptr %gep.2, align 4 374 375 %gep.3 = getelementptr inbounds i32, ptr %ptr, i32 3 376 %l.3 = load i32, ptr %gep.3 377 %cmp.3 = icmp slt i32 %l.3, 16383 378 %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383 379 store i32 %s.3, ptr %gep.3, align 4 380 381 ret void 382} 383 384; There is no <2 x i64> version of smin, but we can efficiently lower 385; compare/select pairs with uniform predicates. 386define void @select_smin_2xi64(ptr %ptr, i64 %x) { 387; CHECK-LABEL: @select_smin_2xi64( 388; CHECK-NEXT: entry: 389; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8 390; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <2 x i64> [[TMP1]], splat (i64 16383) 391; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> splat (i64 16383) 392; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr [[PTR]], align 4 393; CHECK-NEXT: ret void 394; 395entry: 396 %l.0 = load i64, ptr %ptr 397 %cmp.0 = icmp slt i64 %l.0, 16383 398 %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383 399 store i64 %s.0, ptr %ptr, align 4 400 401 %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 1 402 %l.1 = load i64, ptr %gep.1 403 %cmp.1 = icmp slt i64 %l.1, 16383 404 %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383 405 store i64 %s.1, ptr %gep.1, align 4 406 407 ret void 408} 409 410define void @select_smin_sle_8xi16(ptr %ptr, i16 %x) { 411; CHECK-LABEL: @select_smin_sle_8xi16( 412; CHECK-NEXT: entry: 413; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2 414; CHECK-NEXT: [[TMP2:%.*]] = icmp sle <8 x i16> [[TMP1]], splat (i16 16383) 415; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> splat (i16 16383) 416; CHECK-NEXT: store <8 x i16> [[TMP3]], ptr [[PTR]], align 2 417; CHECK-NEXT: ret void 418; 419entry: 420 %l.0 = load i16, ptr %ptr 421 %cmp.0 = icmp sle i16 %l.0, 16383 422 %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383 423 store i16 %s.0, ptr %ptr, align 2 424 425 %gep.1 = getelementptr inbounds i16, ptr %ptr, i16 1 426 %l.1 = load i16, ptr %gep.1 427 %cmp.1 = icmp sle i16 %l.1, 16383 428 %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383 429 store i16 %s.1, ptr %gep.1, align 2 430 431 %gep.2 = getelementptr inbounds i16, ptr %ptr, i16 2 432 %l.2 = load i16, ptr %gep.2 433 %cmp.2 = icmp sle i16 %l.2, 16383 434 %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383 435 store i16 %s.2, ptr %gep.2, align 2 436 437 %gep.3 = getelementptr inbounds i16, ptr %ptr, i16 3 438 %l.3 = load i16, ptr %gep.3 439 %cmp.3 = icmp sle i16 %l.3, 16383 440 %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383 441 store i16 %s.3, ptr %gep.3, align 2 442 443 %gep.4 = getelementptr inbounds i16, ptr %ptr, i16 4 444 %l.4 = load i16, ptr %gep.4 445 %cmp.4 = icmp sle i16 %l.4, 16383 446 %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383 447 store i16 %s.4, ptr %gep.4, align 2 448 449 %gep.5 = getelementptr inbounds i16, ptr %ptr, i16 5 450 %l.5 = load i16, ptr %gep.5 451 %cmp.5 = icmp sle i16 %l.5, 16383 452 %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383 453 store i16 %s.5, ptr %gep.5, align 2 454 455 %gep.6 = getelementptr inbounds i16, ptr %ptr, i16 6 456 %l.6 = load i16, ptr %gep.6 457 %cmp.6 = icmp sle i16 %l.6, 16383 458 %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383 459 store i16 %s.6, ptr %gep.6, align 2 460 461 %gep.7 = getelementptr inbounds i16, ptr %ptr, i16 7 462 %l.7 = load i16, ptr %gep.7 463 %cmp.7 = icmp sle i16 %l.7, 16383 464 %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383 465 store i16 %s.7, ptr %gep.7, align 2 466 ret void 467} 468 469define void @select_smin_sle_4xi32(ptr %ptr, i32 %x) { 470; CHECK-LABEL: @select_smin_sle_4xi32( 471; CHECK-NEXT: entry: 472; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4 473; CHECK-NEXT: [[TMP2:%.*]] = icmp sle <4 x i32> [[TMP1]], splat (i32 16383) 474; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> splat (i32 16383) 475; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[PTR]], align 4 476; CHECK-NEXT: ret void 477; 478entry: 479 %l.0 = load i32, ptr %ptr 480 %cmp.0 = icmp sle i32 %l.0, 16383 481 %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383 482 store i32 %s.0, ptr %ptr, align 4 483 484 %gep.1 = getelementptr inbounds i32, ptr %ptr, i32 1 485 %l.1 = load i32, ptr %gep.1 486 %cmp.1 = icmp sle i32 %l.1, 16383 487 %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383 488 store i32 %s.1, ptr %gep.1, align 4 489 490 %gep.2 = getelementptr inbounds i32, ptr %ptr, i32 2 491 %l.2 = load i32, ptr %gep.2 492 %cmp.2 = icmp sle i32 %l.2, 16383 493 %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383 494 store i32 %s.2, ptr %gep.2, align 4 495 496 %gep.3 = getelementptr inbounds i32, ptr %ptr, i32 3 497 %l.3 = load i32, ptr %gep.3 498 %cmp.3 = icmp sle i32 %l.3, 16383 499 %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383 500 store i32 %s.3, ptr %gep.3, align 4 501 502 ret void 503} 504 505; There is no <2 x i64> version of smin, but we can efficiently lower 506; compare/select pairs with uniform predicates. 507define void @select_smin_sle_2xi64(ptr %ptr, i64 %x) { 508; CHECK-LABEL: @select_smin_sle_2xi64( 509; CHECK-NEXT: entry: 510; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8 511; CHECK-NEXT: [[TMP2:%.*]] = icmp sle <2 x i64> [[TMP1]], splat (i64 16383) 512; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> splat (i64 16383) 513; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr [[PTR]], align 4 514; CHECK-NEXT: ret void 515; 516entry: 517 %l.0 = load i64, ptr %ptr 518 %cmp.0 = icmp sle i64 %l.0, 16383 519 %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383 520 store i64 %s.0, ptr %ptr, align 4 521 522 %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 1 523 %l.1 = load i64, ptr %gep.1 524 %cmp.1 = icmp sle i64 %l.1, 16383 525 %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383 526 store i64 %s.1, ptr %gep.1, align 4 527 528 ret void 529} 530define void @select_umax_8xi16(ptr %ptr, i16 %x) { 531; CHECK-LABEL: @select_umax_8xi16( 532; CHECK-NEXT: entry: 533; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2 534; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <8 x i16> [[TMP1]], splat (i16 16383) 535; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> splat (i16 16383) 536; CHECK-NEXT: store <8 x i16> [[TMP3]], ptr [[PTR]], align 2 537; CHECK-NEXT: ret void 538; 539entry: 540 %l.0 = load i16, ptr %ptr 541 %cmp.0 = icmp ugt i16 %l.0, 16383 542 %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383 543 store i16 %s.0, ptr %ptr, align 2 544 545 %gep.1 = getelementptr inbounds i16, ptr %ptr, i16 1 546 %l.1 = load i16, ptr %gep.1 547 %cmp.1 = icmp ugt i16 %l.1, 16383 548 %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383 549 store i16 %s.1, ptr %gep.1, align 2 550 551 %gep.2 = getelementptr inbounds i16, ptr %ptr, i16 2 552 %l.2 = load i16, ptr %gep.2 553 %cmp.2 = icmp ugt i16 %l.2, 16383 554 %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383 555 store i16 %s.2, ptr %gep.2, align 2 556 557 %gep.3 = getelementptr inbounds i16, ptr %ptr, i16 3 558 %l.3 = load i16, ptr %gep.3 559 %cmp.3 = icmp ugt i16 %l.3, 16383 560 %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383 561 store i16 %s.3, ptr %gep.3, align 2 562 563 %gep.4 = getelementptr inbounds i16, ptr %ptr, i16 4 564 %l.4 = load i16, ptr %gep.4 565 %cmp.4 = icmp ugt i16 %l.4, 16383 566 %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383 567 store i16 %s.4, ptr %gep.4, align 2 568 569 %gep.5 = getelementptr inbounds i16, ptr %ptr, i16 5 570 %l.5 = load i16, ptr %gep.5 571 %cmp.5 = icmp ugt i16 %l.5, 16383 572 %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383 573 store i16 %s.5, ptr %gep.5, align 2 574 575 %gep.6 = getelementptr inbounds i16, ptr %ptr, i16 6 576 %l.6 = load i16, ptr %gep.6 577 %cmp.6 = icmp ugt i16 %l.6, 16383 578 %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383 579 store i16 %s.6, ptr %gep.6, align 2 580 581 %gep.7 = getelementptr inbounds i16, ptr %ptr, i16 7 582 %l.7 = load i16, ptr %gep.7 583 %cmp.7 = icmp ugt i16 %l.7, 16383 584 %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383 585 store i16 %s.7, ptr %gep.7, align 2 586 ret void 587} 588 589define void @select_umax_4xi32(ptr %ptr, i32 %x) { 590; CHECK-LABEL: @select_umax_4xi32( 591; CHECK-NEXT: entry: 592; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4 593; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i32> [[TMP1]], splat (i32 16383) 594; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> splat (i32 16383) 595; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[PTR]], align 4 596; CHECK-NEXT: ret void 597; 598entry: 599 %l.0 = load i32, ptr %ptr 600 %cmp.0 = icmp ugt i32 %l.0, 16383 601 %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383 602 store i32 %s.0, ptr %ptr, align 4 603 604 %gep.1 = getelementptr inbounds i32, ptr %ptr, i32 1 605 %l.1 = load i32, ptr %gep.1 606 %cmp.1 = icmp ugt i32 %l.1, 16383 607 %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383 608 store i32 %s.1, ptr %gep.1, align 4 609 610 %gep.2 = getelementptr inbounds i32, ptr %ptr, i32 2 611 %l.2 = load i32, ptr %gep.2 612 %cmp.2 = icmp ugt i32 %l.2, 16383 613 %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383 614 store i32 %s.2, ptr %gep.2, align 4 615 616 %gep.3 = getelementptr inbounds i32, ptr %ptr, i32 3 617 %l.3 = load i32, ptr %gep.3 618 %cmp.3 = icmp ugt i32 %l.3, 16383 619 %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383 620 store i32 %s.3, ptr %gep.3, align 4 621 622 ret void 623} 624 625; There is no <2 x i64> version of umax, but we can efficiently lower 626; compare/select pairs with uniform predicates. 627define void @select_umax_2xi64(ptr %ptr, i64 %x) { 628; CHECK-LABEL: @select_umax_2xi64( 629; CHECK-NEXT: entry: 630; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8 631; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <2 x i64> [[TMP1]], splat (i64 16383) 632; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> splat (i64 16383) 633; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr [[PTR]], align 4 634; CHECK-NEXT: ret void 635; 636entry: 637 %l.0 = load i64, ptr %ptr 638 %cmp.0 = icmp ugt i64 %l.0, 16383 639 %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383 640 store i64 %s.0, ptr %ptr, align 4 641 642 %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 1 643 %l.1 = load i64, ptr %gep.1 644 %cmp.1 = icmp ugt i64 %l.1, 16383 645 %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383 646 store i64 %s.1, ptr %gep.1, align 4 647 648 ret void 649} 650 651define void @select_umax_uge_8xi16(ptr %ptr, i16 %x) { 652; CHECK-LABEL: @select_umax_uge_8xi16( 653; CHECK-NEXT: entry: 654; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2 655; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <8 x i16> [[TMP1]], splat (i16 16383) 656; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> splat (i16 16383) 657; CHECK-NEXT: store <8 x i16> [[TMP3]], ptr [[PTR]], align 2 658; CHECK-NEXT: ret void 659; 660entry: 661 %l.0 = load i16, ptr %ptr 662 %cmp.0 = icmp uge i16 %l.0, 16383 663 %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383 664 store i16 %s.0, ptr %ptr, align 2 665 666 %gep.1 = getelementptr inbounds i16, ptr %ptr, i16 1 667 %l.1 = load i16, ptr %gep.1 668 %cmp.1 = icmp uge i16 %l.1, 16383 669 %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383 670 store i16 %s.1, ptr %gep.1, align 2 671 672 %gep.2 = getelementptr inbounds i16, ptr %ptr, i16 2 673 %l.2 = load i16, ptr %gep.2 674 %cmp.2 = icmp uge i16 %l.2, 16383 675 %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383 676 store i16 %s.2, ptr %gep.2, align 2 677 678 %gep.3 = getelementptr inbounds i16, ptr %ptr, i16 3 679 %l.3 = load i16, ptr %gep.3 680 %cmp.3 = icmp uge i16 %l.3, 16383 681 %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383 682 store i16 %s.3, ptr %gep.3, align 2 683 684 %gep.4 = getelementptr inbounds i16, ptr %ptr, i16 4 685 %l.4 = load i16, ptr %gep.4 686 %cmp.4 = icmp uge i16 %l.4, 16383 687 %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383 688 store i16 %s.4, ptr %gep.4, align 2 689 690 %gep.5 = getelementptr inbounds i16, ptr %ptr, i16 5 691 %l.5 = load i16, ptr %gep.5 692 %cmp.5 = icmp uge i16 %l.5, 16383 693 %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383 694 store i16 %s.5, ptr %gep.5, align 2 695 696 %gep.6 = getelementptr inbounds i16, ptr %ptr, i16 6 697 %l.6 = load i16, ptr %gep.6 698 %cmp.6 = icmp uge i16 %l.6, 16383 699 %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383 700 store i16 %s.6, ptr %gep.6, align 2 701 702 %gep.7 = getelementptr inbounds i16, ptr %ptr, i16 7 703 %l.7 = load i16, ptr %gep.7 704 %cmp.7 = icmp uge i16 %l.7, 16383 705 %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383 706 store i16 %s.7, ptr %gep.7, align 2 707 ret void 708} 709 710define void @select_umax_uge_4xi32(ptr %ptr, i32 %x) { 711; CHECK-LABEL: @select_umax_uge_4xi32( 712; CHECK-NEXT: entry: 713; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4 714; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <4 x i32> [[TMP1]], splat (i32 16383) 715; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> splat (i32 16383) 716; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[PTR]], align 4 717; CHECK-NEXT: ret void 718; 719entry: 720 %l.0 = load i32, ptr %ptr 721 %cmp.0 = icmp uge i32 %l.0, 16383 722 %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383 723 store i32 %s.0, ptr %ptr, align 4 724 725 %gep.1 = getelementptr inbounds i32, ptr %ptr, i32 1 726 %l.1 = load i32, ptr %gep.1 727 %cmp.1 = icmp uge i32 %l.1, 16383 728 %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383 729 store i32 %s.1, ptr %gep.1, align 4 730 731 %gep.2 = getelementptr inbounds i32, ptr %ptr, i32 2 732 %l.2 = load i32, ptr %gep.2 733 %cmp.2 = icmp uge i32 %l.2, 16383 734 %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383 735 store i32 %s.2, ptr %gep.2, align 4 736 737 %gep.3 = getelementptr inbounds i32, ptr %ptr, i32 3 738 %l.3 = load i32, ptr %gep.3 739 %cmp.3 = icmp uge i32 %l.3, 16383 740 %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383 741 store i32 %s.3, ptr %gep.3, align 4 742 743 ret void 744} 745 746; There is no <2 x i64> version of umax, but we can efficiently lower 747; compare/select pairs with uniform predicates. 748define void @select_umax_uge_2xi64(ptr %ptr, i64 %x) { 749; CHECK-LABEL: @select_umax_uge_2xi64( 750; CHECK-NEXT: entry: 751; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8 752; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <2 x i64> [[TMP1]], splat (i64 16383) 753; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> splat (i64 16383) 754; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr [[PTR]], align 4 755; CHECK-NEXT: ret void 756; 757entry: 758 %l.0 = load i64, ptr %ptr 759 %cmp.0 = icmp uge i64 %l.0, 16383 760 %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383 761 store i64 %s.0, ptr %ptr, align 4 762 763 %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 1 764 %l.1 = load i64, ptr %gep.1 765 %cmp.1 = icmp uge i64 %l.1, 16383 766 %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383 767 store i64 %s.1, ptr %gep.1, align 4 768 769 ret void 770} 771 772define void @select_smax_8xi16(ptr %ptr, i16 %x) { 773; CHECK-LABEL: @select_smax_8xi16( 774; CHECK-NEXT: entry: 775; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2 776; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i16> [[TMP1]], splat (i16 16383) 777; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> splat (i16 16383) 778; CHECK-NEXT: store <8 x i16> [[TMP3]], ptr [[PTR]], align 2 779; CHECK-NEXT: ret void 780; 781entry: 782 %l.0 = load i16, ptr %ptr 783 %cmp.0 = icmp sgt i16 %l.0, 16383 784 %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383 785 store i16 %s.0, ptr %ptr, align 2 786 787 %gep.1 = getelementptr inbounds i16, ptr %ptr, i16 1 788 %l.1 = load i16, ptr %gep.1 789 %cmp.1 = icmp sgt i16 %l.1, 16383 790 %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383 791 store i16 %s.1, ptr %gep.1, align 2 792 793 %gep.2 = getelementptr inbounds i16, ptr %ptr, i16 2 794 %l.2 = load i16, ptr %gep.2 795 %cmp.2 = icmp sgt i16 %l.2, 16383 796 %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383 797 store i16 %s.2, ptr %gep.2, align 2 798 799 %gep.3 = getelementptr inbounds i16, ptr %ptr, i16 3 800 %l.3 = load i16, ptr %gep.3 801 %cmp.3 = icmp sgt i16 %l.3, 16383 802 %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383 803 store i16 %s.3, ptr %gep.3, align 2 804 805 %gep.4 = getelementptr inbounds i16, ptr %ptr, i16 4 806 %l.4 = load i16, ptr %gep.4 807 %cmp.4 = icmp sgt i16 %l.4, 16383 808 %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383 809 store i16 %s.4, ptr %gep.4, align 2 810 811 %gep.5 = getelementptr inbounds i16, ptr %ptr, i16 5 812 %l.5 = load i16, ptr %gep.5 813 %cmp.5 = icmp sgt i16 %l.5, 16383 814 %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383 815 store i16 %s.5, ptr %gep.5, align 2 816 817 %gep.6 = getelementptr inbounds i16, ptr %ptr, i16 6 818 %l.6 = load i16, ptr %gep.6 819 %cmp.6 = icmp sgt i16 %l.6, 16383 820 %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383 821 store i16 %s.6, ptr %gep.6, align 2 822 823 %gep.7 = getelementptr inbounds i16, ptr %ptr, i16 7 824 %l.7 = load i16, ptr %gep.7 825 %cmp.7 = icmp sgt i16 %l.7, 16383 826 %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383 827 store i16 %s.7, ptr %gep.7, align 2 828 ret void 829} 830 831define void @select_smax_4xi32(ptr %ptr, i32 %x) { 832; CHECK-LABEL: @select_smax_4xi32( 833; CHECK-NEXT: entry: 834; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4 835; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[TMP1]], splat (i32 16383) 836; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> splat (i32 16383) 837; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[PTR]], align 4 838; CHECK-NEXT: ret void 839; 840entry: 841 %l.0 = load i32, ptr %ptr 842 %cmp.0 = icmp sgt i32 %l.0, 16383 843 %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383 844 store i32 %s.0, ptr %ptr, align 4 845 846 %gep.1 = getelementptr inbounds i32, ptr %ptr, i32 1 847 %l.1 = load i32, ptr %gep.1 848 %cmp.1 = icmp sgt i32 %l.1, 16383 849 %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383 850 store i32 %s.1, ptr %gep.1, align 4 851 852 %gep.2 = getelementptr inbounds i32, ptr %ptr, i32 2 853 %l.2 = load i32, ptr %gep.2 854 %cmp.2 = icmp sgt i32 %l.2, 16383 855 %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383 856 store i32 %s.2, ptr %gep.2, align 4 857 858 %gep.3 = getelementptr inbounds i32, ptr %ptr, i32 3 859 %l.3 = load i32, ptr %gep.3 860 %cmp.3 = icmp sgt i32 %l.3, 16383 861 %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383 862 store i32 %s.3, ptr %gep.3, align 4 863 864 ret void 865} 866 867; There is no <2 x i64> version of smax, but we can efficiently lower 868; compare/select pairs with uniform predicates. 869define void @select_smax_2xi64(ptr %ptr, i64 %x) { 870; CHECK-LABEL: @select_smax_2xi64( 871; CHECK-NEXT: entry: 872; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8 873; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i64> [[TMP1]], splat (i64 16383) 874; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> splat (i64 16383) 875; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr [[PTR]], align 4 876; CHECK-NEXT: ret void 877; 878entry: 879 %l.0 = load i64, ptr %ptr 880 %cmp.0 = icmp sgt i64 %l.0, 16383 881 %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383 882 store i64 %s.0, ptr %ptr, align 4 883 884 %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 1 885 %l.1 = load i64, ptr %gep.1 886 %cmp.1 = icmp sgt i64 %l.1, 16383 887 %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383 888 store i64 %s.1, ptr %gep.1, align 4 889 890 ret void 891} 892 893 894define void @select_smax_sge_8xi16(ptr %ptr, i16 %x) { 895; CHECK-LABEL: @select_smax_sge_8xi16( 896; CHECK-NEXT: entry: 897; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2 898; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <8 x i16> [[TMP1]], splat (i16 16383) 899; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> splat (i16 16383) 900; CHECK-NEXT: store <8 x i16> [[TMP3]], ptr [[PTR]], align 2 901; CHECK-NEXT: ret void 902; 903entry: 904 %l.0 = load i16, ptr %ptr 905 %cmp.0 = icmp sge i16 %l.0, 16383 906 %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383 907 store i16 %s.0, ptr %ptr, align 2 908 909 %gep.1 = getelementptr inbounds i16, ptr %ptr, i16 1 910 %l.1 = load i16, ptr %gep.1 911 %cmp.1 = icmp sge i16 %l.1, 16383 912 %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383 913 store i16 %s.1, ptr %gep.1, align 2 914 915 %gep.2 = getelementptr inbounds i16, ptr %ptr, i16 2 916 %l.2 = load i16, ptr %gep.2 917 %cmp.2 = icmp sge i16 %l.2, 16383 918 %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383 919 store i16 %s.2, ptr %gep.2, align 2 920 921 %gep.3 = getelementptr inbounds i16, ptr %ptr, i16 3 922 %l.3 = load i16, ptr %gep.3 923 %cmp.3 = icmp sge i16 %l.3, 16383 924 %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383 925 store i16 %s.3, ptr %gep.3, align 2 926 927 %gep.4 = getelementptr inbounds i16, ptr %ptr, i16 4 928 %l.4 = load i16, ptr %gep.4 929 %cmp.4 = icmp sge i16 %l.4, 16383 930 %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383 931 store i16 %s.4, ptr %gep.4, align 2 932 933 %gep.5 = getelementptr inbounds i16, ptr %ptr, i16 5 934 %l.5 = load i16, ptr %gep.5 935 %cmp.5 = icmp sge i16 %l.5, 16383 936 %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383 937 store i16 %s.5, ptr %gep.5, align 2 938 939 %gep.6 = getelementptr inbounds i16, ptr %ptr, i16 6 940 %l.6 = load i16, ptr %gep.6 941 %cmp.6 = icmp sge i16 %l.6, 16383 942 %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383 943 store i16 %s.6, ptr %gep.6, align 2 944 945 %gep.7 = getelementptr inbounds i16, ptr %ptr, i16 7 946 %l.7 = load i16, ptr %gep.7 947 %cmp.7 = icmp sge i16 %l.7, 16383 948 %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383 949 store i16 %s.7, ptr %gep.7, align 2 950 ret void 951} 952 953define void @select_smax_sge_4xi32(ptr %ptr, i32 %x) { 954; CHECK-LABEL: @select_smax_sge_4xi32( 955; CHECK-NEXT: entry: 956; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4 957; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <4 x i32> [[TMP1]], splat (i32 16383) 958; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> splat (i32 16383) 959; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[PTR]], align 4 960; CHECK-NEXT: ret void 961; 962entry: 963 %l.0 = load i32, ptr %ptr 964 %cmp.0 = icmp sge i32 %l.0, 16383 965 %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383 966 store i32 %s.0, ptr %ptr, align 4 967 968 %gep.1 = getelementptr inbounds i32, ptr %ptr, i32 1 969 %l.1 = load i32, ptr %gep.1 970 %cmp.1 = icmp sge i32 %l.1, 16383 971 %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383 972 store i32 %s.1, ptr %gep.1, align 4 973 974 %gep.2 = getelementptr inbounds i32, ptr %ptr, i32 2 975 %l.2 = load i32, ptr %gep.2 976 %cmp.2 = icmp sge i32 %l.2, 16383 977 %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383 978 store i32 %s.2, ptr %gep.2, align 4 979 980 %gep.3 = getelementptr inbounds i32, ptr %ptr, i32 3 981 %l.3 = load i32, ptr %gep.3 982 %cmp.3 = icmp sge i32 %l.3, 16383 983 %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383 984 store i32 %s.3, ptr %gep.3, align 4 985 986 ret void 987} 988 989; There is no <2 x i64> version of smax, but we can efficiently lower 990; compare/select pairs with uniform predicates. 991define void @select_smax_sge_2xi64(ptr %ptr, i64 %x) { 992; CHECK-LABEL: @select_smax_sge_2xi64( 993; CHECK-NEXT: entry: 994; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8 995; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <2 x i64> [[TMP1]], splat (i64 16383) 996; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> splat (i64 16383) 997; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr [[PTR]], align 4 998; CHECK-NEXT: ret void 999; 1000entry: 1001 %l.0 = load i64, ptr %ptr 1002 %cmp.0 = icmp sge i64 %l.0, 16383 1003 %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383 1004 store i64 %s.0, ptr %ptr, align 4 1005 1006 %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 1 1007 %l.1 = load i64, ptr %gep.1 1008 %cmp.1 = icmp sge i64 %l.1, 16383 1009 %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383 1010 store i64 %s.1, ptr %gep.1, align 4 1011 1012 ret void 1013} 1014