1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s 3 4; First, check the generic pattern for any 2 vector constants. Then, check special cases where 5; the constants are all off-by-one. Finally, check the extra special cases where the constants 6; include 0 or -1. 7; Each minimal select test is repeated with a more typical pattern that includes a compare to 8; generate the condition value. 9 10define <4 x i32> @sel_C1_or_C2_vec(<4 x i1> %cond) { 11; CHECK-LABEL: sel_C1_or_C2_vec: 12; CHECK: // %bb.0: 13; CHECK-NEXT: ushll v0.4s, v0.4h, #0 14; CHECK-NEXT: adrp x8, .LCPI0_0 15; CHECK-NEXT: adrp x9, .LCPI0_1 16; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0] 17; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI0_1] 18; CHECK-NEXT: shl v0.4s, v0.4s, #31 19; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 20; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b 21; CHECK-NEXT: ret 22 %add = select <4 x i1> %cond, <4 x i32> <i32 3000, i32 1, i32 -1, i32 0>, <4 x i32> <i32 42, i32 0, i32 -2, i32 -1> 23 ret <4 x i32> %add 24} 25 26define <4 x i32> @cmp_sel_C1_or_C2_vec(<4 x i32> %x, <4 x i32> %y) { 27; CHECK-LABEL: cmp_sel_C1_or_C2_vec: 28; CHECK: // %bb.0: 29; CHECK-NEXT: adrp x8, .LCPI1_0 30; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s 31; CHECK-NEXT: adrp x9, .LCPI1_1 32; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_0] 33; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI1_1] 34; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b 35; CHECK-NEXT: ret 36 %cond = icmp eq <4 x i32> %x, %y 37 %add = select <4 x i1> %cond, <4 x i32> <i32 3000, i32 1, i32 -1, i32 0>, <4 x i32> <i32 42, i32 0, i32 -2, i32 -1> 38 ret <4 x i32> %add 39} 40 41define <4 x i32> @sel_Cplus1_or_C_vec(<4 x i1> %cond) { 42; CHECK-LABEL: sel_Cplus1_or_C_vec: 43; CHECK: // %bb.0: 44; CHECK-NEXT: ushll v0.4s, v0.4h, #0 45; CHECK-NEXT: adrp x8, .LCPI2_0 46; CHECK-NEXT: adrp x9, .LCPI2_1 47; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0] 48; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI2_1] 49; CHECK-NEXT: shl v0.4s, v0.4s, #31 50; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 51; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b 52; CHECK-NEXT: ret 53 %add = select <4 x i1> %cond, <4 x i32> <i32 43, i32 1, i32 -1, i32 0>, <4 x i32> <i32 42, i32 0, i32 -2, i32 -1> 54 ret <4 x i32> %add 55} 56 57define <4 x i32> @cmp_sel_Cplus1_or_C_vec(<4 x i32> %x, <4 x i32> %y) { 58; CHECK-LABEL: cmp_sel_Cplus1_or_C_vec: 59; CHECK: // %bb.0: 60; CHECK-NEXT: adrp x8, .LCPI3_0 61; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s 62; CHECK-NEXT: adrp x9, .LCPI3_1 63; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0] 64; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI3_1] 65; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b 66; CHECK-NEXT: ret 67 %cond = icmp eq <4 x i32> %x, %y 68 %add = select <4 x i1> %cond, <4 x i32> <i32 43, i32 1, i32 -1, i32 0>, <4 x i32> <i32 42, i32 0, i32 -2, i32 -1> 69 ret <4 x i32> %add 70} 71 72define <4 x i32> @sel_Cminus1_or_C_vec(<4 x i1> %cond) { 73; CHECK-LABEL: sel_Cminus1_or_C_vec: 74; CHECK: // %bb.0: 75; CHECK-NEXT: ushll v0.4s, v0.4h, #0 76; CHECK-NEXT: adrp x8, .LCPI4_0 77; CHECK-NEXT: adrp x9, .LCPI4_1 78; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI4_0] 79; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI4_1] 80; CHECK-NEXT: shl v0.4s, v0.4s, #31 81; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 82; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b 83; CHECK-NEXT: ret 84 %add = select <4 x i1> %cond, <4 x i32> <i32 43, i32 1, i32 -1, i32 0>, <4 x i32> <i32 44, i32 2, i32 0, i32 1> 85 ret <4 x i32> %add 86} 87 88define <4 x i32> @cmp_sel_Cminus1_or_C_vec(<4 x i32> %x, <4 x i32> %y) { 89; CHECK-LABEL: cmp_sel_Cminus1_or_C_vec: 90; CHECK: // %bb.0: 91; CHECK-NEXT: adrp x8, .LCPI5_0 92; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s 93; CHECK-NEXT: adrp x9, .LCPI5_1 94; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI5_0] 95; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI5_1] 96; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b 97; CHECK-NEXT: ret 98 %cond = icmp eq <4 x i32> %x, %y 99 %add = select <4 x i1> %cond, <4 x i32> <i32 43, i32 1, i32 -1, i32 0>, <4 x i32> <i32 44, i32 2, i32 0, i32 1> 100 ret <4 x i32> %add 101} 102 103define <4 x i32> @sel_minus1_or_0_vec(<4 x i1> %cond) { 104; CHECK-LABEL: sel_minus1_or_0_vec: 105; CHECK: // %bb.0: 106; CHECK-NEXT: ushll v0.4s, v0.4h, #0 107; CHECK-NEXT: shl v0.4s, v0.4s, #31 108; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 109; CHECK-NEXT: ret 110 %add = select <4 x i1> %cond, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 111 ret <4 x i32> %add 112} 113 114define <4 x i32> @cmp_sel_minus1_or_0_vec(<4 x i32> %x, <4 x i32> %y) { 115; CHECK-LABEL: cmp_sel_minus1_or_0_vec: 116; CHECK: // %bb.0: 117; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s 118; CHECK-NEXT: ret 119 %cond = icmp eq <4 x i32> %x, %y 120 %add = select <4 x i1> %cond, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 121 ret <4 x i32> %add 122} 123 124define <4 x i32> @sel_0_or_minus1_vec(<4 x i1> %cond) { 125; CHECK-LABEL: sel_0_or_minus1_vec: 126; CHECK: // %bb.0: 127; CHECK-NEXT: ushll v0.4s, v0.4h, #0 128; CHECK-NEXT: shl v0.4s, v0.4s, #31 129; CHECK-NEXT: cmge v0.4s, v0.4s, #0 130; CHECK-NEXT: ret 131 %add = select <4 x i1> %cond, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1> 132 ret <4 x i32> %add 133} 134 135define <4 x i32> @cmp_sel_0_or_minus1_vec(<4 x i32> %x, <4 x i32> %y) { 136; CHECK-LABEL: cmp_sel_0_or_minus1_vec: 137; CHECK: // %bb.0: 138; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s 139; CHECK-NEXT: mvn v0.16b, v0.16b 140; CHECK-NEXT: ret 141 %cond = icmp eq <4 x i32> %x, %y 142 %add = select <4 x i1> %cond, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1> 143 ret <4 x i32> %add 144} 145 146define <4 x i32> @sel_1_or_0_vec(<4 x i1> %cond) { 147; CHECK-LABEL: sel_1_or_0_vec: 148; CHECK: // %bb.0: 149; CHECK-NEXT: ushll v0.4s, v0.4h, #0 150; CHECK-NEXT: movi v1.4s, #1 151; CHECK-NEXT: shl v0.4s, v0.4s, #31 152; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 153; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 154; CHECK-NEXT: ret 155 %add = select <4 x i1> %cond, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 156 ret <4 x i32> %add 157} 158 159define <4 x i32> @cmp_sel_1_or_0_vec(<4 x i32> %x, <4 x i32> %y) { 160; CHECK-LABEL: cmp_sel_1_or_0_vec: 161; CHECK: // %bb.0: 162; CHECK-NEXT: movi v2.4s, #1 163; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s 164; CHECK-NEXT: and v0.16b, v0.16b, v2.16b 165; CHECK-NEXT: ret 166 %cond = icmp eq <4 x i32> %x, %y 167 %add = select <4 x i1> %cond, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 168 ret <4 x i32> %add 169} 170 171define <4 x i32> @sel_0_or_1_vec(<4 x i1> %cond) { 172; CHECK-LABEL: sel_0_or_1_vec: 173; CHECK: // %bb.0: 174; CHECK-NEXT: ushll v0.4s, v0.4h, #0 175; CHECK-NEXT: movi v1.4s, #1 176; CHECK-NEXT: shl v0.4s, v0.4s, #31 177; CHECK-NEXT: cmge v0.4s, v0.4s, #0 178; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 179; CHECK-NEXT: ret 180 %add = select <4 x i1> %cond, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 181 ret <4 x i32> %add 182} 183 184define <4 x i32> @cmp_sel_0_or_1_vec(<4 x i32> %x, <4 x i32> %y) { 185; CHECK-LABEL: cmp_sel_0_or_1_vec: 186; CHECK: // %bb.0: 187; CHECK-NEXT: movi v2.4s, #1 188; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s 189; CHECK-NEXT: bic v0.16b, v2.16b, v0.16b 190; CHECK-NEXT: ret 191 %cond = icmp eq <4 x i32> %x, %y 192 %add = select <4 x i1> %cond, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 193 ret <4 x i32> %add 194} 195 196define <16 x i8> @signbit_mask_v16i8(<16 x i8> %a, <16 x i8> %b) { 197; CHECK-LABEL: signbit_mask_v16i8: 198; CHECK: // %bb.0: 199; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 200; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 201; CHECK-NEXT: ret 202 %cond = icmp slt <16 x i8> %a, zeroinitializer 203 %r = select <16 x i1> %cond, <16 x i8> %b, <16 x i8> zeroinitializer 204 ret <16 x i8> %r 205} 206 207; Swap cmp pred and select ops. This is logically equivalent to the above test. 208 209define <16 x i8> @signbit_mask_swap_v16i8(<16 x i8> %a, <16 x i8> %b) { 210; CHECK-LABEL: signbit_mask_swap_v16i8: 211; CHECK: // %bb.0: 212; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 213; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 214; CHECK-NEXT: ret 215 %cond = icmp sgt <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 216 %r = select <16 x i1> %cond, <16 x i8> zeroinitializer, <16 x i8> %b 217 ret <16 x i8> %r 218} 219 220define <8 x i16> @signbit_mask_v8i16(<8 x i16> %a, <8 x i16> %b) { 221; CHECK-LABEL: signbit_mask_v8i16: 222; CHECK: // %bb.0: 223; CHECK-NEXT: cmlt v0.8h, v0.8h, #0 224; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 225; CHECK-NEXT: ret 226 %cond = icmp slt <8 x i16> %a, zeroinitializer 227 %r = select <8 x i1> %cond, <8 x i16> %b, <8 x i16> zeroinitializer 228 ret <8 x i16> %r 229} 230 231define <4 x i32> @signbit_mask_v4i32(<4 x i32> %a, <4 x i32> %b) { 232; CHECK-LABEL: signbit_mask_v4i32: 233; CHECK: // %bb.0: 234; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 235; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 236; CHECK-NEXT: ret 237 %cond = icmp slt <4 x i32> %a, zeroinitializer 238 %r = select <4 x i1> %cond, <4 x i32> %b, <4 x i32> zeroinitializer 239 ret <4 x i32> %r 240} 241 242define <2 x i64> @signbit_mask_v2i64(<2 x i64> %a, <2 x i64> %b) { 243; CHECK-LABEL: signbit_mask_v2i64: 244; CHECK: // %bb.0: 245; CHECK-NEXT: cmlt v0.2d, v0.2d, #0 246; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 247; CHECK-NEXT: ret 248 %cond = icmp slt <2 x i64> %a, zeroinitializer 249 %r = select <2 x i1> %cond, <2 x i64> %b, <2 x i64> zeroinitializer 250 ret <2 x i64> %r 251} 252 253define <16 x i8> @signbit_setmask_v16i8(<16 x i8> %a, <16 x i8> %b) { 254; CHECK-LABEL: signbit_setmask_v16i8: 255; CHECK: // %bb.0: 256; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 257; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b 258; CHECK-NEXT: ret 259 %cond = icmp slt <16 x i8> %a, zeroinitializer 260 %r = select <16 x i1> %cond, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %b 261 ret <16 x i8> %r 262} 263 264define <8 x i16> @signbit_setmask_v8i16(<8 x i16> %a, <8 x i16> %b) { 265; CHECK-LABEL: signbit_setmask_v8i16: 266; CHECK: // %bb.0: 267; CHECK-NEXT: cmlt v0.8h, v0.8h, #0 268; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b 269; CHECK-NEXT: ret 270 %cond = icmp slt <8 x i16> %a, zeroinitializer 271 %r = select <8 x i1> %cond, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %b 272 ret <8 x i16> %r 273} 274 275; Swap cmp pred and select ops. This is logically equivalent to the above test. 276 277define <8 x i16> @signbit_setmask_swap_v8i16(<8 x i16> %a, <8 x i16> %b) { 278; CHECK-LABEL: signbit_setmask_swap_v8i16: 279; CHECK: // %bb.0: 280; CHECK-NEXT: cmlt v0.8h, v0.8h, #0 281; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b 282; CHECK-NEXT: ret 283 %cond = icmp sgt <8 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 284 %r = select <8 x i1> %cond, <8 x i16> %b, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 285 ret <8 x i16> %r 286} 287 288define <4 x i32> @signbit_setmask_v4i32(<4 x i32> %a, <4 x i32> %b) { 289; CHECK-LABEL: signbit_setmask_v4i32: 290; CHECK: // %bb.0: 291; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 292; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b 293; CHECK-NEXT: ret 294 %cond = icmp slt <4 x i32> %a, zeroinitializer 295 %r = select <4 x i1> %cond, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %b 296 ret <4 x i32> %r 297} 298 299define <2 x i64> @signbit_setmask_v2i64(<2 x i64> %a, <2 x i64> %b) { 300; CHECK-LABEL: signbit_setmask_v2i64: 301; CHECK: // %bb.0: 302; CHECK-NEXT: cmlt v0.2d, v0.2d, #0 303; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b 304; CHECK-NEXT: ret 305 %cond = icmp slt <2 x i64> %a, zeroinitializer 306 %r = select <2 x i1> %cond, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %b 307 ret <2 x i64> %r 308} 309 310define <16 x i8> @not_signbit_mask_v16i8(<16 x i8> %a, <16 x i8> %b) { 311; CHECK-LABEL: not_signbit_mask_v16i8: 312; CHECK: // %bb.0: 313; CHECK-NEXT: cmge v0.16b, v0.16b, #0 314; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 315; CHECK-NEXT: ret 316 %cond = icmp sgt <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 317 %r = select <16 x i1> %cond, <16 x i8> %b, <16 x i8> zeroinitializer 318 ret <16 x i8> %r 319} 320 321define <8 x i16> @not_signbit_mask_v8i16(<8 x i16> %a, <8 x i16> %b) { 322; CHECK-LABEL: not_signbit_mask_v8i16: 323; CHECK: // %bb.0: 324; CHECK-NEXT: cmge v0.8h, v0.8h, #0 325; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 326; CHECK-NEXT: ret 327 %cond = icmp sgt <8 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 328 %r = select <8 x i1> %cond, <8 x i16> %b, <8 x i16> zeroinitializer 329 ret <8 x i16> %r 330} 331 332define <4 x i32> @not_signbit_mask_v4i32(<4 x i32> %a, <4 x i32> %b) { 333; CHECK-LABEL: not_signbit_mask_v4i32: 334; CHECK: // %bb.0: 335; CHECK-NEXT: cmge v0.4s, v0.4s, #0 336; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 337; CHECK-NEXT: ret 338 %cond = icmp sgt <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1> 339 %r = select <4 x i1> %cond, <4 x i32> %b, <4 x i32> zeroinitializer 340 ret <4 x i32> %r 341} 342 343; Swap cmp pred and select ops. This is logically equivalent to the above test. 344 345define <4 x i32> @not_signbit_mask_swap_v4i32(<4 x i32> %a, <4 x i32> %b) { 346; CHECK-LABEL: not_signbit_mask_swap_v4i32: 347; CHECK: // %bb.0: 348; CHECK-NEXT: cmge v0.4s, v0.4s, #0 349; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 350; CHECK-NEXT: ret 351 %cond = icmp slt <4 x i32> %a, zeroinitializer 352 %r = select <4 x i1> %cond, <4 x i32> zeroinitializer, <4 x i32> %b 353 ret <4 x i32> %r 354} 355 356define <2 x i64> @not_signbit_mask_v2i64(<2 x i64> %a, <2 x i64> %b) { 357; CHECK-LABEL: not_signbit_mask_v2i64: 358; CHECK: // %bb.0: 359; CHECK-NEXT: cmge v0.2d, v0.2d, #0 360; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 361; CHECK-NEXT: ret 362 %cond = icmp sgt <2 x i64> %a, <i64 -1, i64 -1> 363 %r = select <2 x i1> %cond, <2 x i64> %b, <2 x i64> zeroinitializer 364 ret <2 x i64> %r 365} 366 367; SVE 368 369define <vscale x 16 x i8> @signbit_mask_xor_nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 { 370; CHECK-LABEL: signbit_mask_xor_nxv16i8: 371; CHECK: // %bb.0: 372; CHECK-NEXT: ptrue p0.b 373; CHECK-NEXT: cmplt p0.b, p0/z, z0.b, #0 374; CHECK-NEXT: eor z0.d, z0.d, z1.d 375; CHECK-NEXT: mov z0.b, p0/m, #0 // =0x0 376; CHECK-NEXT: ret 377 %cond = icmp slt <vscale x 16 x i8> %a, zeroinitializer 378 %xor = xor <vscale x 16 x i8> %a, %b 379 %r = select <vscale x 16 x i1> %cond, <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> %xor 380 ret <vscale x 16 x i8> %r 381} 382 383attributes #0 = { "target-features"="+sve" } 384