1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD 3; RUN: llc -mtriple=aarch64-unknown-linux-gnu -global-isel < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI 4 5; BIT Bitwise Insert if True 6; 7; 8-bit vectors tests 8 9define <1 x i8> @test_bit_v1i8(<1 x i8> %A, <1 x i8> %B, <1 x i8> %C) { 10; CHECK-SD-LABEL: test_bit_v1i8: 11; CHECK-SD: // %bb.0: 12; CHECK-SD-NEXT: bit v0.8b, v1.8b, v2.8b 13; CHECK-SD-NEXT: ret 14; 15; CHECK-GI-LABEL: test_bit_v1i8: 16; CHECK-GI: // %bb.0: 17; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 18; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 19; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2 20; CHECK-GI-NEXT: umov w8, v2.b[0] 21; CHECK-GI-NEXT: umov w9, v1.b[0] 22; CHECK-GI-NEXT: umov w10, v0.b[0] 23; CHECK-GI-NEXT: and w9, w8, w9 24; CHECK-GI-NEXT: bic w8, w10, w8 25; CHECK-GI-NEXT: orr w8, w9, w8 26; CHECK-GI-NEXT: fmov s0, w8 27; CHECK-GI-NEXT: ret 28 %and = and <1 x i8> %C, %B 29 %neg = xor <1 x i8> %C, <i8 -1> 30 %and1 = and <1 x i8> %neg, %A 31 %or = or <1 x i8> %and, %and1 32 ret <1 x i8> %or 33} 34 35; 16-bit vectors tests 36 37define <1 x i16> @test_bit_v1i16(<1 x i16> %A, <1 x i16> %B, <1 x i16> %C) { 38; CHECK-SD-LABEL: test_bit_v1i16: 39; CHECK-SD: // %bb.0: 40; CHECK-SD-NEXT: bit v0.8b, v1.8b, v2.8b 41; CHECK-SD-NEXT: ret 42; 43; CHECK-GI-LABEL: test_bit_v1i16: 44; CHECK-GI: // %bb.0: 45; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 46; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 47; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2 48; CHECK-GI-NEXT: umov w8, v2.h[0] 49; CHECK-GI-NEXT: umov w9, v1.h[0] 50; CHECK-GI-NEXT: umov w10, v0.h[0] 51; CHECK-GI-NEXT: and w9, w8, w9 52; CHECK-GI-NEXT: bic w8, w10, w8 53; CHECK-GI-NEXT: orr w8, w9, w8 54; CHECK-GI-NEXT: fmov s0, w8 55; CHECK-GI-NEXT: ret 56 %and = and <1 x i16> %C, %B 57 %neg = xor <1 x i16> %C, <i16 -1> 58 %and1 = and <1 x i16> %neg, %A 59 %or = or <1 x i16> %and, %and1 60 ret <1 x i16> %or 61} 62 63; 32-bit vectors tests 64 65define <1 x i32> @test_bit_v1i32(<1 x i32> %A, <1 x i32> %B, <1 x i32> %C) { 66; CHECK-SD-LABEL: test_bit_v1i32: 67; CHECK-SD: // %bb.0: 68; CHECK-SD-NEXT: bit v0.8b, v1.8b, v2.8b 69; CHECK-SD-NEXT: ret 70; 71; CHECK-GI-LABEL: test_bit_v1i32: 72; CHECK-GI: // %bb.0: 73; CHECK-GI-NEXT: fmov w8, s2 74; CHECK-GI-NEXT: fmov w9, s1 75; CHECK-GI-NEXT: fmov w10, s0 76; CHECK-GI-NEXT: and w9, w8, w9 77; CHECK-GI-NEXT: bic w8, w10, w8 78; CHECK-GI-NEXT: orr w8, w9, w8 79; CHECK-GI-NEXT: mov v0.s[0], w8 80; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 81; CHECK-GI-NEXT: ret 82 %and = and <1 x i32> %C, %B 83 %neg = xor <1 x i32> %C, <i32 -1> 84 %and1 = and <1 x i32> %neg, %A 85 %or = or <1 x i32> %and, %and1 86 ret <1 x i32> %or 87} 88 89; 64-bit vectors tests 90 91define <1 x i64> @test_bit_v1i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C) { 92; CHECK-SD-LABEL: test_bit_v1i64: 93; CHECK-SD: // %bb.0: 94; CHECK-SD-NEXT: bit v0.8b, v1.8b, v2.8b 95; CHECK-SD-NEXT: ret 96; 97; CHECK-GI-LABEL: test_bit_v1i64: 98; CHECK-GI: // %bb.0: 99; CHECK-GI-NEXT: fmov x8, d2 100; CHECK-GI-NEXT: fmov x9, d1 101; CHECK-GI-NEXT: fmov x10, d0 102; CHECK-GI-NEXT: and x9, x8, x9 103; CHECK-GI-NEXT: bic x8, x10, x8 104; CHECK-GI-NEXT: orr x8, x9, x8 105; CHECK-GI-NEXT: fmov d0, x8 106; CHECK-GI-NEXT: ret 107 %and = and <1 x i64> %C, %B 108 %neg = xor <1 x i64> %C, <i64 -1> 109 %and1 = and <1 x i64> %neg, %A 110 %or = or <1 x i64> %and, %and1 111 ret <1 x i64> %or 112} 113 114define <2 x i32> @test_bit_v2i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C) { 115; CHECK-LABEL: test_bit_v2i32: 116; CHECK: // %bb.0: 117; CHECK-NEXT: bit v0.8b, v1.8b, v2.8b 118; CHECK-NEXT: ret 119 %and = and <2 x i32> %C, %B 120 %neg = xor <2 x i32> %C, <i32 -1, i32 -1> 121 %and1 = and <2 x i32> %neg, %A 122 %or = or <2 x i32> %and, %and1 123 ret <2 x i32> %or 124} 125 126define <4 x i16> @test_bit_v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C) { 127; CHECK-LABEL: test_bit_v4i16: 128; CHECK: // %bb.0: 129; CHECK-NEXT: bit v0.8b, v1.8b, v2.8b 130; CHECK-NEXT: ret 131 %and = and <4 x i16> %C, %B 132 %neg = xor <4 x i16> %C, <i16 -1, i16 -1, i16 -1, i16 -1> 133 %and1 = and <4 x i16> %neg, %A 134 %or = or <4 x i16> %and, %and1 135 ret <4 x i16> %or 136} 137 138define <8 x i8> @test_bit_v8i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C) { 139; CHECK-LABEL: test_bit_v8i8: 140; CHECK: // %bb.0: 141; CHECK-NEXT: bit v0.8b, v1.8b, v2.8b 142; CHECK-NEXT: ret 143 %and = and <8 x i8> %C, %B 144 %neg = xor <8 x i8> %C, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 145 %and1 = and <8 x i8> %neg, %A 146 %or = or <8 x i8> %and, %and1 147 ret <8 x i8> %or 148} 149 150; 128-bit vectors tests 151 152define <2 x i64> @test_bit_v2i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C) { 153; CHECK-LABEL: test_bit_v2i64: 154; CHECK: // %bb.0: 155; CHECK-NEXT: bit v0.16b, v1.16b, v2.16b 156; CHECK-NEXT: ret 157 %and = and <2 x i64> %C, %B 158 %neg = xor <2 x i64> %C, <i64 -1, i64 -1> 159 %and1 = and <2 x i64> %neg, %A 160 %or = or <2 x i64> %and, %and1 161 ret <2 x i64> %or 162} 163 164define <4 x i32> @test_bit_v4i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) { 165; CHECK-LABEL: test_bit_v4i32: 166; CHECK: // %bb.0: 167; CHECK-NEXT: bit v0.16b, v1.16b, v2.16b 168; CHECK-NEXT: ret 169 %and = and <4 x i32> %C, %B 170 %neg = xor <4 x i32> %C, <i32 -1, i32 -1, i32 -1, i32 -1> 171 %and1 = and <4 x i32> %neg, %A 172 %or = or <4 x i32> %and, %and1 173 ret <4 x i32> %or 174} 175 176define <8 x i16> @test_bit_v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C) { 177; CHECK-LABEL: test_bit_v8i16: 178; CHECK: // %bb.0: 179; CHECK-NEXT: bit v0.16b, v1.16b, v2.16b 180; CHECK-NEXT: ret 181 %and = and <8 x i16> %C, %B 182 %neg = xor <8 x i16> %C, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 183 %and1 = and <8 x i16> %neg, %A 184 %or = or <8 x i16> %and, %and1 185 ret <8 x i16> %or 186} 187 188define <16 x i8> @test_bit_v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) { 189; CHECK-LABEL: test_bit_v16i8: 190; CHECK: // %bb.0: 191; CHECK-NEXT: bit v0.16b, v1.16b, v2.16b 192; CHECK-NEXT: ret 193 %and = and <16 x i8> %C, %B 194 %neg = xor <16 x i8> %C, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 195 %and1 = and <16 x i8> %neg, %A 196 %or = or <16 x i8> %and, %and1 197 ret <16 x i8> %or 198} 199 200define <4 x i32> @test_bit_sink_operand(<4 x i32> %src, <4 x i32> %dst, <4 x i32> %mask, i32 %scratch) { 201; CHECK-SD-LABEL: test_bit_sink_operand: 202; CHECK-SD: // %bb.0: // %entry 203; CHECK-SD-NEXT: sub sp, sp, #32 204; CHECK-SD-NEXT: .cfi_def_cfa_offset 32 205; CHECK-SD-NEXT: add w8, w0, w0, lsr #31 206; CHECK-SD-NEXT: mov w9, wzr 207; CHECK-SD-NEXT: asr w8, w8, #1 208; CHECK-SD-NEXT: .LBB11_1: // %do.body 209; CHECK-SD-NEXT: // =>This Inner Loop Header: Depth=1 210; CHECK-SD-NEXT: bit v1.16b, v0.16b, v2.16b 211; CHECK-SD-NEXT: add x10, sp, #16 212; CHECK-SD-NEXT: mov x11, sp 213; CHECK-SD-NEXT: bfi x10, x9, #2, #2 214; CHECK-SD-NEXT: bfi x11, x9, #2, #2 215; CHECK-SD-NEXT: add w9, w9, #1 216; CHECK-SD-NEXT: cmp w9, #5 217; CHECK-SD-NEXT: str q1, [sp, #16] 218; CHECK-SD-NEXT: str w0, [x10] 219; CHECK-SD-NEXT: ldr q1, [sp, #16] 220; CHECK-SD-NEXT: str q0, [sp] 221; CHECK-SD-NEXT: str w8, [x11] 222; CHECK-SD-NEXT: ldr q0, [sp] 223; CHECK-SD-NEXT: b.ne .LBB11_1 224; CHECK-SD-NEXT: // %bb.2: // %do.end 225; CHECK-SD-NEXT: mov v0.16b, v1.16b 226; CHECK-SD-NEXT: add sp, sp, #32 227; CHECK-SD-NEXT: ret 228; 229; CHECK-GI-LABEL: test_bit_sink_operand: 230; CHECK-GI: // %bb.0: // %entry 231; CHECK-GI-NEXT: sub sp, sp, #32 232; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 233; CHECK-GI-NEXT: asr w9, w0, #31 234; CHECK-GI-NEXT: mov w8, wzr 235; CHECK-GI-NEXT: add x10, sp, #16 236; CHECK-GI-NEXT: mov x11, sp 237; CHECK-GI-NEXT: add w9, w0, w9, lsr #31 238; CHECK-GI-NEXT: asr w9, w9, #1 239; CHECK-GI-NEXT: .LBB11_1: // %do.body 240; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1 241; CHECK-GI-NEXT: bit v1.16b, v0.16b, v2.16b 242; CHECK-GI-NEXT: mov w12, w8 243; CHECK-GI-NEXT: add w8, w8, #1 244; CHECK-GI-NEXT: and x12, x12, #0x3 245; CHECK-GI-NEXT: cmp w8, #5 246; CHECK-GI-NEXT: str q1, [sp, #16] 247; CHECK-GI-NEXT: str w0, [x10, x12, lsl #2] 248; CHECK-GI-NEXT: ldr q1, [sp, #16] 249; CHECK-GI-NEXT: str q0, [sp] 250; CHECK-GI-NEXT: str w9, [x11, x12, lsl #2] 251; CHECK-GI-NEXT: ldr q0, [sp] 252; CHECK-GI-NEXT: b.ne .LBB11_1 253; CHECK-GI-NEXT: // %bb.2: // %do.end 254; CHECK-GI-NEXT: mov v0.16b, v1.16b 255; CHECK-GI-NEXT: add sp, sp, #32 256; CHECK-GI-NEXT: ret 257 258entry: 259 %0 = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 260 %div = sdiv i32 %scratch, 2 261 br label %do.body 262 263do.body: 264 %dst.addr.0 = phi <4 x i32> [ %dst, %entry ], [ %vecins, %do.body ] 265 %src.addr.0 = phi <4 x i32> [ %src, %entry ], [ %vecins1, %do.body ] 266 %i.0 = phi i32 [ 0, %entry ], [ %inc, %do.body ] 267 %vbsl3.i = and <4 x i32> %src.addr.0, %mask 268 %vbsl4.i = and <4 x i32> %dst.addr.0, %0 269 %vbsl5.i = or <4 x i32> %vbsl3.i, %vbsl4.i 270 %vecins = insertelement <4 x i32> %vbsl5.i, i32 %scratch, i32 %i.0 271 %vecins1 = insertelement <4 x i32> %src.addr.0, i32 %div, i32 %i.0 272 %inc = add nuw nsw i32 %i.0, 1 273 %exitcond.not = icmp eq i32 %inc, 5 274 br i1 %exitcond.not, label %do.end, label %do.body 275 276do.end: 277 ret <4 x i32> %vecins 278} 279