1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ 3; RUN: | FileCheck %s 4; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ 5; RUN: | FileCheck %s 6 7 8define <1 x i32> @select_addsub_v1i32(<1 x i1> %cc, <1 x i32> %a, <1 x i32> %b) { 9; CHECK-LABEL: select_addsub_v1i32: 10; CHECK: # %bb.0: 11; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu 12; CHECK-NEXT: vrsub.vi v9, v9, 0, v0.t 13; CHECK-NEXT: vadd.vv v8, v8, v9 14; CHECK-NEXT: ret 15 %sub = sub <1 x i32> %a, %b 16 %add = add <1 x i32> %a, %b 17 %res = select <1 x i1> %cc, <1 x i32> %sub, <1 x i32> %add 18 ret <1 x i32> %res 19} 20 21define <2 x i32> @select_addsub_v2i32(<2 x i1> %cc, <2 x i32> %a, <2 x i32> %b) { 22; CHECK-LABEL: select_addsub_v2i32: 23; CHECK: # %bb.0: 24; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu 25; CHECK-NEXT: vrsub.vi v9, v9, 0, v0.t 26; CHECK-NEXT: vadd.vv v8, v8, v9 27; CHECK-NEXT: ret 28 %sub = sub <2 x i32> %a, %b 29 %add = add <2 x i32> %a, %b 30 %res = select <2 x i1> %cc, <2 x i32> %sub, <2 x i32> %add 31 ret <2 x i32> %res 32} 33 34define <4 x i32> @select_addsub_v4i32(<4 x i1> %cc, <4 x i32> %a, <4 x i32> %b) { 35; CHECK-LABEL: select_addsub_v4i32: 36; CHECK: # %bb.0: 37; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu 38; CHECK-NEXT: vrsub.vi v9, v9, 0, v0.t 39; CHECK-NEXT: vadd.vv v8, v8, v9 40; CHECK-NEXT: ret 41 %sub = sub <4 x i32> %a, %b 42 %add = add <4 x i32> %a, %b 43 %res = select <4 x i1> %cc, <4 x i32> %sub, <4 x i32> %add 44 ret <4 x i32> %res 45} 46 47define <4 x i32> @select_addsub_v4i32_select_swapped(<4 x i1> %cc, <4 x i32> %a, <4 x i32> %b) { 48; CHECK-LABEL: select_addsub_v4i32_select_swapped: 49; CHECK: # %bb.0: 50; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu 51; CHECK-NEXT: vmnot.m v0, v0 52; CHECK-NEXT: vrsub.vi v9, v9, 0, v0.t 53; CHECK-NEXT: vadd.vv v8, v8, v9 54; CHECK-NEXT: ret 55 %sub = sub <4 x i32> %a, %b 56 %add = add <4 x i32> %a, %b 57 %res = select <4 x i1> %cc, <4 x i32> %add, <4 x i32> %sub 58 ret <4 x i32> %res 59} 60 61define <4 x i32> @select_addsub_v4i32_add_swapped(<4 x i1> %cc, <4 x i32> %a, <4 x i32> %b) { 62; CHECK-LABEL: select_addsub_v4i32_add_swapped: 63; CHECK: # %bb.0: 64; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu 65; CHECK-NEXT: vrsub.vi v9, v9, 0, v0.t 66; CHECK-NEXT: vadd.vv v8, v8, v9 67; CHECK-NEXT: ret 68 %sub = sub <4 x i32> %a, %b 69 %add = add <4 x i32> %b, %a 70 %res = select <4 x i1> %cc, <4 x i32> %sub, <4 x i32> %add 71 ret <4 x i32> %res 72} 73 74define <4 x i32> @select_addsub_v4i32_both_swapped(<4 x i1> %cc, <4 x i32> %a, <4 x i32> %b) { 75; CHECK-LABEL: select_addsub_v4i32_both_swapped: 76; CHECK: # %bb.0: 77; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu 78; CHECK-NEXT: vmnot.m v0, v0 79; CHECK-NEXT: vrsub.vi v9, v9, 0, v0.t 80; CHECK-NEXT: vadd.vv v8, v8, v9 81; CHECK-NEXT: ret 82 %sub = sub <4 x i32> %a, %b 83 %add = add <4 x i32> %b, %a 84 %res = select <4 x i1> %cc, <4 x i32> %add, <4 x i32> %sub 85 ret <4 x i32> %res 86} 87 88define <4 x i32> @select_addsub_v4i32_sub_swapped(<4 x i1> %cc, <4 x i32> %a, <4 x i32> %b) { 89; CHECK-LABEL: select_addsub_v4i32_sub_swapped: 90; CHECK: # %bb.0: 91; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu 92; CHECK-NEXT: vrsub.vi v8, v8, 0, v0.t 93; CHECK-NEXT: vadd.vv v8, v9, v8 94; CHECK-NEXT: ret 95 %sub = sub <4 x i32> %b, %a 96 %add = add <4 x i32> %a, %b 97 %res = select <4 x i1> %cc, <4 x i32> %sub, <4 x i32> %add 98 ret <4 x i32> %res 99} 100 101define <8 x i32> @select_addsub_v8i32(<8 x i1> %cc, <8 x i32> %a, <8 x i32> %b) { 102; CHECK-LABEL: select_addsub_v8i32: 103; CHECK: # %bb.0: 104; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu 105; CHECK-NEXT: vrsub.vi v10, v10, 0, v0.t 106; CHECK-NEXT: vadd.vv v8, v8, v10 107; CHECK-NEXT: ret 108 %sub = sub <8 x i32> %a, %b 109 %add = add <8 x i32> %a, %b 110 %res = select <8 x i1> %cc, <8 x i32> %sub, <8 x i32> %add 111 ret <8 x i32> %res 112} 113 114define <16 x i32> @select_addsub_v16i32(<16 x i1> %cc, <16 x i32> %a, <16 x i32> %b) { 115; CHECK-LABEL: select_addsub_v16i32: 116; CHECK: # %bb.0: 117; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu 118; CHECK-NEXT: vrsub.vi v12, v12, 0, v0.t 119; CHECK-NEXT: vadd.vv v8, v8, v12 120; CHECK-NEXT: ret 121 %sub = sub <16 x i32> %a, %b 122 %add = add <16 x i32> %a, %b 123 %res = select <16 x i1> %cc, <16 x i32> %sub, <16 x i32> %add 124 ret <16 x i32> %res 125} 126 127define <32 x i32> @select_addsub_v32i32(<32 x i1> %cc, <32 x i32> %a, <32 x i32> %b) { 128; CHECK-LABEL: select_addsub_v32i32: 129; CHECK: # %bb.0: 130; CHECK-NEXT: li a0, 32 131; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu 132; CHECK-NEXT: vrsub.vi v16, v16, 0, v0.t 133; CHECK-NEXT: vadd.vv v8, v8, v16 134; CHECK-NEXT: ret 135 %sub = sub <32 x i32> %a, %b 136 %add = add <32 x i32> %a, %b 137 %res = select <32 x i1> %cc, <32 x i32> %sub, <32 x i32> %add 138 ret <32 x i32> %res 139} 140 141define <64 x i32> @select_addsub_v64i32(<64 x i1> %cc, <64 x i32> %a, <64 x i32> %b) { 142; CHECK-LABEL: select_addsub_v64i32: 143; CHECK: # %bb.0: 144; CHECK-NEXT: addi sp, sp, -16 145; CHECK-NEXT: .cfi_def_cfa_offset 16 146; CHECK-NEXT: csrr a1, vlenb 147; CHECK-NEXT: slli a1, a1, 3 148; CHECK-NEXT: sub sp, sp, a1 149; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 150; CHECK-NEXT: addi a1, sp, 16 151; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 152; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma 153; CHECK-NEXT: vmv8r.v v16, v8 154; CHECK-NEXT: li a1, 32 155; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu 156; CHECK-NEXT: vle32.v v8, (a0) 157; CHECK-NEXT: addi a0, a0, 128 158; CHECK-NEXT: vle32.v v24, (a0) 159; CHECK-NEXT: vrsub.vi v8, v8, 0, v0.t 160; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 161; CHECK-NEXT: vslidedown.vi v0, v0, 4 162; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu 163; CHECK-NEXT: vadd.vv v8, v16, v8 164; CHECK-NEXT: vrsub.vi v24, v24, 0, v0.t 165; CHECK-NEXT: addi a0, sp, 16 166; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 167; CHECK-NEXT: vadd.vv v16, v16, v24 168; CHECK-NEXT: csrr a0, vlenb 169; CHECK-NEXT: slli a0, a0, 3 170; CHECK-NEXT: add sp, sp, a0 171; CHECK-NEXT: .cfi_def_cfa sp, 16 172; CHECK-NEXT: addi sp, sp, 16 173; CHECK-NEXT: .cfi_def_cfa_offset 0 174; CHECK-NEXT: ret 175 %sub = sub <64 x i32> %a, %b 176 %add = add <64 x i32> %a, %b 177 %res = select <64 x i1> %cc, <64 x i32> %sub, <64 x i32> %add 178 ret <64 x i32> %res 179} 180 181define <8 x i64> @select_addsub_v8i64(<8 x i1> %cc, <8 x i64> %a, <8 x i64> %b) { 182; CHECK-LABEL: select_addsub_v8i64: 183; CHECK: # %bb.0: 184; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu 185; CHECK-NEXT: vrsub.vi v12, v12, 0, v0.t 186; CHECK-NEXT: vadd.vv v8, v8, v12 187; CHECK-NEXT: ret 188 %sub = sub <8 x i64> %a, %b 189 %add = add <8 x i64> %a, %b 190 %res = select <8 x i1> %cc, <8 x i64> %sub, <8 x i64> %add 191 ret <8 x i64> %res 192} 193 194define <8 x i16> @select_addsub_v8i16(<8 x i1> %cc, <8 x i16> %a, <8 x i16> %b) { 195; CHECK-LABEL: select_addsub_v8i16: 196; CHECK: # %bb.0: 197; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu 198; CHECK-NEXT: vrsub.vi v9, v9, 0, v0.t 199; CHECK-NEXT: vadd.vv v8, v8, v9 200; CHECK-NEXT: ret 201 %sub = sub <8 x i16> %a, %b 202 %add = add <8 x i16> %a, %b 203 %res = select <8 x i1> %cc, <8 x i16> %sub, <8 x i16> %add 204 ret <8 x i16> %res 205} 206 207define <8 x i8> @select_addsub_v8i8(<8 x i1> %cc, <8 x i8> %a, <8 x i8> %b) { 208; CHECK-LABEL: select_addsub_v8i8: 209; CHECK: # %bb.0: 210; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu 211; CHECK-NEXT: vrsub.vi v9, v9, 0, v0.t 212; CHECK-NEXT: vadd.vv v8, v8, v9 213; CHECK-NEXT: ret 214 %sub = sub <8 x i8> %a, %b 215 %add = add <8 x i8> %a, %b 216 %res = select <8 x i1> %cc, <8 x i8> %sub, <8 x i8> %add 217 ret <8 x i8> %res 218} 219 220define <8 x i1> @select_addsub_v8i1(<8 x i1> %cc, <8 x i1> %a, <8 x i1> %b) { 221; CHECK-LABEL: select_addsub_v8i1: 222; CHECK: # %bb.0: 223; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 224; CHECK-NEXT: vmxor.mm v0, v8, v9 225; CHECK-NEXT: ret 226 %sub = sub <8 x i1> %a, %b 227 %add = add <8 x i1> %a, %b 228 %res = select <8 x i1> %cc, <8 x i1> %sub, <8 x i1> %add 229 ret <8 x i1> %res 230} 231 232define <8 x i2> @select_addsub_v8i2(<8 x i1> %cc, <8 x i2> %a, <8 x i2> %b) { 233; CHECK-LABEL: select_addsub_v8i2: 234; CHECK: # %bb.0: 235; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu 236; CHECK-NEXT: vrsub.vi v9, v9, 0, v0.t 237; CHECK-NEXT: vadd.vv v8, v8, v9 238; CHECK-NEXT: ret 239 %sub = sub <8 x i2> %a, %b 240 %add = add <8 x i2> %a, %b 241 %res = select <8 x i1> %cc, <8 x i2> %sub, <8 x i2> %add 242 ret <8 x i2> %res 243} 244 245define <4 x i32> @select_addsub_v4i32_constmask(<4 x i32> %a, <4 x i32> %b) { 246; CHECK-LABEL: select_addsub_v4i32_constmask: 247; CHECK: # %bb.0: 248; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu 249; CHECK-NEXT: vmv.v.i v0, 5 250; CHECK-NEXT: vrsub.vi v9, v9, 0, v0.t 251; CHECK-NEXT: vadd.vv v8, v8, v9 252; CHECK-NEXT: ret 253 %sub = sub <4 x i32> %a, %b 254 %add = add <4 x i32> %a, %b 255 %res = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> %sub, <4 x i32> %add 256 ret <4 x i32> %res 257} 258 259define <4 x i32> @select_addsub_v4i32_constmask2(<4 x i32> %a, <4 x i32> %b) { 260; CHECK-LABEL: select_addsub_v4i32_constmask2: 261; CHECK: # %bb.0: 262; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu 263; CHECK-NEXT: vmv.v.i v0, 10 264; CHECK-NEXT: vrsub.vi v8, v8, 0, v0.t 265; CHECK-NEXT: vadd.vv v8, v9, v8 266; CHECK-NEXT: ret 267 %sub = sub <4 x i32> %b, %a 268 %add = add <4 x i32> %a, %b 269 %res = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> %add, <4 x i32> %sub 270 ret <4 x i32> %res 271} 272 273; Same pattern as above, but the select is disguised as a shuffle 274define <4 x i32> @select_addsub_v4i32_as_shuffle(<4 x i32> %a, <4 x i32> %b) { 275; CHECK-LABEL: select_addsub_v4i32_as_shuffle: 276; CHECK: # %bb.0: 277; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu 278; CHECK-NEXT: vmv.v.i v0, 5 279; CHECK-NEXT: vrsub.vi v9, v9, 0, v0.t 280; CHECK-NEXT: vadd.vv v8, v8, v9 281; CHECK-NEXT: ret 282 %sub = sub <4 x i32> %a, %b 283 %add = add <4 x i32> %a, %b 284 %res = shufflevector <4 x i32> %sub, <4 x i32> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 285 ret <4 x i32> %res 286} 287 288; Same pattern as above, but the select is disguised as a shuffle 289define <4 x i32> @select_addsub_v4i32_as_shuffle2(<4 x i32> %a, <4 x i32> %b) { 290; CHECK-LABEL: select_addsub_v4i32_as_shuffle2: 291; CHECK: # %bb.0: 292; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu 293; CHECK-NEXT: vmv.v.i v0, 10 294; CHECK-NEXT: vrsub.vi v8, v8, 0, v0.t 295; CHECK-NEXT: vadd.vv v8, v9, v8 296; CHECK-NEXT: ret 297 %sub = sub <4 x i32> %b, %a 298 %add = add <4 x i32> %a, %b 299 %res = shufflevector <4 x i32> %add, <4 x i32> %sub, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 300 ret <4 x i32> %res 301} 302