1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 3; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 4 5define <4 x i32> @add_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) { 6; CHECK-LABEL: add_constant_rhs: 7; CHECK: # %bb.0: 8; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 9; CHECK-NEXT: vmv.v.x v8, a0 10; CHECK-NEXT: lui a0, %hi(.LCPI0_0) 11; CHECK-NEXT: addi a0, a0, %lo(.LCPI0_0) 12; CHECK-NEXT: vle32.v v9, (a0) 13; CHECK-NEXT: vslide1down.vx v8, v8, a1 14; CHECK-NEXT: vslide1down.vx v8, v8, a2 15; CHECK-NEXT: vslide1down.vx v8, v8, a3 16; CHECK-NEXT: vadd.vv v8, v8, v9 17; CHECK-NEXT: ret 18 %e0 = add i32 %a, 23 19 %e1 = add i32 %b, 25 20 %e2 = add i32 %c, 1 21 %e3 = add i32 %d, 2355 22 %v0 = insertelement <4 x i32> poison, i32 %e0, i32 0 23 %v1 = insertelement <4 x i32> %v0, i32 %e1, i32 1 24 %v2 = insertelement <4 x i32> %v1, i32 %e2, i32 2 25 %v3 = insertelement <4 x i32> %v2, i32 %e3, i32 3 26 ret <4 x i32> %v3 27} 28 29define <8 x i32> @add_constant_rhs_8xi32(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h) { 30; CHECK-LABEL: add_constant_rhs_8xi32: 31; CHECK: # %bb.0: 32; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 33; CHECK-NEXT: vmv.v.x v8, a0 34; CHECK-NEXT: lui a0, %hi(.LCPI1_0) 35; CHECK-NEXT: addi a0, a0, %lo(.LCPI1_0) 36; CHECK-NEXT: vslide1down.vx v8, v8, a1 37; CHECK-NEXT: vslide1down.vx v8, v8, a2 38; CHECK-NEXT: vslide1down.vx v8, v8, a3 39; CHECK-NEXT: vslide1down.vx v8, v8, a4 40; CHECK-NEXT: vle32.v v10, (a0) 41; CHECK-NEXT: vslide1down.vx v8, v8, a5 42; CHECK-NEXT: vslide1down.vx v8, v8, a6 43; CHECK-NEXT: vslide1down.vx v8, v8, a7 44; CHECK-NEXT: vadd.vv v8, v8, v10 45; CHECK-NEXT: ret 46 %e0 = add i32 %a, 23 47 %e1 = add i32 %b, 25 48 %e2 = add i32 %c, 1 49 %e3 = add i32 %d, 2355 50 %e4 = add i32 %e, 23 51 %e5 = add i32 %f, 23 52 %e6 = add i32 %g, 22 53 %e7 = add i32 %h, 23 54 %v0 = insertelement <8 x i32> poison, i32 %e0, i32 0 55 %v1 = insertelement <8 x i32> %v0, i32 %e1, i32 1 56 %v2 = insertelement <8 x i32> %v1, i32 %e2, i32 2 57 %v3 = insertelement <8 x i32> %v2, i32 %e3, i32 3 58 %v4 = insertelement <8 x i32> %v3, i32 %e4, i32 4 59 %v5 = insertelement <8 x i32> %v4, i32 %e5, i32 5 60 %v6 = insertelement <8 x i32> %v5, i32 %e6, i32 6 61 %v7 = insertelement <8 x i32> %v6, i32 %e7, i32 7 62 ret <8 x i32> %v7 63} 64 65 66define <4 x i32> @sub_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) { 67; CHECK-LABEL: sub_constant_rhs: 68; CHECK: # %bb.0: 69; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 70; CHECK-NEXT: vmv.v.x v8, a0 71; CHECK-NEXT: lui a0, %hi(.LCPI2_0) 72; CHECK-NEXT: addi a0, a0, %lo(.LCPI2_0) 73; CHECK-NEXT: vle32.v v9, (a0) 74; CHECK-NEXT: vslide1down.vx v8, v8, a1 75; CHECK-NEXT: vslide1down.vx v8, v8, a2 76; CHECK-NEXT: vslide1down.vx v8, v8, a3 77; CHECK-NEXT: vsub.vv v8, v8, v9 78; CHECK-NEXT: ret 79 %e0 = sub i32 %a, 23 80 %e1 = sub i32 %b, 25 81 %e2 = sub i32 %c, 1 82 %e3 = sub i32 %d, 2355 83 %v0 = insertelement <4 x i32> poison, i32 %e0, i32 0 84 %v1 = insertelement <4 x i32> %v0, i32 %e1, i32 1 85 %v2 = insertelement <4 x i32> %v1, i32 %e2, i32 2 86 %v3 = insertelement <4 x i32> %v2, i32 %e3, i32 3 87 ret <4 x i32> %v3 88} 89 90define <4 x i32> @mul_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) { 91; CHECK-LABEL: mul_constant_rhs: 92; CHECK: # %bb.0: 93; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 94; CHECK-NEXT: vmv.v.x v8, a0 95; CHECK-NEXT: lui a0, %hi(.LCPI3_0) 96; CHECK-NEXT: addi a0, a0, %lo(.LCPI3_0) 97; CHECK-NEXT: vle32.v v9, (a0) 98; CHECK-NEXT: vslide1down.vx v8, v8, a1 99; CHECK-NEXT: vslide1down.vx v8, v8, a2 100; CHECK-NEXT: vslide1down.vx v8, v8, a3 101; CHECK-NEXT: vmul.vv v8, v8, v9 102; CHECK-NEXT: ret 103 %e0 = mul i32 %a, 23 104 %e1 = mul i32 %b, 25 105 %e2 = mul i32 %c, 27 106 %e3 = mul i32 %d, 2355 107 %v0 = insertelement <4 x i32> poison, i32 %e0, i32 0 108 %v1 = insertelement <4 x i32> %v0, i32 %e1, i32 1 109 %v2 = insertelement <4 x i32> %v1, i32 %e2, i32 2 110 %v3 = insertelement <4 x i32> %v2, i32 %e3, i32 3 111 ret <4 x i32> %v3 112} 113 114define <4 x i32> @udiv_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) { 115; CHECK-LABEL: udiv_constant_rhs: 116; CHECK: # %bb.0: 117; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 118; CHECK-NEXT: vmv.v.x v8, a0 119; CHECK-NEXT: lui a0, %hi(.LCPI4_0) 120; CHECK-NEXT: addi a0, a0, %lo(.LCPI4_0) 121; CHECK-NEXT: vmv.v.i v9, 0 122; CHECK-NEXT: vslide1down.vx v8, v8, a1 123; CHECK-NEXT: lui a1, 524288 124; CHECK-NEXT: vle32.v v10, (a0) 125; CHECK-NEXT: lui a0, %hi(.LCPI4_1) 126; CHECK-NEXT: addi a0, a0, %lo(.LCPI4_1) 127; CHECK-NEXT: vslide1down.vx v9, v9, a1 128; CHECK-NEXT: vle32.v v11, (a0) 129; CHECK-NEXT: vslide1down.vx v8, v8, a2 130; CHECK-NEXT: vslide1down.vx v8, v8, a3 131; CHECK-NEXT: vmulhu.vv v10, v8, v10 132; CHECK-NEXT: vsub.vv v12, v8, v10 133; CHECK-NEXT: vmulhu.vv v9, v12, v9 134; CHECK-NEXT: vadd.vv v9, v9, v10 135; CHECK-NEXT: vmv.v.i v0, 4 136; CHECK-NEXT: vsrl.vv v9, v9, v11 137; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 138; CHECK-NEXT: ret 139 %e0 = udiv i32 %a, 23 140 %e1 = udiv i32 %b, 25 141 %e2 = udiv i32 %c, 1 142 %e3 = udiv i32 %d, 235 143 %v0 = insertelement <4 x i32> poison, i32 %e0, i32 0 144 %v1 = insertelement <4 x i32> %v0, i32 %e1, i32 1 145 %v2 = insertelement <4 x i32> %v1, i32 %e2, i32 2 146 %v3 = insertelement <4 x i32> %v2, i32 %e3, i32 3 147 ret <4 x i32> %v3 148} 149 150 151define <4 x float> @fadd_constant_rhs(float %a, float %b, float %c, float %d) { 152; CHECK-LABEL: fadd_constant_rhs: 153; CHECK: # %bb.0: 154; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 155; CHECK-NEXT: vfmv.v.f v8, fa0 156; CHECK-NEXT: lui a0, %hi(.LCPI5_0) 157; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_0) 158; CHECK-NEXT: vle32.v v9, (a0) 159; CHECK-NEXT: vfslide1down.vf v8, v8, fa1 160; CHECK-NEXT: vfslide1down.vf v8, v8, fa2 161; CHECK-NEXT: vfslide1down.vf v8, v8, fa3 162; CHECK-NEXT: vfadd.vv v8, v8, v9 163; CHECK-NEXT: ret 164 %e0 = fadd float %a, 23.0 165 %e1 = fadd float %b, 25.0 166 %e2 = fadd float %c, 2.0 167 %e3 = fadd float %d, 23.0 168 %v0 = insertelement <4 x float> poison, float %e0, i32 0 169 %v1 = insertelement <4 x float> %v0, float %e1, i32 1 170 %v2 = insertelement <4 x float> %v1, float %e2, i32 2 171 %v3 = insertelement <4 x float> %v2, float %e3, i32 3 172 ret <4 x float> %v3 173} 174 175define <4 x float> @fdiv_constant_rhs(float %a, float %b, float %c, float %d) { 176; CHECK-LABEL: fdiv_constant_rhs: 177; CHECK: # %bb.0: 178; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 179; CHECK-NEXT: vfmv.v.f v8, fa0 180; CHECK-NEXT: lui a0, %hi(.LCPI6_0) 181; CHECK-NEXT: addi a0, a0, %lo(.LCPI6_0) 182; CHECK-NEXT: vle32.v v9, (a0) 183; CHECK-NEXT: vfslide1down.vf v8, v8, fa1 184; CHECK-NEXT: vfslide1down.vf v8, v8, fa2 185; CHECK-NEXT: vfslide1down.vf v8, v8, fa3 186; CHECK-NEXT: vfdiv.vv v8, v8, v9 187; CHECK-NEXT: ret 188 %e0 = fdiv float %a, 23.0 189 %e1 = fdiv float %b, 25.0 190 %e2 = fdiv float %c, 10.0 191 %e3 = fdiv float %d, 23.0 192 %v0 = insertelement <4 x float> poison, float %e0, i32 0 193 %v1 = insertelement <4 x float> %v0, float %e1, i32 1 194 %v2 = insertelement <4 x float> %v1, float %e2, i32 2 195 %v3 = insertelement <4 x float> %v2, float %e3, i32 3 196 ret <4 x float> %v3 197} 198 199define <4 x i32> @add_constant_rhs_splat(i32 %a, i32 %b, i32 %c, i32 %d) { 200; CHECK-LABEL: add_constant_rhs_splat: 201; CHECK: # %bb.0: 202; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 203; CHECK-NEXT: vmv.v.x v8, a0 204; CHECK-NEXT: vslide1down.vx v8, v8, a1 205; CHECK-NEXT: vslide1down.vx v8, v8, a2 206; CHECK-NEXT: vslide1down.vx v8, v8, a3 207; CHECK-NEXT: li a0, 23 208; CHECK-NEXT: vadd.vx v8, v8, a0 209; CHECK-NEXT: ret 210 %e0 = add i32 %a, 23 211 %e1 = add i32 %b, 23 212 %e2 = add i32 %c, 23 213 %e3 = add i32 %d, 23 214 %v0 = insertelement <4 x i32> poison, i32 %e0, i32 0 215 %v1 = insertelement <4 x i32> %v0, i32 %e1, i32 1 216 %v2 = insertelement <4 x i32> %v1, i32 %e2, i32 2 217 %v3 = insertelement <4 x i32> %v2, i32 %e3, i32 3 218 ret <4 x i32> %v3 219} 220 221define <4 x i32> @add_constant_rhs_with_identity(i32 %a, i32 %b, i32 %c, i32 %d) { 222; RV32-LABEL: add_constant_rhs_with_identity: 223; RV32: # %bb.0: 224; RV32-NEXT: addi a1, a1, 25 225; RV32-NEXT: addi a2, a2, 1 226; RV32-NEXT: addi a3, a3, 2047 227; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 228; RV32-NEXT: vmv.v.x v8, a0 229; RV32-NEXT: addi a0, a3, 308 230; RV32-NEXT: vslide1down.vx v8, v8, a1 231; RV32-NEXT: vslide1down.vx v8, v8, a2 232; RV32-NEXT: vslide1down.vx v8, v8, a0 233; RV32-NEXT: ret 234; 235; RV64-LABEL: add_constant_rhs_with_identity: 236; RV64: # %bb.0: 237; RV64-NEXT: addiw a1, a1, 25 238; RV64-NEXT: addiw a2, a2, 1 239; RV64-NEXT: addi a3, a3, 2047 240; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma 241; RV64-NEXT: vmv.v.x v8, a0 242; RV64-NEXT: addiw a0, a3, 308 243; RV64-NEXT: vslide1down.vx v8, v8, a1 244; RV64-NEXT: vslide1down.vx v8, v8, a2 245; RV64-NEXT: vslide1down.vx v8, v8, a0 246; RV64-NEXT: ret 247 %e0 = add i32 %a, 0 248 %e1 = add i32 %b, 25 249 %e2 = add i32 %c, 1 250 %e3 = add i32 %d, 2355 251 %v0 = insertelement <4 x i32> poison, i32 %e0, i32 0 252 %v1 = insertelement <4 x i32> %v0, i32 %e1, i32 1 253 %v2 = insertelement <4 x i32> %v1, i32 %e2, i32 2 254 %v3 = insertelement <4 x i32> %v2, i32 %e3, i32 3 255 ret <4 x i32> %v3 256} 257 258define <4 x i32> @add_constant_rhs_identity(i32 %a, i32 %b, i32 %c, i32 %d) { 259; RV32-LABEL: add_constant_rhs_identity: 260; RV32: # %bb.0: 261; RV32-NEXT: addi a1, a1, 25 262; RV32-NEXT: addi a2, a2, 1 263; RV32-NEXT: addi a3, a3, 2047 264; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 265; RV32-NEXT: vmv.v.x v8, a0 266; RV32-NEXT: addi a0, a3, 308 267; RV32-NEXT: vslide1down.vx v8, v8, a1 268; RV32-NEXT: vslide1down.vx v8, v8, a2 269; RV32-NEXT: vslide1down.vx v8, v8, a0 270; RV32-NEXT: ret 271; 272; RV64-LABEL: add_constant_rhs_identity: 273; RV64: # %bb.0: 274; RV64-NEXT: addiw a1, a1, 25 275; RV64-NEXT: addiw a2, a2, 1 276; RV64-NEXT: addi a3, a3, 2047 277; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma 278; RV64-NEXT: vmv.v.x v8, a0 279; RV64-NEXT: addiw a0, a3, 308 280; RV64-NEXT: vslide1down.vx v8, v8, a1 281; RV64-NEXT: vslide1down.vx v8, v8, a2 282; RV64-NEXT: vslide1down.vx v8, v8, a0 283; RV64-NEXT: ret 284 %e0 = add i32 %a, 0 285 %e1 = add i32 %b, 25 286 %e2 = add i32 %c, 1 287 %e3 = add i32 %d, 2355 288 %v0 = insertelement <4 x i32> poison, i32 %e0, i32 0 289 %v1 = insertelement <4 x i32> %v0, i32 %e1, i32 1 290 %v2 = insertelement <4 x i32> %v1, i32 %e2, i32 2 291 %v3 = insertelement <4 x i32> %v2, i32 %e3, i32 3 292 ret <4 x i32> %v3 293} 294 295define <4 x i32> @add_constant_rhs_identity2(i32 %a, i32 %b, i32 %c, i32 %d) { 296; CHECK-LABEL: add_constant_rhs_identity2: 297; CHECK: # %bb.0: 298; CHECK-NEXT: addi a0, a0, 23 299; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 300; CHECK-NEXT: vmv.v.x v8, a0 301; CHECK-NEXT: vslide1down.vx v8, v8, a1 302; CHECK-NEXT: vslide1down.vx v8, v8, a2 303; CHECK-NEXT: vslide1down.vx v8, v8, a3 304; CHECK-NEXT: ret 305 %e0 = add i32 %a, 23 306 %v0 = insertelement <4 x i32> poison, i32 %e0, i32 0 307 %v1 = insertelement <4 x i32> %v0, i32 %b, i32 1 308 %v2 = insertelement <4 x i32> %v1, i32 %c, i32 2 309 %v3 = insertelement <4 x i32> %v2, i32 %d, i32 3 310 ret <4 x i32> %v3 311} 312 313define <4 x i32> @add_constant_rhs_inverse(i32 %a, i32 %b, i32 %c, i32 %d) { 314; CHECK-LABEL: add_constant_rhs_inverse: 315; CHECK: # %bb.0: 316; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 317; CHECK-NEXT: vmv.v.x v8, a0 318; CHECK-NEXT: lui a0, %hi(.LCPI11_0) 319; CHECK-NEXT: addi a0, a0, %lo(.LCPI11_0) 320; CHECK-NEXT: vle32.v v9, (a0) 321; CHECK-NEXT: vslide1down.vx v8, v8, a1 322; CHECK-NEXT: vslide1down.vx v8, v8, a2 323; CHECK-NEXT: vslide1down.vx v8, v8, a3 324; CHECK-NEXT: vadd.vv v8, v8, v9 325; CHECK-NEXT: ret 326 %e0 = sub i32 %a, 1 327 %e1 = add i32 %b, 25 328 %e2 = add i32 %c, 1 329 %e3 = add i32 %d, 2355 330 %v0 = insertelement <4 x i32> poison, i32 %e0, i32 0 331 %v1 = insertelement <4 x i32> %v0, i32 %e1, i32 1 332 %v2 = insertelement <4 x i32> %v1, i32 %e2, i32 2 333 %v3 = insertelement <4 x i32> %v2, i32 %e3, i32 3 334 ret <4 x i32> %v3 335} 336 337define <4 x i32> @add_constant_rhs_commute(i32 %a, i32 %b, i32 %c, i32 %d) { 338; CHECK-LABEL: add_constant_rhs_commute: 339; CHECK: # %bb.0: 340; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 341; CHECK-NEXT: vmv.v.x v8, a0 342; CHECK-NEXT: lui a0, %hi(.LCPI12_0) 343; CHECK-NEXT: addi a0, a0, %lo(.LCPI12_0) 344; CHECK-NEXT: vle32.v v9, (a0) 345; CHECK-NEXT: vslide1down.vx v8, v8, a1 346; CHECK-NEXT: vslide1down.vx v8, v8, a2 347; CHECK-NEXT: vslide1down.vx v8, v8, a3 348; CHECK-NEXT: vadd.vv v8, v8, v9 349; CHECK-NEXT: ret 350 %e0 = add i32 %a, 23 351 %e1 = add i32 %b, 25 352 %e2 = add i32 1, %c 353 %e3 = add i32 %d, 2355 354 %v0 = insertelement <4 x i32> poison, i32 %e0, i32 0 355 %v1 = insertelement <4 x i32> %v0, i32 %e1, i32 1 356 %v2 = insertelement <4 x i32> %v1, i32 %e2, i32 2 357 %v3 = insertelement <4 x i32> %v2, i32 %e3, i32 3 358 ret <4 x i32> %v3 359} 360 361 362define <4 x i32> @add_general_rhs(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h) { 363; RV32-LABEL: add_general_rhs: 364; RV32: # %bb.0: 365; RV32-NEXT: add a0, a0, a4 366; RV32-NEXT: add a1, a1, a5 367; RV32-NEXT: add a2, a2, a6 368; RV32-NEXT: add a3, a3, a7 369; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 370; RV32-NEXT: vmv.v.x v8, a0 371; RV32-NEXT: vslide1down.vx v8, v8, a1 372; RV32-NEXT: vslide1down.vx v8, v8, a2 373; RV32-NEXT: vslide1down.vx v8, v8, a3 374; RV32-NEXT: ret 375; 376; RV64-LABEL: add_general_rhs: 377; RV64: # %bb.0: 378; RV64-NEXT: add a0, a0, a4 379; RV64-NEXT: addw a1, a1, a5 380; RV64-NEXT: addw a2, a2, a6 381; RV64-NEXT: addw a3, a3, a7 382; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma 383; RV64-NEXT: vmv.v.x v8, a0 384; RV64-NEXT: vslide1down.vx v8, v8, a1 385; RV64-NEXT: vslide1down.vx v8, v8, a2 386; RV64-NEXT: vslide1down.vx v8, v8, a3 387; RV64-NEXT: ret 388 %e0 = add i32 %a, %e 389 %e1 = add i32 %b, %f 390 %e2 = add i32 %c, %g 391 %e3 = add i32 %d, %h 392 %v0 = insertelement <4 x i32> poison, i32 %e0, i32 0 393 %v1 = insertelement <4 x i32> %v0, i32 %e1, i32 1 394 %v2 = insertelement <4 x i32> %v1, i32 %e2, i32 2 395 %v3 = insertelement <4 x i32> %v2, i32 %e3, i32 3 396 ret <4 x i32> %v3 397} 398 399define <4 x i32> @add_general_splat(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) { 400; RV32-LABEL: add_general_splat: 401; RV32: # %bb.0: 402; RV32-NEXT: add a0, a0, a4 403; RV32-NEXT: add a1, a1, a4 404; RV32-NEXT: add a2, a2, a4 405; RV32-NEXT: add a3, a3, a4 406; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 407; RV32-NEXT: vmv.v.x v8, a0 408; RV32-NEXT: vslide1down.vx v8, v8, a1 409; RV32-NEXT: vslide1down.vx v8, v8, a2 410; RV32-NEXT: vslide1down.vx v8, v8, a3 411; RV32-NEXT: ret 412; 413; RV64-LABEL: add_general_splat: 414; RV64: # %bb.0: 415; RV64-NEXT: add a0, a0, a4 416; RV64-NEXT: addw a1, a1, a4 417; RV64-NEXT: addw a2, a2, a4 418; RV64-NEXT: addw a3, a3, a4 419; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma 420; RV64-NEXT: vmv.v.x v8, a0 421; RV64-NEXT: vslide1down.vx v8, v8, a1 422; RV64-NEXT: vslide1down.vx v8, v8, a2 423; RV64-NEXT: vslide1down.vx v8, v8, a3 424; RV64-NEXT: ret 425 %e0 = add i32 %a, %e 426 %e1 = add i32 %b, %e 427 %e2 = add i32 %c, %e 428 %e3 = add i32 %d, %e 429 %v0 = insertelement <4 x i32> poison, i32 %e0, i32 0 430 %v1 = insertelement <4 x i32> %v0, i32 %e1, i32 1 431 %v2 = insertelement <4 x i32> %v1, i32 %e2, i32 2 432 %v3 = insertelement <4 x i32> %v2, i32 %e3, i32 3 433 ret <4 x i32> %v3 434} 435 436; This test previously failed with an assertion failure because constant shift 437; amounts are type legalized early. 438define void @buggy(i32 %0) #0 { 439; RV32-LABEL: buggy: 440; RV32: # %bb.0: # %entry 441; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 442; RV32-NEXT: vmv.v.x v8, a0 443; RV32-NEXT: vadd.vv v8, v8, v8 444; RV32-NEXT: vor.vi v8, v8, 1 445; RV32-NEXT: vrgather.vi v9, v8, 0 446; RV32-NEXT: vse32.v v9, (zero) 447; RV32-NEXT: ret 448; 449; RV64-LABEL: buggy: 450; RV64: # %bb.0: # %entry 451; RV64-NEXT: slli a0, a0, 1 452; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma 453; RV64-NEXT: vmv.v.x v8, a0 454; RV64-NEXT: vor.vi v8, v8, 1 455; RV64-NEXT: vrgather.vi v9, v8, 0 456; RV64-NEXT: vse32.v v9, (zero) 457; RV64-NEXT: ret 458entry: 459 %mul.us.us.i.3 = shl i32 %0, 1 460 %1 = insertelement <4 x i32> zeroinitializer, i32 %mul.us.us.i.3, i64 0 461 %2 = or <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1> 462 %3 = shufflevector <4 x i32> %2, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer 463 store <4 x i32> %3, ptr null, align 16 464 ret void 465} 466 467 468define <8 x i32> @add_constant_rhs_8xi32_vector_in(<8 x i32> %vin, i32 %a, i32 %b, i32 %c, i32 %d) { 469; CHECK-LABEL: add_constant_rhs_8xi32_vector_in: 470; CHECK: # %bb.0: 471; CHECK-NEXT: addi a0, a0, 23 472; CHECK-NEXT: addi a1, a1, 25 473; CHECK-NEXT: addi a2, a2, 1 474; CHECK-NEXT: addi a3, a3, 2047 475; CHECK-NEXT: addi a3, a3, 308 476; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma 477; CHECK-NEXT: vmv.s.x v8, a0 478; CHECK-NEXT: vmv.s.x v10, a1 479; CHECK-NEXT: vslideup.vi v8, v10, 1 480; CHECK-NEXT: vmv.s.x v10, a2 481; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma 482; CHECK-NEXT: vslideup.vi v8, v10, 2 483; CHECK-NEXT: vmv.s.x v10, a3 484; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma 485; CHECK-NEXT: vslideup.vi v8, v10, 3 486; CHECK-NEXT: ret 487 %e0 = add i32 %a, 23 488 %e1 = add i32 %b, 25 489 %e2 = add i32 %c, 1 490 %e3 = add i32 %d, 2355 491 %v0 = insertelement <8 x i32> %vin, i32 %e0, i32 0 492 %v1 = insertelement <8 x i32> %v0, i32 %e1, i32 1 493 %v2 = insertelement <8 x i32> %v1, i32 %e2, i32 2 494 %v3 = insertelement <8 x i32> %v2, i32 %e3, i32 3 495 ret <8 x i32> %v3 496} 497 498define <8 x i32> @add_constant_rhs_8xi32_vector_in2(<8 x i32> %vin, i32 %a, i32 %b, i32 %c, i32 %d) { 499; CHECK-LABEL: add_constant_rhs_8xi32_vector_in2: 500; CHECK: # %bb.0: 501; CHECK-NEXT: addi a0, a0, 23 502; CHECK-NEXT: addi a1, a1, 25 503; CHECK-NEXT: addi a2, a2, 1 504; CHECK-NEXT: addi a3, a3, 2047 505; CHECK-NEXT: addi a3, a3, 308 506; CHECK-NEXT: vsetivli zero, 5, e32, m2, tu, ma 507; CHECK-NEXT: vmv.s.x v10, a0 508; CHECK-NEXT: vslideup.vi v8, v10, 4 509; CHECK-NEXT: vmv.s.x v10, a1 510; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, ma 511; CHECK-NEXT: vslideup.vi v8, v10, 5 512; CHECK-NEXT: vmv.s.x v10, a2 513; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma 514; CHECK-NEXT: vslideup.vi v8, v10, 6 515; CHECK-NEXT: vmv.s.x v10, a3 516; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 517; CHECK-NEXT: vslideup.vi v8, v10, 7 518; CHECK-NEXT: ret 519 %e0 = add i32 %a, 23 520 %e1 = add i32 %b, 25 521 %e2 = add i32 %c, 1 522 %e3 = add i32 %d, 2355 523 %v0 = insertelement <8 x i32> %vin, i32 %e0, i32 4 524 %v1 = insertelement <8 x i32> %v0, i32 %e1, i32 5 525 %v2 = insertelement <8 x i32> %v1, i32 %e2, i32 6 526 %v3 = insertelement <8 x i32> %v2, i32 %e3, i32 7 527 ret <8 x i32> %v3 528} 529 530define <8 x i32> @add_constant_rhs_8xi32_vector_in3(<8 x i32> %vin, i32 %a, i32 %b, i32 %c, i32 %d) { 531; CHECK-LABEL: add_constant_rhs_8xi32_vector_in3: 532; CHECK: # %bb.0: 533; CHECK-NEXT: addi a0, a0, 23 534; CHECK-NEXT: addi a1, a1, 25 535; CHECK-NEXT: addi a2, a2, 1 536; CHECK-NEXT: addi a3, a3, 2047 537; CHECK-NEXT: addi a3, a3, 308 538; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma 539; CHECK-NEXT: vmv.s.x v8, a0 540; CHECK-NEXT: vmv.s.x v10, a1 541; CHECK-NEXT: vslideup.vi v8, v10, 2 542; CHECK-NEXT: vmv.s.x v10, a2 543; CHECK-NEXT: vsetivli zero, 5, e32, m2, tu, ma 544; CHECK-NEXT: vslideup.vi v8, v10, 4 545; CHECK-NEXT: vmv.s.x v10, a3 546; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma 547; CHECK-NEXT: vslideup.vi v8, v10, 6 548; CHECK-NEXT: ret 549 %e0 = add i32 %a, 23 550 %e1 = add i32 %b, 25 551 %e2 = add i32 %c, 1 552 %e3 = add i32 %d, 2355 553 %v0 = insertelement <8 x i32> %vin, i32 %e0, i32 0 554 %v1 = insertelement <8 x i32> %v0, i32 %e1, i32 2 555 %v2 = insertelement <8 x i32> %v1, i32 %e2, i32 4 556 %v3 = insertelement <8 x i32> %v2, i32 %e3, i32 6 557 ret <8 x i32> %v3 558} 559 560define <8 x i32> @add_constant_rhs_8xi32_partial(<8 x i32> %vin, i32 %a, i32 %b, i32 %c, i32 %d) { 561; CHECK-LABEL: add_constant_rhs_8xi32_partial: 562; CHECK: # %bb.0: 563; CHECK-NEXT: vsetivli zero, 5, e32, m2, tu, ma 564; CHECK-NEXT: vmv.s.x v10, a0 565; CHECK-NEXT: vmv.s.x v12, a1 566; CHECK-NEXT: vslideup.vi v8, v10, 4 567; CHECK-NEXT: vmv.s.x v10, a2 568; CHECK-NEXT: lui a0, %hi(.LCPI19_0) 569; CHECK-NEXT: addi a0, a0, %lo(.LCPI19_0) 570; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, ma 571; CHECK-NEXT: vslideup.vi v8, v12, 5 572; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 573; CHECK-NEXT: vle32.v v12, (a0) 574; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma 575; CHECK-NEXT: vslideup.vi v8, v10, 6 576; CHECK-NEXT: vmv.s.x v10, a3 577; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 578; CHECK-NEXT: vslideup.vi v8, v10, 7 579; CHECK-NEXT: vadd.vv v8, v8, v12 580; CHECK-NEXT: ret 581 %vadd = add <8 x i32> %vin, <i32 1, i32 2, i32 3, i32 5, i32 undef, i32 undef, i32 undef, i32 undef> 582 %e0 = add i32 %a, 23 583 %e1 = add i32 %b, 25 584 %e2 = add i32 %c, 1 585 %e3 = add i32 %d, 2355 586 %v0 = insertelement <8 x i32> %vadd, i32 %e0, i32 4 587 %v1 = insertelement <8 x i32> %v0, i32 %e1, i32 5 588 %v2 = insertelement <8 x i32> %v1, i32 %e2, i32 6 589 %v3 = insertelement <8 x i32> %v2, i32 %e3, i32 7 590 ret <8 x i32> %v3 591} 592 593; Here we can not pull the ashr through into the vector domain due to 594; the truncate semantics of the build_vector. Doing so would 595; truncate before the ashr instead of after it, so if %a or %b 596; is e.g. UINT32_MAX+1 we get different result. 597define <2 x i32> @build_vec_of_trunc_op(i64 %a, i64 %b) { 598; RV32-LABEL: build_vec_of_trunc_op: 599; RV32: # %bb.0: # %entry 600; RV32-NEXT: slli a1, a1, 31 601; RV32-NEXT: srli a0, a0, 1 602; RV32-NEXT: slli a3, a3, 31 603; RV32-NEXT: srli a2, a2, 1 604; RV32-NEXT: or a0, a0, a1 605; RV32-NEXT: or a2, a2, a3 606; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 607; RV32-NEXT: vmv.v.x v8, a0 608; RV32-NEXT: vslide1down.vx v8, v8, a2 609; RV32-NEXT: ret 610; 611; RV64-LABEL: build_vec_of_trunc_op: 612; RV64: # %bb.0: # %entry 613; RV64-NEXT: srli a0, a0, 1 614; RV64-NEXT: srli a1, a1, 1 615; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 616; RV64-NEXT: vmv.v.x v8, a0 617; RV64-NEXT: vslide1down.vx v8, v8, a1 618; RV64-NEXT: ret 619entry: 620 %conv11.i = ashr i64 %a, 1 621 %conv11.2 = ashr i64 %b, 1 622 %0 = trunc i64 %conv11.i to i32 623 %1 = trunc i64 %conv11.2 to i32 624 %2 = insertelement <2 x i32> zeroinitializer, i32 %0, i64 0 625 %3 = insertelement <2 x i32> %2, i32 %1, i64 1 626 ret <2 x i32> %3 627} 628