1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,RV32 3; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,RV64 4 5define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) { 6; RV32-LABEL: vselect_vv_v6i32: 7; RV32: # %bb.0: 8; RV32-NEXT: lbu a2, 0(a2) 9; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma 10; RV32-NEXT: vle32.v v8, (a1) 11; RV32-NEXT: slli a1, a2, 30 12; RV32-NEXT: andi a4, a2, 1 13; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 14; RV32-NEXT: vmv.v.x v10, a4 15; RV32-NEXT: slli a4, a2, 29 16; RV32-NEXT: srli a1, a1, 31 17; RV32-NEXT: vslide1down.vx v10, v10, a1 18; RV32-NEXT: slli a1, a2, 28 19; RV32-NEXT: srli a4, a4, 31 20; RV32-NEXT: vslide1down.vx v10, v10, a4 21; RV32-NEXT: slli a4, a2, 27 22; RV32-NEXT: srli a2, a2, 5 23; RV32-NEXT: srli a1, a1, 31 24; RV32-NEXT: srli a4, a4, 31 25; RV32-NEXT: vslide1down.vx v10, v10, a1 26; RV32-NEXT: vslide1down.vx v10, v10, a4 27; RV32-NEXT: vslide1down.vx v10, v10, a2 28; RV32-NEXT: vslidedown.vi v10, v10, 2 29; RV32-NEXT: vand.vi v10, v10, 1 30; RV32-NEXT: vmsne.vi v0, v10, 0 31; RV32-NEXT: vsetivli zero, 6, e32, m2, tu, mu 32; RV32-NEXT: vle32.v v8, (a0), v0.t 33; RV32-NEXT: vse32.v v8, (a3) 34; RV32-NEXT: ret 35; 36; RV64-LABEL: vselect_vv_v6i32: 37; RV64: # %bb.0: 38; RV64-NEXT: lbu a2, 0(a2) 39; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma 40; RV64-NEXT: vle32.v v8, (a1) 41; RV64-NEXT: slli a1, a2, 62 42; RV64-NEXT: andi a4, a2, 1 43; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 44; RV64-NEXT: vmv.v.x v10, a4 45; RV64-NEXT: slli a4, a2, 61 46; RV64-NEXT: srli a1, a1, 63 47; RV64-NEXT: vslide1down.vx v10, v10, a1 48; RV64-NEXT: slli a1, a2, 60 49; RV64-NEXT: srli a4, a4, 63 50; RV64-NEXT: vslide1down.vx v10, v10, a4 51; RV64-NEXT: slli a4, a2, 59 52; RV64-NEXT: srli a2, a2, 5 53; RV64-NEXT: srli a1, a1, 63 54; RV64-NEXT: srli a4, a4, 63 55; RV64-NEXT: vslide1down.vx v10, v10, a1 56; RV64-NEXT: vslide1down.vx v10, v10, a4 57; RV64-NEXT: vslide1down.vx v10, v10, a2 58; RV64-NEXT: vslidedown.vi v10, v10, 2 59; RV64-NEXT: vand.vi v10, v10, 1 60; RV64-NEXT: vmsne.vi v0, v10, 0 61; RV64-NEXT: vsetivli zero, 6, e32, m2, tu, mu 62; RV64-NEXT: vle32.v v8, (a0), v0.t 63; RV64-NEXT: vse32.v v8, (a3) 64; RV64-NEXT: ret 65 %va = load <6 x i32>, ptr %a 66 %vb = load <6 x i32>, ptr %b 67 %vcc = load <6 x i1>, ptr %cc 68 %vsel = select <6 x i1> %vcc, <6 x i32> %va, <6 x i32> %vb 69 store <6 x i32> %vsel, ptr %z 70 ret void 71} 72 73define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) { 74; RV32-LABEL: vselect_vx_v6i32: 75; RV32: # %bb.0: 76; RV32-NEXT: lbu a2, 0(a2) 77; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma 78; RV32-NEXT: vle32.v v8, (a1) 79; RV32-NEXT: slli a1, a2, 30 80; RV32-NEXT: andi a4, a2, 1 81; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 82; RV32-NEXT: vmv.v.x v10, a4 83; RV32-NEXT: slli a4, a2, 29 84; RV32-NEXT: srli a1, a1, 31 85; RV32-NEXT: vslide1down.vx v10, v10, a1 86; RV32-NEXT: slli a1, a2, 28 87; RV32-NEXT: srli a4, a4, 31 88; RV32-NEXT: vslide1down.vx v10, v10, a4 89; RV32-NEXT: slli a4, a2, 27 90; RV32-NEXT: srli a2, a2, 5 91; RV32-NEXT: srli a1, a1, 31 92; RV32-NEXT: srli a4, a4, 31 93; RV32-NEXT: vslide1down.vx v10, v10, a1 94; RV32-NEXT: vslide1down.vx v10, v10, a4 95; RV32-NEXT: vslide1down.vx v10, v10, a2 96; RV32-NEXT: vslidedown.vi v10, v10, 2 97; RV32-NEXT: vand.vi v10, v10, 1 98; RV32-NEXT: vmsne.vi v0, v10, 0 99; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma 100; RV32-NEXT: vmerge.vxm v8, v8, a0, v0 101; RV32-NEXT: vse32.v v8, (a3) 102; RV32-NEXT: ret 103; 104; RV64-LABEL: vselect_vx_v6i32: 105; RV64: # %bb.0: 106; RV64-NEXT: lbu a2, 0(a2) 107; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma 108; RV64-NEXT: vle32.v v8, (a1) 109; RV64-NEXT: slli a1, a2, 62 110; RV64-NEXT: andi a4, a2, 1 111; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 112; RV64-NEXT: vmv.v.x v10, a4 113; RV64-NEXT: slli a4, a2, 61 114; RV64-NEXT: srli a1, a1, 63 115; RV64-NEXT: vslide1down.vx v10, v10, a1 116; RV64-NEXT: slli a1, a2, 60 117; RV64-NEXT: srli a4, a4, 63 118; RV64-NEXT: vslide1down.vx v10, v10, a4 119; RV64-NEXT: slli a4, a2, 59 120; RV64-NEXT: srli a2, a2, 5 121; RV64-NEXT: srli a1, a1, 63 122; RV64-NEXT: srli a4, a4, 63 123; RV64-NEXT: vslide1down.vx v10, v10, a1 124; RV64-NEXT: vslide1down.vx v10, v10, a4 125; RV64-NEXT: vslide1down.vx v10, v10, a2 126; RV64-NEXT: vslidedown.vi v10, v10, 2 127; RV64-NEXT: vand.vi v10, v10, 1 128; RV64-NEXT: vmsne.vi v0, v10, 0 129; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma 130; RV64-NEXT: vmerge.vxm v8, v8, a0, v0 131; RV64-NEXT: vse32.v v8, (a3) 132; RV64-NEXT: ret 133 %vb = load <6 x i32>, ptr %b 134 %ahead = insertelement <6 x i32> poison, i32 %a, i32 0 135 %va = shufflevector <6 x i32> %ahead, <6 x i32> poison, <6 x i32> zeroinitializer 136 %vcc = load <6 x i1>, ptr %cc 137 %vsel = select <6 x i1> %vcc, <6 x i32> %va, <6 x i32> %vb 138 store <6 x i32> %vsel, ptr %z 139 ret void 140} 141 142define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) { 143; RV32-LABEL: vselect_vi_v6i32: 144; RV32: # %bb.0: 145; RV32-NEXT: lbu a1, 0(a1) 146; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma 147; RV32-NEXT: vle32.v v8, (a0) 148; RV32-NEXT: slli a0, a1, 30 149; RV32-NEXT: andi a3, a1, 1 150; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 151; RV32-NEXT: vmv.v.x v10, a3 152; RV32-NEXT: slli a3, a1, 29 153; RV32-NEXT: srli a0, a0, 31 154; RV32-NEXT: vslide1down.vx v10, v10, a0 155; RV32-NEXT: slli a0, a1, 28 156; RV32-NEXT: srli a3, a3, 31 157; RV32-NEXT: vslide1down.vx v10, v10, a3 158; RV32-NEXT: slli a3, a1, 27 159; RV32-NEXT: srli a1, a1, 5 160; RV32-NEXT: srli a0, a0, 31 161; RV32-NEXT: srli a3, a3, 31 162; RV32-NEXT: vslide1down.vx v10, v10, a0 163; RV32-NEXT: vslide1down.vx v10, v10, a3 164; RV32-NEXT: vslide1down.vx v10, v10, a1 165; RV32-NEXT: vslidedown.vi v10, v10, 2 166; RV32-NEXT: vand.vi v10, v10, 1 167; RV32-NEXT: vmsne.vi v0, v10, 0 168; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma 169; RV32-NEXT: vmerge.vim v8, v8, -1, v0 170; RV32-NEXT: vse32.v v8, (a2) 171; RV32-NEXT: ret 172; 173; RV64-LABEL: vselect_vi_v6i32: 174; RV64: # %bb.0: 175; RV64-NEXT: lbu a1, 0(a1) 176; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma 177; RV64-NEXT: vle32.v v8, (a0) 178; RV64-NEXT: slli a0, a1, 62 179; RV64-NEXT: andi a3, a1, 1 180; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 181; RV64-NEXT: vmv.v.x v10, a3 182; RV64-NEXT: slli a3, a1, 61 183; RV64-NEXT: srli a0, a0, 63 184; RV64-NEXT: vslide1down.vx v10, v10, a0 185; RV64-NEXT: slli a0, a1, 60 186; RV64-NEXT: srli a3, a3, 63 187; RV64-NEXT: vslide1down.vx v10, v10, a3 188; RV64-NEXT: slli a3, a1, 59 189; RV64-NEXT: srli a1, a1, 5 190; RV64-NEXT: srli a0, a0, 63 191; RV64-NEXT: srli a3, a3, 63 192; RV64-NEXT: vslide1down.vx v10, v10, a0 193; RV64-NEXT: vslide1down.vx v10, v10, a3 194; RV64-NEXT: vslide1down.vx v10, v10, a1 195; RV64-NEXT: vslidedown.vi v10, v10, 2 196; RV64-NEXT: vand.vi v10, v10, 1 197; RV64-NEXT: vmsne.vi v0, v10, 0 198; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma 199; RV64-NEXT: vmerge.vim v8, v8, -1, v0 200; RV64-NEXT: vse32.v v8, (a2) 201; RV64-NEXT: ret 202 %vb = load <6 x i32>, ptr %b 203 %vcc = load <6 x i1>, ptr %cc 204 %vsel = select <6 x i1> %vcc, <6 x i32> splat (i32 -1), <6 x i32> %vb 205 store <6 x i32> %vsel, ptr %z 206 ret void 207} 208 209 210define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) { 211; RV32-LABEL: vselect_vv_v6f32: 212; RV32: # %bb.0: 213; RV32-NEXT: lbu a2, 0(a2) 214; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma 215; RV32-NEXT: vle32.v v8, (a1) 216; RV32-NEXT: slli a1, a2, 30 217; RV32-NEXT: andi a4, a2, 1 218; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 219; RV32-NEXT: vmv.v.x v10, a4 220; RV32-NEXT: slli a4, a2, 29 221; RV32-NEXT: srli a1, a1, 31 222; RV32-NEXT: vslide1down.vx v10, v10, a1 223; RV32-NEXT: slli a1, a2, 28 224; RV32-NEXT: srli a4, a4, 31 225; RV32-NEXT: vslide1down.vx v10, v10, a4 226; RV32-NEXT: slli a4, a2, 27 227; RV32-NEXT: srli a2, a2, 5 228; RV32-NEXT: srli a1, a1, 31 229; RV32-NEXT: srli a4, a4, 31 230; RV32-NEXT: vslide1down.vx v10, v10, a1 231; RV32-NEXT: vslide1down.vx v10, v10, a4 232; RV32-NEXT: vslide1down.vx v10, v10, a2 233; RV32-NEXT: vslidedown.vi v10, v10, 2 234; RV32-NEXT: vand.vi v10, v10, 1 235; RV32-NEXT: vmsne.vi v0, v10, 0 236; RV32-NEXT: vsetivli zero, 6, e32, m2, tu, mu 237; RV32-NEXT: vle32.v v8, (a0), v0.t 238; RV32-NEXT: vse32.v v8, (a3) 239; RV32-NEXT: ret 240; 241; RV64-LABEL: vselect_vv_v6f32: 242; RV64: # %bb.0: 243; RV64-NEXT: lbu a2, 0(a2) 244; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma 245; RV64-NEXT: vle32.v v8, (a1) 246; RV64-NEXT: slli a1, a2, 62 247; RV64-NEXT: andi a4, a2, 1 248; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 249; RV64-NEXT: vmv.v.x v10, a4 250; RV64-NEXT: slli a4, a2, 61 251; RV64-NEXT: srli a1, a1, 63 252; RV64-NEXT: vslide1down.vx v10, v10, a1 253; RV64-NEXT: slli a1, a2, 60 254; RV64-NEXT: srli a4, a4, 63 255; RV64-NEXT: vslide1down.vx v10, v10, a4 256; RV64-NEXT: slli a4, a2, 59 257; RV64-NEXT: srli a2, a2, 5 258; RV64-NEXT: srli a1, a1, 63 259; RV64-NEXT: srli a4, a4, 63 260; RV64-NEXT: vslide1down.vx v10, v10, a1 261; RV64-NEXT: vslide1down.vx v10, v10, a4 262; RV64-NEXT: vslide1down.vx v10, v10, a2 263; RV64-NEXT: vslidedown.vi v10, v10, 2 264; RV64-NEXT: vand.vi v10, v10, 1 265; RV64-NEXT: vmsne.vi v0, v10, 0 266; RV64-NEXT: vsetivli zero, 6, e32, m2, tu, mu 267; RV64-NEXT: vle32.v v8, (a0), v0.t 268; RV64-NEXT: vse32.v v8, (a3) 269; RV64-NEXT: ret 270 %va = load <6 x float>, ptr %a 271 %vb = load <6 x float>, ptr %b 272 %vcc = load <6 x i1>, ptr %cc 273 %vsel = select <6 x i1> %vcc, <6 x float> %va, <6 x float> %vb 274 store <6 x float> %vsel, ptr %z 275 ret void 276} 277 278define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) { 279; RV32-LABEL: vselect_vx_v6f32: 280; RV32: # %bb.0: 281; RV32-NEXT: lbu a1, 0(a1) 282; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma 283; RV32-NEXT: vle32.v v8, (a0) 284; RV32-NEXT: slli a0, a1, 30 285; RV32-NEXT: andi a3, a1, 1 286; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 287; RV32-NEXT: vmv.v.x v10, a3 288; RV32-NEXT: slli a3, a1, 29 289; RV32-NEXT: srli a0, a0, 31 290; RV32-NEXT: vslide1down.vx v10, v10, a0 291; RV32-NEXT: slli a0, a1, 28 292; RV32-NEXT: srli a3, a3, 31 293; RV32-NEXT: vslide1down.vx v10, v10, a3 294; RV32-NEXT: slli a3, a1, 27 295; RV32-NEXT: srli a1, a1, 5 296; RV32-NEXT: srli a0, a0, 31 297; RV32-NEXT: srli a3, a3, 31 298; RV32-NEXT: vslide1down.vx v10, v10, a0 299; RV32-NEXT: vslide1down.vx v10, v10, a3 300; RV32-NEXT: vslide1down.vx v10, v10, a1 301; RV32-NEXT: vslidedown.vi v10, v10, 2 302; RV32-NEXT: vand.vi v10, v10, 1 303; RV32-NEXT: vmsne.vi v0, v10, 0 304; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma 305; RV32-NEXT: vfmerge.vfm v8, v8, fa0, v0 306; RV32-NEXT: vse32.v v8, (a2) 307; RV32-NEXT: ret 308; 309; RV64-LABEL: vselect_vx_v6f32: 310; RV64: # %bb.0: 311; RV64-NEXT: lbu a1, 0(a1) 312; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma 313; RV64-NEXT: vle32.v v8, (a0) 314; RV64-NEXT: slli a0, a1, 62 315; RV64-NEXT: andi a3, a1, 1 316; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 317; RV64-NEXT: vmv.v.x v10, a3 318; RV64-NEXT: slli a3, a1, 61 319; RV64-NEXT: srli a0, a0, 63 320; RV64-NEXT: vslide1down.vx v10, v10, a0 321; RV64-NEXT: slli a0, a1, 60 322; RV64-NEXT: srli a3, a3, 63 323; RV64-NEXT: vslide1down.vx v10, v10, a3 324; RV64-NEXT: slli a3, a1, 59 325; RV64-NEXT: srli a1, a1, 5 326; RV64-NEXT: srli a0, a0, 63 327; RV64-NEXT: srli a3, a3, 63 328; RV64-NEXT: vslide1down.vx v10, v10, a0 329; RV64-NEXT: vslide1down.vx v10, v10, a3 330; RV64-NEXT: vslide1down.vx v10, v10, a1 331; RV64-NEXT: vslidedown.vi v10, v10, 2 332; RV64-NEXT: vand.vi v10, v10, 1 333; RV64-NEXT: vmsne.vi v0, v10, 0 334; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma 335; RV64-NEXT: vfmerge.vfm v8, v8, fa0, v0 336; RV64-NEXT: vse32.v v8, (a2) 337; RV64-NEXT: ret 338 %vb = load <6 x float>, ptr %b 339 %ahead = insertelement <6 x float> poison, float %a, i32 0 340 %va = shufflevector <6 x float> %ahead, <6 x float> poison, <6 x i32> zeroinitializer 341 %vcc = load <6 x i1>, ptr %cc 342 %vsel = select <6 x i1> %vcc, <6 x float> %va, <6 x float> %vb 343 store <6 x float> %vsel, ptr %z 344 ret void 345} 346 347define void @vselect_vfpzero_v6f32(ptr %b, ptr %cc, ptr %z) { 348; RV32-LABEL: vselect_vfpzero_v6f32: 349; RV32: # %bb.0: 350; RV32-NEXT: lbu a1, 0(a1) 351; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma 352; RV32-NEXT: vle32.v v8, (a0) 353; RV32-NEXT: slli a0, a1, 30 354; RV32-NEXT: andi a3, a1, 1 355; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 356; RV32-NEXT: vmv.v.x v10, a3 357; RV32-NEXT: slli a3, a1, 29 358; RV32-NEXT: srli a0, a0, 31 359; RV32-NEXT: vslide1down.vx v10, v10, a0 360; RV32-NEXT: slli a0, a1, 28 361; RV32-NEXT: srli a3, a3, 31 362; RV32-NEXT: vslide1down.vx v10, v10, a3 363; RV32-NEXT: slli a3, a1, 27 364; RV32-NEXT: srli a1, a1, 5 365; RV32-NEXT: srli a0, a0, 31 366; RV32-NEXT: srli a3, a3, 31 367; RV32-NEXT: vslide1down.vx v10, v10, a0 368; RV32-NEXT: vslide1down.vx v10, v10, a3 369; RV32-NEXT: vslide1down.vx v10, v10, a1 370; RV32-NEXT: vslidedown.vi v10, v10, 2 371; RV32-NEXT: vand.vi v10, v10, 1 372; RV32-NEXT: vmsne.vi v0, v10, 0 373; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma 374; RV32-NEXT: vmerge.vim v8, v8, 0, v0 375; RV32-NEXT: vse32.v v8, (a2) 376; RV32-NEXT: ret 377; 378; RV64-LABEL: vselect_vfpzero_v6f32: 379; RV64: # %bb.0: 380; RV64-NEXT: lbu a1, 0(a1) 381; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma 382; RV64-NEXT: vle32.v v8, (a0) 383; RV64-NEXT: slli a0, a1, 62 384; RV64-NEXT: andi a3, a1, 1 385; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 386; RV64-NEXT: vmv.v.x v10, a3 387; RV64-NEXT: slli a3, a1, 61 388; RV64-NEXT: srli a0, a0, 63 389; RV64-NEXT: vslide1down.vx v10, v10, a0 390; RV64-NEXT: slli a0, a1, 60 391; RV64-NEXT: srli a3, a3, 63 392; RV64-NEXT: vslide1down.vx v10, v10, a3 393; RV64-NEXT: slli a3, a1, 59 394; RV64-NEXT: srli a1, a1, 5 395; RV64-NEXT: srli a0, a0, 63 396; RV64-NEXT: srli a3, a3, 63 397; RV64-NEXT: vslide1down.vx v10, v10, a0 398; RV64-NEXT: vslide1down.vx v10, v10, a3 399; RV64-NEXT: vslide1down.vx v10, v10, a1 400; RV64-NEXT: vslidedown.vi v10, v10, 2 401; RV64-NEXT: vand.vi v10, v10, 1 402; RV64-NEXT: vmsne.vi v0, v10, 0 403; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma 404; RV64-NEXT: vmerge.vim v8, v8, 0, v0 405; RV64-NEXT: vse32.v v8, (a2) 406; RV64-NEXT: ret 407 %vb = load <6 x float>, ptr %b 408 %vcc = load <6 x i1>, ptr %cc 409 %vsel = select <6 x i1> %vcc, <6 x float> splat (float 0.0), <6 x float> %vb 410 store <6 x float> %vsel, ptr %z 411 ret void 412} 413 414define void @vselect_vv_v8i32(ptr %a, ptr %b, ptr %cc, ptr %z) { 415; CHECK-LABEL: vselect_vv_v8i32: 416; CHECK: # %bb.0: 417; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu 418; CHECK-NEXT: vlm.v v0, (a2) 419; CHECK-NEXT: vle32.v v8, (a1) 420; CHECK-NEXT: vle32.v v8, (a0), v0.t 421; CHECK-NEXT: vse32.v v8, (a3) 422; CHECK-NEXT: ret 423 %va = load <8 x i32>, ptr %a 424 %vb = load <8 x i32>, ptr %b 425 %vcc = load <8 x i1>, ptr %cc 426 %vsel = select <8 x i1> %vcc, <8 x i32> %va, <8 x i32> %vb 427 store <8 x i32> %vsel, ptr %z 428 ret void 429} 430 431define void @vselect_vx_v8i32(i32 %a, ptr %b, ptr %cc, ptr %z) { 432; CHECK-LABEL: vselect_vx_v8i32: 433; CHECK: # %bb.0: 434; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 435; CHECK-NEXT: vlm.v v0, (a2) 436; CHECK-NEXT: vle32.v v8, (a1) 437; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 438; CHECK-NEXT: vse32.v v8, (a3) 439; CHECK-NEXT: ret 440 %vb = load <8 x i32>, ptr %b 441 %ahead = insertelement <8 x i32> poison, i32 %a, i32 0 442 %va = shufflevector <8 x i32> %ahead, <8 x i32> poison, <8 x i32> zeroinitializer 443 %vcc = load <8 x i1>, ptr %cc 444 %vsel = select <8 x i1> %vcc, <8 x i32> %va, <8 x i32> %vb 445 store <8 x i32> %vsel, ptr %z 446 ret void 447} 448 449define void @vselect_vi_v8i32(ptr %b, ptr %cc, ptr %z) { 450; CHECK-LABEL: vselect_vi_v8i32: 451; CHECK: # %bb.0: 452; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 453; CHECK-NEXT: vlm.v v0, (a1) 454; CHECK-NEXT: vle32.v v8, (a0) 455; CHECK-NEXT: vmerge.vim v8, v8, -1, v0 456; CHECK-NEXT: vse32.v v8, (a2) 457; CHECK-NEXT: ret 458 %vb = load <8 x i32>, ptr %b 459 %vcc = load <8 x i1>, ptr %cc 460 %vsel = select <8 x i1> %vcc, <8 x i32> splat (i32 -1), <8 x i32> %vb 461 store <8 x i32> %vsel, ptr %z 462 ret void 463} 464 465define void @vselect_vv_v8f32(ptr %a, ptr %b, ptr %cc, ptr %z) { 466; CHECK-LABEL: vselect_vv_v8f32: 467; CHECK: # %bb.0: 468; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu 469; CHECK-NEXT: vlm.v v0, (a2) 470; CHECK-NEXT: vle32.v v8, (a1) 471; CHECK-NEXT: vle32.v v8, (a0), v0.t 472; CHECK-NEXT: vse32.v v8, (a3) 473; CHECK-NEXT: ret 474 %va = load <8 x float>, ptr %a 475 %vb = load <8 x float>, ptr %b 476 %vcc = load <8 x i1>, ptr %cc 477 %vsel = select <8 x i1> %vcc, <8 x float> %va, <8 x float> %vb 478 store <8 x float> %vsel, ptr %z 479 ret void 480} 481 482define void @vselect_vx_v8f32(float %a, ptr %b, ptr %cc, ptr %z) { 483; CHECK-LABEL: vselect_vx_v8f32: 484; CHECK: # %bb.0: 485; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 486; CHECK-NEXT: vlm.v v0, (a1) 487; CHECK-NEXT: vle32.v v8, (a0) 488; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 489; CHECK-NEXT: vse32.v v8, (a2) 490; CHECK-NEXT: ret 491 %vb = load <8 x float>, ptr %b 492 %ahead = insertelement <8 x float> poison, float %a, i32 0 493 %va = shufflevector <8 x float> %ahead, <8 x float> poison, <8 x i32> zeroinitializer 494 %vcc = load <8 x i1>, ptr %cc 495 %vsel = select <8 x i1> %vcc, <8 x float> %va, <8 x float> %vb 496 store <8 x float> %vsel, ptr %z 497 ret void 498} 499 500define void @vselect_vfpzero_v8f32(ptr %b, ptr %cc, ptr %z) { 501; CHECK-LABEL: vselect_vfpzero_v8f32: 502; CHECK: # %bb.0: 503; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 504; CHECK-NEXT: vlm.v v0, (a1) 505; CHECK-NEXT: vle32.v v8, (a0) 506; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 507; CHECK-NEXT: vse32.v v8, (a2) 508; CHECK-NEXT: ret 509 %vb = load <8 x float>, ptr %b 510 %vcc = load <8 x i1>, ptr %cc 511 %vsel = select <8 x i1> %vcc, <8 x float> splat (float 0.0), <8 x float> %vb 512 store <8 x float> %vsel, ptr %z 513 ret void 514} 515 516define void @vselect_vv_v16i16(ptr %a, ptr %b, ptr %cc, ptr %z) { 517; CHECK-LABEL: vselect_vv_v16i16: 518; CHECK: # %bb.0: 519; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu 520; CHECK-NEXT: vlm.v v0, (a2) 521; CHECK-NEXT: vle16.v v8, (a1) 522; CHECK-NEXT: vle16.v v8, (a0), v0.t 523; CHECK-NEXT: vse16.v v8, (a3) 524; CHECK-NEXT: ret 525 %va = load <16 x i16>, ptr %a 526 %vb = load <16 x i16>, ptr %b 527 %vcc = load <16 x i1>, ptr %cc 528 %vsel = select <16 x i1> %vcc, <16 x i16> %va, <16 x i16> %vb 529 store <16 x i16> %vsel, ptr %z 530 ret void 531} 532 533define void @vselect_vx_v16i16(i16 signext %a, ptr %b, ptr %cc, ptr %z) { 534; CHECK-LABEL: vselect_vx_v16i16: 535; CHECK: # %bb.0: 536; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 537; CHECK-NEXT: vlm.v v0, (a2) 538; CHECK-NEXT: vle16.v v8, (a1) 539; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 540; CHECK-NEXT: vse16.v v8, (a3) 541; CHECK-NEXT: ret 542 %vb = load <16 x i16>, ptr %b 543 %ahead = insertelement <16 x i16> poison, i16 %a, i32 0 544 %va = shufflevector <16 x i16> %ahead, <16 x i16> poison, <16 x i32> zeroinitializer 545 %vcc = load <16 x i1>, ptr %cc 546 %vsel = select <16 x i1> %vcc, <16 x i16> %va, <16 x i16> %vb 547 store <16 x i16> %vsel, ptr %z 548 ret void 549} 550 551define void @vselect_vi_v16i16(ptr %b, ptr %cc, ptr %z) { 552; CHECK-LABEL: vselect_vi_v16i16: 553; CHECK: # %bb.0: 554; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 555; CHECK-NEXT: vlm.v v0, (a1) 556; CHECK-NEXT: vle16.v v8, (a0) 557; CHECK-NEXT: vmerge.vim v8, v8, 4, v0 558; CHECK-NEXT: vse16.v v8, (a2) 559; CHECK-NEXT: ret 560 %vb = load <16 x i16>, ptr %b 561 %vcc = load <16 x i1>, ptr %cc 562 %vsel = select <16 x i1> %vcc, <16 x i16> splat (i16 4), <16 x i16> %vb 563 store <16 x i16> %vsel, ptr %z 564 ret void 565} 566 567define void @vselect_vv_v32f16(ptr %a, ptr %b, ptr %cc, ptr %z) { 568; CHECK-LABEL: vselect_vv_v32f16: 569; CHECK: # %bb.0: 570; CHECK-NEXT: li a4, 32 571; CHECK-NEXT: vsetvli zero, a4, e16, m4, ta, mu 572; CHECK-NEXT: vlm.v v0, (a2) 573; CHECK-NEXT: vle16.v v8, (a1) 574; CHECK-NEXT: vle16.v v8, (a0), v0.t 575; CHECK-NEXT: vse16.v v8, (a3) 576; CHECK-NEXT: ret 577 %va = load <32 x half>, ptr %a 578 %vb = load <32 x half>, ptr %b 579 %vcc = load <32 x i1>, ptr %cc 580 %vsel = select <32 x i1> %vcc, <32 x half> %va, <32 x half> %vb 581 store <32 x half> %vsel, ptr %z 582 ret void 583} 584 585define void @vselect_vx_v32f16(half %a, ptr %b, ptr %cc, ptr %z) { 586; CHECK-LABEL: vselect_vx_v32f16: 587; CHECK: # %bb.0: 588; CHECK-NEXT: li a3, 32 589; CHECK-NEXT: vsetvli zero, a3, e16, m4, ta, ma 590; CHECK-NEXT: vlm.v v0, (a1) 591; CHECK-NEXT: vle16.v v8, (a0) 592; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 593; CHECK-NEXT: vse16.v v8, (a2) 594; CHECK-NEXT: ret 595 %vb = load <32 x half>, ptr %b 596 %ahead = insertelement <32 x half> poison, half %a, i32 0 597 %va = shufflevector <32 x half> %ahead, <32 x half> poison, <32 x i32> zeroinitializer 598 %vcc = load <32 x i1>, ptr %cc 599 %vsel = select <32 x i1> %vcc, <32 x half> %va, <32 x half> %vb 600 store <32 x half> %vsel, ptr %z 601 ret void 602} 603 604define void @vselect_vfpzero_v32f16(ptr %b, ptr %cc, ptr %z) { 605; CHECK-LABEL: vselect_vfpzero_v32f16: 606; CHECK: # %bb.0: 607; CHECK-NEXT: li a3, 32 608; CHECK-NEXT: vsetvli zero, a3, e16, m4, ta, ma 609; CHECK-NEXT: vlm.v v0, (a1) 610; CHECK-NEXT: vle16.v v8, (a0) 611; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 612; CHECK-NEXT: vse16.v v8, (a2) 613; CHECK-NEXT: ret 614 %vb = load <32 x half>, ptr %b 615 %vcc = load <32 x i1>, ptr %cc 616 %vsel = select <32 x i1> %vcc, <32 x half> splat (half 0.0), <32 x half> %vb 617 store <32 x half> %vsel, ptr %z 618 ret void 619} 620 621define <2 x i1> @vselect_v2i1(<2 x i1> %a, <2 x i1> %b, <2 x i1> %cc) { 622; CHECK-LABEL: vselect_v2i1: 623; CHECK: # %bb.0: 624; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma 625; CHECK-NEXT: vmandn.mm v8, v8, v9 626; CHECK-NEXT: vmand.mm v9, v0, v9 627; CHECK-NEXT: vmor.mm v0, v9, v8 628; CHECK-NEXT: ret 629 %v = select <2 x i1> %cc, <2 x i1> %a, <2 x i1> %b 630 ret <2 x i1> %v 631} 632 633define <4 x i1> @vselect_v4i1(<4 x i1> %a, <4 x i1> %b, <4 x i1> %cc) { 634; CHECK-LABEL: vselect_v4i1: 635; CHECK: # %bb.0: 636; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 637; CHECK-NEXT: vmandn.mm v8, v8, v9 638; CHECK-NEXT: vmand.mm v9, v0, v9 639; CHECK-NEXT: vmor.mm v0, v9, v8 640; CHECK-NEXT: ret 641 %v = select <4 x i1> %cc, <4 x i1> %a, <4 x i1> %b 642 ret <4 x i1> %v 643} 644 645define <8 x i1> @vselect_v8i1(<8 x i1> %a, <8 x i1> %b, <8 x i1> %cc) { 646; CHECK-LABEL: vselect_v8i1: 647; CHECK: # %bb.0: 648; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 649; CHECK-NEXT: vmandn.mm v8, v8, v9 650; CHECK-NEXT: vmand.mm v9, v0, v9 651; CHECK-NEXT: vmor.mm v0, v9, v8 652; CHECK-NEXT: ret 653 %v = select <8 x i1> %cc, <8 x i1> %a, <8 x i1> %b 654 ret <8 x i1> %v 655} 656 657define <16 x i1> @vselect_v16i1(<16 x i1> %a, <16 x i1> %b, <16 x i1> %cc) { 658; CHECK-LABEL: vselect_v16i1: 659; CHECK: # %bb.0: 660; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 661; CHECK-NEXT: vmandn.mm v8, v8, v9 662; CHECK-NEXT: vmand.mm v9, v0, v9 663; CHECK-NEXT: vmor.mm v0, v9, v8 664; CHECK-NEXT: ret 665 %v = select <16 x i1> %cc, <16 x i1> %a, <16 x i1> %b 666 ret <16 x i1> %v 667} 668 669define <32 x i1> @vselect_v32i1(<32 x i1> %a, <32 x i1> %b, <32 x i1> %cc) { 670; CHECK-LABEL: vselect_v32i1: 671; CHECK: # %bb.0: 672; CHECK-NEXT: li a0, 32 673; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma 674; CHECK-NEXT: vmandn.mm v8, v8, v9 675; CHECK-NEXT: vmand.mm v9, v0, v9 676; CHECK-NEXT: vmor.mm v0, v9, v8 677; CHECK-NEXT: ret 678 %v = select <32 x i1> %cc, <32 x i1> %a, <32 x i1> %b 679 ret <32 x i1> %v 680} 681 682define <64 x i1> @vselect_v64i1(<64 x i1> %a, <64 x i1> %b, <64 x i1> %cc) { 683; CHECK-LABEL: vselect_v64i1: 684; CHECK: # %bb.0: 685; CHECK-NEXT: li a0, 64 686; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma 687; CHECK-NEXT: vmandn.mm v8, v8, v9 688; CHECK-NEXT: vmand.mm v9, v0, v9 689; CHECK-NEXT: vmor.mm v0, v9, v8 690; CHECK-NEXT: ret 691 %v = select <64 x i1> %cc, <64 x i1> %a, <64 x i1> %b 692 ret <64 x i1> %v 693} 694