1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 2; RUN: llc -verify-machineinstrs -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zvfh,+zvfbfmin < %s | FileCheck %s 3; RUN: llc -verify-machineinstrs -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zvfh,+zvfbfmin < %s | FileCheck %s 4; RUN: llc -verify-machineinstrs -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zvfhmin,+zvfbfmin < %s | FileCheck %s 5; RUN: llc -verify-machineinstrs -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zvfhmin,+zvfbfmin < %s | FileCheck %s 6 7define <1 x bfloat> @vector_compress_v1bf16(<1 x bfloat> %v, <1 x i1> %mask) { 8; CHECK-LABEL: vector_compress_v1bf16: 9; CHECK: # %bb.0: 10; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma 11; CHECK-NEXT: vcompress.vm v9, v8, v0 12; CHECK-NEXT: vmv1r.v v8, v9 13; CHECK-NEXT: ret 14 %ret = call <1 x bfloat> @llvm.experimental.vector.compress.v1bf16(<1 x bfloat> %v, <1 x i1> %mask, <1 x bfloat> undef) 15 ret <1 x bfloat> %ret 16} 17 18define <1 x bfloat> @vector_compress_v1bf16_passthru(<1 x bfloat> %passthru, <1 x bfloat> %v, <1 x i1> %mask) { 19; CHECK-LABEL: vector_compress_v1bf16_passthru: 20; CHECK: # %bb.0: 21; CHECK-NEXT: vsetivli zero, 1, e16, mf4, tu, ma 22; CHECK-NEXT: vcompress.vm v8, v9, v0 23; CHECK-NEXT: ret 24 %ret = call <1 x bfloat> @llvm.experimental.vector.compress.v1bf16(<1 x bfloat> %v, <1 x i1> %mask, <1 x bfloat> %passthru) 25 ret <1 x bfloat> %ret 26} 27 28define <2 x bfloat> @vector_compress_v2bf16(<2 x bfloat> %v, <2 x i1> %mask) { 29; CHECK-LABEL: vector_compress_v2bf16: 30; CHECK: # %bb.0: 31; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 32; CHECK-NEXT: vcompress.vm v9, v8, v0 33; CHECK-NEXT: vmv1r.v v8, v9 34; CHECK-NEXT: ret 35 %ret = call <2 x bfloat> @llvm.experimental.vector.compress.v2bf16(<2 x bfloat> %v, <2 x i1> %mask, <2 x bfloat> undef) 36 ret <2 x bfloat> %ret 37} 38 39define <2 x bfloat> @vector_compress_v2bf16_passthru(<2 x bfloat> %passthru, <2 x bfloat> %v, <2 x i1> %mask) { 40; CHECK-LABEL: vector_compress_v2bf16_passthru: 41; CHECK: # %bb.0: 42; CHECK-NEXT: vsetivli zero, 2, e16, mf4, tu, ma 43; CHECK-NEXT: vcompress.vm v8, v9, v0 44; CHECK-NEXT: ret 45 %ret = call <2 x bfloat> @llvm.experimental.vector.compress.v2bf16(<2 x bfloat> %v, <2 x i1> %mask, <2 x bfloat> %passthru) 46 ret <2 x bfloat> %ret 47} 48 49define <4 x bfloat> @vector_compress_v4bf16(<4 x bfloat> %v, <4 x i1> %mask) { 50; CHECK-LABEL: vector_compress_v4bf16: 51; CHECK: # %bb.0: 52; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 53; CHECK-NEXT: vcompress.vm v9, v8, v0 54; CHECK-NEXT: vmv1r.v v8, v9 55; CHECK-NEXT: ret 56 %ret = call <4 x bfloat> @llvm.experimental.vector.compress.v4bf16(<4 x bfloat> %v, <4 x i1> %mask, <4 x bfloat> undef) 57 ret <4 x bfloat> %ret 58} 59 60define <4 x bfloat> @vector_compress_v4bf16_passthru(<4 x bfloat> %passthru, <4 x bfloat> %v, <4 x i1> %mask) { 61; CHECK-LABEL: vector_compress_v4bf16_passthru: 62; CHECK: # %bb.0: 63; CHECK-NEXT: vsetivli zero, 4, e16, mf2, tu, ma 64; CHECK-NEXT: vcompress.vm v8, v9, v0 65; CHECK-NEXT: ret 66 %ret = call <4 x bfloat> @llvm.experimental.vector.compress.v4bf16(<4 x bfloat> %v, <4 x i1> %mask, <4 x bfloat> %passthru) 67 ret <4 x bfloat> %ret 68} 69 70define <8 x bfloat> @vector_compress_v8bf16(<8 x bfloat> %v, <8 x i1> %mask) { 71; CHECK-LABEL: vector_compress_v8bf16: 72; CHECK: # %bb.0: 73; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 74; CHECK-NEXT: vcompress.vm v9, v8, v0 75; CHECK-NEXT: vmv.v.v v8, v9 76; CHECK-NEXT: ret 77 %ret = call <8 x bfloat> @llvm.experimental.vector.compress.v8bf16(<8 x bfloat> %v, <8 x i1> %mask, <8 x bfloat> undef) 78 ret <8 x bfloat> %ret 79} 80 81define <8 x bfloat> @vector_compress_v8bf16_passthru(<8 x bfloat> %passthru, <8 x bfloat> %v, <8 x i1> %mask) { 82; CHECK-LABEL: vector_compress_v8bf16_passthru: 83; CHECK: # %bb.0: 84; CHECK-NEXT: vsetivli zero, 8, e16, m1, tu, ma 85; CHECK-NEXT: vcompress.vm v8, v9, v0 86; CHECK-NEXT: ret 87 %ret = call <8 x bfloat> @llvm.experimental.vector.compress.v8bf16(<8 x bfloat> %v, <8 x i1> %mask, <8 x bfloat> %passthru) 88 ret <8 x bfloat> %ret 89} 90 91define <1 x half> @vector_compress_v1f16(<1 x half> %v, <1 x i1> %mask) { 92; CHECK-LABEL: vector_compress_v1f16: 93; CHECK: # %bb.0: 94; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma 95; CHECK-NEXT: vcompress.vm v9, v8, v0 96; CHECK-NEXT: vmv1r.v v8, v9 97; CHECK-NEXT: ret 98 %ret = call <1 x half> @llvm.experimental.vector.compress.v1f16(<1 x half> %v, <1 x i1> %mask, <1 x half> undef) 99 ret <1 x half> %ret 100} 101 102define <1 x half> @vector_compress_v1f16_passthru(<1 x half> %passthru, <1 x half> %v, <1 x i1> %mask) { 103; CHECK-LABEL: vector_compress_v1f16_passthru: 104; CHECK: # %bb.0: 105; CHECK-NEXT: vsetivli zero, 1, e16, mf4, tu, ma 106; CHECK-NEXT: vcompress.vm v8, v9, v0 107; CHECK-NEXT: ret 108 %ret = call <1 x half> @llvm.experimental.vector.compress.v1f16(<1 x half> %v, <1 x i1> %mask, <1 x half> %passthru) 109 ret <1 x half> %ret 110} 111 112define <2 x half> @vector_compress_v2f16(<2 x half> %v, <2 x i1> %mask) { 113; CHECK-LABEL: vector_compress_v2f16: 114; CHECK: # %bb.0: 115; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 116; CHECK-NEXT: vcompress.vm v9, v8, v0 117; CHECK-NEXT: vmv1r.v v8, v9 118; CHECK-NEXT: ret 119 %ret = call <2 x half> @llvm.experimental.vector.compress.v2f16(<2 x half> %v, <2 x i1> %mask, <2 x half> undef) 120 ret <2 x half> %ret 121} 122 123define <2 x half> @vector_compress_v2f16_passthru(<2 x half> %passthru, <2 x half> %v, <2 x i1> %mask) { 124; CHECK-LABEL: vector_compress_v2f16_passthru: 125; CHECK: # %bb.0: 126; CHECK-NEXT: vsetivli zero, 2, e16, mf4, tu, ma 127; CHECK-NEXT: vcompress.vm v8, v9, v0 128; CHECK-NEXT: ret 129 %ret = call <2 x half> @llvm.experimental.vector.compress.v2f16(<2 x half> %v, <2 x i1> %mask, <2 x half> %passthru) 130 ret <2 x half> %ret 131} 132 133define <4 x half> @vector_compress_v4f16(<4 x half> %v, <4 x i1> %mask) { 134; CHECK-LABEL: vector_compress_v4f16: 135; CHECK: # %bb.0: 136; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 137; CHECK-NEXT: vcompress.vm v9, v8, v0 138; CHECK-NEXT: vmv1r.v v8, v9 139; CHECK-NEXT: ret 140 %ret = call <4 x half> @llvm.experimental.vector.compress.v4f16(<4 x half> %v, <4 x i1> %mask, <4 x half> undef) 141 ret <4 x half> %ret 142} 143 144define <4 x half> @vector_compress_v4f16_passthru(<4 x half> %passthru, <4 x half> %v, <4 x i1> %mask) { 145; CHECK-LABEL: vector_compress_v4f16_passthru: 146; CHECK: # %bb.0: 147; CHECK-NEXT: vsetivli zero, 4, e16, mf2, tu, ma 148; CHECK-NEXT: vcompress.vm v8, v9, v0 149; CHECK-NEXT: ret 150 %ret = call <4 x half> @llvm.experimental.vector.compress.v4f16(<4 x half> %v, <4 x i1> %mask, <4 x half> %passthru) 151 ret <4 x half> %ret 152} 153 154define <8 x half> @vector_compress_v8f16(<8 x half> %v, <8 x i1> %mask) { 155; CHECK-LABEL: vector_compress_v8f16: 156; CHECK: # %bb.0: 157; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 158; CHECK-NEXT: vcompress.vm v9, v8, v0 159; CHECK-NEXT: vmv.v.v v8, v9 160; CHECK-NEXT: ret 161 %ret = call <8 x half> @llvm.experimental.vector.compress.v8f16(<8 x half> %v, <8 x i1> %mask, <8 x half> undef) 162 ret <8 x half> %ret 163} 164 165define <8 x half> @vector_compress_v8f16_passthru(<8 x half> %passthru, <8 x half> %v, <8 x i1> %mask) { 166; CHECK-LABEL: vector_compress_v8f16_passthru: 167; CHECK: # %bb.0: 168; CHECK-NEXT: vsetivli zero, 8, e16, m1, tu, ma 169; CHECK-NEXT: vcompress.vm v8, v9, v0 170; CHECK-NEXT: ret 171 %ret = call <8 x half> @llvm.experimental.vector.compress.v8f16(<8 x half> %v, <8 x i1> %mask, <8 x half> %passthru) 172 ret <8 x half> %ret 173} 174 175define <1 x float> @vector_compress_v1f32(<1 x float> %v, <1 x i1> %mask) { 176; CHECK-LABEL: vector_compress_v1f32: 177; CHECK: # %bb.0: 178; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma 179; CHECK-NEXT: vcompress.vm v9, v8, v0 180; CHECK-NEXT: vmv1r.v v8, v9 181; CHECK-NEXT: ret 182 %ret = call <1 x float> @llvm.experimental.vector.compress.v1f32(<1 x float> %v, <1 x i1> %mask, <1 x float> undef) 183 ret <1 x float> %ret 184} 185 186define <1 x float> @vector_compress_v1f32_passthru(<1 x float> %passthru, <1 x float> %v, <1 x i1> %mask) { 187; CHECK-LABEL: vector_compress_v1f32_passthru: 188; CHECK: # %bb.0: 189; CHECK-NEXT: vsetivli zero, 1, e32, mf2, tu, ma 190; CHECK-NEXT: vcompress.vm v8, v9, v0 191; CHECK-NEXT: ret 192 %ret = call <1 x float> @llvm.experimental.vector.compress.v1f32(<1 x float> %v, <1 x i1> %mask, <1 x float> %passthru) 193 ret <1 x float> %ret 194} 195 196define <2 x float> @vector_compress_v2f32(<2 x float> %v, <2 x i1> %mask) { 197; CHECK-LABEL: vector_compress_v2f32: 198; CHECK: # %bb.0: 199; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 200; CHECK-NEXT: vcompress.vm v9, v8, v0 201; CHECK-NEXT: vmv1r.v v8, v9 202; CHECK-NEXT: ret 203 %ret = call <2 x float> @llvm.experimental.vector.compress.v2f32(<2 x float> %v, <2 x i1> %mask, <2 x float> undef) 204 ret <2 x float> %ret 205} 206 207define <2 x float> @vector_compress_v2f32_passthru(<2 x float> %passthru, <2 x float> %v, <2 x i1> %mask) { 208; CHECK-LABEL: vector_compress_v2f32_passthru: 209; CHECK: # %bb.0: 210; CHECK-NEXT: vsetivli zero, 2, e32, mf2, tu, ma 211; CHECK-NEXT: vcompress.vm v8, v9, v0 212; CHECK-NEXT: ret 213 %ret = call <2 x float> @llvm.experimental.vector.compress.v2f32(<2 x float> %v, <2 x i1> %mask, <2 x float> %passthru) 214 ret <2 x float> %ret 215} 216 217define <4 x float> @vector_compress_v4f32(<4 x float> %v, <4 x i1> %mask) { 218; CHECK-LABEL: vector_compress_v4f32: 219; CHECK: # %bb.0: 220; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 221; CHECK-NEXT: vcompress.vm v9, v8, v0 222; CHECK-NEXT: vmv.v.v v8, v9 223; CHECK-NEXT: ret 224 %ret = call <4 x float> @llvm.experimental.vector.compress.v4f32(<4 x float> %v, <4 x i1> %mask, <4 x float> undef) 225 ret <4 x float> %ret 226} 227 228define <4 x float> @vector_compress_v4f32_passthru(<4 x float> %passthru, <4 x float> %v, <4 x i1> %mask) { 229; CHECK-LABEL: vector_compress_v4f32_passthru: 230; CHECK: # %bb.0: 231; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma 232; CHECK-NEXT: vcompress.vm v8, v9, v0 233; CHECK-NEXT: ret 234 %ret = call <4 x float> @llvm.experimental.vector.compress.v4f32(<4 x float> %v, <4 x i1> %mask, <4 x float> %passthru) 235 ret <4 x float> %ret 236} 237 238define <8 x float> @vector_compress_v8f32(<8 x float> %v, <8 x i1> %mask) { 239; CHECK-LABEL: vector_compress_v8f32: 240; CHECK: # %bb.0: 241; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 242; CHECK-NEXT: vcompress.vm v10, v8, v0 243; CHECK-NEXT: vmv.v.v v8, v10 244; CHECK-NEXT: ret 245 %ret = call <8 x float> @llvm.experimental.vector.compress.v8f32(<8 x float> %v, <8 x i1> %mask, <8 x float> undef) 246 ret <8 x float> %ret 247} 248 249define <8 x float> @vector_compress_v8f32_passthru(<8 x float> %passthru, <8 x float> %v, <8 x i1> %mask) { 250; CHECK-LABEL: vector_compress_v8f32_passthru: 251; CHECK: # %bb.0: 252; CHECK-NEXT: vsetivli zero, 8, e32, m2, tu, ma 253; CHECK-NEXT: vcompress.vm v8, v10, v0 254; CHECK-NEXT: ret 255 %ret = call <8 x float> @llvm.experimental.vector.compress.v8f32(<8 x float> %v, <8 x i1> %mask, <8 x float> %passthru) 256 ret <8 x float> %ret 257} 258 259define <1 x double> @vector_compress_v1f64(<1 x double> %v, <1 x i1> %mask) { 260; CHECK-LABEL: vector_compress_v1f64: 261; CHECK: # %bb.0: 262; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma 263; CHECK-NEXT: vcompress.vm v9, v8, v0 264; CHECK-NEXT: vmv.v.v v8, v9 265; CHECK-NEXT: ret 266 %ret = call <1 x double> @llvm.experimental.vector.compress.v1f64(<1 x double> %v, <1 x i1> %mask, <1 x double> undef) 267 ret <1 x double> %ret 268} 269 270define <1 x double> @vector_compress_v1f64_passthru(<1 x double> %passthru, <1 x double> %v, <1 x i1> %mask) { 271; CHECK-LABEL: vector_compress_v1f64_passthru: 272; CHECK: # %bb.0: 273; CHECK-NEXT: vsetivli zero, 1, e64, m1, tu, ma 274; CHECK-NEXT: vcompress.vm v8, v9, v0 275; CHECK-NEXT: ret 276 %ret = call <1 x double> @llvm.experimental.vector.compress.v1f64(<1 x double> %v, <1 x i1> %mask, <1 x double> %passthru) 277 ret <1 x double> %ret 278} 279 280define <2 x double> @vector_compress_v2f64(<2 x double> %v, <2 x i1> %mask) { 281; CHECK-LABEL: vector_compress_v2f64: 282; CHECK: # %bb.0: 283; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 284; CHECK-NEXT: vcompress.vm v9, v8, v0 285; CHECK-NEXT: vmv.v.v v8, v9 286; CHECK-NEXT: ret 287 %ret = call <2 x double> @llvm.experimental.vector.compress.v2f64(<2 x double> %v, <2 x i1> %mask, <2 x double> undef) 288 ret <2 x double> %ret 289} 290 291define <2 x double> @vector_compress_v2f64_passthru(<2 x double> %passthru, <2 x double> %v, <2 x i1> %mask) { 292; CHECK-LABEL: vector_compress_v2f64_passthru: 293; CHECK: # %bb.0: 294; CHECK-NEXT: vsetivli zero, 2, e64, m1, tu, ma 295; CHECK-NEXT: vcompress.vm v8, v9, v0 296; CHECK-NEXT: ret 297 %ret = call <2 x double> @llvm.experimental.vector.compress.v2f64(<2 x double> %v, <2 x i1> %mask, <2 x double> %passthru) 298 ret <2 x double> %ret 299} 300 301define <4 x double> @vector_compress_v4f64(<4 x double> %v, <4 x i1> %mask) { 302; CHECK-LABEL: vector_compress_v4f64: 303; CHECK: # %bb.0: 304; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 305; CHECK-NEXT: vcompress.vm v10, v8, v0 306; CHECK-NEXT: vmv.v.v v8, v10 307; CHECK-NEXT: ret 308 %ret = call <4 x double> @llvm.experimental.vector.compress.v4f64(<4 x double> %v, <4 x i1> %mask, <4 x double> undef) 309 ret <4 x double> %ret 310} 311 312define <4 x double> @vector_compress_v4f64_passthru(<4 x double> %passthru, <4 x double> %v, <4 x i1> %mask) { 313; CHECK-LABEL: vector_compress_v4f64_passthru: 314; CHECK: # %bb.0: 315; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma 316; CHECK-NEXT: vcompress.vm v8, v10, v0 317; CHECK-NEXT: ret 318 %ret = call <4 x double> @llvm.experimental.vector.compress.v4f64(<4 x double> %v, <4 x i1> %mask, <4 x double> %passthru) 319 ret <4 x double> %ret 320} 321 322define <8 x double> @vector_compress_v8f64(<8 x double> %v, <8 x i1> %mask) { 323; CHECK-LABEL: vector_compress_v8f64: 324; CHECK: # %bb.0: 325; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma 326; CHECK-NEXT: vcompress.vm v12, v8, v0 327; CHECK-NEXT: vmv.v.v v8, v12 328; CHECK-NEXT: ret 329 %ret = call <8 x double> @llvm.experimental.vector.compress.v8f64(<8 x double> %v, <8 x i1> %mask, <8 x double> undef) 330 ret <8 x double> %ret 331} 332 333define <8 x double> @vector_compress_v8f64_passthru(<8 x double> %passthru, <8 x double> %v, <8 x i1> %mask) { 334; CHECK-LABEL: vector_compress_v8f64_passthru: 335; CHECK: # %bb.0: 336; CHECK-NEXT: vsetivli zero, 8, e64, m4, tu, ma 337; CHECK-NEXT: vcompress.vm v8, v12, v0 338; CHECK-NEXT: ret 339 %ret = call <8 x double> @llvm.experimental.vector.compress.v8f64(<8 x double> %v, <8 x i1> %mask, <8 x double> %passthru) 340 ret <8 x double> %ret 341} 342