1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -mtriple=riscv32 -mattr=+v,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s 3; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s 4; RUN: llc -mtriple=riscv32 -mattr=+v,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s 5; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s 6 7define <1 x bfloat> @masked_load_v1bf16(ptr %a, <1 x i1> %mask) { 8; CHECK-LABEL: masked_load_v1bf16: 9; CHECK: # %bb.0: 10; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma 11; CHECK-NEXT: vle16.v v8, (a0), v0.t 12; CHECK-NEXT: ret 13 %load = call <1 x bfloat> @llvm.masked.load.v1bf16(ptr %a, i32 8, <1 x i1> %mask, <1 x bfloat> undef) 14 ret <1 x bfloat> %load 15} 16 17define <1 x half> @masked_load_v1f16(ptr %a, <1 x i1> %mask) { 18; CHECK-LABEL: masked_load_v1f16: 19; CHECK: # %bb.0: 20; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma 21; CHECK-NEXT: vle16.v v8, (a0), v0.t 22; CHECK-NEXT: ret 23 %load = call <1 x half> @llvm.masked.load.v1f16(ptr %a, i32 8, <1 x i1> %mask, <1 x half> undef) 24 ret <1 x half> %load 25} 26 27define <1 x float> @masked_load_v1f32(ptr %a, <1 x i1> %mask) { 28; CHECK-LABEL: masked_load_v1f32: 29; CHECK: # %bb.0: 30; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma 31; CHECK-NEXT: vle32.v v8, (a0), v0.t 32; CHECK-NEXT: ret 33 %load = call <1 x float> @llvm.masked.load.v1f32(ptr %a, i32 8, <1 x i1> %mask, <1 x float> undef) 34 ret <1 x float> %load 35} 36 37define <1 x double> @masked_load_v1f64(ptr %a, <1 x i1> %mask) { 38; CHECK-LABEL: masked_load_v1f64: 39; CHECK: # %bb.0: 40; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma 41; CHECK-NEXT: vle64.v v8, (a0), v0.t 42; CHECK-NEXT: ret 43 %load = call <1 x double> @llvm.masked.load.v1f64(ptr %a, i32 8, <1 x i1> %mask, <1 x double> undef) 44 ret <1 x double> %load 45} 46 47define <2 x bfloat> @masked_load_v2bf16(ptr %a, <2 x i1> %mask) { 48; CHECK-LABEL: masked_load_v2bf16: 49; CHECK: # %bb.0: 50; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 51; CHECK-NEXT: vle16.v v8, (a0), v0.t 52; CHECK-NEXT: ret 53 %load = call <2 x bfloat> @llvm.masked.load.v2bf16(ptr %a, i32 8, <2 x i1> %mask, <2 x bfloat> undef) 54 ret <2 x bfloat> %load 55} 56 57define <2 x half> @masked_load_v2f16(ptr %a, <2 x i1> %mask) { 58; CHECK-LABEL: masked_load_v2f16: 59; CHECK: # %bb.0: 60; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 61; CHECK-NEXT: vle16.v v8, (a0), v0.t 62; CHECK-NEXT: ret 63 %load = call <2 x half> @llvm.masked.load.v2f16(ptr %a, i32 8, <2 x i1> %mask, <2 x half> undef) 64 ret <2 x half> %load 65} 66 67define <2 x float> @masked_load_v2f32(ptr %a, <2 x i1> %mask) { 68; CHECK-LABEL: masked_load_v2f32: 69; CHECK: # %bb.0: 70; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 71; CHECK-NEXT: vle32.v v8, (a0), v0.t 72; CHECK-NEXT: ret 73 %load = call <2 x float> @llvm.masked.load.v2f32(ptr %a, i32 8, <2 x i1> %mask, <2 x float> undef) 74 ret <2 x float> %load 75} 76 77define <2 x double> @masked_load_v2f64(ptr %a, <2 x i1> %mask) { 78; CHECK-LABEL: masked_load_v2f64: 79; CHECK: # %bb.0: 80; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 81; CHECK-NEXT: vle64.v v8, (a0), v0.t 82; CHECK-NEXT: ret 83 %load = call <2 x double> @llvm.masked.load.v2f64(ptr %a, i32 8, <2 x i1> %mask, <2 x double> undef) 84 ret <2 x double> %load 85} 86 87define <4 x bfloat> @masked_load_v4bf16(ptr %a, <4 x i1> %mask) { 88; CHECK-LABEL: masked_load_v4bf16: 89; CHECK: # %bb.0: 90; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 91; CHECK-NEXT: vle16.v v8, (a0), v0.t 92; CHECK-NEXT: ret 93 %load = call <4 x bfloat> @llvm.masked.load.v4bf16(ptr %a, i32 8, <4 x i1> %mask, <4 x bfloat> undef) 94 ret <4 x bfloat> %load 95} 96 97define <4 x half> @masked_load_v4f16(ptr %a, <4 x i1> %mask) { 98; CHECK-LABEL: masked_load_v4f16: 99; CHECK: # %bb.0: 100; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 101; CHECK-NEXT: vle16.v v8, (a0), v0.t 102; CHECK-NEXT: ret 103 %load = call <4 x half> @llvm.masked.load.v4f16(ptr %a, i32 8, <4 x i1> %mask, <4 x half> undef) 104 ret <4 x half> %load 105} 106 107define <4 x float> @masked_load_v4f32(ptr %a, <4 x i1> %mask) { 108; CHECK-LABEL: masked_load_v4f32: 109; CHECK: # %bb.0: 110; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 111; CHECK-NEXT: vle32.v v8, (a0), v0.t 112; CHECK-NEXT: ret 113 %load = call <4 x float> @llvm.masked.load.v4f32(ptr %a, i32 8, <4 x i1> %mask, <4 x float> undef) 114 ret <4 x float> %load 115} 116 117define <4 x double> @masked_load_v4f64(ptr %a, <4 x i1> %mask) { 118; CHECK-LABEL: masked_load_v4f64: 119; CHECK: # %bb.0: 120; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 121; CHECK-NEXT: vle64.v v8, (a0), v0.t 122; CHECK-NEXT: ret 123 %load = call <4 x double> @llvm.masked.load.v4f64(ptr %a, i32 8, <4 x i1> %mask, <4 x double> undef) 124 ret <4 x double> %load 125} 126 127define <8 x bfloat> @masked_load_v8bf16(ptr %a, <8 x i1> %mask) { 128; CHECK-LABEL: masked_load_v8bf16: 129; CHECK: # %bb.0: 130; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 131; CHECK-NEXT: vle16.v v8, (a0), v0.t 132; CHECK-NEXT: ret 133 %load = call <8 x bfloat> @llvm.masked.load.v8bf16(ptr %a, i32 8, <8 x i1> %mask, <8 x bfloat> undef) 134 ret <8 x bfloat> %load 135} 136 137define <8 x half> @masked_load_v8f16(ptr %a, <8 x i1> %mask) { 138; CHECK-LABEL: masked_load_v8f16: 139; CHECK: # %bb.0: 140; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 141; CHECK-NEXT: vle16.v v8, (a0), v0.t 142; CHECK-NEXT: ret 143 %load = call <8 x half> @llvm.masked.load.v8f16(ptr %a, i32 8, <8 x i1> %mask, <8 x half> undef) 144 ret <8 x half> %load 145} 146 147define <8 x float> @masked_load_v8f32(ptr %a, <8 x i1> %mask) { 148; CHECK-LABEL: masked_load_v8f32: 149; CHECK: # %bb.0: 150; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 151; CHECK-NEXT: vle32.v v8, (a0), v0.t 152; CHECK-NEXT: ret 153 %load = call <8 x float> @llvm.masked.load.v8f32(ptr %a, i32 8, <8 x i1> %mask, <8 x float> undef) 154 ret <8 x float> %load 155} 156 157define <8 x double> @masked_load_v8f64(ptr %a, <8 x i1> %mask) { 158; CHECK-LABEL: masked_load_v8f64: 159; CHECK: # %bb.0: 160; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma 161; CHECK-NEXT: vle64.v v8, (a0), v0.t 162; CHECK-NEXT: ret 163 %load = call <8 x double> @llvm.masked.load.v8f64(ptr %a, i32 8, <8 x i1> %mask, <8 x double> undef) 164 ret <8 x double> %load 165} 166 167define <16 x bfloat> @masked_load_v16bf16(ptr %a, <16 x i1> %mask) { 168; CHECK-LABEL: masked_load_v16bf16: 169; CHECK: # %bb.0: 170; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 171; CHECK-NEXT: vle16.v v8, (a0), v0.t 172; CHECK-NEXT: ret 173 %load = call <16 x bfloat> @llvm.masked.load.v16bf16(ptr %a, i32 8, <16 x i1> %mask, <16 x bfloat> undef) 174 ret <16 x bfloat> %load 175} 176 177define <16 x half> @masked_load_v16f16(ptr %a, <16 x i1> %mask) { 178; CHECK-LABEL: masked_load_v16f16: 179; CHECK: # %bb.0: 180; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 181; CHECK-NEXT: vle16.v v8, (a0), v0.t 182; CHECK-NEXT: ret 183 %load = call <16 x half> @llvm.masked.load.v16f16(ptr %a, i32 8, <16 x i1> %mask, <16 x half> undef) 184 ret <16 x half> %load 185} 186 187define <16 x float> @masked_load_v16f32(ptr %a, <16 x i1> %mask) { 188; CHECK-LABEL: masked_load_v16f32: 189; CHECK: # %bb.0: 190; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma 191; CHECK-NEXT: vle32.v v8, (a0), v0.t 192; CHECK-NEXT: ret 193 %load = call <16 x float> @llvm.masked.load.v16f32(ptr %a, i32 8, <16 x i1> %mask, <16 x float> undef) 194 ret <16 x float> %load 195} 196 197define <16 x double> @masked_load_v16f64(ptr %a, <16 x i1> %mask) { 198; CHECK-LABEL: masked_load_v16f64: 199; CHECK: # %bb.0: 200; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma 201; CHECK-NEXT: vle64.v v8, (a0), v0.t 202; CHECK-NEXT: ret 203 %load = call <16 x double> @llvm.masked.load.v16f64(ptr %a, i32 8, <16 x i1> %mask, <16 x double> undef) 204 ret <16 x double> %load 205} 206 207define <32 x bfloat> @masked_load_v32bf16(ptr %a, <32 x i1> %mask) { 208; CHECK-LABEL: masked_load_v32bf16: 209; CHECK: # %bb.0: 210; CHECK-NEXT: li a1, 32 211; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma 212; CHECK-NEXT: vle16.v v8, (a0), v0.t 213; CHECK-NEXT: ret 214 %load = call <32 x bfloat> @llvm.masked.load.v32bf16(ptr %a, i32 8, <32 x i1> %mask, <32 x bfloat> undef) 215 ret <32 x bfloat> %load 216} 217 218define <32 x half> @masked_load_v32f16(ptr %a, <32 x i1> %mask) { 219; CHECK-LABEL: masked_load_v32f16: 220; CHECK: # %bb.0: 221; CHECK-NEXT: li a1, 32 222; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma 223; CHECK-NEXT: vle16.v v8, (a0), v0.t 224; CHECK-NEXT: ret 225 %load = call <32 x half> @llvm.masked.load.v32f16(ptr %a, i32 8, <32 x i1> %mask, <32 x half> undef) 226 ret <32 x half> %load 227} 228 229define <32 x float> @masked_load_v32f32(ptr %a, <32 x i1> %mask) { 230; CHECK-LABEL: masked_load_v32f32: 231; CHECK: # %bb.0: 232; CHECK-NEXT: li a1, 32 233; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma 234; CHECK-NEXT: vle32.v v8, (a0), v0.t 235; CHECK-NEXT: ret 236 %load = call <32 x float> @llvm.masked.load.v32f32(ptr %a, i32 8, <32 x i1> %mask, <32 x float> undef) 237 ret <32 x float> %load 238} 239 240define <32 x double> @masked_load_v32f64(ptr %a, <32 x i1> %mask) { 241; CHECK-LABEL: masked_load_v32f64: 242; CHECK: # %bb.0: 243; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma 244; CHECK-NEXT: vle64.v v8, (a0), v0.t 245; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 246; CHECK-NEXT: vslidedown.vi v0, v0, 2 247; CHECK-NEXT: addi a0, a0, 128 248; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma 249; CHECK-NEXT: vle64.v v16, (a0), v0.t 250; CHECK-NEXT: ret 251 %load = call <32 x double> @llvm.masked.load.v32f64(ptr %a, i32 8, <32 x i1> %mask, <32 x double> undef) 252 ret <32 x double> %load 253} 254 255define <64 x bfloat> @masked_load_v64bf16(ptr %a, <64 x i1> %mask) { 256; CHECK-LABEL: masked_load_v64bf16: 257; CHECK: # %bb.0: 258; CHECK-NEXT: li a1, 64 259; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma 260; CHECK-NEXT: vle16.v v8, (a0), v0.t 261; CHECK-NEXT: ret 262 %load = call <64 x bfloat> @llvm.masked.load.v64bf16(ptr %a, i32 8, <64 x i1> %mask, <64 x bfloat> undef) 263 ret <64 x bfloat> %load 264} 265 266define <64 x half> @masked_load_v64f16(ptr %a, <64 x i1> %mask) { 267; CHECK-LABEL: masked_load_v64f16: 268; CHECK: # %bb.0: 269; CHECK-NEXT: li a1, 64 270; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma 271; CHECK-NEXT: vle16.v v8, (a0), v0.t 272; CHECK-NEXT: ret 273 %load = call <64 x half> @llvm.masked.load.v64f16(ptr %a, i32 8, <64 x i1> %mask, <64 x half> undef) 274 ret <64 x half> %load 275} 276 277define <64 x float> @masked_load_v64f32(ptr %a, <64 x i1> %mask) { 278; CHECK-LABEL: masked_load_v64f32: 279; CHECK: # %bb.0: 280; CHECK-NEXT: li a1, 32 281; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 282; CHECK-NEXT: vslidedown.vi v16, v0, 4 283; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma 284; CHECK-NEXT: vle32.v v8, (a0), v0.t 285; CHECK-NEXT: addi a0, a0, 128 286; CHECK-NEXT: vmv1r.v v0, v16 287; CHECK-NEXT: vle32.v v16, (a0), v0.t 288; CHECK-NEXT: ret 289 %load = call <64 x float> @llvm.masked.load.v64f32(ptr %a, i32 8, <64 x i1> %mask, <64 x float> undef) 290 ret <64 x float> %load 291} 292 293define <128 x bfloat> @masked_load_v128bf16(ptr %a, <128 x i1> %mask) { 294; CHECK-LABEL: masked_load_v128bf16: 295; CHECK: # %bb.0: 296; CHECK-NEXT: li a1, 64 297; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma 298; CHECK-NEXT: vslidedown.vi v16, v0, 8 299; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma 300; CHECK-NEXT: vle16.v v8, (a0), v0.t 301; CHECK-NEXT: addi a0, a0, 128 302; CHECK-NEXT: vmv1r.v v0, v16 303; CHECK-NEXT: vle16.v v16, (a0), v0.t 304; CHECK-NEXT: ret 305 %load = call <128 x bfloat> @llvm.masked.load.v128bf16(ptr %a, i32 8, <128 x i1> %mask, <128 x bfloat> undef) 306 ret <128 x bfloat> %load 307} 308 309define <128 x half> @masked_load_v128f16(ptr %a, <128 x i1> %mask) { 310; CHECK-LABEL: masked_load_v128f16: 311; CHECK: # %bb.0: 312; CHECK-NEXT: li a1, 64 313; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma 314; CHECK-NEXT: vslidedown.vi v16, v0, 8 315; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma 316; CHECK-NEXT: vle16.v v8, (a0), v0.t 317; CHECK-NEXT: addi a0, a0, 128 318; CHECK-NEXT: vmv1r.v v0, v16 319; CHECK-NEXT: vle16.v v16, (a0), v0.t 320; CHECK-NEXT: ret 321 %load = call <128 x half> @llvm.masked.load.v128f16(ptr %a, i32 8, <128 x i1> %mask, <128 x half> undef) 322 ret <128 x half> %load 323} 324 325