1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s 3 4declare <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i64( 5 <vscale x 4 x i32>, 6 ptr, 7 <vscale x 4 x i64>, 8 i64); 9 10define <vscale x 4 x i32> @test_vloxei(ptr %ptr, <vscale x 4 x i8> %offset, i64 %vl) { 11; CHECK-LABEL: test_vloxei: 12; CHECK: # %bb.0: # %entry 13; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma 14; CHECK-NEXT: vzext.vf8 v12, v8 15; CHECK-NEXT: vsll.vi v12, v12, 4 16; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 17; CHECK-NEXT: vloxei64.v v8, (a0), v12 18; CHECK-NEXT: ret 19entry: 20 %offset.ext = zext <vscale x 4 x i8> %offset to <vscale x 4 x i64> 21 %shl = shl <vscale x 4 x i64> %offset.ext, splat (i64 4) 22 %res = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i64( 23 <vscale x 4 x i32> undef, 24 ptr %ptr, 25 <vscale x 4 x i64> %shl, 26 i64 %vl) 27 ret <vscale x 4 x i32> %res 28} 29 30define <vscale x 4 x i32> @test_vloxei2(ptr %ptr, <vscale x 4 x i8> %offset, i64 %vl) { 31; CHECK-LABEL: test_vloxei2: 32; CHECK: # %bb.0: # %entry 33; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma 34; CHECK-NEXT: vzext.vf8 v12, v8 35; CHECK-NEXT: vsll.vi v12, v12, 14 36; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 37; CHECK-NEXT: vloxei64.v v8, (a0), v12 38; CHECK-NEXT: ret 39entry: 40 %offset.ext = zext <vscale x 4 x i8> %offset to <vscale x 4 x i64> 41 %shl = shl <vscale x 4 x i64> %offset.ext, splat (i64 14) 42 %res = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i64( 43 <vscale x 4 x i32> undef, 44 ptr %ptr, 45 <vscale x 4 x i64> %shl, 46 i64 %vl) 47 ret <vscale x 4 x i32> %res 48} 49 50define <vscale x 4 x i32> @test_vloxei3(ptr %ptr, <vscale x 4 x i8> %offset, i64 %vl) { 51; CHECK-LABEL: test_vloxei3: 52; CHECK: # %bb.0: # %entry 53; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma 54; CHECK-NEXT: vzext.vf8 v12, v8 55; CHECK-NEXT: vsll.vi v12, v12, 26 56; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 57; CHECK-NEXT: vloxei64.v v8, (a0), v12 58; CHECK-NEXT: ret 59entry: 60 %offset.ext = zext <vscale x 4 x i8> %offset to <vscale x 4 x i64> 61 %shl = shl <vscale x 4 x i64> %offset.ext, splat (i64 26) 62 %res = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i64( 63 <vscale x 4 x i32> undef, 64 ptr %ptr, 65 <vscale x 4 x i64> %shl, 66 i64 %vl) 67 ret <vscale x 4 x i32> %res 68} 69 70; Test use vp.zext to extend. 71declare <vscale x 4 x i64> @llvm.vp.zext.nxvi64.nxv1i8(<vscale x 4 x i8>, <vscale x 4 x i1>, i32) 72define <vscale x 4 x i32> @test_vloxei4(ptr %ptr, <vscale x 4 x i8> %offset, <vscale x 4 x i1> %m, i32 zeroext %vl) { 73; CHECK-LABEL: test_vloxei4: 74; CHECK: # %bb.0: # %entry 75; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma 76; CHECK-NEXT: vzext.vf8 v12, v8, v0.t 77; CHECK-NEXT: vsll.vi v12, v12, 4 78; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 79; CHECK-NEXT: vloxei64.v v8, (a0), v12 80; CHECK-NEXT: ret 81entry: 82 %offset.ext = call <vscale x 4 x i64> @llvm.vp.zext.nxvi64.nxv1i8(<vscale x 4 x i8> %offset, <vscale x 4 x i1> %m, i32 %vl) 83 %shl = shl <vscale x 4 x i64> %offset.ext, splat (i64 4) 84 %vl.i64 = zext i32 %vl to i64 85 %res = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i64( 86 <vscale x 4 x i32> undef, 87 ptr %ptr, 88 <vscale x 4 x i64> %shl, 89 i64 %vl.i64) 90 ret <vscale x 4 x i32> %res 91} 92 93; Test orignal extnened type is enough narrow. 94declare <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i16( 95 <vscale x 4 x i32>, 96 ptr, 97 <vscale x 4 x i16>, 98 i64); 99define <vscale x 4 x i32> @test_vloxei5(ptr %ptr, <vscale x 4 x i8> %offset, i64 %vl) { 100; CHECK-LABEL: test_vloxei5: 101; CHECK: # %bb.0: # %entry 102; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma 103; CHECK-NEXT: vzext.vf2 v9, v8 104; CHECK-NEXT: vsll.vi v10, v9, 12 105; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 106; CHECK-NEXT: vloxei16.v v8, (a0), v10 107; CHECK-NEXT: ret 108entry: 109 %offset.ext = zext <vscale x 4 x i8> %offset to <vscale x 4 x i16> 110 %shl = shl <vscale x 4 x i16> %offset.ext, splat (i16 12) 111 %res = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i16( 112 <vscale x 4 x i32> undef, 113 ptr %ptr, 114 <vscale x 4 x i16> %shl, 115 i64 %vl) 116 ret <vscale x 4 x i32> %res 117} 118 119define <vscale x 4 x i32> @test_vloxei6(ptr %ptr, <vscale x 4 x i7> %offset, i64 %vl) { 120; CHECK-LABEL: test_vloxei6: 121; CHECK: # %bb.0: # %entry 122; CHECK-NEXT: li a2, 127 123; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma 124; CHECK-NEXT: vand.vx v8, v8, a2 125; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma 126; CHECK-NEXT: vzext.vf8 v12, v8 127; CHECK-NEXT: vsll.vi v12, v12, 4 128; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 129; CHECK-NEXT: vloxei64.v v8, (a0), v12 130; CHECK-NEXT: ret 131entry: 132 %offset.ext = zext <vscale x 4 x i7> %offset to <vscale x 4 x i64> 133 %shl = shl <vscale x 4 x i64> %offset.ext, splat (i64 4) 134 %res = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i64( 135 <vscale x 4 x i32> undef, 136 ptr %ptr, 137 <vscale x 4 x i64> %shl, 138 i64 %vl) 139 ret <vscale x 4 x i32> %res 140} 141 142define <vscale x 4 x i32> @test_vloxei7(ptr %ptr, <vscale x 4 x i1> %offset, i64 %vl) { 143; CHECK-LABEL: test_vloxei7: 144; CHECK: # %bb.0: # %entry 145; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma 146; CHECK-NEXT: vmv.v.i v8, 0 147; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 148; CHECK-NEXT: vsll.vi v12, v8, 2 149; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 150; CHECK-NEXT: vloxei64.v v8, (a0), v12 151; CHECK-NEXT: ret 152entry: 153 %offset.ext = zext <vscale x 4 x i1> %offset to <vscale x 4 x i64> 154 %shl = shl <vscale x 4 x i64> %offset.ext, splat (i64 2) 155 %res = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i64( 156 <vscale x 4 x i32> undef, 157 ptr %ptr, 158 <vscale x 4 x i64> %shl, 159 i64 %vl) 160 ret <vscale x 4 x i32> %res 161} 162 163declare <vscale x 4 x i32> @llvm.riscv.vloxei.mask.nxv4i32.nxv4i64( 164 <vscale x 4 x i32>, 165 ptr, 166 <vscale x 4 x i64>, 167 <vscale x 4 x i1>, 168 i64, 169 i64); 170 171define <vscale x 4 x i32> @test_vloxei_mask(ptr %ptr, <vscale x 4 x i8> %offset, <vscale x 4 x i1> %m, i64 %vl) { 172; CHECK-LABEL: test_vloxei_mask: 173; CHECK: # %bb.0: # %entry 174; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma 175; CHECK-NEXT: vzext.vf8 v12, v8 176; CHECK-NEXT: vsll.vi v12, v12, 4 177; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 178; CHECK-NEXT: vloxei64.v v8, (a0), v12, v0.t 179; CHECK-NEXT: ret 180entry: 181 %offset.ext = zext <vscale x 4 x i8> %offset to <vscale x 4 x i64> 182 %shl = shl <vscale x 4 x i64> %offset.ext, splat (i64 4) 183 %res = call <vscale x 4 x i32> @llvm.riscv.vloxei.mask.nxv4i32.nxv4i64( 184 <vscale x 4 x i32> undef, 185 ptr %ptr, 186 <vscale x 4 x i64> %shl, 187 <vscale x 4 x i1> %m, 188 i64 %vl, i64 1) 189 ret <vscale x 4 x i32> %res 190} 191 192declare <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i64( 193 <vscale x 4 x i32>, 194 ptr, 195 <vscale x 4 x i64>, 196 i64); 197 198define <vscale x 4 x i32> @test_vluxei(ptr %ptr, <vscale x 4 x i8> %offset, i64 %vl) { 199; CHECK-LABEL: test_vluxei: 200; CHECK: # %bb.0: # %entry 201; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma 202; CHECK-NEXT: vzext.vf8 v12, v8 203; CHECK-NEXT: vsll.vi v12, v12, 4 204; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 205; CHECK-NEXT: vluxei64.v v8, (a0), v12 206; CHECK-NEXT: ret 207entry: 208 %offset.ext = zext <vscale x 4 x i8> %offset to <vscale x 4 x i64> 209 %shl = shl <vscale x 4 x i64> %offset.ext, splat (i64 4) 210 %res = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i64( 211 <vscale x 4 x i32> undef, 212 ptr %ptr, 213 <vscale x 4 x i64> %shl, 214 i64 %vl) 215 ret <vscale x 4 x i32> %res 216} 217 218declare <vscale x 4 x i32> @llvm.riscv.vluxei.mask.nxv4i32.nxv4i64( 219 <vscale x 4 x i32>, 220 ptr, 221 <vscale x 4 x i64>, 222 <vscale x 4 x i1>, 223 i64, 224 i64); 225 226define <vscale x 4 x i32> @test_vluxei_mask(ptr %ptr, <vscale x 4 x i8> %offset, <vscale x 4 x i1> %m, i64 %vl) { 227; CHECK-LABEL: test_vluxei_mask: 228; CHECK: # %bb.0: # %entry 229; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma 230; CHECK-NEXT: vzext.vf8 v12, v8 231; CHECK-NEXT: vsll.vi v12, v12, 4 232; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 233; CHECK-NEXT: vluxei64.v v8, (a0), v12, v0.t 234; CHECK-NEXT: ret 235entry: 236 %offset.ext = zext <vscale x 4 x i8> %offset to <vscale x 4 x i64> 237 %shl = shl <vscale x 4 x i64> %offset.ext, splat (i64 4) 238 %res = call <vscale x 4 x i32> @llvm.riscv.vluxei.mask.nxv4i32.nxv4i64( 239 <vscale x 4 x i32> undef, 240 ptr %ptr, 241 <vscale x 4 x i64> %shl, 242 <vscale x 4 x i1> %m, 243 i64 %vl, i64 1) 244 ret <vscale x 4 x i32> %res 245} 246 247declare void @llvm.riscv.vsoxei.nxv4i32.nxv4i64( 248 <vscale x 4 x i32>, 249 ptr, 250 <vscale x 4 x i64>, 251 i64); 252 253define void @test_vsoxei(<vscale x 4 x i32> %val, ptr %ptr, <vscale x 4 x i8> %offset, i64 %vl) { 254; CHECK-LABEL: test_vsoxei: 255; CHECK: # %bb.0: # %entry 256; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma 257; CHECK-NEXT: vzext.vf8 v12, v10 258; CHECK-NEXT: vsll.vi v12, v12, 4 259; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 260; CHECK-NEXT: vsoxei64.v v8, (a0), v12 261; CHECK-NEXT: ret 262entry: 263 %offset.ext = zext <vscale x 4 x i8> %offset to <vscale x 4 x i64> 264 %shl = shl <vscale x 4 x i64> %offset.ext, splat (i64 4) 265 call void @llvm.riscv.vsoxei.nxv4i32.nxv4i64( 266 <vscale x 4 x i32> %val, 267 ptr %ptr, 268 <vscale x 4 x i64> %shl, 269 i64 %vl) 270 ret void 271} 272 273declare void @llvm.riscv.vsoxei.mask.nxv4i32.nxv4i64( 274 <vscale x 4 x i32>, 275 ptr, 276 <vscale x 4 x i64>, 277 <vscale x 4 x i1>, 278 i64); 279 280define void @test_vsoxei_mask(<vscale x 4 x i32> %val, ptr %ptr, <vscale x 4 x i8> %offset, <vscale x 4 x i1> %m, i64 %vl) { 281; CHECK-LABEL: test_vsoxei_mask: 282; CHECK: # %bb.0: # %entry 283; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma 284; CHECK-NEXT: vzext.vf8 v12, v10 285; CHECK-NEXT: vsll.vi v12, v12, 4 286; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 287; CHECK-NEXT: vsoxei64.v v8, (a0), v12, v0.t 288; CHECK-NEXT: ret 289entry: 290 %offset.ext = zext <vscale x 4 x i8> %offset to <vscale x 4 x i64> 291 %shl = shl <vscale x 4 x i64> %offset.ext, splat (i64 4) 292 call void @llvm.riscv.vsoxei.mask.nxv4i32.nxv4i64( 293 <vscale x 4 x i32> %val, 294 ptr %ptr, 295 <vscale x 4 x i64> %shl, 296 <vscale x 4 x i1> %m, 297 i64 %vl) 298 ret void 299} 300 301declare void @llvm.riscv.vsuxei.nxv4i32.nxv4i64( 302 <vscale x 4 x i32>, 303 ptr, 304 <vscale x 4 x i64>, 305 i64); 306 307define void @test_vsuxei(<vscale x 4 x i32> %val, ptr %ptr, <vscale x 4 x i8> %offset, i64 %vl) { 308; CHECK-LABEL: test_vsuxei: 309; CHECK: # %bb.0: # %entry 310; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma 311; CHECK-NEXT: vzext.vf8 v12, v10 312; CHECK-NEXT: vsll.vi v12, v12, 4 313; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 314; CHECK-NEXT: vsuxei64.v v8, (a0), v12 315; CHECK-NEXT: ret 316entry: 317 %offset.ext = zext <vscale x 4 x i8> %offset to <vscale x 4 x i64> 318 %shl = shl <vscale x 4 x i64> %offset.ext, splat (i64 4) 319 call void @llvm.riscv.vsuxei.nxv4i32.nxv4i64( 320 <vscale x 4 x i32> %val, 321 ptr %ptr, 322 <vscale x 4 x i64> %shl, 323 i64 %vl) 324 ret void 325} 326 327declare void @llvm.riscv.vsuxei.mask.nxv4i32.nxv4i64( 328 <vscale x 4 x i32>, 329 ptr, 330 <vscale x 4 x i64>, 331 <vscale x 4 x i1>, 332 i64); 333 334define void @test_vsuxei_mask(<vscale x 4 x i32> %val, ptr %ptr, <vscale x 4 x i8> %offset, <vscale x 4 x i1> %m, i64 %vl) { 335; CHECK-LABEL: test_vsuxei_mask: 336; CHECK: # %bb.0: # %entry 337; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma 338; CHECK-NEXT: vzext.vf8 v12, v10 339; CHECK-NEXT: vsll.vi v12, v12, 4 340; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 341; CHECK-NEXT: vsuxei64.v v8, (a0), v12, v0.t 342; CHECK-NEXT: ret 343entry: 344 %offset.ext = zext <vscale x 4 x i8> %offset to <vscale x 4 x i64> 345 %shl = shl <vscale x 4 x i64> %offset.ext, splat (i64 4) 346 call void @llvm.riscv.vsuxei.mask.nxv4i32.nxv4i64( 347 <vscale x 4 x i32> %val, 348 ptr %ptr, 349 <vscale x 4 x i64> %shl, 350 <vscale x 4 x i1> %m, 351 i64 %vl) 352 ret void 353} 354