1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
3
4;
5; STNT1B, STNT1W, STNT1H, STNT1D: base + 32-bit unscaled offset, zero (uxtw)
6; extended to 64 bits.
7;   e.g. stnt1h { z0.d }, p0, [z1.d, x0]
8;
9
10; STNT1B
11define void @sstnt1b_s_uxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, ptr %base, <vscale x 4 x i32> %offsets) {
12; CHECK-LABEL: sstnt1b_s_uxtw:
13; CHECK:       // %bb.0:
14; CHECK-NEXT:    stnt1b { z0.s }, p0, [z1.s, x0]
15; CHECK-NEXT:    ret
16  %data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i8>
17  call void  @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i8(<vscale x 4 x i8> %data_trunc,
18                                                         <vscale x 4 x i1> %pg,
19                                                         ptr %base,
20                                                         <vscale x 4 x i32> %offsets)
21  ret void
22}
23
24; STNT1H
25define void @sstnt1h_s_uxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, ptr %base, <vscale x 4 x i32> %offsets) {
26; CHECK-LABEL: sstnt1h_s_uxtw:
27; CHECK:       // %bb.0:
28; CHECK-NEXT:    stnt1h { z0.s }, p0, [z1.s, x0]
29; CHECK-NEXT:    ret
30  %data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
31  call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i16(<vscale x 4 x i16> %data_trunc,
32                                                         <vscale x 4 x i1> %pg,
33                                                         ptr %base,
34                                                         <vscale x 4 x i32> %offsets)
35  ret void
36}
37
38; STNT1W
39define void @sstnt1w_s_uxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, ptr %base, <vscale x 4 x i32> %offsets) {
40; CHECK-LABEL: sstnt1w_s_uxtw:
41; CHECK:       // %bb.0:
42; CHECK-NEXT:    stnt1w { z0.s }, p0, [z1.s, x0]
43; CHECK-NEXT:    ret
44  call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i32(<vscale x 4 x i32> %data,
45                                                         <vscale x 4 x i1> %pg,
46                                                         ptr %base,
47                                                         <vscale x 4 x i32> %offsets)
48  ret void
49}
50
51define void @sstnt1w_s_uxtw_float(<vscale x 4 x float> %data, <vscale x 4 x i1> %pg, ptr %base, <vscale x 4 x i32> %offsets) {
52; CHECK-LABEL: sstnt1w_s_uxtw_float:
53; CHECK:       // %bb.0:
54; CHECK-NEXT:    stnt1w { z0.s }, p0, [z1.s, x0]
55; CHECK-NEXT:    ret
56  call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4f32(<vscale x 4 x float> %data,
57                                                         <vscale x 4 x i1> %pg,
58                                                         ptr %base,
59                                                         <vscale x 4 x i32> %offsets)
60  ret void
61}
62
63; STNT1B
64declare void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i1>, ptr, <vscale x 4 x i32>)
65declare void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i1>, ptr, <vscale x 2 x i32>)
66declare void @llvm.aarch64.sve.stnt1.scatter.sxtw.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i1>, ptr, <vscale x 4 x i32>)
67declare void @llvm.aarch64.sve.stnt1.scatter.sxtw.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i1>, ptr, <vscale x 2 x i32>)
68
69; STNT1H
70declare void @llvm.aarch64.sve.stnt1.scatter.sxtw.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, ptr, <vscale x 4 x i32>)
71declare void @llvm.aarch64.sve.stnt1.scatter.sxtw.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, ptr, <vscale x 2 x i32>)
72declare void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, ptr, <vscale x 4 x i32>)
73declare void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, ptr, <vscale x 2 x i32>)
74
75; STNT1W
76declare void @llvm.aarch64.sve.stnt1.scatter.sxtw.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, ptr, <vscale x 4 x i32>)
77declare void @llvm.aarch64.sve.stnt1.scatter.sxtw.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, ptr, <vscale x 2 x i32>)
78declare void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, ptr, <vscale x 4 x i32>)
79declare void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, ptr, <vscale x 2 x i32>)
80
81declare void @llvm.aarch64.sve.stnt1.scatter.sxtw.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, ptr, <vscale x 4 x i32>)
82declare void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, ptr, <vscale x 4 x i32>)
83