1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s 3; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s 4 5; 6; CADD 7; 8 9define <vscale x 16 x i8> @cadd_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 10; CHECK-LABEL: cadd_b: 11; CHECK: // %bb.0: 12; CHECK-NEXT: cadd z0.b, z0.b, z1.b, #90 13; CHECK-NEXT: ret 14 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.cadd.x.nxv16i8(<vscale x 16 x i8> %a, 15 <vscale x 16 x i8> %b, 16 i32 90) 17 ret <vscale x 16 x i8> %out 18} 19 20define <vscale x 8 x i16> @cadd_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 21; CHECK-LABEL: cadd_h: 22; CHECK: // %bb.0: 23; CHECK-NEXT: cadd z0.h, z0.h, z1.h, #90 24; CHECK-NEXT: ret 25 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.cadd.x.nxv8i16(<vscale x 8 x i16> %a, 26 <vscale x 8 x i16> %b, 27 i32 90) 28 ret <vscale x 8 x i16> %out 29} 30 31define <vscale x 4 x i32> @cadd_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 32; CHECK-LABEL: cadd_s: 33; CHECK: // %bb.0: 34; CHECK-NEXT: cadd z0.s, z0.s, z1.s, #270 35; CHECK-NEXT: ret 36 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.cadd.x.nxv4i32(<vscale x 4 x i32> %a, 37 <vscale x 4 x i32> %b, 38 i32 270) 39 ret <vscale x 4 x i32> %out 40} 41 42define <vscale x 2 x i64> @cadd_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 43; CHECK-LABEL: cadd_d: 44; CHECK: // %bb.0: 45; CHECK-NEXT: cadd z0.d, z0.d, z1.d, #270 46; CHECK-NEXT: ret 47 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.cadd.x.nxv2i64(<vscale x 2 x i64> %a, 48 <vscale x 2 x i64> %b, 49 i32 270) 50 ret <vscale x 2 x i64> %out 51} 52 53; 54; SQCADD 55; 56 57define <vscale x 16 x i8> @sqcadd_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 58; CHECK-LABEL: sqcadd_b: 59; CHECK: // %bb.0: 60; CHECK-NEXT: sqcadd z0.b, z0.b, z1.b, #90 61; CHECK-NEXT: ret 62 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqcadd.x.nxv16i8(<vscale x 16 x i8> %a, 63 <vscale x 16 x i8> %b, 64 i32 90) 65 ret <vscale x 16 x i8> %out 66} 67 68define <vscale x 8 x i16> @sqcadd_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 69; CHECK-LABEL: sqcadd_h: 70; CHECK: // %bb.0: 71; CHECK-NEXT: sqcadd z0.h, z0.h, z1.h, #90 72; CHECK-NEXT: ret 73 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqcadd.x.nxv8i16(<vscale x 8 x i16> %a, 74 <vscale x 8 x i16> %b, 75 i32 90) 76 ret <vscale x 8 x i16> %out 77} 78 79define <vscale x 4 x i32> @sqcadd_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 80; CHECK-LABEL: sqcadd_s: 81; CHECK: // %bb.0: 82; CHECK-NEXT: sqcadd z0.s, z0.s, z1.s, #270 83; CHECK-NEXT: ret 84 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqcadd.x.nxv4i32(<vscale x 4 x i32> %a, 85 <vscale x 4 x i32> %b, 86 i32 270) 87 ret <vscale x 4 x i32> %out 88} 89 90define <vscale x 2 x i64> @sqcadd_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 91; CHECK-LABEL: sqcadd_d: 92; CHECK: // %bb.0: 93; CHECK-NEXT: sqcadd z0.d, z0.d, z1.d, #270 94; CHECK-NEXT: ret 95 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqcadd.x.nxv2i64(<vscale x 2 x i64> %a, 96 <vscale x 2 x i64> %b, 97 i32 270) 98 ret <vscale x 2 x i64> %out 99} 100 101; 102; CMLA 103; 104 105define <vscale x 16 x i8> @cmla_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) { 106; CHECK-LABEL: cmla_b: 107; CHECK: // %bb.0: 108; CHECK-NEXT: cmla z0.b, z1.b, z2.b, #90 109; CHECK-NEXT: ret 110 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.cmla.x.nxv16i8(<vscale x 16 x i8> %a, 111 <vscale x 16 x i8> %b, 112 <vscale x 16 x i8> %c, 113 i32 90) 114 ret <vscale x 16 x i8> %out 115} 116 117define <vscale x 8 x i16> @cmla_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) { 118; CHECK-LABEL: cmla_h: 119; CHECK: // %bb.0: 120; CHECK-NEXT: cmla z0.h, z1.h, z2.h, #180 121; CHECK-NEXT: ret 122 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.cmla.x.nxv8i16(<vscale x 8 x i16> %a, 123 <vscale x 8 x i16> %b, 124 <vscale x 8 x i16> %c, 125 i32 180) 126 ret <vscale x 8 x i16> %out 127} 128 129define <vscale x 4 x i32> @cmla_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) { 130; CHECK-LABEL: cmla_s: 131; CHECK: // %bb.0: 132; CHECK-NEXT: cmla z0.s, z1.s, z2.s, #270 133; CHECK-NEXT: ret 134 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.cmla.x.nxv4i32(<vscale x 4 x i32> %a, 135 <vscale x 4 x i32> %b, 136 <vscale x 4 x i32> %c, 137 i32 270) 138 ret <vscale x 4 x i32> %out 139} 140 141define <vscale x 2 x i64> @cmla_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) { 142; CHECK-LABEL: cmla_d: 143; CHECK: // %bb.0: 144; CHECK-NEXT: cmla z0.d, z1.d, z2.d, #0 145; CHECK-NEXT: ret 146 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.cmla.x.nxv2i64(<vscale x 2 x i64> %a, 147 <vscale x 2 x i64> %b, 148 <vscale x 2 x i64> %c, 149 i32 0) 150 ret <vscale x 2 x i64> %out 151} 152 153; 154; CMLA_LANE 155; 156 157define <vscale x 8 x i16> @cmla_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) { 158; CHECK-LABEL: cmla_lane_h: 159; CHECK: // %bb.0: 160; CHECK-NEXT: cmla z0.h, z1.h, z2.h[1], #180 161; CHECK-NEXT: ret 162 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.cmla.lane.x.nxv8i16(<vscale x 8 x i16> %a, 163 <vscale x 8 x i16> %b, 164 <vscale x 8 x i16> %c, 165 i32 1, 166 i32 180) 167 ret <vscale x 8 x i16> %out 168} 169 170define <vscale x 4 x i32> @cmla_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) { 171; CHECK-LABEL: cmla_lane_s: 172; CHECK: // %bb.0: 173; CHECK-NEXT: cmla z0.s, z1.s, z2.s[0], #270 174; CHECK-NEXT: ret 175 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.cmla.lane.x.nxv4i32(<vscale x 4 x i32> %a, 176 <vscale x 4 x i32> %b, 177 <vscale x 4 x i32> %c, 178 i32 0, 179 i32 270) 180 ret <vscale x 4 x i32> %out 181} 182 183; 184; QRDCMLAH 185; 186 187define <vscale x 16 x i8> @sqrdcmlah_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) { 188; CHECK-LABEL: sqrdcmlah_b: 189; CHECK: // %bb.0: 190; CHECK-NEXT: sqrdcmlah z0.b, z1.b, z2.b, #0 191; CHECK-NEXT: ret 192 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrdcmlah.x.nxv16i8(<vscale x 16 x i8> %a, 193 <vscale x 16 x i8> %b, 194 <vscale x 16 x i8> %c, 195 i32 0) 196 ret <vscale x 16 x i8> %out 197} 198 199define <vscale x 8 x i16> @sqrdcmlah_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) { 200; CHECK-LABEL: sqrdcmlah_h: 201; CHECK: // %bb.0: 202; CHECK-NEXT: sqrdcmlah z0.h, z1.h, z2.h, #90 203; CHECK-NEXT: ret 204 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrdcmlah.x.nxv8i16(<vscale x 8 x i16> %a, 205 <vscale x 8 x i16> %b, 206 <vscale x 8 x i16> %c, 207 i32 90) 208 ret <vscale x 8 x i16> %out 209} 210 211define <vscale x 4 x i32> @sqrdcmlah_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) { 212; CHECK-LABEL: sqrdcmlah_s: 213; CHECK: // %bb.0: 214; CHECK-NEXT: sqrdcmlah z0.s, z1.s, z2.s, #180 215; CHECK-NEXT: ret 216 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrdcmlah.x.nxv4i32(<vscale x 4 x i32> %a, 217 <vscale x 4 x i32> %b, 218 <vscale x 4 x i32> %c, 219 i32 180) 220 ret <vscale x 4 x i32> %out 221} 222 223define <vscale x 2 x i64> @sqrdcmlah_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) { 224; CHECK-LABEL: sqrdcmlah_d: 225; CHECK: // %bb.0: 226; CHECK-NEXT: sqrdcmlah z0.d, z1.d, z2.d, #270 227; CHECK-NEXT: ret 228 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrdcmlah.x.nxv2i64(<vscale x 2 x i64> %a, 229 <vscale x 2 x i64> %b, 230 <vscale x 2 x i64> %c, 231 i32 270) 232 ret <vscale x 2 x i64> %out 233} 234 235; 236; QRDCMLAH_LANE 237; 238 239define <vscale x 8 x i16> @sqrdcmlah_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) { 240; CHECK-LABEL: sqrdcmlah_lane_h: 241; CHECK: // %bb.0: 242; CHECK-NEXT: sqrdcmlah z0.h, z1.h, z2.h[1], #90 243; CHECK-NEXT: ret 244 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrdcmlah.lane.x.nxv8i16(<vscale x 8 x i16> %a, 245 <vscale x 8 x i16> %b, 246 <vscale x 8 x i16> %c, 247 i32 1, 248 i32 90) 249 ret <vscale x 8 x i16> %out 250} 251 252define <vscale x 4 x i32> @sqrdcmlah_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) { 253; CHECK-LABEL: sqrdcmlah_lane_s: 254; CHECK: // %bb.0: 255; CHECK-NEXT: sqrdcmlah z0.s, z1.s, z2.s[0], #180 256; CHECK-NEXT: ret 257 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrdcmlah.lane.x.nxv4i32(<vscale x 4 x i32> %a, 258 <vscale x 4 x i32> %b, 259 <vscale x 4 x i32> %c, 260 i32 0, 261 i32 180) 262 ret <vscale x 4 x i32> %out 263} 264 265declare <vscale x 16 x i8> @llvm.aarch64.sve.cadd.x.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32) 266declare <vscale x 8 x i16> @llvm.aarch64.sve.cadd.x.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32) 267declare <vscale x 4 x i32> @llvm.aarch64.sve.cadd.x.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32) 268declare <vscale x 2 x i64> @llvm.aarch64.sve.cadd.x.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32) 269 270declare <vscale x 16 x i8> @llvm.aarch64.sve.sqcadd.x.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32) 271declare <vscale x 8 x i16> @llvm.aarch64.sve.sqcadd.x.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32) 272declare <vscale x 4 x i32> @llvm.aarch64.sve.sqcadd.x.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32) 273declare <vscale x 2 x i64> @llvm.aarch64.sve.sqcadd.x.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32) 274 275declare <vscale x 16 x i8> @llvm.aarch64.sve.cmla.x.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, i32) 276declare <vscale x 8 x i16> @llvm.aarch64.sve.cmla.x.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32) 277declare <vscale x 4 x i32> @llvm.aarch64.sve.cmla.x.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, i32) 278declare <vscale x 2 x i64> @llvm.aarch64.sve.cmla.x.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, i32) 279 280declare <vscale x 8 x i16> @llvm.aarch64.sve.cmla.lane.x.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32, i32) 281declare <vscale x 4 x i32> @llvm.aarch64.sve.cmla.lane.x.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, i32, i32) 282 283declare <vscale x 16 x i8> @llvm.aarch64.sve.sqrdcmlah.x.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, i32) 284declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrdcmlah.x.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32) 285declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrdcmlah.x.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, i32) 286declare <vscale x 2 x i64> @llvm.aarch64.sve.sqrdcmlah.x.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, i32) 287 288declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrdcmlah.lane.x.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32, i32) 289declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrdcmlah.lane.x.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, i32, i32) 290