1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s 3 4; 5; SABA 6; 7 8define <vscale x 16 x i8> @saba_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) { 9; CHECK-LABEL: saba_i8: 10; CHECK: // %bb.0: 11; CHECK-NEXT: saba z0.b, z1.b, z2.b 12; CHECK-NEXT: ret 13 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.saba.nxv16i8(<vscale x 16 x i8> %a, 14 <vscale x 16 x i8> %b, 15 <vscale x 16 x i8> %c) 16 ret <vscale x 16 x i8> %out 17} 18 19define <vscale x 8 x i16> @saba_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) { 20; CHECK-LABEL: saba_i16: 21; CHECK: // %bb.0: 22; CHECK-NEXT: saba z0.h, z1.h, z2.h 23; CHECK-NEXT: ret 24 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.saba.nxv8i16(<vscale x 8 x i16> %a, 25 <vscale x 8 x i16> %b, 26 <vscale x 8 x i16> %c) 27 ret <vscale x 8 x i16> %out 28} 29 30define <vscale x 4 x i32> @saba_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) { 31; CHECK-LABEL: saba_i32: 32; CHECK: // %bb.0: 33; CHECK-NEXT: saba z0.s, z1.s, z2.s 34; CHECK-NEXT: ret 35 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.saba.nxv4i32(<vscale x 4 x i32> %a, 36 <vscale x 4 x i32> %b, 37 <vscale x 4 x i32> %c) 38 ret <vscale x 4 x i32> %out 39} 40 41define <vscale x 2 x i64> @saba_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) { 42; CHECK-LABEL: saba_i64: 43; CHECK: // %bb.0: 44; CHECK-NEXT: saba z0.d, z1.d, z2.d 45; CHECK-NEXT: ret 46 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.saba.nxv2i64(<vscale x 2 x i64> %a, 47 <vscale x 2 x i64> %b, 48 <vscale x 2 x i64> %c) 49 ret <vscale x 2 x i64> %out 50} 51 52; 53; SHADD 54; 55 56define <vscale x 16 x i8> @shadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 57; CHECK-LABEL: shadd_i8: 58; CHECK: // %bb.0: 59; CHECK-NEXT: shadd z0.b, p0/m, z0.b, z1.b 60; CHECK-NEXT: ret 61 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.shadd.nxv16i8(<vscale x 16 x i1> %pg, 62 <vscale x 16 x i8> %a, 63 <vscale x 16 x i8> %b) 64 ret <vscale x 16 x i8> %out 65} 66 67define <vscale x 8 x i16> @shadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 68; CHECK-LABEL: shadd_i16: 69; CHECK: // %bb.0: 70; CHECK-NEXT: shadd z0.h, p0/m, z0.h, z1.h 71; CHECK-NEXT: ret 72 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.shadd.nxv8i16(<vscale x 8 x i1> %pg, 73 <vscale x 8 x i16> %a, 74 <vscale x 8 x i16> %b) 75 ret <vscale x 8 x i16> %out 76} 77 78define <vscale x 4 x i32> @shadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 79; CHECK-LABEL: shadd_i32: 80; CHECK: // %bb.0: 81; CHECK-NEXT: shadd z0.s, p0/m, z0.s, z1.s 82; CHECK-NEXT: ret 83 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.shadd.nxv4i32(<vscale x 4 x i1> %pg, 84 <vscale x 4 x i32> %a, 85 <vscale x 4 x i32> %b) 86 ret <vscale x 4 x i32> %out 87} 88 89define <vscale x 2 x i64> @shadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 90; CHECK-LABEL: shadd_i64: 91; CHECK: // %bb.0: 92; CHECK-NEXT: shadd z0.d, p0/m, z0.d, z1.d 93; CHECK-NEXT: ret 94 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.shadd.nxv2i64(<vscale x 2 x i1> %pg, 95 <vscale x 2 x i64> %a, 96 <vscale x 2 x i64> %b) 97 ret <vscale x 2 x i64> %out 98} 99 100; 101; SHSUB 102; 103 104define <vscale x 16 x i8> @shsub_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 105; CHECK-LABEL: shsub_i8: 106; CHECK: // %bb.0: 107; CHECK-NEXT: shsub z0.b, p0/m, z0.b, z1.b 108; CHECK-NEXT: ret 109 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.shsub.nxv16i8(<vscale x 16 x i1> %pg, 110 <vscale x 16 x i8> %a, 111 <vscale x 16 x i8> %b) 112 ret <vscale x 16 x i8> %out 113} 114 115define <vscale x 8 x i16> @shsub_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 116; CHECK-LABEL: shsub_i16: 117; CHECK: // %bb.0: 118; CHECK-NEXT: shsub z0.h, p0/m, z0.h, z1.h 119; CHECK-NEXT: ret 120 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.shsub.nxv8i16(<vscale x 8 x i1> %pg, 121 <vscale x 8 x i16> %a, 122 <vscale x 8 x i16> %b) 123 ret <vscale x 8 x i16> %out 124} 125 126define <vscale x 4 x i32> @shsub_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 127; CHECK-LABEL: shsub_i32: 128; CHECK: // %bb.0: 129; CHECK-NEXT: shsub z0.s, p0/m, z0.s, z1.s 130; CHECK-NEXT: ret 131 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.shsub.nxv4i32(<vscale x 4 x i1> %pg, 132 <vscale x 4 x i32> %a, 133 <vscale x 4 x i32> %b) 134 ret <vscale x 4 x i32> %out 135} 136 137define <vscale x 2 x i64> @shsub_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 138; CHECK-LABEL: shsub_i64: 139; CHECK: // %bb.0: 140; CHECK-NEXT: shsub z0.d, p0/m, z0.d, z1.d 141; CHECK-NEXT: ret 142 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.shsub.nxv2i64(<vscale x 2 x i1> %pg, 143 <vscale x 2 x i64> %a, 144 <vscale x 2 x i64> %b) 145 ret <vscale x 2 x i64> %out 146} 147 148; 149; SHSUBR 150; 151 152define <vscale x 16 x i8> @shsubr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 153; CHECK-LABEL: shsubr_i8: 154; CHECK: // %bb.0: 155; CHECK-NEXT: shsubr z0.b, p0/m, z0.b, z1.b 156; CHECK-NEXT: ret 157 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.shsubr.nxv16i8(<vscale x 16 x i1> %pg, 158 <vscale x 16 x i8> %a, 159 <vscale x 16 x i8> %b) 160 ret <vscale x 16 x i8> %out 161} 162 163define <vscale x 8 x i16> @shsubr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 164; CHECK-LABEL: shsubr_i16: 165; CHECK: // %bb.0: 166; CHECK-NEXT: shsubr z0.h, p0/m, z0.h, z1.h 167; CHECK-NEXT: ret 168 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.shsubr.nxv8i16(<vscale x 8 x i1> %pg, 169 <vscale x 8 x i16> %a, 170 <vscale x 8 x i16> %b) 171 ret <vscale x 8 x i16> %out 172} 173 174define <vscale x 4 x i32> @shsubr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 175; CHECK-LABEL: shsubr_i32: 176; CHECK: // %bb.0: 177; CHECK-NEXT: shsubr z0.s, p0/m, z0.s, z1.s 178; CHECK-NEXT: ret 179 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.shsubr.nxv4i32(<vscale x 4 x i1> %pg, 180 <vscale x 4 x i32> %a, 181 <vscale x 4 x i32> %b) 182 ret <vscale x 4 x i32> %out 183} 184 185define <vscale x 2 x i64> @shsubr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 186; CHECK-LABEL: shsubr_i64: 187; CHECK: // %bb.0: 188; CHECK-NEXT: shsubr z0.d, p0/m, z0.d, z1.d 189; CHECK-NEXT: ret 190 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.shsubr.nxv2i64(<vscale x 2 x i1> %pg, 191 <vscale x 2 x i64> %a, 192 <vscale x 2 x i64> %b) 193 ret <vscale x 2 x i64> %out 194} 195 196; 197; SLI 198; 199 200define <vscale x 16 x i8> @sli_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 201; CHECK-LABEL: sli_i8: 202; CHECK: // %bb.0: 203; CHECK-NEXT: sli z0.b, z1.b, #0 204; CHECK-NEXT: ret 205 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sli.nxv16i8(<vscale x 16 x i8> %a, 206 <vscale x 16 x i8> %b, 207 i32 0) 208 ret <vscale x 16 x i8> %out 209} 210 211define <vscale x 8 x i16> @sli_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 212; CHECK-LABEL: sli_i16: 213; CHECK: // %bb.0: 214; CHECK-NEXT: sli z0.h, z1.h, #1 215; CHECK-NEXT: ret 216 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sli.nxv8i16(<vscale x 8 x i16> %a, 217 <vscale x 8 x i16> %b, 218 i32 1) 219 ret <vscale x 8 x i16> %out 220} 221 222define <vscale x 4 x i32> @sli_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 223; CHECK-LABEL: sli_i32: 224; CHECK: // %bb.0: 225; CHECK-NEXT: sli z0.s, z1.s, #30 226; CHECK-NEXT: ret 227 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sli.nxv4i32(<vscale x 4 x i32> %a, 228 <vscale x 4 x i32> %b, 229 i32 30); 230 ret <vscale x 4 x i32> %out 231} 232 233define <vscale x 2 x i64> @sli_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 234; CHECK-LABEL: sli_i64: 235; CHECK: // %bb.0: 236; CHECK-NEXT: sli z0.d, z1.d, #63 237; CHECK-NEXT: ret 238 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sli.nxv2i64(<vscale x 2 x i64> %a, 239 <vscale x 2 x i64> %b, 240 i32 63) 241 ret <vscale x 2 x i64> %out 242} 243 244; 245; SQABS 246; 247 248define <vscale x 16 x i8> @sqabs_i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b) { 249; CHECK-LABEL: sqabs_i8: 250; CHECK: // %bb.0: 251; CHECK-NEXT: sqabs z0.b, p0/m, z1.b 252; CHECK-NEXT: ret 253 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqabs.nxv16i8(<vscale x 16 x i8> %a, 254 <vscale x 16 x i1> %pg, 255 <vscale x 16 x i8> %b) 256 ret <vscale x 16 x i8> %out 257} 258 259define <vscale x 8 x i16> @sqabs_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b) { 260; CHECK-LABEL: sqabs_i16: 261; CHECK: // %bb.0: 262; CHECK-NEXT: sqabs z0.h, p0/m, z1.h 263; CHECK-NEXT: ret 264 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16> %a, 265 <vscale x 8 x i1> %pg, 266 <vscale x 8 x i16> %b) 267 ret <vscale x 8 x i16> %out 268} 269 270define <vscale x 4 x i32> @sqabs_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) { 271; CHECK-LABEL: sqabs_i32: 272; CHECK: // %bb.0: 273; CHECK-NEXT: sqabs z0.s, p0/m, z1.s 274; CHECK-NEXT: ret 275 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32> %a, 276 <vscale x 4 x i1> %pg, 277 <vscale x 4 x i32> %b) 278 ret <vscale x 4 x i32> %out 279} 280 281define <vscale x 2 x i64> @sqabs_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) { 282; CHECK-LABEL: sqabs_i64: 283; CHECK: // %bb.0: 284; CHECK-NEXT: sqabs z0.d, p0/m, z1.d 285; CHECK-NEXT: ret 286 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqabs.nxv2i64(<vscale x 2 x i64> %a, 287 <vscale x 2 x i1> %pg, 288 <vscale x 2 x i64> %b) 289 ret <vscale x 2 x i64> %out 290} 291 292; 293; SQADD 294; 295 296define <vscale x 16 x i8> @sqadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 297; CHECK-LABEL: sqadd_i8: 298; CHECK: // %bb.0: 299; CHECK-NEXT: sqadd z0.b, p0/m, z0.b, z1.b 300; CHECK-NEXT: ret 301 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqadd.nxv16i8(<vscale x 16 x i1> %pg, 302 <vscale x 16 x i8> %a, 303 <vscale x 16 x i8> %b) 304 ret <vscale x 16 x i8> %out 305} 306 307define <vscale x 8 x i16> @sqadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 308; CHECK-LABEL: sqadd_i16: 309; CHECK: // %bb.0: 310; CHECK-NEXT: sqadd z0.h, p0/m, z0.h, z1.h 311; CHECK-NEXT: ret 312 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqadd.nxv8i16(<vscale x 8 x i1> %pg, 313 <vscale x 8 x i16> %a, 314 <vscale x 8 x i16> %b) 315 ret <vscale x 8 x i16> %out 316} 317 318define <vscale x 4 x i32> @sqadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 319; CHECK-LABEL: sqadd_i32: 320; CHECK: // %bb.0: 321; CHECK-NEXT: sqadd z0.s, p0/m, z0.s, z1.s 322; CHECK-NEXT: ret 323 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.nxv4i32(<vscale x 4 x i1> %pg, 324 <vscale x 4 x i32> %a, 325 <vscale x 4 x i32> %b) 326 ret <vscale x 4 x i32> %out 327} 328 329define <vscale x 2 x i64> @sqadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 330; CHECK-LABEL: sqadd_i64: 331; CHECK: // %bb.0: 332; CHECK-NEXT: sqadd z0.d, p0/m, z0.d, z1.d 333; CHECK-NEXT: ret 334 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.nxv2i64(<vscale x 2 x i1> %pg, 335 <vscale x 2 x i64> %a, 336 <vscale x 2 x i64> %b) 337 ret <vscale x 2 x i64> %out 338} 339 340; 341; SQDMULH (Vector) 342; 343 344define <vscale x 16 x i8> @sqdmulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 345; CHECK-LABEL: sqdmulh_i8: 346; CHECK: // %bb.0: 347; CHECK-NEXT: sqdmulh z0.b, z0.b, z1.b 348; CHECK-NEXT: ret 349 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqdmulh.nxv16i8(<vscale x 16 x i8> %a, 350 <vscale x 16 x i8> %b) 351 ret <vscale x 16 x i8> %out 352} 353 354define <vscale x 8 x i16> @sqdmulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 355; CHECK-LABEL: sqdmulh_i16: 356; CHECK: // %bb.0: 357; CHECK-NEXT: sqdmulh z0.h, z0.h, z1.h 358; CHECK-NEXT: ret 359 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqdmulh.nxv8i16(<vscale x 8 x i16> %a, 360 <vscale x 8 x i16> %b) 361 ret <vscale x 8 x i16> %out 362} 363 364define <vscale x 4 x i32> @sqdmulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 365; CHECK-LABEL: sqdmulh_i32: 366; CHECK: // %bb.0: 367; CHECK-NEXT: sqdmulh z0.s, z0.s, z1.s 368; CHECK-NEXT: ret 369 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqdmulh.nxv4i32(<vscale x 4 x i32> %a, 370 <vscale x 4 x i32> %b) 371 ret <vscale x 4 x i32> %out 372} 373 374define <vscale x 2 x i64> @sqdmulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 375; CHECK-LABEL: sqdmulh_i64: 376; CHECK: // %bb.0: 377; CHECK-NEXT: sqdmulh z0.d, z0.d, z1.d 378; CHECK-NEXT: ret 379 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqdmulh.nxv2i64(<vscale x 2 x i64> %a, 380 <vscale x 2 x i64> %b) 381 ret <vscale x 2 x i64> %out 382} 383 384; 385; SQDMULH (Indexed) 386; 387 388define <vscale x 8 x i16> @sqdmulh_lane_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 389; CHECK-LABEL: sqdmulh_lane_i16: 390; CHECK: // %bb.0: 391; CHECK-NEXT: sqdmulh z0.h, z0.h, z1.h[7] 392; CHECK-NEXT: ret 393 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqdmulh.lane.nxv8i16(<vscale x 8 x i16> %a, 394 <vscale x 8 x i16> %b, 395 i32 7) 396 ret <vscale x 8 x i16> %out 397} 398 399define <vscale x 4 x i32> @sqdmulh_lane_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 400; CHECK-LABEL: sqdmulh_lane_i32: 401; CHECK: // %bb.0: 402; CHECK-NEXT: sqdmulh z0.s, z0.s, z1.s[3] 403; CHECK-NEXT: ret 404 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqdmulh.lane.nxv4i32(<vscale x 4 x i32> %a, 405 <vscale x 4 x i32> %b, 406 i32 3); 407 ret <vscale x 4 x i32> %out 408} 409 410define <vscale x 2 x i64> @sqdmulh_lane_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 411; CHECK-LABEL: sqdmulh_lane_i64: 412; CHECK: // %bb.0: 413; CHECK-NEXT: sqdmulh z0.d, z0.d, z1.d[1] 414; CHECK-NEXT: ret 415 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqdmulh.lane.nxv2i64(<vscale x 2 x i64> %a, 416 <vscale x 2 x i64> %b, 417 i32 1) 418 ret <vscale x 2 x i64> %out 419} 420 421; 422; SQNEG 423; 424 425define <vscale x 16 x i8> @sqneg_i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b) { 426; CHECK-LABEL: sqneg_i8: 427; CHECK: // %bb.0: 428; CHECK-NEXT: sqneg z0.b, p0/m, z1.b 429; CHECK-NEXT: ret 430 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqneg.nxv16i8(<vscale x 16 x i8> %a, 431 <vscale x 16 x i1> %pg, 432 <vscale x 16 x i8> %b) 433 ret <vscale x 16 x i8> %out 434} 435 436define <vscale x 8 x i16> @sqneg_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b) { 437; CHECK-LABEL: sqneg_i16: 438; CHECK: // %bb.0: 439; CHECK-NEXT: sqneg z0.h, p0/m, z1.h 440; CHECK-NEXT: ret 441 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqneg.nxv8i16(<vscale x 8 x i16> %a, 442 <vscale x 8 x i1> %pg, 443 <vscale x 8 x i16> %b) 444 ret <vscale x 8 x i16> %out 445} 446 447define <vscale x 4 x i32> @sqneg_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) { 448; CHECK-LABEL: sqneg_i32: 449; CHECK: // %bb.0: 450; CHECK-NEXT: sqneg z0.s, p0/m, z1.s 451; CHECK-NEXT: ret 452 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqneg.nxv4i32(<vscale x 4 x i32> %a, 453 <vscale x 4 x i1> %pg, 454 <vscale x 4 x i32> %b) 455 ret <vscale x 4 x i32> %out 456} 457 458define <vscale x 2 x i64> @sqneg_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) { 459; CHECK-LABEL: sqneg_i64: 460; CHECK: // %bb.0: 461; CHECK-NEXT: sqneg z0.d, p0/m, z1.d 462; CHECK-NEXT: ret 463 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqneg.nxv2i64(<vscale x 2 x i64> %a, 464 <vscale x 2 x i1> %pg, 465 <vscale x 2 x i64> %b) 466 ret <vscale x 2 x i64> %out 467} 468 469; 470; SQRDMALH (Vectors) 471; 472 473define <vscale x 16 x i8> @sqrdmlah_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) { 474; CHECK-LABEL: sqrdmlah_i8: 475; CHECK: // %bb.0: 476; CHECK-NEXT: sqrdmlah z0.b, z1.b, z2.b 477; CHECK-NEXT: ret 478 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrdmlah.nxv16i8(<vscale x 16 x i8> %a, 479 <vscale x 16 x i8> %b, 480 <vscale x 16 x i8> %c) 481 ret <vscale x 16 x i8> %out 482} 483 484define <vscale x 8 x i16> @sqrdmlah_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) { 485; CHECK-LABEL: sqrdmlah_i16: 486; CHECK: // %bb.0: 487; CHECK-NEXT: sqrdmlah z0.h, z1.h, z2.h 488; CHECK-NEXT: ret 489 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmlah.nxv8i16(<vscale x 8 x i16> %a, 490 <vscale x 8 x i16> %b, 491 <vscale x 8 x i16> %c) 492 ret <vscale x 8 x i16> %out 493} 494 495define <vscale x 4 x i32> @sqrdmlah_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) { 496; CHECK-LABEL: sqrdmlah_i32: 497; CHECK: // %bb.0: 498; CHECK-NEXT: sqrdmlah z0.s, z1.s, z2.s 499; CHECK-NEXT: ret 500 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmlah.nxv4i32(<vscale x 4 x i32> %a, 501 <vscale x 4 x i32> %b, 502 <vscale x 4 x i32> %c) 503 ret <vscale x 4 x i32> %out 504} 505 506define <vscale x 2 x i64> @sqrdmlah_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) { 507; CHECK-LABEL: sqrdmlah_i64: 508; CHECK: // %bb.0: 509; CHECK-NEXT: sqrdmlah z0.d, z1.d, z2.d 510; CHECK-NEXT: ret 511 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmlah.nxv2i64(<vscale x 2 x i64> %a, 512 <vscale x 2 x i64> %b, 513 <vscale x 2 x i64> %c) 514 ret <vscale x 2 x i64> %out 515} 516 517; 518; SQRDMALH (Indexed) 519; 520 521define <vscale x 8 x i16> @sqrdmlah_lane_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) { 522; CHECK-LABEL: sqrdmlah_lane_i16: 523; CHECK: // %bb.0: 524; CHECK-NEXT: sqrdmlah z0.h, z1.h, z2.h[5] 525; CHECK-NEXT: ret 526 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmlah.lane.nxv8i16(<vscale x 8 x i16> %a, 527 <vscale x 8 x i16> %b, 528 <vscale x 8 x i16> %c, 529 i32 5) 530 ret <vscale x 8 x i16> %out 531} 532 533define <vscale x 4 x i32> @sqrdmlah_lane_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) { 534; CHECK-LABEL: sqrdmlah_lane_i32: 535; CHECK: // %bb.0: 536; CHECK-NEXT: sqrdmlah z0.s, z1.s, z2.s[1] 537; CHECK-NEXT: ret 538 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmlah.lane.nxv4i32(<vscale x 4 x i32> %a, 539 <vscale x 4 x i32> %b, 540 <vscale x 4 x i32> %c, 541 i32 1); 542 ret <vscale x 4 x i32> %out 543} 544 545define <vscale x 2 x i64> @sqrdmlah_lane_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) { 546; CHECK-LABEL: sqrdmlah_lane_i64: 547; CHECK: // %bb.0: 548; CHECK-NEXT: sqrdmlah z0.d, z1.d, z2.d[1] 549; CHECK-NEXT: ret 550 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmlah.lane.nxv2i64(<vscale x 2 x i64> %a, 551 <vscale x 2 x i64> %b, 552 <vscale x 2 x i64> %c, 553 i32 1) 554 ret <vscale x 2 x i64> %out 555} 556 557; 558; SQRDMSLH (Vectors) 559; 560 561define <vscale x 16 x i8> @sqrdmlsh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) { 562; CHECK-LABEL: sqrdmlsh_i8: 563; CHECK: // %bb.0: 564; CHECK-NEXT: sqrdmlsh z0.b, z1.b, z2.b 565; CHECK-NEXT: ret 566 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrdmlsh.nxv16i8(<vscale x 16 x i8> %a, 567 <vscale x 16 x i8> %b, 568 <vscale x 16 x i8> %c) 569 ret <vscale x 16 x i8> %out 570} 571 572define <vscale x 8 x i16> @sqrdmlsh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) { 573; CHECK-LABEL: sqrdmlsh_i16: 574; CHECK: // %bb.0: 575; CHECK-NEXT: sqrdmlsh z0.h, z1.h, z2.h 576; CHECK-NEXT: ret 577 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmlsh.nxv8i16(<vscale x 8 x i16> %a, 578 <vscale x 8 x i16> %b, 579 <vscale x 8 x i16> %c) 580 ret <vscale x 8 x i16> %out 581} 582 583define <vscale x 4 x i32> @sqrdmlsh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) { 584; CHECK-LABEL: sqrdmlsh_i32: 585; CHECK: // %bb.0: 586; CHECK-NEXT: sqrdmlsh z0.s, z1.s, z2.s 587; CHECK-NEXT: ret 588 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmlsh.nxv4i32(<vscale x 4 x i32> %a, 589 <vscale x 4 x i32> %b, 590 <vscale x 4 x i32> %c) 591 ret <vscale x 4 x i32> %out 592} 593 594define <vscale x 2 x i64> @sqrdmlsh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) { 595; CHECK-LABEL: sqrdmlsh_i64: 596; CHECK: // %bb.0: 597; CHECK-NEXT: sqrdmlsh z0.d, z1.d, z2.d 598; CHECK-NEXT: ret 599 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmlsh.nxv2i64(<vscale x 2 x i64> %a, 600 <vscale x 2 x i64> %b, 601 <vscale x 2 x i64> %c) 602 ret <vscale x 2 x i64> %out 603} 604 605; 606; SQRDMSLH (Indexed) 607; 608 609define <vscale x 8 x i16> @sqrdmlsh_lane_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) { 610; CHECK-LABEL: sqrdmlsh_lane_i16: 611; CHECK: // %bb.0: 612; CHECK-NEXT: sqrdmlsh z0.h, z1.h, z2.h[4] 613; CHECK-NEXT: ret 614 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmlsh.lane.nxv8i16(<vscale x 8 x i16> %a, 615 <vscale x 8 x i16> %b, 616 <vscale x 8 x i16> %c, 617 i32 4) 618 ret <vscale x 8 x i16> %out 619} 620 621define <vscale x 4 x i32> @sqrdmlsh_lane_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) { 622; CHECK-LABEL: sqrdmlsh_lane_i32: 623; CHECK: // %bb.0: 624; CHECK-NEXT: sqrdmlsh z0.s, z1.s, z2.s[0] 625; CHECK-NEXT: ret 626 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmlsh.lane.nxv4i32(<vscale x 4 x i32> %a, 627 <vscale x 4 x i32> %b, 628 <vscale x 4 x i32> %c, 629 i32 0); 630 ret <vscale x 4 x i32> %out 631} 632 633define <vscale x 2 x i64> @sqrdmlsh_lane_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) { 634; CHECK-LABEL: sqrdmlsh_lane_i64: 635; CHECK: // %bb.0: 636; CHECK-NEXT: sqrdmlsh z0.d, z1.d, z2.d[1] 637; CHECK-NEXT: ret 638 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmlsh.lane.nxv2i64(<vscale x 2 x i64> %a, 639 <vscale x 2 x i64> %b, 640 <vscale x 2 x i64> %c, 641 i32 1) 642 ret <vscale x 2 x i64> %out 643} 644 645; 646; SQRDMULH (Vectors) 647; 648 649define <vscale x 16 x i8> @sqrdmulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 650; CHECK-LABEL: sqrdmulh_i8: 651; CHECK: // %bb.0: 652; CHECK-NEXT: sqrdmulh z0.b, z0.b, z1.b 653; CHECK-NEXT: ret 654 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrdmulh.nxv16i8(<vscale x 16 x i8> %a, 655 <vscale x 16 x i8> %b) 656 ret <vscale x 16 x i8> %out 657} 658 659define <vscale x 8 x i16> @sqrdmulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 660; CHECK-LABEL: sqrdmulh_i16: 661; CHECK: // %bb.0: 662; CHECK-NEXT: sqrdmulh z0.h, z0.h, z1.h 663; CHECK-NEXT: ret 664 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmulh.nxv8i16(<vscale x 8 x i16> %a, 665 <vscale x 8 x i16> %b) 666 ret <vscale x 8 x i16> %out 667} 668 669define <vscale x 4 x i32> @sqrdmulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 670; CHECK-LABEL: sqrdmulh_i32: 671; CHECK: // %bb.0: 672; CHECK-NEXT: sqrdmulh z0.s, z0.s, z1.s 673; CHECK-NEXT: ret 674 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmulh.nxv4i32(<vscale x 4 x i32> %a, 675 <vscale x 4 x i32> %b) 676 ret <vscale x 4 x i32> %out 677} 678 679define <vscale x 2 x i64> @sqrdmulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 680; CHECK-LABEL: sqrdmulh_i64: 681; CHECK: // %bb.0: 682; CHECK-NEXT: sqrdmulh z0.d, z0.d, z1.d 683; CHECK-NEXT: ret 684 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmulh.nxv2i64(<vscale x 2 x i64> %a, 685 <vscale x 2 x i64> %b) 686 ret <vscale x 2 x i64> %out 687} 688 689; 690; SQRDMULH (Indexed) 691; 692 693define <vscale x 8 x i16> @sqrdmulh_lane_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 694; CHECK-LABEL: sqrdmulh_lane_i16: 695; CHECK: // %bb.0: 696; CHECK-NEXT: sqrdmulh z0.h, z0.h, z1.h[6] 697; CHECK-NEXT: ret 698 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmulh.lane.nxv8i16(<vscale x 8 x i16> %a, 699 <vscale x 8 x i16> %b, 700 i32 6) 701 ret <vscale x 8 x i16> %out 702} 703 704define <vscale x 4 x i32> @sqrdmulh_lane_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 705; CHECK-LABEL: sqrdmulh_lane_i32: 706; CHECK: // %bb.0: 707; CHECK-NEXT: sqrdmulh z0.s, z0.s, z1.s[2] 708; CHECK-NEXT: ret 709 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmulh.lane.nxv4i32(<vscale x 4 x i32> %a, 710 <vscale x 4 x i32> %b, 711 i32 2); 712 ret <vscale x 4 x i32> %out 713} 714 715define <vscale x 2 x i64> @sqrdmulh_lane_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 716; CHECK-LABEL: sqrdmulh_lane_i64: 717; CHECK: // %bb.0: 718; CHECK-NEXT: sqrdmulh z0.d, z0.d, z1.d[1] 719; CHECK-NEXT: ret 720 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmulh.lane.nxv2i64(<vscale x 2 x i64> %a, 721 <vscale x 2 x i64> %b, 722 i32 1) 723 ret <vscale x 2 x i64> %out 724} 725 726; 727; SQRSHL 728; 729 730define <vscale x 16 x i8> @sqrshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 731; CHECK-LABEL: sqrshl_i8: 732; CHECK: // %bb.0: 733; CHECK-NEXT: sqrshl z0.b, p0/m, z0.b, z1.b 734; CHECK-NEXT: ret 735 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshl.nxv16i8(<vscale x 16 x i1> %pg, 736 <vscale x 16 x i8> %a, 737 <vscale x 16 x i8> %b) 738 ret <vscale x 16 x i8> %out 739} 740 741define <vscale x 8 x i16> @sqrshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 742; CHECK-LABEL: sqrshl_i16: 743; CHECK: // %bb.0: 744; CHECK-NEXT: sqrshl z0.h, p0/m, z0.h, z1.h 745; CHECK-NEXT: ret 746 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshl.nxv8i16(<vscale x 8 x i1> %pg, 747 <vscale x 8 x i16> %a, 748 <vscale x 8 x i16> %b) 749 ret <vscale x 8 x i16> %out 750} 751 752define <vscale x 4 x i32> @sqrshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 753; CHECK-LABEL: sqrshl_i32: 754; CHECK: // %bb.0: 755; CHECK-NEXT: sqrshl z0.s, p0/m, z0.s, z1.s 756; CHECK-NEXT: ret 757 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshl.nxv4i32(<vscale x 4 x i1> %pg, 758 <vscale x 4 x i32> %a, 759 <vscale x 4 x i32> %b) 760 ret <vscale x 4 x i32> %out 761} 762 763define <vscale x 2 x i64> @sqrshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 764; CHECK-LABEL: sqrshl_i64: 765; CHECK: // %bb.0: 766; CHECK-NEXT: sqrshl z0.d, p0/m, z0.d, z1.d 767; CHECK-NEXT: ret 768 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrshl.nxv2i64(<vscale x 2 x i1> %pg, 769 <vscale x 2 x i64> %a, 770 <vscale x 2 x i64> %b) 771 ret <vscale x 2 x i64> %out 772} 773 774; 775; SQRSHLR 776; 777 778define <vscale x 16 x i8> @sqrshlr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 779; CHECK-LABEL: sqrshlr_i8: 780; CHECK: // %bb.0: 781; CHECK-NEXT: ptrue p0.b 782; CHECK-NEXT: sqrshlr z0.b, p0/m, z0.b, z1.b 783; CHECK-NEXT: ret 784 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) 785 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshl.nxv16i8(<vscale x 16 x i1> %pg, 786 <vscale x 16 x i8> %b, 787 <vscale x 16 x i8> %a) 788 ret <vscale x 16 x i8> %out 789} 790 791define <vscale x 8 x i16> @sqrshlr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 792; CHECK-LABEL: sqrshlr_i16: 793; CHECK: // %bb.0: 794; CHECK-NEXT: ptrue p0.h 795; CHECK-NEXT: sqrshlr z0.h, p0/m, z0.h, z1.h 796; CHECK-NEXT: ret 797 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) 798 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshl.nxv8i16(<vscale x 8 x i1> %pg, 799 <vscale x 8 x i16> %b, 800 <vscale x 8 x i16> %a) 801 ret <vscale x 8 x i16> %out 802} 803 804define <vscale x 4 x i32> @sqrshlr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 805; CHECK-LABEL: sqrshlr_i32: 806; CHECK: // %bb.0: 807; CHECK-NEXT: ptrue p0.s 808; CHECK-NEXT: sqrshlr z0.s, p0/m, z0.s, z1.s 809; CHECK-NEXT: ret 810 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) 811 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshl.nxv4i32(<vscale x 4 x i1> %pg, 812 <vscale x 4 x i32> %b, 813 <vscale x 4 x i32> %a) 814 ret <vscale x 4 x i32> %out 815} 816 817define <vscale x 2 x i64> @sqrshlr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 818; CHECK-LABEL: sqrshlr_i64: 819; CHECK: // %bb.0: 820; CHECK-NEXT: ptrue p0.d 821; CHECK-NEXT: sqrshlr z0.d, p0/m, z0.d, z1.d 822; CHECK-NEXT: ret 823 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) 824 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrshl.nxv2i64(<vscale x 2 x i1> %pg, 825 <vscale x 2 x i64> %b, 826 <vscale x 2 x i64> %a) 827 ret <vscale x 2 x i64> %out 828} 829 830define <vscale x 2 x i64> @sqrshlr_i64_noptrue(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 831; CHECK-LABEL: sqrshlr_i64_noptrue: 832; CHECK: // %bb.0: 833; CHECK-NEXT: sqrshl z1.d, p0/m, z1.d, z0.d 834; CHECK-NEXT: mov z0.d, z1.d 835; CHECK-NEXT: ret 836 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrshl.nxv2i64(<vscale x 2 x i1> %pg, 837 <vscale x 2 x i64> %b, 838 <vscale x 2 x i64> %a) 839 ret <vscale x 2 x i64> %out 840} 841 842; 843; SQSHL (Vectors) 844; 845 846define <vscale x 16 x i8> @sqshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 847; CHECK-LABEL: sqshl_i8: 848; CHECK: // %bb.0: 849; CHECK-NEXT: sqshl z0.b, p0/m, z0.b, z1.b 850; CHECK-NEXT: ret 851 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshl.nxv16i8(<vscale x 16 x i1> %pg, 852 <vscale x 16 x i8> %a, 853 <vscale x 16 x i8> %b) 854 ret <vscale x 16 x i8> %out 855} 856 857define <vscale x 8 x i16> @sqshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 858; CHECK-LABEL: sqshl_i16: 859; CHECK: // %bb.0: 860; CHECK-NEXT: sqshl z0.h, p0/m, z0.h, z1.h 861; CHECK-NEXT: ret 862 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshl.nxv8i16(<vscale x 8 x i1> %pg, 863 <vscale x 8 x i16> %a, 864 <vscale x 8 x i16> %b) 865 ret <vscale x 8 x i16> %out 866} 867 868define <vscale x 4 x i32> @sqshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 869; CHECK-LABEL: sqshl_i32: 870; CHECK: // %bb.0: 871; CHECK-NEXT: sqshl z0.s, p0/m, z0.s, z1.s 872; CHECK-NEXT: ret 873 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.nxv4i32(<vscale x 4 x i1> %pg, 874 <vscale x 4 x i32> %a, 875 <vscale x 4 x i32> %b) 876 ret <vscale x 4 x i32> %out 877} 878 879define <vscale x 2 x i64> @sqshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 880; CHECK-LABEL: sqshl_i64: 881; CHECK: // %bb.0: 882; CHECK-NEXT: sqshl z0.d, p0/m, z0.d, z1.d 883; CHECK-NEXT: ret 884 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqshl.nxv2i64(<vscale x 2 x i1> %pg, 885 <vscale x 2 x i64> %a, 886 <vscale x 2 x i64> %b) 887 ret <vscale x 2 x i64> %out 888} 889 890; 891; SQSHLR 892; 893 894define <vscale x 16 x i8> @sqshlr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 895; CHECK-LABEL: sqshlr_i8: 896; CHECK: // %bb.0: 897; CHECK-NEXT: ptrue p0.b 898; CHECK-NEXT: sqshlr z0.b, p0/m, z0.b, z1.b 899; CHECK-NEXT: ret 900 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) 901 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshl.nxv16i8(<vscale x 16 x i1> %pg, 902 <vscale x 16 x i8> %b, 903 <vscale x 16 x i8> %a) 904 ret <vscale x 16 x i8> %out 905} 906 907define <vscale x 8 x i16> @sqshlr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 908; CHECK-LABEL: sqshlr_i16: 909; CHECK: // %bb.0: 910; CHECK-NEXT: ptrue p0.h 911; CHECK-NEXT: sqshlr z0.h, p0/m, z0.h, z1.h 912; CHECK-NEXT: ret 913 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) 914 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshl.nxv8i16(<vscale x 8 x i1> %pg, 915 <vscale x 8 x i16> %b, 916 <vscale x 8 x i16> %a) 917 ret <vscale x 8 x i16> %out 918} 919 920define <vscale x 4 x i32> @sqshlr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 921; CHECK-LABEL: sqshlr_i32: 922; CHECK: // %bb.0: 923; CHECK-NEXT: ptrue p0.s 924; CHECK-NEXT: sqshlr z0.s, p0/m, z0.s, z1.s 925; CHECK-NEXT: ret 926 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) 927 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.nxv4i32(<vscale x 4 x i1> %pg, 928 <vscale x 4 x i32> %b, 929 <vscale x 4 x i32> %a) 930 ret <vscale x 4 x i32> %out 931} 932 933define <vscale x 2 x i64> @sqshlr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 934; CHECK-LABEL: sqshlr_i64: 935; CHECK: // %bb.0: 936; CHECK-NEXT: ptrue p0.d 937; CHECK-NEXT: sqshlr z0.d, p0/m, z0.d, z1.d 938; CHECK-NEXT: ret 939 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) 940 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqshl.nxv2i64(<vscale x 2 x i1> %pg, 941 <vscale x 2 x i64> %b, 942 <vscale x 2 x i64> %a) 943 ret <vscale x 2 x i64> %out 944} 945 946define <vscale x 2 x i64> @sqshlr_i64_noptrue(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 947; CHECK-LABEL: sqshlr_i64_noptrue: 948; CHECK: // %bb.0: 949; CHECK-NEXT: sqshl z1.d, p0/m, z1.d, z0.d 950; CHECK-NEXT: mov z0.d, z1.d 951; CHECK-NEXT: ret 952 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqshl.nxv2i64(<vscale x 2 x i1> %pg, 953 <vscale x 2 x i64> %b, 954 <vscale x 2 x i64> %a) 955 ret <vscale x 2 x i64> %out 956} 957 958; 959; SQSHL (Scalar) 960; 961 962define <vscale x 16 x i8> @sqshl_n_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) { 963; CHECK-LABEL: sqshl_n_i8: 964; CHECK: // %bb.0: 965; CHECK-NEXT: sqshl z0.b, p0/m, z0.b, #7 966; CHECK-NEXT: ret 967 %dup = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 7) 968 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshl.nxv16i8(<vscale x 16 x i1> %pg, 969 <vscale x 16 x i8> %a, 970 <vscale x 16 x i8> %dup) 971 ret <vscale x 16 x i8> %out 972} 973 974define <vscale x 8 x i16> @sqshl_n_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) { 975; CHECK-LABEL: sqshl_n_i16: 976; CHECK: // %bb.0: 977; CHECK-NEXT: sqshl z0.h, p0/m, z0.h, #15 978; CHECK-NEXT: ret 979 %dup = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 15) 980 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshl.nxv8i16(<vscale x 8 x i1> %pg, 981 <vscale x 8 x i16> %a, 982 <vscale x 8 x i16> %dup) 983 ret <vscale x 8 x i16> %out 984} 985 986define <vscale x 4 x i32> @sqshl_n_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) { 987; CHECK-LABEL: sqshl_n_i32: 988; CHECK: // %bb.0: 989; CHECK-NEXT: sqshl z0.s, p0/m, z0.s, #31 990; CHECK-NEXT: ret 991 %dup = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 31) 992 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.nxv4i32(<vscale x 4 x i1> %pg, 993 <vscale x 4 x i32> %a, 994 <vscale x 4 x i32> %dup) 995 ret <vscale x 4 x i32> %out 996} 997 998define <vscale x 2 x i64> @sqshl_n_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) { 999; CHECK-LABEL: sqshl_n_i64: 1000; CHECK: // %bb.0: 1001; CHECK-NEXT: sqshl z0.d, p0/m, z0.d, #63 1002; CHECK-NEXT: ret 1003 %dup = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 63) 1004 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqshl.nxv2i64(<vscale x 2 x i1> %pg, 1005 <vscale x 2 x i64> %a, 1006 <vscale x 2 x i64> %dup) 1007 ret <vscale x 2 x i64> %out 1008} 1009 1010define <vscale x 16 x i8> @sqshl_n_i8_range(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) { 1011; CHECK-LABEL: sqshl_n_i8_range: 1012; CHECK: // %bb.0: 1013; CHECK-NEXT: mov z1.b, #8 // =0x8 1014; CHECK-NEXT: sqshl z0.b, p0/m, z0.b, z1.b 1015; CHECK-NEXT: ret 1016 %dup = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 8) 1017 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshl.nxv16i8(<vscale x 16 x i1> %pg, 1018 <vscale x 16 x i8> %a, 1019 <vscale x 16 x i8> %dup) 1020 ret <vscale x 16 x i8> %out 1021} 1022 1023define <vscale x 8 x i16> @sqshl_n_i16_range(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) { 1024; CHECK-LABEL: sqshl_n_i16_range: 1025; CHECK: // %bb.0: 1026; CHECK-NEXT: mov z1.h, #16 // =0x10 1027; CHECK-NEXT: sqshl z0.h, p0/m, z0.h, z1.h 1028; CHECK-NEXT: ret 1029 %dup = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 16) 1030 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshl.nxv8i16(<vscale x 8 x i1> %pg, 1031 <vscale x 8 x i16> %a, 1032 <vscale x 8 x i16> %dup) 1033 ret <vscale x 8 x i16> %out 1034} 1035 1036define <vscale x 4 x i32> @sqshl_n_i32_range(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) { 1037; CHECK-LABEL: sqshl_n_i32_range: 1038; CHECK: // %bb.0: 1039; CHECK-NEXT: mov z1.s, #32 // =0x20 1040; CHECK-NEXT: sqshl z0.s, p0/m, z0.s, z1.s 1041; CHECK-NEXT: ret 1042 %dup = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 32) 1043 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.nxv4i32(<vscale x 4 x i1> %pg, 1044 <vscale x 4 x i32> %a, 1045 <vscale x 4 x i32> %dup) 1046 ret <vscale x 4 x i32> %out 1047} 1048 1049define <vscale x 2 x i64> @sqshl_n_i64_range(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) { 1050; CHECK-LABEL: sqshl_n_i64_range: 1051; CHECK: // %bb.0: 1052; CHECK-NEXT: mov z1.d, #64 // =0x40 1053; CHECK-NEXT: sqshl z0.d, p0/m, z0.d, z1.d 1054; CHECK-NEXT: ret 1055 %dup = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 64) 1056 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqshl.nxv2i64(<vscale x 2 x i1> %pg, 1057 <vscale x 2 x i64> %a, 1058 <vscale x 2 x i64> %dup) 1059 ret <vscale x 2 x i64> %out 1060} 1061 1062; 1063; SQSHLU 1064; 1065 1066define <vscale x 16 x i8> @sqshlu_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) { 1067; CHECK-LABEL: sqshlu_i8: 1068; CHECK: // %bb.0: 1069; CHECK-NEXT: sqshlu z0.b, p0/m, z0.b, #2 1070; CHECK-NEXT: ret 1071 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshlu.nxv16i8(<vscale x 16 x i1> %pg, 1072 <vscale x 16 x i8> %a, 1073 i32 2) 1074 ret <vscale x 16 x i8> %out 1075} 1076 1077define <vscale x 8 x i16> @sqshlu_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) { 1078; CHECK-LABEL: sqshlu_i16: 1079; CHECK: // %bb.0: 1080; CHECK-NEXT: sqshlu z0.h, p0/m, z0.h, #3 1081; CHECK-NEXT: ret 1082 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshlu.nxv8i16(<vscale x 8 x i1> %pg, 1083 <vscale x 8 x i16> %a, 1084 i32 3) 1085 ret <vscale x 8 x i16> %out 1086} 1087 1088define <vscale x 4 x i32> @sqshlu_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) { 1089; CHECK-LABEL: sqshlu_i32: 1090; CHECK: // %bb.0: 1091; CHECK-NEXT: sqshlu z0.s, p0/m, z0.s, #29 1092; CHECK-NEXT: ret 1093 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshlu.nxv4i32(<vscale x 4 x i1> %pg, 1094 <vscale x 4 x i32> %a, 1095 i32 29) 1096 ret <vscale x 4 x i32> %out 1097} 1098 1099define <vscale x 2 x i64> @sqshlu_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) { 1100; CHECK-LABEL: sqshlu_i64: 1101; CHECK: // %bb.0: 1102; CHECK-NEXT: sqshlu z0.d, p0/m, z0.d, #62 1103; CHECK-NEXT: ret 1104 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqshlu.nxv2i64(<vscale x 2 x i1> %pg, 1105 <vscale x 2 x i64> %a, 1106 i32 62) 1107 ret <vscale x 2 x i64> %out 1108} 1109 1110; 1111; SQSUB 1112; 1113 1114define <vscale x 16 x i8> @sqsub_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 1115; CHECK-LABEL: sqsub_i8: 1116; CHECK: // %bb.0: 1117; CHECK-NEXT: sqsub z0.b, p0/m, z0.b, z1.b 1118; CHECK-NEXT: ret 1119 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.nxv16i8(<vscale x 16 x i1> %pg, 1120 <vscale x 16 x i8> %a, 1121 <vscale x 16 x i8> %b) 1122 ret <vscale x 16 x i8> %out 1123} 1124 1125define <vscale x 8 x i16> @sqsub_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 1126; CHECK-LABEL: sqsub_i16: 1127; CHECK: // %bb.0: 1128; CHECK-NEXT: sqsub z0.h, p0/m, z0.h, z1.h 1129; CHECK-NEXT: ret 1130 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.nxv8i16(<vscale x 8 x i1> %pg, 1131 <vscale x 8 x i16> %a, 1132 <vscale x 8 x i16> %b) 1133 ret <vscale x 8 x i16> %out 1134} 1135 1136define <vscale x 4 x i32> @sqsub_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 1137; CHECK-LABEL: sqsub_i32: 1138; CHECK: // %bb.0: 1139; CHECK-NEXT: sqsub z0.s, p0/m, z0.s, z1.s 1140; CHECK-NEXT: ret 1141 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.nxv4i32(<vscale x 4 x i1> %pg, 1142 <vscale x 4 x i32> %a, 1143 <vscale x 4 x i32> %b) 1144 ret <vscale x 4 x i32> %out 1145} 1146 1147define <vscale x 2 x i64> @sqsub_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 1148; CHECK-LABEL: sqsub_i64: 1149; CHECK: // %bb.0: 1150; CHECK-NEXT: sqsub z0.d, p0/m, z0.d, z1.d 1151; CHECK-NEXT: ret 1152 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.nxv2i64(<vscale x 2 x i1> %pg, 1153 <vscale x 2 x i64> %a, 1154 <vscale x 2 x i64> %b) 1155 ret <vscale x 2 x i64> %out 1156} 1157 1158; 1159; SQSUBR 1160; 1161 1162define <vscale x 16 x i8> @sqsubr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 1163; CHECK-LABEL: sqsubr_i8: 1164; CHECK: // %bb.0: 1165; CHECK-NEXT: sqsubr z0.b, p0/m, z0.b, z1.b 1166; CHECK-NEXT: ret 1167 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqsubr.nxv16i8(<vscale x 16 x i1> %pg, 1168 <vscale x 16 x i8> %a, 1169 <vscale x 16 x i8> %b) 1170 ret <vscale x 16 x i8> %out 1171} 1172 1173define <vscale x 8 x i16> @sqsubr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 1174; CHECK-LABEL: sqsubr_i16: 1175; CHECK: // %bb.0: 1176; CHECK-NEXT: sqsubr z0.h, p0/m, z0.h, z1.h 1177; CHECK-NEXT: ret 1178 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqsubr.nxv8i16(<vscale x 8 x i1> %pg, 1179 <vscale x 8 x i16> %a, 1180 <vscale x 8 x i16> %b) 1181 ret <vscale x 8 x i16> %out 1182} 1183 1184define <vscale x 4 x i32> @sqsubr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 1185; CHECK-LABEL: sqsubr_i32: 1186; CHECK: // %bb.0: 1187; CHECK-NEXT: sqsubr z0.s, p0/m, z0.s, z1.s 1188; CHECK-NEXT: ret 1189 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqsubr.nxv4i32(<vscale x 4 x i1> %pg, 1190 <vscale x 4 x i32> %a, 1191 <vscale x 4 x i32> %b) 1192 ret <vscale x 4 x i32> %out 1193} 1194 1195define <vscale x 2 x i64> @sqsubr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 1196; CHECK-LABEL: sqsubr_i64: 1197; CHECK: // %bb.0: 1198; CHECK-NEXT: sqsubr z0.d, p0/m, z0.d, z1.d 1199; CHECK-NEXT: ret 1200 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqsubr.nxv2i64(<vscale x 2 x i1> %pg, 1201 <vscale x 2 x i64> %a, 1202 <vscale x 2 x i64> %b) 1203 ret <vscale x 2 x i64> %out 1204} 1205 1206; 1207; SRHADD 1208; 1209 1210define <vscale x 16 x i8> @srhadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 1211; CHECK-LABEL: srhadd_i8: 1212; CHECK: // %bb.0: 1213; CHECK-NEXT: srhadd z0.b, p0/m, z0.b, z1.b 1214; CHECK-NEXT: ret 1215 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.srhadd.nxv16i8(<vscale x 16 x i1> %pg, 1216 <vscale x 16 x i8> %a, 1217 <vscale x 16 x i8> %b) 1218 ret <vscale x 16 x i8> %out 1219} 1220 1221define <vscale x 8 x i16> @srhadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 1222; CHECK-LABEL: srhadd_i16: 1223; CHECK: // %bb.0: 1224; CHECK-NEXT: srhadd z0.h, p0/m, z0.h, z1.h 1225; CHECK-NEXT: ret 1226 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.srhadd.nxv8i16(<vscale x 8 x i1> %pg, 1227 <vscale x 8 x i16> %a, 1228 <vscale x 8 x i16> %b) 1229 ret <vscale x 8 x i16> %out 1230} 1231 1232define <vscale x 4 x i32> @srhadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 1233; CHECK-LABEL: srhadd_i32: 1234; CHECK: // %bb.0: 1235; CHECK-NEXT: srhadd z0.s, p0/m, z0.s, z1.s 1236; CHECK-NEXT: ret 1237 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.srhadd.nxv4i32(<vscale x 4 x i1> %pg, 1238 <vscale x 4 x i32> %a, 1239 <vscale x 4 x i32> %b) 1240 ret <vscale x 4 x i32> %out 1241} 1242 1243define <vscale x 2 x i64> @srhadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 1244; CHECK-LABEL: srhadd_i64: 1245; CHECK: // %bb.0: 1246; CHECK-NEXT: srhadd z0.d, p0/m, z0.d, z1.d 1247; CHECK-NEXT: ret 1248 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.srhadd.nxv2i64(<vscale x 2 x i1> %pg, 1249 <vscale x 2 x i64> %a, 1250 <vscale x 2 x i64> %b) 1251 ret <vscale x 2 x i64> %out 1252} 1253 1254; 1255; SRI 1256; 1257 1258define <vscale x 16 x i8> @sri_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 1259; CHECK-LABEL: sri_i8: 1260; CHECK: // %bb.0: 1261; CHECK-NEXT: sri z0.b, z1.b, #1 1262; CHECK-NEXT: ret 1263 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sri.nxv16i8(<vscale x 16 x i8> %a, 1264 <vscale x 16 x i8> %b, 1265 i32 1) 1266 ret <vscale x 16 x i8> %out 1267} 1268 1269define <vscale x 8 x i16> @sri_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 1270; CHECK-LABEL: sri_i16: 1271; CHECK: // %bb.0: 1272; CHECK-NEXT: sri z0.h, z1.h, #16 1273; CHECK-NEXT: ret 1274 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sri.nxv8i16(<vscale x 8 x i16> %a, 1275 <vscale x 8 x i16> %b, 1276 i32 16) 1277 ret <vscale x 8 x i16> %out 1278} 1279 1280define <vscale x 4 x i32> @sri_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 1281; CHECK-LABEL: sri_i32: 1282; CHECK: // %bb.0: 1283; CHECK-NEXT: sri z0.s, z1.s, #32 1284; CHECK-NEXT: ret 1285 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sri.nxv4i32(<vscale x 4 x i32> %a, 1286 <vscale x 4 x i32> %b, 1287 i32 32); 1288 ret <vscale x 4 x i32> %out 1289} 1290 1291define <vscale x 2 x i64> @sri_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 1292; CHECK-LABEL: sri_i64: 1293; CHECK: // %bb.0: 1294; CHECK-NEXT: sri z0.d, z1.d, #64 1295; CHECK-NEXT: ret 1296 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sri.nxv2i64(<vscale x 2 x i64> %a, 1297 <vscale x 2 x i64> %b, 1298 i32 64) 1299 ret <vscale x 2 x i64> %out 1300} 1301 1302; 1303; SRSHL 1304; 1305 1306define <vscale x 16 x i8> @srshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 1307; CHECK-LABEL: srshl_i8: 1308; CHECK: // %bb.0: 1309; CHECK-NEXT: srshl z0.b, p0/m, z0.b, z1.b 1310; CHECK-NEXT: ret 1311 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.srshl.nxv16i8(<vscale x 16 x i1> %pg, 1312 <vscale x 16 x i8> %a, 1313 <vscale x 16 x i8> %b) 1314 ret <vscale x 16 x i8> %out 1315} 1316 1317define <vscale x 8 x i16> @srshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 1318; CHECK-LABEL: srshl_i16: 1319; CHECK: // %bb.0: 1320; CHECK-NEXT: srshl z0.h, p0/m, z0.h, z1.h 1321; CHECK-NEXT: ret 1322 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> %pg, 1323 <vscale x 8 x i16> %a, 1324 <vscale x 8 x i16> %b) 1325 ret <vscale x 8 x i16> %out 1326} 1327 1328define <vscale x 4 x i32> @srshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 1329; CHECK-LABEL: srshl_i32: 1330; CHECK: // %bb.0: 1331; CHECK-NEXT: srshl z0.s, p0/m, z0.s, z1.s 1332; CHECK-NEXT: ret 1333 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.srshl.nxv4i32(<vscale x 4 x i1> %pg, 1334 <vscale x 4 x i32> %a, 1335 <vscale x 4 x i32> %b) 1336 ret <vscale x 4 x i32> %out 1337} 1338 1339define <vscale x 2 x i64> @srshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 1340; CHECK-LABEL: srshl_i64: 1341; CHECK: // %bb.0: 1342; CHECK-NEXT: srshl z0.d, p0/m, z0.d, z1.d 1343; CHECK-NEXT: ret 1344 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.srshl.nxv2i64(<vscale x 2 x i1> %pg, 1345 <vscale x 2 x i64> %a, 1346 <vscale x 2 x i64> %b) 1347 ret <vscale x 2 x i64> %out 1348} 1349 1350; 1351; SRSHLR 1352; 1353 1354define <vscale x 16 x i8> @srshlr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 1355; CHECK-LABEL: srshlr_i8: 1356; CHECK: // %bb.0: 1357; CHECK-NEXT: ptrue p0.b 1358; CHECK-NEXT: srshlr z0.b, p0/m, z0.b, z1.b 1359; CHECK-NEXT: ret 1360 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) 1361 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.srshl.nxv16i8(<vscale x 16 x i1> %pg, 1362 <vscale x 16 x i8> %b, 1363 <vscale x 16 x i8> %a) 1364 ret <vscale x 16 x i8> %out 1365} 1366 1367define <vscale x 8 x i16> @srshlr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 1368; CHECK-LABEL: srshlr_i16: 1369; CHECK: // %bb.0: 1370; CHECK-NEXT: ptrue p0.h 1371; CHECK-NEXT: srshlr z0.h, p0/m, z0.h, z1.h 1372; CHECK-NEXT: ret 1373 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) 1374 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> %pg, 1375 <vscale x 8 x i16> %b, 1376 <vscale x 8 x i16> %a) 1377 ret <vscale x 8 x i16> %out 1378} 1379 1380define <vscale x 4 x i32> @srshlr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 1381; CHECK-LABEL: srshlr_i32: 1382; CHECK: // %bb.0: 1383; CHECK-NEXT: ptrue p0.s 1384; CHECK-NEXT: srshlr z0.s, p0/m, z0.s, z1.s 1385; CHECK-NEXT: ret 1386 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) 1387 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.srshl.nxv4i32(<vscale x 4 x i1> %pg, 1388 <vscale x 4 x i32> %b, 1389 <vscale x 4 x i32> %a) 1390 ret <vscale x 4 x i32> %out 1391} 1392 1393define <vscale x 2 x i64> @srshlr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 1394; CHECK-LABEL: srshlr_i64: 1395; CHECK: // %bb.0: 1396; CHECK-NEXT: ptrue p0.d 1397; CHECK-NEXT: srshlr z0.d, p0/m, z0.d, z1.d 1398; CHECK-NEXT: ret 1399 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) 1400 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.srshl.nxv2i64(<vscale x 2 x i1> %pg, 1401 <vscale x 2 x i64> %b, 1402 <vscale x 2 x i64> %a) 1403 ret <vscale x 2 x i64> %out 1404} 1405 1406define <vscale x 2 x i64> @srshlr_i64_noptrue(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 1407; CHECK-LABEL: srshlr_i64_noptrue: 1408; CHECK: // %bb.0: 1409; CHECK-NEXT: srshl z1.d, p0/m, z1.d, z0.d 1410; CHECK-NEXT: mov z0.d, z1.d 1411; CHECK-NEXT: ret 1412 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.srshl.nxv2i64(<vscale x 2 x i1> %pg, 1413 <vscale x 2 x i64> %b, 1414 <vscale x 2 x i64> %a) 1415 ret <vscale x 2 x i64> %out 1416} 1417 1418; 1419; SRSHR 1420; 1421 1422define <vscale x 16 x i8> @srshr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) { 1423; CHECK-LABEL: srshr_i8: 1424; CHECK: // %bb.0: 1425; CHECK-NEXT: srshr z0.b, p0/m, z0.b, #8 1426; CHECK-NEXT: ret 1427 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.srshr.nxv16i8(<vscale x 16 x i1> %pg, 1428 <vscale x 16 x i8> %a, 1429 i32 8) 1430 ret <vscale x 16 x i8> %out 1431} 1432 1433define <vscale x 8 x i16> @srshr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) { 1434; CHECK-LABEL: srshr_i16: 1435; CHECK: // %bb.0: 1436; CHECK-NEXT: srshr z0.h, p0/m, z0.h, #1 1437; CHECK-NEXT: ret 1438 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.srshr.nxv8i16(<vscale x 8 x i1> %pg, 1439 <vscale x 8 x i16> %a, 1440 i32 1) 1441 ret <vscale x 8 x i16> %out 1442} 1443 1444define <vscale x 4 x i32> @srshr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) { 1445; CHECK-LABEL: srshr_i32: 1446; CHECK: // %bb.0: 1447; CHECK-NEXT: srshr z0.s, p0/m, z0.s, #22 1448; CHECK-NEXT: ret 1449 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.srshr.nxv4i32(<vscale x 4 x i1> %pg, 1450 <vscale x 4 x i32> %a, 1451 i32 22) 1452 ret <vscale x 4 x i32> %out 1453} 1454 1455define <vscale x 2 x i64> @srshr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) { 1456; CHECK-LABEL: srshr_i64: 1457; CHECK: // %bb.0: 1458; CHECK-NEXT: srshr z0.d, p0/m, z0.d, #54 1459; CHECK-NEXT: ret 1460 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.srshr.nxv2i64(<vscale x 2 x i1> %pg, 1461 <vscale x 2 x i64> %a, 1462 i32 54) 1463 ret <vscale x 2 x i64> %out 1464} 1465 1466; 1467; SRSRA 1468; 1469 1470define <vscale x 16 x i8> @srsra_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 1471; CHECK-LABEL: srsra_i8: 1472; CHECK: // %bb.0: 1473; CHECK-NEXT: srsra z0.b, z1.b, #2 1474; CHECK-NEXT: ret 1475 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.srsra.nxv16i8(<vscale x 16 x i8> %a, 1476 <vscale x 16 x i8> %b, 1477 i32 2) 1478 ret <vscale x 16 x i8> %out 1479} 1480 1481define <vscale x 8 x i16> @srsra_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 1482; CHECK-LABEL: srsra_i16: 1483; CHECK: // %bb.0: 1484; CHECK-NEXT: srsra z0.h, z1.h, #15 1485; CHECK-NEXT: ret 1486 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.srsra.nxv8i16(<vscale x 8 x i16> %a, 1487 <vscale x 8 x i16> %b, 1488 i32 15) 1489 ret <vscale x 8 x i16> %out 1490} 1491 1492define <vscale x 4 x i32> @srsra_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 1493; CHECK-LABEL: srsra_i32: 1494; CHECK: // %bb.0: 1495; CHECK-NEXT: srsra z0.s, z1.s, #12 1496; CHECK-NEXT: ret 1497 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.srsra.nxv4i32(<vscale x 4 x i32> %a, 1498 <vscale x 4 x i32> %b, 1499 i32 12) 1500 ret <vscale x 4 x i32> %out 1501} 1502 1503define <vscale x 2 x i64> @srsra_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 1504; CHECK-LABEL: srsra_i64: 1505; CHECK: // %bb.0: 1506; CHECK-NEXT: srsra z0.d, z1.d, #44 1507; CHECK-NEXT: ret 1508 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.srsra.nxv2i64(<vscale x 2 x i64> %a, 1509 <vscale x 2 x i64> %b, 1510 i32 44) 1511 ret <vscale x 2 x i64> %out 1512} 1513 1514; 1515; SSRA 1516; 1517 1518define <vscale x 16 x i8> @ssra_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 1519; CHECK-LABEL: ssra_i8: 1520; CHECK: // %bb.0: 1521; CHECK-NEXT: ssra z0.b, z1.b, #3 1522; CHECK-NEXT: ret 1523 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.ssra.nxv16i8(<vscale x 16 x i8> %a, 1524 <vscale x 16 x i8> %b, 1525 i32 3) 1526 ret <vscale x 16 x i8> %out 1527} 1528 1529define <vscale x 8 x i16> @ssra_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 1530; CHECK-LABEL: ssra_i16: 1531; CHECK: // %bb.0: 1532; CHECK-NEXT: ssra z0.h, z1.h, #14 1533; CHECK-NEXT: ret 1534 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ssra.nxv8i16(<vscale x 8 x i16> %a, 1535 <vscale x 8 x i16> %b, 1536 i32 14) 1537 ret <vscale x 8 x i16> %out 1538} 1539 1540define <vscale x 4 x i32> @ssra_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 1541; CHECK-LABEL: ssra_i32: 1542; CHECK: // %bb.0: 1543; CHECK-NEXT: ssra z0.s, z1.s, #2 1544; CHECK-NEXT: ret 1545 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ssra.nxv4i32(<vscale x 4 x i32> %a, 1546 <vscale x 4 x i32> %b, 1547 i32 2) 1548 ret <vscale x 4 x i32> %out 1549} 1550 1551define <vscale x 2 x i64> @ssra_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 1552; CHECK-LABEL: ssra_i64: 1553; CHECK: // %bb.0: 1554; CHECK-NEXT: ssra z0.d, z1.d, #34 1555; CHECK-NEXT: ret 1556 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ssra.nxv2i64(<vscale x 2 x i64> %a, 1557 <vscale x 2 x i64> %b, 1558 i32 34) 1559 ret <vscale x 2 x i64> %out 1560} 1561 1562; 1563; SUQADD 1564; 1565 1566define <vscale x 16 x i8> @suqadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 1567; CHECK-LABEL: suqadd_i8: 1568; CHECK: // %bb.0: 1569; CHECK-NEXT: suqadd z0.b, p0/m, z0.b, z1.b 1570; CHECK-NEXT: ret 1571 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.suqadd.nxv16i8(<vscale x 16 x i1> %pg, 1572 <vscale x 16 x i8> %a, 1573 <vscale x 16 x i8> %b) 1574 ret <vscale x 16 x i8> %out 1575} 1576 1577define <vscale x 8 x i16> @suqadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 1578; CHECK-LABEL: suqadd_i16: 1579; CHECK: // %bb.0: 1580; CHECK-NEXT: suqadd z0.h, p0/m, z0.h, z1.h 1581; CHECK-NEXT: ret 1582 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.suqadd.nxv8i16(<vscale x 8 x i1> %pg, 1583 <vscale x 8 x i16> %a, 1584 <vscale x 8 x i16> %b) 1585 ret <vscale x 8 x i16> %out 1586} 1587 1588define <vscale x 4 x i32> @suqadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 1589; CHECK-LABEL: suqadd_i32: 1590; CHECK: // %bb.0: 1591; CHECK-NEXT: suqadd z0.s, p0/m, z0.s, z1.s 1592; CHECK-NEXT: ret 1593 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.suqadd.nxv4i32(<vscale x 4 x i1> %pg, 1594 <vscale x 4 x i32> %a, 1595 <vscale x 4 x i32> %b) 1596 ret <vscale x 4 x i32> %out 1597} 1598 1599define <vscale x 2 x i64> @suqadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 1600; CHECK-LABEL: suqadd_i64: 1601; CHECK: // %bb.0: 1602; CHECK-NEXT: suqadd z0.d, p0/m, z0.d, z1.d 1603; CHECK-NEXT: ret 1604 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.suqadd.nxv2i64(<vscale x 2 x i1> %pg, 1605 <vscale x 2 x i64> %a, 1606 <vscale x 2 x i64> %b) 1607 ret <vscale x 2 x i64> %out 1608} 1609 1610; 1611; UABA 1612; 1613 1614define <vscale x 16 x i8> @uaba_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) { 1615; CHECK-LABEL: uaba_i8: 1616; CHECK: // %bb.0: 1617; CHECK-NEXT: uaba z0.b, z1.b, z2.b 1618; CHECK-NEXT: ret 1619 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uaba.nxv16i8(<vscale x 16 x i8> %a, 1620 <vscale x 16 x i8> %b, 1621 <vscale x 16 x i8> %c) 1622 ret <vscale x 16 x i8> %out 1623} 1624 1625define <vscale x 8 x i16> @uaba_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) { 1626; CHECK-LABEL: uaba_i16: 1627; CHECK: // %bb.0: 1628; CHECK-NEXT: uaba z0.h, z1.h, z2.h 1629; CHECK-NEXT: ret 1630 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uaba.nxv8i16(<vscale x 8 x i16> %a, 1631 <vscale x 8 x i16> %b, 1632 <vscale x 8 x i16> %c) 1633 ret <vscale x 8 x i16> %out 1634} 1635 1636define <vscale x 4 x i32> @uaba_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) { 1637; CHECK-LABEL: uaba_i32: 1638; CHECK: // %bb.0: 1639; CHECK-NEXT: uaba z0.s, z1.s, z2.s 1640; CHECK-NEXT: ret 1641 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uaba.nxv4i32(<vscale x 4 x i32> %a, 1642 <vscale x 4 x i32> %b, 1643 <vscale x 4 x i32> %c) 1644 ret <vscale x 4 x i32> %out 1645} 1646 1647define <vscale x 2 x i64> @uaba_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) { 1648; CHECK-LABEL: uaba_i64: 1649; CHECK: // %bb.0: 1650; CHECK-NEXT: uaba z0.d, z1.d, z2.d 1651; CHECK-NEXT: ret 1652 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uaba.nxv2i64(<vscale x 2 x i64> %a, 1653 <vscale x 2 x i64> %b, 1654 <vscale x 2 x i64> %c) 1655 ret <vscale x 2 x i64> %out 1656} 1657 1658; 1659; UHADD 1660; 1661 1662define <vscale x 16 x i8> @uhadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 1663; CHECK-LABEL: uhadd_i8: 1664; CHECK: // %bb.0: 1665; CHECK-NEXT: uhadd z0.b, p0/m, z0.b, z1.b 1666; CHECK-NEXT: ret 1667 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uhadd.nxv16i8(<vscale x 16 x i1> %pg, 1668 <vscale x 16 x i8> %a, 1669 <vscale x 16 x i8> %b) 1670 ret <vscale x 16 x i8> %out 1671} 1672 1673define <vscale x 8 x i16> @uhadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 1674; CHECK-LABEL: uhadd_i16: 1675; CHECK: // %bb.0: 1676; CHECK-NEXT: uhadd z0.h, p0/m, z0.h, z1.h 1677; CHECK-NEXT: ret 1678 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uhadd.nxv8i16(<vscale x 8 x i1> %pg, 1679 <vscale x 8 x i16> %a, 1680 <vscale x 8 x i16> %b) 1681 ret <vscale x 8 x i16> %out 1682} 1683 1684define <vscale x 4 x i32> @uhadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 1685; CHECK-LABEL: uhadd_i32: 1686; CHECK: // %bb.0: 1687; CHECK-NEXT: uhadd z0.s, p0/m, z0.s, z1.s 1688; CHECK-NEXT: ret 1689 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uhadd.nxv4i32(<vscale x 4 x i1> %pg, 1690 <vscale x 4 x i32> %a, 1691 <vscale x 4 x i32> %b) 1692 ret <vscale x 4 x i32> %out 1693} 1694 1695define <vscale x 2 x i64> @uhadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 1696; CHECK-LABEL: uhadd_i64: 1697; CHECK: // %bb.0: 1698; CHECK-NEXT: uhadd z0.d, p0/m, z0.d, z1.d 1699; CHECK-NEXT: ret 1700 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uhadd.nxv2i64(<vscale x 2 x i1> %pg, 1701 <vscale x 2 x i64> %a, 1702 <vscale x 2 x i64> %b) 1703 ret <vscale x 2 x i64> %out 1704} 1705 1706; 1707; UHSUB 1708; 1709 1710define <vscale x 16 x i8> @uhsub_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 1711; CHECK-LABEL: uhsub_i8: 1712; CHECK: // %bb.0: 1713; CHECK-NEXT: uhsub z0.b, p0/m, z0.b, z1.b 1714; CHECK-NEXT: ret 1715 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uhsub.nxv16i8(<vscale x 16 x i1> %pg, 1716 <vscale x 16 x i8> %a, 1717 <vscale x 16 x i8> %b) 1718 ret <vscale x 16 x i8> %out 1719} 1720 1721define <vscale x 8 x i16> @uhsub_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 1722; CHECK-LABEL: uhsub_i16: 1723; CHECK: // %bb.0: 1724; CHECK-NEXT: uhsub z0.h, p0/m, z0.h, z1.h 1725; CHECK-NEXT: ret 1726 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uhsub.nxv8i16(<vscale x 8 x i1> %pg, 1727 <vscale x 8 x i16> %a, 1728 <vscale x 8 x i16> %b) 1729 ret <vscale x 8 x i16> %out 1730} 1731 1732define <vscale x 4 x i32> @uhsub_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 1733; CHECK-LABEL: uhsub_i32: 1734; CHECK: // %bb.0: 1735; CHECK-NEXT: uhsub z0.s, p0/m, z0.s, z1.s 1736; CHECK-NEXT: ret 1737 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.nxv4i32(<vscale x 4 x i1> %pg, 1738 <vscale x 4 x i32> %a, 1739 <vscale x 4 x i32> %b) 1740 ret <vscale x 4 x i32> %out 1741} 1742 1743define <vscale x 2 x i64> @uhsub_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 1744; CHECK-LABEL: uhsub_i64: 1745; CHECK: // %bb.0: 1746; CHECK-NEXT: uhsub z0.d, p0/m, z0.d, z1.d 1747; CHECK-NEXT: ret 1748 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uhsub.nxv2i64(<vscale x 2 x i1> %pg, 1749 <vscale x 2 x i64> %a, 1750 <vscale x 2 x i64> %b) 1751 ret <vscale x 2 x i64> %out 1752} 1753 1754; 1755; UHSUBR 1756; 1757 1758define <vscale x 16 x i8> @uhsubr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 1759; CHECK-LABEL: uhsubr_i8: 1760; CHECK: // %bb.0: 1761; CHECK-NEXT: uhsubr z0.b, p0/m, z0.b, z1.b 1762; CHECK-NEXT: ret 1763 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uhsubr.nxv16i8(<vscale x 16 x i1> %pg, 1764 <vscale x 16 x i8> %a, 1765 <vscale x 16 x i8> %b) 1766 ret <vscale x 16 x i8> %out 1767} 1768 1769define <vscale x 8 x i16> @uhsubr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 1770; CHECK-LABEL: uhsubr_i16: 1771; CHECK: // %bb.0: 1772; CHECK-NEXT: uhsubr z0.h, p0/m, z0.h, z1.h 1773; CHECK-NEXT: ret 1774 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uhsubr.nxv8i16(<vscale x 8 x i1> %pg, 1775 <vscale x 8 x i16> %a, 1776 <vscale x 8 x i16> %b) 1777 ret <vscale x 8 x i16> %out 1778} 1779 1780define <vscale x 4 x i32> @uhsubr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 1781; CHECK-LABEL: uhsubr_i32: 1782; CHECK: // %bb.0: 1783; CHECK-NEXT: uhsubr z0.s, p0/m, z0.s, z1.s 1784; CHECK-NEXT: ret 1785 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uhsubr.nxv4i32(<vscale x 4 x i1> %pg, 1786 <vscale x 4 x i32> %a, 1787 <vscale x 4 x i32> %b) 1788 ret <vscale x 4 x i32> %out 1789} 1790 1791define <vscale x 2 x i64> @uhsubr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 1792; CHECK-LABEL: uhsubr_i64: 1793; CHECK: // %bb.0: 1794; CHECK-NEXT: uhsubr z0.d, p0/m, z0.d, z1.d 1795; CHECK-NEXT: ret 1796 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uhsubr.nxv2i64(<vscale x 2 x i1> %pg, 1797 <vscale x 2 x i64> %a, 1798 <vscale x 2 x i64> %b) 1799 ret <vscale x 2 x i64> %out 1800} 1801 1802; 1803; UQADD 1804; 1805 1806define <vscale x 16 x i8> @uqadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 1807; CHECK-LABEL: uqadd_i8: 1808; CHECK: // %bb.0: 1809; CHECK-NEXT: uqadd z0.b, p0/m, z0.b, z1.b 1810; CHECK-NEXT: ret 1811 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqadd.nxv16i8(<vscale x 16 x i1> %pg, 1812 <vscale x 16 x i8> %a, 1813 <vscale x 16 x i8> %b) 1814 ret <vscale x 16 x i8> %out 1815} 1816 1817define <vscale x 8 x i16> @uqadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 1818; CHECK-LABEL: uqadd_i16: 1819; CHECK: // %bb.0: 1820; CHECK-NEXT: uqadd z0.h, p0/m, z0.h, z1.h 1821; CHECK-NEXT: ret 1822 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqadd.nxv8i16(<vscale x 8 x i1> %pg, 1823 <vscale x 8 x i16> %a, 1824 <vscale x 8 x i16> %b) 1825 ret <vscale x 8 x i16> %out 1826} 1827 1828define <vscale x 4 x i32> @uqadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 1829; CHECK-LABEL: uqadd_i32: 1830; CHECK: // %bb.0: 1831; CHECK-NEXT: uqadd z0.s, p0/m, z0.s, z1.s 1832; CHECK-NEXT: ret 1833 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqadd.nxv4i32(<vscale x 4 x i1> %pg, 1834 <vscale x 4 x i32> %a, 1835 <vscale x 4 x i32> %b) 1836 ret <vscale x 4 x i32> %out 1837} 1838 1839define <vscale x 2 x i64> @uqadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 1840; CHECK-LABEL: uqadd_i64: 1841; CHECK: // %bb.0: 1842; CHECK-NEXT: uqadd z0.d, p0/m, z0.d, z1.d 1843; CHECK-NEXT: ret 1844 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqadd.nxv2i64(<vscale x 2 x i1> %pg, 1845 <vscale x 2 x i64> %a, 1846 <vscale x 2 x i64> %b) 1847 ret <vscale x 2 x i64> %out 1848} 1849 1850; 1851; UQRSHL 1852; 1853 1854define <vscale x 16 x i8> @uqrshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 1855; CHECK-LABEL: uqrshl_i8: 1856; CHECK: // %bb.0: 1857; CHECK-NEXT: uqrshl z0.b, p0/m, z0.b, z1.b 1858; CHECK-NEXT: ret 1859 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqrshl.nxv16i8(<vscale x 16 x i1> %pg, 1860 <vscale x 16 x i8> %a, 1861 <vscale x 16 x i8> %b) 1862 ret <vscale x 16 x i8> %out 1863} 1864 1865define <vscale x 8 x i16> @uqrshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 1866; CHECK-LABEL: uqrshl_i16: 1867; CHECK: // %bb.0: 1868; CHECK-NEXT: uqrshl z0.h, p0/m, z0.h, z1.h 1869; CHECK-NEXT: ret 1870 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqrshl.nxv8i16(<vscale x 8 x i1> %pg, 1871 <vscale x 8 x i16> %a, 1872 <vscale x 8 x i16> %b) 1873 ret <vscale x 8 x i16> %out 1874} 1875 1876define <vscale x 4 x i32> @uqrshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 1877; CHECK-LABEL: uqrshl_i32: 1878; CHECK: // %bb.0: 1879; CHECK-NEXT: uqrshl z0.s, p0/m, z0.s, z1.s 1880; CHECK-NEXT: ret 1881 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqrshl.nxv4i32(<vscale x 4 x i1> %pg, 1882 <vscale x 4 x i32> %a, 1883 <vscale x 4 x i32> %b) 1884 ret <vscale x 4 x i32> %out 1885} 1886 1887define <vscale x 2 x i64> @uqrshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 1888; CHECK-LABEL: uqrshl_i64: 1889; CHECK: // %bb.0: 1890; CHECK-NEXT: uqrshl z0.d, p0/m, z0.d, z1.d 1891; CHECK-NEXT: ret 1892 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqrshl.nxv2i64(<vscale x 2 x i1> %pg, 1893 <vscale x 2 x i64> %a, 1894 <vscale x 2 x i64> %b) 1895 ret <vscale x 2 x i64> %out 1896} 1897 1898; 1899; UQRSHLR 1900; 1901 1902define <vscale x 16 x i8> @uqrshlr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 1903; CHECK-LABEL: uqrshlr_i8: 1904; CHECK: // %bb.0: 1905; CHECK-NEXT: ptrue p0.b 1906; CHECK-NEXT: uqrshlr z0.b, p0/m, z0.b, z1.b 1907; CHECK-NEXT: ret 1908 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) 1909 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqrshl.nxv16i8(<vscale x 16 x i1> %pg, 1910 <vscale x 16 x i8> %b, 1911 <vscale x 16 x i8> %a) 1912 ret <vscale x 16 x i8> %out 1913} 1914 1915define <vscale x 8 x i16> @uqrshlr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 1916; CHECK-LABEL: uqrshlr_i16: 1917; CHECK: // %bb.0: 1918; CHECK-NEXT: ptrue p0.h 1919; CHECK-NEXT: uqrshlr z0.h, p0/m, z0.h, z1.h 1920; CHECK-NEXT: ret 1921 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) 1922 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqrshl.nxv8i16(<vscale x 8 x i1> %pg, 1923 <vscale x 8 x i16> %b, 1924 <vscale x 8 x i16> %a) 1925 ret <vscale x 8 x i16> %out 1926} 1927 1928define <vscale x 4 x i32> @uqrshlr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 1929; CHECK-LABEL: uqrshlr_i32: 1930; CHECK: // %bb.0: 1931; CHECK-NEXT: ptrue p0.s 1932; CHECK-NEXT: uqrshlr z0.s, p0/m, z0.s, z1.s 1933; CHECK-NEXT: ret 1934 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) 1935 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqrshl.nxv4i32(<vscale x 4 x i1> %pg, 1936 <vscale x 4 x i32> %b, 1937 <vscale x 4 x i32> %a) 1938 ret <vscale x 4 x i32> %out 1939} 1940 1941define <vscale x 2 x i64> @uqrshlr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 1942; CHECK-LABEL: uqrshlr_i64: 1943; CHECK: // %bb.0: 1944; CHECK-NEXT: ptrue p0.d 1945; CHECK-NEXT: uqrshlr z0.d, p0/m, z0.d, z1.d 1946; CHECK-NEXT: ret 1947 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) 1948 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqrshl.nxv2i64(<vscale x 2 x i1> %pg, 1949 <vscale x 2 x i64> %b, 1950 <vscale x 2 x i64> %a) 1951 ret <vscale x 2 x i64> %out 1952} 1953 1954define <vscale x 2 x i64> @uqrshlr_i64_noptrue(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 1955; CHECK-LABEL: uqrshlr_i64_noptrue: 1956; CHECK: // %bb.0: 1957; CHECK-NEXT: uqrshl z1.d, p0/m, z1.d, z0.d 1958; CHECK-NEXT: mov z0.d, z1.d 1959; CHECK-NEXT: ret 1960 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqrshl.nxv2i64(<vscale x 2 x i1> %pg, 1961 <vscale x 2 x i64> %b, 1962 <vscale x 2 x i64> %a) 1963 ret <vscale x 2 x i64> %out 1964} 1965 1966; 1967; UQSHL (Vectors) 1968; 1969 1970define <vscale x 16 x i8> @uqshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 1971; CHECK-LABEL: uqshl_i8: 1972; CHECK: // %bb.0: 1973; CHECK-NEXT: uqshl z0.b, p0/m, z0.b, z1.b 1974; CHECK-NEXT: ret 1975 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqshl.nxv16i8(<vscale x 16 x i1> %pg, 1976 <vscale x 16 x i8> %a, 1977 <vscale x 16 x i8> %b) 1978 ret <vscale x 16 x i8> %out 1979} 1980 1981define <vscale x 8 x i16> @uqshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 1982; CHECK-LABEL: uqshl_i16: 1983; CHECK: // %bb.0: 1984; CHECK-NEXT: uqshl z0.h, p0/m, z0.h, z1.h 1985; CHECK-NEXT: ret 1986 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqshl.nxv8i16(<vscale x 8 x i1> %pg, 1987 <vscale x 8 x i16> %a, 1988 <vscale x 8 x i16> %b) 1989 ret <vscale x 8 x i16> %out 1990} 1991 1992define <vscale x 4 x i32> @uqshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 1993; CHECK-LABEL: uqshl_i32: 1994; CHECK: // %bb.0: 1995; CHECK-NEXT: uqshl z0.s, p0/m, z0.s, z1.s 1996; CHECK-NEXT: ret 1997 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.nxv4i32(<vscale x 4 x i1> %pg, 1998 <vscale x 4 x i32> %a, 1999 <vscale x 4 x i32> %b) 2000 ret <vscale x 4 x i32> %out 2001} 2002 2003define <vscale x 2 x i64> @uqshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 2004; CHECK-LABEL: uqshl_i64: 2005; CHECK: // %bb.0: 2006; CHECK-NEXT: uqshl z0.d, p0/m, z0.d, z1.d 2007; CHECK-NEXT: ret 2008 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqshl.nxv2i64(<vscale x 2 x i1> %pg, 2009 <vscale x 2 x i64> %a, 2010 <vscale x 2 x i64> %b) 2011 ret <vscale x 2 x i64> %out 2012} 2013 2014; 2015; UQSHLR 2016; 2017 2018define <vscale x 16 x i8> @uqshlr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 2019; CHECK-LABEL: uqshlr_i8: 2020; CHECK: // %bb.0: 2021; CHECK-NEXT: ptrue p0.b 2022; CHECK-NEXT: uqshlr z0.b, p0/m, z0.b, z1.b 2023; CHECK-NEXT: ret 2024 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) 2025 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqshl.nxv16i8(<vscale x 16 x i1> %pg, 2026 <vscale x 16 x i8> %b, 2027 <vscale x 16 x i8> %a) 2028 ret <vscale x 16 x i8> %out 2029} 2030 2031define <vscale x 8 x i16> @uqshlr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 2032; CHECK-LABEL: uqshlr_i16: 2033; CHECK: // %bb.0: 2034; CHECK-NEXT: ptrue p0.h 2035; CHECK-NEXT: uqshlr z0.h, p0/m, z0.h, z1.h 2036; CHECK-NEXT: ret 2037 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) 2038 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqshl.nxv8i16(<vscale x 8 x i1> %pg, 2039 <vscale x 8 x i16> %b, 2040 <vscale x 8 x i16> %a) 2041 ret <vscale x 8 x i16> %out 2042} 2043 2044define <vscale x 4 x i32> @uqshlr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 2045; CHECK-LABEL: uqshlr_i32: 2046; CHECK: // %bb.0: 2047; CHECK-NEXT: ptrue p0.s 2048; CHECK-NEXT: uqshlr z0.s, p0/m, z0.s, z1.s 2049; CHECK-NEXT: ret 2050 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) 2051 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.nxv4i32(<vscale x 4 x i1> %pg, 2052 <vscale x 4 x i32> %b, 2053 <vscale x 4 x i32> %a) 2054 ret <vscale x 4 x i32> %out 2055} 2056 2057define <vscale x 2 x i64> @uqshlr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 2058; CHECK-LABEL: uqshlr_i64: 2059; CHECK: // %bb.0: 2060; CHECK-NEXT: ptrue p0.d 2061; CHECK-NEXT: uqshlr z0.d, p0/m, z0.d, z1.d 2062; CHECK-NEXT: ret 2063 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) 2064 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqshl.nxv2i64(<vscale x 2 x i1> %pg, 2065 <vscale x 2 x i64> %b, 2066 <vscale x 2 x i64> %a) 2067 ret <vscale x 2 x i64> %out 2068} 2069 2070define <vscale x 2 x i64> @uqshlr_i64_noptrue(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 2071; CHECK-LABEL: uqshlr_i64_noptrue: 2072; CHECK: // %bb.0: 2073; CHECK-NEXT: uqshl z1.d, p0/m, z1.d, z0.d 2074; CHECK-NEXT: mov z0.d, z1.d 2075; CHECK-NEXT: ret 2076 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqshl.nxv2i64(<vscale x 2 x i1> %pg, 2077 <vscale x 2 x i64> %b, 2078 <vscale x 2 x i64> %a) 2079 ret <vscale x 2 x i64> %out 2080} 2081 2082; 2083; UQSHL (Scalar) 2084; 2085 2086define <vscale x 16 x i8> @uqshl_n_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) { 2087; CHECK-LABEL: uqshl_n_i8: 2088; CHECK: // %bb.0: 2089; CHECK-NEXT: uqshl z0.b, p0/m, z0.b, #7 2090; CHECK-NEXT: ret 2091 %dup = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 7) 2092 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqshl.nxv16i8(<vscale x 16 x i1> %pg, 2093 <vscale x 16 x i8> %a, 2094 <vscale x 16 x i8> %dup) 2095 ret <vscale x 16 x i8> %out 2096} 2097 2098define <vscale x 8 x i16> @uqshl_n_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) { 2099; CHECK-LABEL: uqshl_n_i16: 2100; CHECK: // %bb.0: 2101; CHECK-NEXT: uqshl z0.h, p0/m, z0.h, #15 2102; CHECK-NEXT: ret 2103 %dup = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 15) 2104 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqshl.nxv8i16(<vscale x 8 x i1> %pg, 2105 <vscale x 8 x i16> %a, 2106 <vscale x 8 x i16> %dup) 2107 ret <vscale x 8 x i16> %out 2108} 2109 2110define <vscale x 4 x i32> @uqshl_n_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) { 2111; CHECK-LABEL: uqshl_n_i32: 2112; CHECK: // %bb.0: 2113; CHECK-NEXT: uqshl z0.s, p0/m, z0.s, #31 2114; CHECK-NEXT: ret 2115 %dup = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 31) 2116 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.nxv4i32(<vscale x 4 x i1> %pg, 2117 <vscale x 4 x i32> %a, 2118 <vscale x 4 x i32> %dup) 2119 ret <vscale x 4 x i32> %out 2120} 2121 2122define <vscale x 2 x i64> @uqshl_n_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) { 2123; CHECK-LABEL: uqshl_n_i64: 2124; CHECK: // %bb.0: 2125; CHECK-NEXT: uqshl z0.d, p0/m, z0.d, #63 2126; CHECK-NEXT: ret 2127 %dup = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 63) 2128 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqshl.nxv2i64(<vscale x 2 x i1> %pg, 2129 <vscale x 2 x i64> %a, 2130 <vscale x 2 x i64> %dup) 2131 ret <vscale x 2 x i64> %out 2132} 2133 2134define <vscale x 16 x i8> @uqshl_n_i8_range(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) { 2135; CHECK-LABEL: uqshl_n_i8_range: 2136; CHECK: // %bb.0: 2137; CHECK-NEXT: mov z1.b, #8 // =0x8 2138; CHECK-NEXT: uqshl z0.b, p0/m, z0.b, z1.b 2139; CHECK-NEXT: ret 2140 %dup = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 8) 2141 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqshl.nxv16i8(<vscale x 16 x i1> %pg, 2142 <vscale x 16 x i8> %a, 2143 <vscale x 16 x i8> %dup) 2144 ret <vscale x 16 x i8> %out 2145} 2146 2147define <vscale x 8 x i16> @uqshl_n_i16_range(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) { 2148; CHECK-LABEL: uqshl_n_i16_range: 2149; CHECK: // %bb.0: 2150; CHECK-NEXT: mov z1.h, #16 // =0x10 2151; CHECK-NEXT: uqshl z0.h, p0/m, z0.h, z1.h 2152; CHECK-NEXT: ret 2153 %dup = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 16) 2154 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqshl.nxv8i16(<vscale x 8 x i1> %pg, 2155 <vscale x 8 x i16> %a, 2156 <vscale x 8 x i16> %dup) 2157 ret <vscale x 8 x i16> %out 2158} 2159 2160define <vscale x 4 x i32> @uqshl_n_i32_range(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) { 2161; CHECK-LABEL: uqshl_n_i32_range: 2162; CHECK: // %bb.0: 2163; CHECK-NEXT: mov z1.s, #32 // =0x20 2164; CHECK-NEXT: uqshl z0.s, p0/m, z0.s, z1.s 2165; CHECK-NEXT: ret 2166 %dup = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 32) 2167 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.nxv4i32(<vscale x 4 x i1> %pg, 2168 <vscale x 4 x i32> %a, 2169 <vscale x 4 x i32> %dup) 2170 ret <vscale x 4 x i32> %out 2171} 2172 2173define <vscale x 2 x i64> @uqshl_n_i64_range(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) { 2174; CHECK-LABEL: uqshl_n_i64_range: 2175; CHECK: // %bb.0: 2176; CHECK-NEXT: mov z1.d, #64 // =0x40 2177; CHECK-NEXT: uqshl z0.d, p0/m, z0.d, z1.d 2178; CHECK-NEXT: ret 2179 %dup = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 64) 2180 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqshl.nxv2i64(<vscale x 2 x i1> %pg, 2181 <vscale x 2 x i64> %a, 2182 <vscale x 2 x i64> %dup) 2183 ret <vscale x 2 x i64> %out 2184} 2185 2186; 2187; UQSUB 2188; 2189 2190define <vscale x 16 x i8> @uqsub_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 2191; CHECK-LABEL: uqsub_i8: 2192; CHECK: // %bb.0: 2193; CHECK-NEXT: uqsub z0.b, p0/m, z0.b, z1.b 2194; CHECK-NEXT: ret 2195 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.nxv16i8(<vscale x 16 x i1> %pg, 2196 <vscale x 16 x i8> %a, 2197 <vscale x 16 x i8> %b) 2198 ret <vscale x 16 x i8> %out 2199} 2200 2201define <vscale x 8 x i16> @uqsub_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 2202; CHECK-LABEL: uqsub_i16: 2203; CHECK: // %bb.0: 2204; CHECK-NEXT: uqsub z0.h, p0/m, z0.h, z1.h 2205; CHECK-NEXT: ret 2206 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.nxv8i16(<vscale x 8 x i1> %pg, 2207 <vscale x 8 x i16> %a, 2208 <vscale x 8 x i16> %b) 2209 ret <vscale x 8 x i16> %out 2210} 2211 2212define <vscale x 4 x i32> @uqsub_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 2213; CHECK-LABEL: uqsub_i32: 2214; CHECK: // %bb.0: 2215; CHECK-NEXT: uqsub z0.s, p0/m, z0.s, z1.s 2216; CHECK-NEXT: ret 2217 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1> %pg, 2218 <vscale x 4 x i32> %a, 2219 <vscale x 4 x i32> %b) 2220 ret <vscale x 4 x i32> %out 2221} 2222 2223define <vscale x 2 x i64> @uqsub_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 2224; CHECK-LABEL: uqsub_i64: 2225; CHECK: // %bb.0: 2226; CHECK-NEXT: uqsub z0.d, p0/m, z0.d, z1.d 2227; CHECK-NEXT: ret 2228 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.nxv2i64(<vscale x 2 x i1> %pg, 2229 <vscale x 2 x i64> %a, 2230 <vscale x 2 x i64> %b) 2231 ret <vscale x 2 x i64> %out 2232} 2233 2234; 2235; UQSUBR 2236; 2237 2238define <vscale x 16 x i8> @uqsubr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 2239; CHECK-LABEL: uqsubr_i8: 2240; CHECK: // %bb.0: 2241; CHECK-NEXT: uqsubr z0.b, p0/m, z0.b, z1.b 2242; CHECK-NEXT: ret 2243 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqsubr.nxv16i8(<vscale x 16 x i1> %pg, 2244 <vscale x 16 x i8> %a, 2245 <vscale x 16 x i8> %b) 2246 ret <vscale x 16 x i8> %out 2247} 2248 2249define <vscale x 8 x i16> @uqsubr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 2250; CHECK-LABEL: uqsubr_i16: 2251; CHECK: // %bb.0: 2252; CHECK-NEXT: uqsubr z0.h, p0/m, z0.h, z1.h 2253; CHECK-NEXT: ret 2254 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqsubr.nxv8i16(<vscale x 8 x i1> %pg, 2255 <vscale x 8 x i16> %a, 2256 <vscale x 8 x i16> %b) 2257 ret <vscale x 8 x i16> %out 2258} 2259 2260define <vscale x 4 x i32> @uqsubr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 2261; CHECK-LABEL: uqsubr_i32: 2262; CHECK: // %bb.0: 2263; CHECK-NEXT: uqsubr z0.s, p0/m, z0.s, z1.s 2264; CHECK-NEXT: ret 2265 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqsubr.nxv4i32(<vscale x 4 x i1> %pg, 2266 <vscale x 4 x i32> %a, 2267 <vscale x 4 x i32> %b) 2268 ret <vscale x 4 x i32> %out 2269} 2270 2271define <vscale x 2 x i64> @uqsubr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 2272; CHECK-LABEL: uqsubr_i64: 2273; CHECK: // %bb.0: 2274; CHECK-NEXT: uqsubr z0.d, p0/m, z0.d, z1.d 2275; CHECK-NEXT: ret 2276 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqsubr.nxv2i64(<vscale x 2 x i1> %pg, 2277 <vscale x 2 x i64> %a, 2278 <vscale x 2 x i64> %b) 2279 ret <vscale x 2 x i64> %out 2280} 2281 2282; 2283; URECPE 2284; 2285 2286define <vscale x 4 x i32> @urecpe_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) { 2287; CHECK-LABEL: urecpe_i32: 2288; CHECK: // %bb.0: 2289; CHECK-NEXT: urecpe z0.s, p0/m, z1.s 2290; CHECK-NEXT: ret 2291 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32> %a, 2292 <vscale x 4 x i1> %pg, 2293 <vscale x 4 x i32> %b) 2294 ret <vscale x 4 x i32> %out 2295} 2296 2297; 2298; URHADD 2299; 2300 2301define <vscale x 16 x i8> @urhadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 2302; CHECK-LABEL: urhadd_i8: 2303; CHECK: // %bb.0: 2304; CHECK-NEXT: urhadd z0.b, p0/m, z0.b, z1.b 2305; CHECK-NEXT: ret 2306 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.urhadd.nxv16i8(<vscale x 16 x i1> %pg, 2307 <vscale x 16 x i8> %a, 2308 <vscale x 16 x i8> %b) 2309 ret <vscale x 16 x i8> %out 2310} 2311 2312define <vscale x 8 x i16> @urhadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 2313; CHECK-LABEL: urhadd_i16: 2314; CHECK: // %bb.0: 2315; CHECK-NEXT: urhadd z0.h, p0/m, z0.h, z1.h 2316; CHECK-NEXT: ret 2317 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.urhadd.nxv8i16(<vscale x 8 x i1> %pg, 2318 <vscale x 8 x i16> %a, 2319 <vscale x 8 x i16> %b) 2320 ret <vscale x 8 x i16> %out 2321} 2322 2323define <vscale x 4 x i32> @urhadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 2324; CHECK-LABEL: urhadd_i32: 2325; CHECK: // %bb.0: 2326; CHECK-NEXT: urhadd z0.s, p0/m, z0.s, z1.s 2327; CHECK-NEXT: ret 2328 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.urhadd.nxv4i32(<vscale x 4 x i1> %pg, 2329 <vscale x 4 x i32> %a, 2330 <vscale x 4 x i32> %b) 2331 ret <vscale x 4 x i32> %out 2332} 2333 2334define <vscale x 2 x i64> @urhadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 2335; CHECK-LABEL: urhadd_i64: 2336; CHECK: // %bb.0: 2337; CHECK-NEXT: urhadd z0.d, p0/m, z0.d, z1.d 2338; CHECK-NEXT: ret 2339 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.urhadd.nxv2i64(<vscale x 2 x i1> %pg, 2340 <vscale x 2 x i64> %a, 2341 <vscale x 2 x i64> %b) 2342 ret <vscale x 2 x i64> %out 2343} 2344 2345; 2346; URSHL 2347; 2348 2349define <vscale x 16 x i8> @urshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 2350; CHECK-LABEL: urshl_i8: 2351; CHECK: // %bb.0: 2352; CHECK-NEXT: urshl z0.b, p0/m, z0.b, z1.b 2353; CHECK-NEXT: ret 2354 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.urshl.nxv16i8(<vscale x 16 x i1> %pg, 2355 <vscale x 16 x i8> %a, 2356 <vscale x 16 x i8> %b) 2357 ret <vscale x 16 x i8> %out 2358} 2359 2360define <vscale x 8 x i16> @urshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 2361; CHECK-LABEL: urshl_i16: 2362; CHECK: // %bb.0: 2363; CHECK-NEXT: urshl z0.h, p0/m, z0.h, z1.h 2364; CHECK-NEXT: ret 2365 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.urshl.nxv8i16(<vscale x 8 x i1> %pg, 2366 <vscale x 8 x i16> %a, 2367 <vscale x 8 x i16> %b) 2368 ret <vscale x 8 x i16> %out 2369} 2370 2371define <vscale x 4 x i32> @urshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 2372; CHECK-LABEL: urshl_i32: 2373; CHECK: // %bb.0: 2374; CHECK-NEXT: urshl z0.s, p0/m, z0.s, z1.s 2375; CHECK-NEXT: ret 2376 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.urshl.nxv4i32(<vscale x 4 x i1> %pg, 2377 <vscale x 4 x i32> %a, 2378 <vscale x 4 x i32> %b) 2379 ret <vscale x 4 x i32> %out 2380} 2381 2382define <vscale x 2 x i64> @urshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 2383; CHECK-LABEL: urshl_i64: 2384; CHECK: // %bb.0: 2385; CHECK-NEXT: urshl z0.d, p0/m, z0.d, z1.d 2386; CHECK-NEXT: ret 2387 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.urshl.nxv2i64(<vscale x 2 x i1> %pg, 2388 <vscale x 2 x i64> %a, 2389 <vscale x 2 x i64> %b) 2390 ret <vscale x 2 x i64> %out 2391} 2392 2393; 2394; URSHLR 2395; 2396 2397define <vscale x 16 x i8> @urshlr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 2398; CHECK-LABEL: urshlr_i8: 2399; CHECK: // %bb.0: 2400; CHECK-NEXT: ptrue p0.b 2401; CHECK-NEXT: urshlr z0.b, p0/m, z0.b, z1.b 2402; CHECK-NEXT: ret 2403 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) 2404 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.urshl.nxv16i8(<vscale x 16 x i1> %pg, 2405 <vscale x 16 x i8> %b, 2406 <vscale x 16 x i8> %a) 2407 ret <vscale x 16 x i8> %out 2408} 2409 2410define <vscale x 8 x i16> @urshlr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 2411; CHECK-LABEL: urshlr_i16: 2412; CHECK: // %bb.0: 2413; CHECK-NEXT: ptrue p0.h 2414; CHECK-NEXT: urshlr z0.h, p0/m, z0.h, z1.h 2415; CHECK-NEXT: ret 2416 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) 2417 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.urshl.nxv8i16(<vscale x 8 x i1> %pg, 2418 <vscale x 8 x i16> %b, 2419 <vscale x 8 x i16> %a) 2420 ret <vscale x 8 x i16> %out 2421} 2422 2423define <vscale x 4 x i32> @urshlr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 2424; CHECK-LABEL: urshlr_i32: 2425; CHECK: // %bb.0: 2426; CHECK-NEXT: ptrue p0.s 2427; CHECK-NEXT: urshlr z0.s, p0/m, z0.s, z1.s 2428; CHECK-NEXT: ret 2429 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) 2430 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.urshl.nxv4i32(<vscale x 4 x i1> %pg, 2431 <vscale x 4 x i32> %b, 2432 <vscale x 4 x i32> %a) 2433 ret <vscale x 4 x i32> %out 2434} 2435 2436define <vscale x 2 x i64> @urshlr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 2437; CHECK-LABEL: urshlr_i64: 2438; CHECK: // %bb.0: 2439; CHECK-NEXT: ptrue p0.d 2440; CHECK-NEXT: urshlr z0.d, p0/m, z0.d, z1.d 2441; CHECK-NEXT: ret 2442 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) 2443 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.urshl.nxv2i64(<vscale x 2 x i1> %pg, 2444 <vscale x 2 x i64> %b, 2445 <vscale x 2 x i64> %a) 2446 ret <vscale x 2 x i64> %out 2447} 2448 2449define <vscale x 2 x i64> @urshlr_i64_noptrue(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 2450; CHECK-LABEL: urshlr_i64_noptrue: 2451; CHECK: // %bb.0: 2452; CHECK-NEXT: urshl z1.d, p0/m, z1.d, z0.d 2453; CHECK-NEXT: mov z0.d, z1.d 2454; CHECK-NEXT: ret 2455 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.urshl.nxv2i64(<vscale x 2 x i1> %pg, 2456 <vscale x 2 x i64> %b, 2457 <vscale x 2 x i64> %a) 2458 ret <vscale x 2 x i64> %out 2459} 2460 2461; 2462; URSHR 2463; 2464 2465define <vscale x 16 x i8> @urshr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) { 2466; CHECK-LABEL: urshr_i8: 2467; CHECK: // %bb.0: 2468; CHECK-NEXT: urshr z0.b, p0/m, z0.b, #4 2469; CHECK-NEXT: ret 2470 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.urshr.nxv16i8(<vscale x 16 x i1> %pg, 2471 <vscale x 16 x i8> %a, 2472 i32 4) 2473 ret <vscale x 16 x i8> %out 2474} 2475 2476define <vscale x 8 x i16> @urshr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) { 2477; CHECK-LABEL: urshr_i16: 2478; CHECK: // %bb.0: 2479; CHECK-NEXT: urshr z0.h, p0/m, z0.h, #13 2480; CHECK-NEXT: ret 2481 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.urshr.nxv8i16(<vscale x 8 x i1> %pg, 2482 <vscale x 8 x i16> %a, 2483 i32 13) 2484 ret <vscale x 8 x i16> %out 2485} 2486 2487define <vscale x 4 x i32> @urshr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) { 2488; CHECK-LABEL: urshr_i32: 2489; CHECK: // %bb.0: 2490; CHECK-NEXT: urshr z0.s, p0/m, z0.s, #1 2491; CHECK-NEXT: ret 2492 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.urshr.nxv4i32(<vscale x 4 x i1> %pg, 2493 <vscale x 4 x i32> %a, 2494 i32 1) 2495 ret <vscale x 4 x i32> %out 2496} 2497 2498define <vscale x 2 x i64> @urshr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) { 2499; CHECK-LABEL: urshr_i64: 2500; CHECK: // %bb.0: 2501; CHECK-NEXT: urshr z0.d, p0/m, z0.d, #24 2502; CHECK-NEXT: ret 2503 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.urshr.nxv2i64(<vscale x 2 x i1> %pg, 2504 <vscale x 2 x i64> %a, 2505 i32 24) 2506 ret <vscale x 2 x i64> %out 2507} 2508 2509; 2510; URSQRTE 2511; 2512 2513define <vscale x 4 x i32> @ursqrte_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) { 2514; CHECK-LABEL: ursqrte_i32: 2515; CHECK: // %bb.0: 2516; CHECK-NEXT: ursqrte z0.s, p0/m, z1.s 2517; CHECK-NEXT: ret 2518 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ursqrte.nxv4i32(<vscale x 4 x i32> %a, 2519 <vscale x 4 x i1> %pg, 2520 <vscale x 4 x i32> %b) 2521 ret <vscale x 4 x i32> %out 2522} 2523 2524; 2525; URSRA 2526; 2527 2528define <vscale x 16 x i8> @ursra_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 2529; CHECK-LABEL: ursra_i8: 2530; CHECK: // %bb.0: 2531; CHECK-NEXT: ursra z0.b, z1.b, #5 2532; CHECK-NEXT: ret 2533 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.ursra.nxv16i8(<vscale x 16 x i8> %a, 2534 <vscale x 16 x i8> %b, 2535 i32 5) 2536 ret <vscale x 16 x i8> %out 2537} 2538 2539define <vscale x 8 x i16> @ursra_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 2540; CHECK-LABEL: ursra_i16: 2541; CHECK: // %bb.0: 2542; CHECK-NEXT: ursra z0.h, z1.h, #12 2543; CHECK-NEXT: ret 2544 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ursra.nxv8i16(<vscale x 8 x i16> %a, 2545 <vscale x 8 x i16> %b, 2546 i32 12) 2547 ret <vscale x 8 x i16> %out 2548} 2549 2550define <vscale x 4 x i32> @ursra_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 2551; CHECK-LABEL: ursra_i32: 2552; CHECK: // %bb.0: 2553; CHECK-NEXT: ursra z0.s, z1.s, #31 2554; CHECK-NEXT: ret 2555 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ursra.nxv4i32(<vscale x 4 x i32> %a, 2556 <vscale x 4 x i32> %b, 2557 i32 31) 2558 ret <vscale x 4 x i32> %out 2559} 2560 2561define <vscale x 2 x i64> @ursra_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 2562; CHECK-LABEL: ursra_i64: 2563; CHECK: // %bb.0: 2564; CHECK-NEXT: ursra z0.d, z1.d, #14 2565; CHECK-NEXT: ret 2566 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ursra.nxv2i64(<vscale x 2 x i64> %a, 2567 <vscale x 2 x i64> %b, 2568 i32 14) 2569 ret <vscale x 2 x i64> %out 2570} 2571 2572; 2573; USQADD 2574; 2575 2576define <vscale x 16 x i8> @usqadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 2577; CHECK-LABEL: usqadd_i8: 2578; CHECK: // %bb.0: 2579; CHECK-NEXT: usqadd z0.b, p0/m, z0.b, z1.b 2580; CHECK-NEXT: ret 2581 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.usqadd.nxv16i8(<vscale x 16 x i1> %pg, 2582 <vscale x 16 x i8> %a, 2583 <vscale x 16 x i8> %b) 2584 ret <vscale x 16 x i8> %out 2585} 2586 2587define <vscale x 8 x i16> @usqadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 2588; CHECK-LABEL: usqadd_i16: 2589; CHECK: // %bb.0: 2590; CHECK-NEXT: usqadd z0.h, p0/m, z0.h, z1.h 2591; CHECK-NEXT: ret 2592 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.usqadd.nxv8i16(<vscale x 8 x i1> %pg, 2593 <vscale x 8 x i16> %a, 2594 <vscale x 8 x i16> %b) 2595 ret <vscale x 8 x i16> %out 2596} 2597 2598define <vscale x 4 x i32> @usqadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 2599; CHECK-LABEL: usqadd_i32: 2600; CHECK: // %bb.0: 2601; CHECK-NEXT: usqadd z0.s, p0/m, z0.s, z1.s 2602; CHECK-NEXT: ret 2603 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.usqadd.nxv4i32(<vscale x 4 x i1> %pg, 2604 <vscale x 4 x i32> %a, 2605 <vscale x 4 x i32> %b) 2606 ret <vscale x 4 x i32> %out 2607} 2608 2609define <vscale x 2 x i64> @usqadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 2610; CHECK-LABEL: usqadd_i64: 2611; CHECK: // %bb.0: 2612; CHECK-NEXT: usqadd z0.d, p0/m, z0.d, z1.d 2613; CHECK-NEXT: ret 2614 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.usqadd.nxv2i64(<vscale x 2 x i1> %pg, 2615 <vscale x 2 x i64> %a, 2616 <vscale x 2 x i64> %b) 2617 ret <vscale x 2 x i64> %out 2618} 2619 2620; 2621; USRA 2622; 2623 2624define <vscale x 16 x i8> @usra_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 2625; CHECK-LABEL: usra_i8: 2626; CHECK: // %bb.0: 2627; CHECK-NEXT: usra z0.b, z1.b, #6 2628; CHECK-NEXT: ret 2629 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.usra.nxv16i8(<vscale x 16 x i8> %a, 2630 <vscale x 16 x i8> %b, 2631 i32 6) 2632 ret <vscale x 16 x i8> %out 2633} 2634 2635define <vscale x 8 x i16> @usra_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 2636; CHECK-LABEL: usra_i16: 2637; CHECK: // %bb.0: 2638; CHECK-NEXT: usra z0.h, z1.h, #11 2639; CHECK-NEXT: ret 2640 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.usra.nxv8i16(<vscale x 8 x i16> %a, 2641 <vscale x 8 x i16> %b, 2642 i32 11) 2643 ret <vscale x 8 x i16> %out 2644} 2645 2646define <vscale x 4 x i32> @usra_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 2647; CHECK-LABEL: usra_i32: 2648; CHECK: // %bb.0: 2649; CHECK-NEXT: usra z0.s, z1.s, #21 2650; CHECK-NEXT: ret 2651 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.usra.nxv4i32(<vscale x 4 x i32> %a, 2652 <vscale x 4 x i32> %b, 2653 i32 21) 2654 ret <vscale x 4 x i32> %out 2655} 2656 2657define <vscale x 2 x i64> @usra_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 2658; CHECK-LABEL: usra_i64: 2659; CHECK: // %bb.0: 2660; CHECK-NEXT: usra z0.d, z1.d, #4 2661; CHECK-NEXT: ret 2662 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.usra.nxv2i64(<vscale x 2 x i64> %a, 2663 <vscale x 2 x i64> %b, 2664 i32 4) 2665 ret <vscale x 2 x i64> %out 2666} 2667 2668declare <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8) 2669declare <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16) 2670declare <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32) 2671declare <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64) 2672 2673declare <vscale x 16 x i8> @llvm.aarch64.sve.saba.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>) 2674declare <vscale x 8 x i16> @llvm.aarch64.sve.saba.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>) 2675declare <vscale x 4 x i32> @llvm.aarch64.sve.saba.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) 2676declare <vscale x 2 x i64> @llvm.aarch64.sve.saba.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>) 2677 2678declare <vscale x 16 x i8> @llvm.aarch64.sve.shadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) 2679declare <vscale x 8 x i16> @llvm.aarch64.sve.shadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) 2680declare <vscale x 4 x i32> @llvm.aarch64.sve.shadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 2681declare <vscale x 2 x i64> @llvm.aarch64.sve.shadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) 2682 2683declare <vscale x 16 x i8> @llvm.aarch64.sve.shsub.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) 2684declare <vscale x 8 x i16> @llvm.aarch64.sve.shsub.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) 2685declare <vscale x 4 x i32> @llvm.aarch64.sve.shsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 2686declare <vscale x 2 x i64> @llvm.aarch64.sve.shsub.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) 2687 2688declare <vscale x 16 x i8> @llvm.aarch64.sve.shsubr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) 2689declare <vscale x 8 x i16> @llvm.aarch64.sve.shsubr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) 2690declare <vscale x 4 x i32> @llvm.aarch64.sve.shsubr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 2691declare <vscale x 2 x i64> @llvm.aarch64.sve.shsubr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) 2692 2693declare <vscale x 16 x i8> @llvm.aarch64.sve.sli.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32) 2694declare <vscale x 8 x i16> @llvm.aarch64.sve.sli.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32) 2695declare <vscale x 4 x i32> @llvm.aarch64.sve.sli.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32) 2696declare <vscale x 2 x i64> @llvm.aarch64.sve.sli.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32) 2697 2698declare <vscale x 16 x i8> @llvm.aarch64.sve.sqabs.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>) 2699declare <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>) 2700declare <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>) 2701declare <vscale x 2 x i64> @llvm.aarch64.sve.sqabs.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>) 2702 2703declare <vscale x 16 x i8> @llvm.aarch64.sve.sqadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) 2704declare <vscale x 8 x i16> @llvm.aarch64.sve.sqadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) 2705declare <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 2706declare <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) 2707 2708declare <vscale x 16 x i8> @llvm.aarch64.sve.sqdmulh.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>) 2709declare <vscale x 8 x i16> @llvm.aarch64.sve.sqdmulh.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>) 2710declare <vscale x 4 x i32> @llvm.aarch64.sve.sqdmulh.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>) 2711declare <vscale x 2 x i64> @llvm.aarch64.sve.sqdmulh.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>) 2712 2713declare <vscale x 8 x i16> @llvm.aarch64.sve.sqdmulh.lane.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32) 2714declare <vscale x 4 x i32> @llvm.aarch64.sve.sqdmulh.lane.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32) 2715declare <vscale x 2 x i64> @llvm.aarch64.sve.sqdmulh.lane.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32) 2716 2717declare <vscale x 16 x i8> @llvm.aarch64.sve.sqneg.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>) 2718declare <vscale x 8 x i16> @llvm.aarch64.sve.sqneg.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>) 2719declare <vscale x 4 x i32> @llvm.aarch64.sve.sqneg.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>) 2720declare <vscale x 2 x i64> @llvm.aarch64.sve.sqneg.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>) 2721 2722declare <vscale x 16 x i8> @llvm.aarch64.sve.sqrdmlah.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>) 2723declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmlah.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>) 2724declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmlah.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) 2725declare <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmlah.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>) 2726 2727declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmlah.lane.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32) 2728declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmlah.lane.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, i32) 2729declare <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmlah.lane.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, i32) 2730 2731declare <vscale x 16 x i8> @llvm.aarch64.sve.sqrdmlsh.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>) 2732declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmlsh.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>) 2733declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmlsh.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) 2734declare <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmlsh.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>) 2735 2736declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmlsh.lane.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32) 2737declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmlsh.lane.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, i32) 2738declare <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmlsh.lane.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, i32) 2739 2740declare <vscale x 16 x i8> @llvm.aarch64.sve.sqrdmulh.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>) 2741declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmulh.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>) 2742declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmulh.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>) 2743declare <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmulh.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>) 2744 2745declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmulh.lane.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32) 2746declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmulh.lane.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32) 2747declare <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmulh.lane.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32) 2748 2749declare <vscale x 16 x i8> @llvm.aarch64.sve.sqrshl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) 2750declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrshl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) 2751declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrshl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 2752declare <vscale x 2 x i64> @llvm.aarch64.sve.sqrshl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) 2753 2754declare <vscale x 16 x i8> @llvm.aarch64.sve.sqshl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) 2755declare <vscale x 8 x i16> @llvm.aarch64.sve.sqshl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) 2756declare <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 2757declare <vscale x 2 x i64> @llvm.aarch64.sve.sqshl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) 2758 2759declare <vscale x 16 x i8> @llvm.aarch64.sve.sqshlu.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, i32) 2760declare <vscale x 8 x i16> @llvm.aarch64.sve.sqshlu.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, i32) 2761declare <vscale x 4 x i32> @llvm.aarch64.sve.sqshlu.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i32) 2762declare <vscale x 2 x i64> @llvm.aarch64.sve.sqshlu.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i32) 2763 2764declare <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) 2765declare <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) 2766declare <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 2767declare <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) 2768 2769declare <vscale x 16 x i8> @llvm.aarch64.sve.sqsubr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) 2770declare <vscale x 8 x i16> @llvm.aarch64.sve.sqsubr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) 2771declare <vscale x 4 x i32> @llvm.aarch64.sve.sqsubr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 2772declare <vscale x 2 x i64> @llvm.aarch64.sve.sqsubr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) 2773 2774declare <vscale x 16 x i8> @llvm.aarch64.sve.srhadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) 2775declare <vscale x 8 x i16> @llvm.aarch64.sve.srhadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) 2776declare <vscale x 4 x i32> @llvm.aarch64.sve.srhadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 2777declare <vscale x 2 x i64> @llvm.aarch64.sve.srhadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) 2778 2779declare <vscale x 16 x i8> @llvm.aarch64.sve.sri.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32) 2780declare <vscale x 8 x i16> @llvm.aarch64.sve.sri.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32) 2781declare <vscale x 4 x i32> @llvm.aarch64.sve.sri.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32) 2782declare <vscale x 2 x i64> @llvm.aarch64.sve.sri.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32) 2783 2784declare <vscale x 16 x i8> @llvm.aarch64.sve.srshl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) 2785declare <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) 2786declare <vscale x 4 x i32> @llvm.aarch64.sve.srshl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 2787declare <vscale x 2 x i64> @llvm.aarch64.sve.srshl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) 2788 2789declare <vscale x 16 x i8> @llvm.aarch64.sve.srshr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, i32) 2790declare <vscale x 8 x i16> @llvm.aarch64.sve.srshr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, i32) 2791declare <vscale x 4 x i32> @llvm.aarch64.sve.srshr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i32) 2792declare <vscale x 2 x i64> @llvm.aarch64.sve.srshr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i32) 2793 2794declare <vscale x 16 x i8> @llvm.aarch64.sve.srsra.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32) 2795declare <vscale x 8 x i16> @llvm.aarch64.sve.srsra.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32) 2796declare <vscale x 4 x i32> @llvm.aarch64.sve.srsra.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32) 2797declare <vscale x 2 x i64> @llvm.aarch64.sve.srsra.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32) 2798 2799declare <vscale x 16 x i8> @llvm.aarch64.sve.ssra.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32) 2800declare <vscale x 8 x i16> @llvm.aarch64.sve.ssra.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32) 2801declare <vscale x 4 x i32> @llvm.aarch64.sve.ssra.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32) 2802declare <vscale x 2 x i64> @llvm.aarch64.sve.ssra.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32) 2803 2804declare <vscale x 16 x i8> @llvm.aarch64.sve.suqadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) 2805declare <vscale x 8 x i16> @llvm.aarch64.sve.suqadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) 2806declare <vscale x 4 x i32> @llvm.aarch64.sve.suqadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 2807declare <vscale x 2 x i64> @llvm.aarch64.sve.suqadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) 2808 2809declare <vscale x 16 x i8> @llvm.aarch64.sve.uaba.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>) 2810declare <vscale x 8 x i16> @llvm.aarch64.sve.uaba.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>) 2811declare <vscale x 4 x i32> @llvm.aarch64.sve.uaba.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) 2812declare <vscale x 2 x i64> @llvm.aarch64.sve.uaba.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>) 2813 2814declare <vscale x 16 x i8> @llvm.aarch64.sve.uhadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) 2815declare <vscale x 8 x i16> @llvm.aarch64.sve.uhadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) 2816declare <vscale x 4 x i32> @llvm.aarch64.sve.uhadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 2817declare <vscale x 2 x i64> @llvm.aarch64.sve.uhadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) 2818 2819declare <vscale x 16 x i8> @llvm.aarch64.sve.uhsub.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) 2820declare <vscale x 8 x i16> @llvm.aarch64.sve.uhsub.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) 2821declare <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 2822declare <vscale x 2 x i64> @llvm.aarch64.sve.uhsub.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) 2823 2824declare <vscale x 16 x i8> @llvm.aarch64.sve.uhsubr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) 2825declare <vscale x 8 x i16> @llvm.aarch64.sve.uhsubr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) 2826declare <vscale x 4 x i32> @llvm.aarch64.sve.uhsubr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 2827declare <vscale x 2 x i64> @llvm.aarch64.sve.uhsubr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) 2828 2829declare <vscale x 16 x i8> @llvm.aarch64.sve.uqadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) 2830declare <vscale x 8 x i16> @llvm.aarch64.sve.uqadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) 2831declare <vscale x 4 x i32> @llvm.aarch64.sve.uqadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 2832declare <vscale x 2 x i64> @llvm.aarch64.sve.uqadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) 2833 2834declare <vscale x 16 x i8> @llvm.aarch64.sve.uqrshl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) 2835declare <vscale x 8 x i16> @llvm.aarch64.sve.uqrshl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) 2836declare <vscale x 4 x i32> @llvm.aarch64.sve.uqrshl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 2837declare <vscale x 2 x i64> @llvm.aarch64.sve.uqrshl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) 2838 2839declare <vscale x 16 x i8> @llvm.aarch64.sve.uqshl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) 2840declare <vscale x 8 x i16> @llvm.aarch64.sve.uqshl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) 2841declare <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 2842declare <vscale x 2 x i64> @llvm.aarch64.sve.uqshl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) 2843 2844declare <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) 2845declare <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) 2846declare <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 2847declare <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) 2848 2849declare <vscale x 16 x i8> @llvm.aarch64.sve.uqsubr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) 2850declare <vscale x 8 x i16> @llvm.aarch64.sve.uqsubr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) 2851declare <vscale x 4 x i32> @llvm.aarch64.sve.uqsubr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 2852declare <vscale x 2 x i64> @llvm.aarch64.sve.uqsubr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) 2853 2854declare <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>) 2855 2856declare <vscale x 16 x i8> @llvm.aarch64.sve.urhadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) 2857declare <vscale x 8 x i16> @llvm.aarch64.sve.urhadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) 2858declare <vscale x 4 x i32> @llvm.aarch64.sve.urhadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 2859declare <vscale x 2 x i64> @llvm.aarch64.sve.urhadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) 2860 2861declare <vscale x 16 x i8> @llvm.aarch64.sve.urshl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) 2862declare <vscale x 8 x i16> @llvm.aarch64.sve.urshl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) 2863declare <vscale x 4 x i32> @llvm.aarch64.sve.urshl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 2864declare <vscale x 2 x i64> @llvm.aarch64.sve.urshl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) 2865 2866declare <vscale x 16 x i8> @llvm.aarch64.sve.urshr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, i32) 2867declare <vscale x 8 x i16> @llvm.aarch64.sve.urshr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, i32) 2868declare <vscale x 4 x i32> @llvm.aarch64.sve.urshr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i32) 2869declare <vscale x 2 x i64> @llvm.aarch64.sve.urshr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i32) 2870 2871declare <vscale x 4 x i32> @llvm.aarch64.sve.ursqrte.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>) 2872 2873declare <vscale x 16 x i8> @llvm.aarch64.sve.ursra.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32) 2874declare <vscale x 8 x i16> @llvm.aarch64.sve.ursra.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32) 2875declare <vscale x 4 x i32> @llvm.aarch64.sve.ursra.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32) 2876declare <vscale x 2 x i64> @llvm.aarch64.sve.ursra.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32) 2877 2878declare <vscale x 16 x i8> @llvm.aarch64.sve.usqadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) 2879declare <vscale x 8 x i16> @llvm.aarch64.sve.usqadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) 2880declare <vscale x 4 x i32> @llvm.aarch64.sve.usqadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 2881declare <vscale x 2 x i64> @llvm.aarch64.sve.usqadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) 2882 2883declare <vscale x 16 x i8> @llvm.aarch64.sve.usra.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32) 2884declare <vscale x 8 x i16> @llvm.aarch64.sve.usra.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32) 2885declare <vscale x 4 x i32> @llvm.aarch64.sve.usra.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32) 2886declare <vscale x 2 x i64> @llvm.aarch64.sve.usra.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32) 2887 2888declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32) 2889declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32) 2890declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32) 2891declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32) 2892