1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s 3 4; 5; ADD 6; 7 8define <vscale x 16 x i8> @add_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 9; CHECK-LABEL: add_i8: 10; CHECK: // %bb.0: 11; CHECK-NEXT: add z0.b, z0.b, z1.b 12; CHECK-NEXT: ret 13 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) 14 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.add.u.nxv16i8(<vscale x 16 x i1> %pg, 15 <vscale x 16 x i8> %a, 16 <vscale x 16 x i8> %b) 17 ret <vscale x 16 x i8> %out 18} 19 20define <vscale x 8 x i16> @add_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 21; CHECK-LABEL: add_i16: 22; CHECK: // %bb.0: 23; CHECK-NEXT: add z0.h, z0.h, z1.h 24; CHECK-NEXT: ret 25 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) 26 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.add.u.nxv8i16(<vscale x 8 x i1> %pg, 27 <vscale x 8 x i16> %a, 28 <vscale x 8 x i16> %b) 29 ret <vscale x 8 x i16> %out 30} 31 32define <vscale x 4 x i32> @add_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 33; CHECK-LABEL: add_i32: 34; CHECK: // %bb.0: 35; CHECK-NEXT: add z0.s, z0.s, z1.s 36; CHECK-NEXT: ret 37 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) 38 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.add.u.nxv4i32(<vscale x 4 x i1> %pg, 39 <vscale x 4 x i32> %a, 40 <vscale x 4 x i32> %b) 41 ret <vscale x 4 x i32> %out 42} 43 44define <vscale x 2 x i64> @add_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 45; CHECK-LABEL: add_i64: 46; CHECK: // %bb.0: 47; CHECK-NEXT: add z0.d, z0.d, z1.d 48; CHECK-NEXT: ret 49 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) 50 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.add.u.nxv2i64(<vscale x 2 x i1> %pg, 51 <vscale x 2 x i64> %a, 52 <vscale x 2 x i64> %b) 53 ret <vscale x 2 x i64> %out 54} 55 56; 57; SUB 58; 59 60define <vscale x 16 x i8> @sub_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 61; CHECK-LABEL: sub_i8: 62; CHECK: // %bb.0: 63; CHECK-NEXT: sub z0.b, z0.b, z1.b 64; CHECK-NEXT: ret 65 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) 66 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sub.u.nxv16i8(<vscale x 16 x i1> %pg, 67 <vscale x 16 x i8> %a, 68 <vscale x 16 x i8> %b) 69 ret <vscale x 16 x i8> %out 70} 71 72define <vscale x 8 x i16> @sub_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 73; CHECK-LABEL: sub_i16: 74; CHECK: // %bb.0: 75; CHECK-NEXT: sub z0.h, z0.h, z1.h 76; CHECK-NEXT: ret 77 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) 78 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sub.u.nxv8i16(<vscale x 8 x i1> %pg, 79 <vscale x 8 x i16> %a, 80 <vscale x 8 x i16> %b) 81 ret <vscale x 8 x i16> %out 82} 83 84define <vscale x 4 x i32> @sub_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 85; CHECK-LABEL: sub_i32: 86; CHECK: // %bb.0: 87; CHECK-NEXT: sub z0.s, z0.s, z1.s 88; CHECK-NEXT: ret 89 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) 90 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> %pg, 91 <vscale x 4 x i32> %a, 92 <vscale x 4 x i32> %b) 93 ret <vscale x 4 x i32> %out 94} 95 96define <vscale x 2 x i64> @sub_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 97; CHECK-LABEL: sub_i64: 98; CHECK: // %bb.0: 99; CHECK-NEXT: sub z0.d, z0.d, z1.d 100; CHECK-NEXT: ret 101 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) 102 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sub.u.nxv2i64(<vscale x 2 x i1> %pg, 103 <vscale x 2 x i64> %a, 104 <vscale x 2 x i64> %b) 105 ret <vscale x 2 x i64> %out 106} 107 108; As sub_i32 but where pg is i8 based and thus compatible for i32. 109define <vscale x 4 x i32> @sub_i32_ptrue_all_b(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 110; CHECK-LABEL: sub_i32_ptrue_all_b: 111; CHECK: // %bb.0: 112; CHECK-NEXT: sub z0.s, z0.s, z1.s 113; CHECK-NEXT: ret 114 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) 115 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b) 116 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> %pg.s, 117 <vscale x 4 x i32> %a, 118 <vscale x 4 x i32> %b) 119 ret <vscale x 4 x i32> %out 120} 121 122; As sub_i32 but where pg is i16 based and thus compatible for i32. 123define <vscale x 4 x i32> @sub_i32_ptrue_all_h(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 124; CHECK-LABEL: sub_i32_ptrue_all_h: 125; CHECK: // %bb.0: 126; CHECK-NEXT: sub z0.s, z0.s, z1.s 127; CHECK-NEXT: ret 128 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) 129 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h) 130 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b) 131 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> %pg.s, 132 <vscale x 4 x i32> %a, 133 <vscale x 4 x i32> %b) 134 ret <vscale x 4 x i32> %out 135} 136 137; As sub_i32 but where pg is i64 based, which is not compatibile for i32 and 138; thus inactive lanes are important and the immediate form cannot be used. 139define <vscale x 4 x i32> @sub_i32_ptrue_all_d(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 140; CHECK-LABEL: sub_i32_ptrue_all_d: 141; CHECK: // %bb.0: 142; CHECK-NEXT: ptrue p0.d 143; CHECK-NEXT: sub z0.s, p0/m, z0.s, z1.s 144; CHECK-NEXT: ret 145 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) 146 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d) 147 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b) 148 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1> %pg.s, 149 <vscale x 4 x i32> %a, 150 <vscale x 4 x i32> %b) 151 ret <vscale x 4 x i32> %out 152} 153 154; 155; MUL 156; 157 158define <vscale x 16 x i8> @mul_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 159; CHECK-LABEL: mul_i8: 160; CHECK: // %bb.0: 161; CHECK-NEXT: mul z0.b, z0.b, z1.b 162; CHECK-NEXT: ret 163 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) 164 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.mul.u.nxv16i8(<vscale x 16 x i1> %pg, 165 <vscale x 16 x i8> %a, 166 <vscale x 16 x i8> %b) 167 ret <vscale x 16 x i8> %out 168} 169 170define <vscale x 8 x i16> @mul_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 171; CHECK-LABEL: mul_i16: 172; CHECK: // %bb.0: 173; CHECK-NEXT: mul z0.h, z0.h, z1.h 174; CHECK-NEXT: ret 175 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) 176 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.mul.u.nxv8i16(<vscale x 8 x i1> %pg, 177 <vscale x 8 x i16> %a, 178 <vscale x 8 x i16> %b) 179 ret <vscale x 8 x i16> %out 180} 181 182define <vscale x 4 x i32> @mul_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 183; CHECK-LABEL: mul_i32: 184; CHECK: // %bb.0: 185; CHECK-NEXT: mul z0.s, z0.s, z1.s 186; CHECK-NEXT: ret 187 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) 188 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1> %pg, 189 <vscale x 4 x i32> %a, 190 <vscale x 4 x i32> %b) 191 ret <vscale x 4 x i32> %out 192} 193 194define <vscale x 2 x i64> @mul_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 195; CHECK-LABEL: mul_i64: 196; CHECK: // %bb.0: 197; CHECK-NEXT: mul z0.d, z0.d, z1.d 198; CHECK-NEXT: ret 199 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) 200 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.mul.u.nxv2i64(<vscale x 2 x i1> %pg, 201 <vscale x 2 x i64> %a, 202 <vscale x 2 x i64> %b) 203 ret <vscale x 2 x i64> %out 204} 205 206; 207; SMULH 208; 209 210define <vscale x 16 x i8> @smulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 211; CHECK-LABEL: smulh_i8: 212; CHECK: // %bb.0: 213; CHECK-NEXT: smulh z0.b, z0.b, z1.b 214; CHECK-NEXT: ret 215 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) 216 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.smulh.u.nxv16i8(<vscale x 16 x i1> %pg, 217 <vscale x 16 x i8> %a, 218 <vscale x 16 x i8> %b) 219 ret <vscale x 16 x i8> %out 220} 221 222define <vscale x 8 x i16> @smulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 223; CHECK-LABEL: smulh_i16: 224; CHECK: // %bb.0: 225; CHECK-NEXT: smulh z0.h, z0.h, z1.h 226; CHECK-NEXT: ret 227 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) 228 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.smulh.u.nxv8i16(<vscale x 8 x i1> %pg, 229 <vscale x 8 x i16> %a, 230 <vscale x 8 x i16> %b) 231 ret <vscale x 8 x i16> %out 232} 233 234define <vscale x 4 x i32> @smulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 235; CHECK-LABEL: smulh_i32: 236; CHECK: // %bb.0: 237; CHECK-NEXT: smulh z0.s, z0.s, z1.s 238; CHECK-NEXT: ret 239 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) 240 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smulh.u.nxv4i32(<vscale x 4 x i1> %pg, 241 <vscale x 4 x i32> %a, 242 <vscale x 4 x i32> %b) 243 ret <vscale x 4 x i32> %out 244} 245 246define <vscale x 2 x i64> @smulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 247; CHECK-LABEL: smulh_i64: 248; CHECK: // %bb.0: 249; CHECK-NEXT: smulh z0.d, z0.d, z1.d 250; CHECK-NEXT: ret 251 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) 252 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smulh.u.nxv2i64(<vscale x 2 x i1> %pg, 253 <vscale x 2 x i64> %a, 254 <vscale x 2 x i64> %b) 255 ret <vscale x 2 x i64> %out 256} 257 258; 259; UMULH 260; 261 262define <vscale x 16 x i8> @umulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 263; CHECK-LABEL: umulh_i8: 264; CHECK: // %bb.0: 265; CHECK-NEXT: umulh z0.b, z0.b, z1.b 266; CHECK-NEXT: ret 267 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) 268 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.umulh.u.nxv16i8(<vscale x 16 x i1> %pg, 269 <vscale x 16 x i8> %a, 270 <vscale x 16 x i8> %b) 271 ret <vscale x 16 x i8> %out 272} 273 274define <vscale x 8 x i16> @umulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 275; CHECK-LABEL: umulh_i16: 276; CHECK: // %bb.0: 277; CHECK-NEXT: umulh z0.h, z0.h, z1.h 278; CHECK-NEXT: ret 279 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) 280 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.umulh.u.nxv8i16(<vscale x 8 x i1> %pg, 281 <vscale x 8 x i16> %a, 282 <vscale x 8 x i16> %b) 283 ret <vscale x 8 x i16> %out 284} 285 286define <vscale x 4 x i32> @umulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 287; CHECK-LABEL: umulh_i32: 288; CHECK: // %bb.0: 289; CHECK-NEXT: umulh z0.s, z0.s, z1.s 290; CHECK-NEXT: ret 291 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) 292 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.u.nxv4i32(<vscale x 4 x i1> %pg, 293 <vscale x 4 x i32> %a, 294 <vscale x 4 x i32> %b) 295 ret <vscale x 4 x i32> %out 296} 297 298define <vscale x 2 x i64> @umulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 299; CHECK-LABEL: umulh_i64: 300; CHECK: // %bb.0: 301; CHECK-NEXT: umulh z0.d, z0.d, z1.d 302; CHECK-NEXT: ret 303 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) 304 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umulh.u.nxv2i64(<vscale x 2 x i1> %pg, 305 <vscale x 2 x i64> %a, 306 <vscale x 2 x i64> %b) 307 ret <vscale x 2 x i64> %out 308} 309 310; As umulh_i32 but where pg is i8 based and thus compatible for i32. 311define <vscale x 4 x i32> @umulh_i32_ptrue_all_b(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 312; CHECK-LABEL: umulh_i32_ptrue_all_b: 313; CHECK: // %bb.0: 314; CHECK-NEXT: umulh z0.s, z0.s, z1.s 315; CHECK-NEXT: ret 316 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) 317 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b) 318 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.u.nxv4i32(<vscale x 4 x i1> %pg.s, 319 <vscale x 4 x i32> %a, 320 <vscale x 4 x i32> %b) 321 ret <vscale x 4 x i32> %out 322} 323 324; As umulh_i32 but where pg is i16 based and thus compatible for i32. 325define <vscale x 4 x i32> @umulh_i32_ptrue_all_h(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 326; CHECK-LABEL: umulh_i32_ptrue_all_h: 327; CHECK: // %bb.0: 328; CHECK-NEXT: umulh z0.s, z0.s, z1.s 329; CHECK-NEXT: ret 330 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) 331 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h) 332 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b) 333 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.u.nxv4i32(<vscale x 4 x i1> %pg.s, 334 <vscale x 4 x i32> %a, 335 <vscale x 4 x i32> %b) 336 ret <vscale x 4 x i32> %out 337} 338 339; As umulh_i32 but where pg is i64 based, which is not compatibile for i32 and 340; thus inactive lanes are important and the immediate form cannot be used. 341define <vscale x 4 x i32> @umulh_i32_ptrue_all_d(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 342; CHECK-LABEL: umulh_i32_ptrue_all_d: 343; CHECK: // %bb.0: 344; CHECK-NEXT: ptrue p0.d 345; CHECK-NEXT: umulh z0.s, p0/m, z0.s, z1.s 346; CHECK-NEXT: ret 347 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) 348 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d) 349 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b) 350 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.nxv4i32(<vscale x 4 x i1> %pg.s, 351 <vscale x 4 x i32> %a, 352 <vscale x 4 x i32> %b) 353 ret <vscale x 4 x i32> %out 354} 355 356; 357; AND 358; 359 360define <vscale x 16 x i8> @and_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 361; CHECK-LABEL: and_i8: 362; CHECK: // %bb.0: 363; CHECK-NEXT: and z0.d, z0.d, z1.d 364; CHECK-NEXT: ret 365 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) 366 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.and.u.nxv16i8(<vscale x 16 x i1> %pg, 367 <vscale x 16 x i8> %a, 368 <vscale x 16 x i8> %b) 369 ret <vscale x 16 x i8> %out 370} 371 372define <vscale x 8 x i16> @and_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 373; CHECK-LABEL: and_i16: 374; CHECK: // %bb.0: 375; CHECK-NEXT: and z0.d, z0.d, z1.d 376; CHECK-NEXT: ret 377 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) 378 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.and.u.nxv8i16(<vscale x 8 x i1> %pg, 379 <vscale x 8 x i16> %a, 380 <vscale x 8 x i16> %b) 381 ret <vscale x 8 x i16> %out 382} 383 384define <vscale x 4 x i32> @and_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 385; CHECK-LABEL: and_i32: 386; CHECK: // %bb.0: 387; CHECK-NEXT: and z0.d, z0.d, z1.d 388; CHECK-NEXT: ret 389 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) 390 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.and.u.nxv4i32(<vscale x 4 x i1> %pg, 391 <vscale x 4 x i32> %a, 392 <vscale x 4 x i32> %b) 393 ret <vscale x 4 x i32> %out 394} 395 396define <vscale x 2 x i64> @and_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 397; CHECK-LABEL: and_i64: 398; CHECK: // %bb.0: 399; CHECK-NEXT: and z0.d, z0.d, z1.d 400; CHECK-NEXT: ret 401 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) 402 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.and.u.nxv2i64(<vscale x 2 x i1> %pg, 403 <vscale x 2 x i64> %a, 404 <vscale x 2 x i64> %b) 405 ret <vscale x 2 x i64> %out 406} 407 408; 409; BIC 410; 411 412define <vscale x 16 x i8> @bic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 413; CHECK-LABEL: bic_i8: 414; CHECK: // %bb.0: 415; CHECK-NEXT: bic z0.d, z0.d, z1.d 416; CHECK-NEXT: ret 417 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) 418 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.bic.u.nxv16i8(<vscale x 16 x i1> %pg, 419 <vscale x 16 x i8> %a, 420 <vscale x 16 x i8> %b) 421 ret <vscale x 16 x i8> %out 422} 423 424define <vscale x 8 x i16> @bic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 425; CHECK-LABEL: bic_i16: 426; CHECK: // %bb.0: 427; CHECK-NEXT: bic z0.d, z0.d, z1.d 428; CHECK-NEXT: ret 429 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) 430 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.bic.u.nxv8i16(<vscale x 8 x i1> %pg, 431 <vscale x 8 x i16> %a, 432 <vscale x 8 x i16> %b) 433 ret <vscale x 8 x i16> %out 434} 435 436define <vscale x 4 x i32> @bic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 437; CHECK-LABEL: bic_i32: 438; CHECK: // %bb.0: 439; CHECK-NEXT: bic z0.d, z0.d, z1.d 440; CHECK-NEXT: ret 441 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) 442 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.bic.u.nxv4i32(<vscale x 4 x i1> %pg, 443 <vscale x 4 x i32> %a, 444 <vscale x 4 x i32> %b) 445 ret <vscale x 4 x i32> %out 446} 447 448define <vscale x 2 x i64> @bic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 449; CHECK-LABEL: bic_i64: 450; CHECK: // %bb.0: 451; CHECK-NEXT: bic z0.d, z0.d, z1.d 452; CHECK-NEXT: ret 453 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) 454 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.bic.u.nxv2i64(<vscale x 2 x i1> %pg, 455 <vscale x 2 x i64> %a, 456 <vscale x 2 x i64> %b) 457 ret <vscale x 2 x i64> %out 458} 459 460; 461; EOR 462; 463 464define <vscale x 16 x i8> @eor_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 465; CHECK-LABEL: eor_i8: 466; CHECK: // %bb.0: 467; CHECK-NEXT: eor z0.d, z0.d, z1.d 468; CHECK-NEXT: ret 469 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) 470 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.eor.u.nxv16i8(<vscale x 16 x i1> %pg, 471 <vscale x 16 x i8> %a, 472 <vscale x 16 x i8> %b) 473 ret <vscale x 16 x i8> %out 474} 475 476define <vscale x 8 x i16> @eor_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 477; CHECK-LABEL: eor_i16: 478; CHECK: // %bb.0: 479; CHECK-NEXT: eor z0.d, z0.d, z1.d 480; CHECK-NEXT: ret 481 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) 482 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.eor.u.nxv8i16(<vscale x 8 x i1> %pg, 483 <vscale x 8 x i16> %a, 484 <vscale x 8 x i16> %b) 485 ret <vscale x 8 x i16> %out 486} 487 488define <vscale x 4 x i32> @eor_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 489; CHECK-LABEL: eor_i32: 490; CHECK: // %bb.0: 491; CHECK-NEXT: eor z0.d, z0.d, z1.d 492; CHECK-NEXT: ret 493 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) 494 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.eor.u.nxv4i32(<vscale x 4 x i1> %pg, 495 <vscale x 4 x i32> %a, 496 <vscale x 4 x i32> %b) 497 ret <vscale x 4 x i32> %out 498} 499 500define <vscale x 2 x i64> @eor_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 501; CHECK-LABEL: eor_i64: 502; CHECK: // %bb.0: 503; CHECK-NEXT: eor z0.d, z0.d, z1.d 504; CHECK-NEXT: ret 505 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) 506 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.eor.u.nxv2i64(<vscale x 2 x i1> %pg, 507 <vscale x 2 x i64> %a, 508 <vscale x 2 x i64> %b) 509 ret <vscale x 2 x i64> %out 510} 511 512; 513; ORR 514; 515 516define <vscale x 16 x i8> @orr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 517; CHECK-LABEL: orr_i8: 518; CHECK: // %bb.0: 519; CHECK-NEXT: orr z0.d, z0.d, z1.d 520; CHECK-NEXT: ret 521 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) 522 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.orr.u.nxv16i8(<vscale x 16 x i1> %pg, 523 <vscale x 16 x i8> %a, 524 <vscale x 16 x i8> %b) 525 ret <vscale x 16 x i8> %out 526} 527 528define <vscale x 8 x i16> @orr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 529; CHECK-LABEL: orr_i16: 530; CHECK: // %bb.0: 531; CHECK-NEXT: orr z0.d, z0.d, z1.d 532; CHECK-NEXT: ret 533 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) 534 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.orr.u.nxv8i16(<vscale x 8 x i1> %pg, 535 <vscale x 8 x i16> %a, 536 <vscale x 8 x i16> %b) 537 ret <vscale x 8 x i16> %out 538} 539 540define <vscale x 4 x i32> @orr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 541; CHECK-LABEL: orr_i32: 542; CHECK: // %bb.0: 543; CHECK-NEXT: orr z0.d, z0.d, z1.d 544; CHECK-NEXT: ret 545 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) 546 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.orr.u.nxv4i32(<vscale x 4 x i1> %pg, 547 <vscale x 4 x i32> %a, 548 <vscale x 4 x i32> %b) 549 ret <vscale x 4 x i32> %out 550} 551 552define <vscale x 2 x i64> @orr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 553; CHECK-LABEL: orr_i64: 554; CHECK: // %bb.0: 555; CHECK-NEXT: orr z0.d, z0.d, z1.d 556; CHECK-NEXT: ret 557 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) 558 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.orr.u.nxv2i64(<vscale x 2 x i1> %pg, 559 <vscale x 2 x i64> %a, 560 <vscale x 2 x i64> %b) 561 ret <vscale x 2 x i64> %out 562} 563 564; As orr_i32 but where pg is i8 based and thus compatible for i32. 565define <vscale x 4 x i32> @orr_i32_ptrue_all_b(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 566; CHECK-LABEL: orr_i32_ptrue_all_b: 567; CHECK: // %bb.0: 568; CHECK-NEXT: orr z0.d, z0.d, z1.d 569; CHECK-NEXT: ret 570 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) 571 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b) 572 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.u.nxv4i32(<vscale x 4 x i1> %pg.s, 573 <vscale x 4 x i32> %a, 574 <vscale x 4 x i32> %b) 575 ret <vscale x 4 x i32> %out 576} 577 578; As orr_i32 but where pg is i16 based and thus compatible for i32. 579define <vscale x 4 x i32> @orr_i32_ptrue_all_h(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 580; CHECK-LABEL: orr_i32_ptrue_all_h: 581; CHECK: // %bb.0: 582; CHECK-NEXT: orr z0.d, z0.d, z1.d 583; CHECK-NEXT: ret 584 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) 585 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h) 586 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b) 587 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.u.nxv4i32(<vscale x 4 x i1> %pg.s, 588 <vscale x 4 x i32> %a, 589 <vscale x 4 x i32> %b) 590 ret <vscale x 4 x i32> %out 591} 592 593; As orr_i32 but where pg is i64 based, which is not compatibile for i32 and 594; thus inactive lanes are important and the immediate form cannot be used. 595define <vscale x 4 x i32> @orr_i32_ptrue_all_d(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 596; CHECK-LABEL: orr_i32_ptrue_all_d: 597; CHECK: // %bb.0: 598; CHECK-NEXT: ptrue p0.d 599; CHECK-NEXT: orr z0.s, p0/m, z0.s, z1.s 600; CHECK-NEXT: ret 601 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) 602 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d) 603 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b) 604 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1> %pg.s, 605 <vscale x 4 x i32> %a, 606 <vscale x 4 x i32> %b) 607 ret <vscale x 4 x i32> %out 608} 609 610; 611; SQADD 612; 613 614define <vscale x 16 x i8> @sqadd_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 615; CHECK-LABEL: sqadd_i8: 616; CHECK: // %bb.0: 617; CHECK-NEXT: sqadd z0.b, z0.b, z1.b 618; CHECK-NEXT: ret 619 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) 620 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqadd.nxv16i8(<vscale x 16 x i1> %pg, 621 <vscale x 16 x i8> %a, 622 <vscale x 16 x i8> %b) 623 ret <vscale x 16 x i8> %out 624} 625 626define <vscale x 8 x i16> @sqadd_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 627; CHECK-LABEL: sqadd_i16: 628; CHECK: // %bb.0: 629; CHECK-NEXT: sqadd z0.h, z0.h, z1.h 630; CHECK-NEXT: ret 631 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) 632 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqadd.nxv8i16(<vscale x 8 x i1> %pg, 633 <vscale x 8 x i16> %a, 634 <vscale x 8 x i16> %b) 635 ret <vscale x 8 x i16> %out 636} 637 638define <vscale x 4 x i32> @sqadd_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 639; CHECK-LABEL: sqadd_i32: 640; CHECK: // %bb.0: 641; CHECK-NEXT: sqadd z0.s, z0.s, z1.s 642; CHECK-NEXT: ret 643 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) 644 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.nxv4i32(<vscale x 4 x i1> %pg, 645 <vscale x 4 x i32> %a, 646 <vscale x 4 x i32> %b) 647 ret <vscale x 4 x i32> %out 648} 649 650define <vscale x 2 x i64> @sqadd_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 651; CHECK-LABEL: sqadd_i64: 652; CHECK: // %bb.0: 653; CHECK-NEXT: sqadd z0.d, z0.d, z1.d 654; CHECK-NEXT: ret 655 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) 656 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.nxv2i64(<vscale x 2 x i1> %pg, 657 <vscale x 2 x i64> %a, 658 <vscale x 2 x i64> %b) 659 ret <vscale x 2 x i64> %out 660} 661 662; 663; SQSUB 664; 665 666define <vscale x 16 x i8> @sqsub_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 667; CHECK-LABEL: sqsub_i8: 668; CHECK: // %bb.0: 669; CHECK-NEXT: sqsub z0.b, z0.b, z1.b 670; CHECK-NEXT: ret 671 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) 672 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.u.nxv16i8(<vscale x 16 x i1> %pg, 673 <vscale x 16 x i8> %a, 674 <vscale x 16 x i8> %b) 675 ret <vscale x 16 x i8> %out 676} 677 678define <vscale x 8 x i16> @sqsub_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 679; CHECK-LABEL: sqsub_i16: 680; CHECK: // %bb.0: 681; CHECK-NEXT: sqsub z0.h, z0.h, z1.h 682; CHECK-NEXT: ret 683 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) 684 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.u.nxv8i16(<vscale x 8 x i1> %pg, 685 <vscale x 8 x i16> %a, 686 <vscale x 8 x i16> %b) 687 ret <vscale x 8 x i16> %out 688} 689 690define <vscale x 4 x i32> @sqsub_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 691; CHECK-LABEL: sqsub_i32: 692; CHECK: // %bb.0: 693; CHECK-NEXT: sqsub z0.s, z0.s, z1.s 694; CHECK-NEXT: ret 695 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) 696 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.u.nxv4i32(<vscale x 4 x i1> %pg, 697 <vscale x 4 x i32> %a, 698 <vscale x 4 x i32> %b) 699 ret <vscale x 4 x i32> %out 700} 701 702define <vscale x 2 x i64> @sqsub_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 703; CHECK-LABEL: sqsub_i64: 704; CHECK: // %bb.0: 705; CHECK-NEXT: sqsub z0.d, z0.d, z1.d 706; CHECK-NEXT: ret 707 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) 708 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.u.nxv2i64(<vscale x 2 x i1> %pg, 709 <vscale x 2 x i64> %a, 710 <vscale x 2 x i64> %b) 711 ret <vscale x 2 x i64> %out 712} 713 714; 715; UQADD 716; 717 718define <vscale x 16 x i8> @uqadd_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 719; CHECK-LABEL: uqadd_i8: 720; CHECK: // %bb.0: 721; CHECK-NEXT: uqadd z0.b, z0.b, z1.b 722; CHECK-NEXT: ret 723 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) 724 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqadd.nxv16i8(<vscale x 16 x i1> %pg, 725 <vscale x 16 x i8> %a, 726 <vscale x 16 x i8> %b) 727 ret <vscale x 16 x i8> %out 728} 729 730define <vscale x 8 x i16> @uqadd_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 731; CHECK-LABEL: uqadd_i16: 732; CHECK: // %bb.0: 733; CHECK-NEXT: uqadd z0.h, z0.h, z1.h 734; CHECK-NEXT: ret 735 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) 736 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqadd.nxv8i16(<vscale x 8 x i1> %pg, 737 <vscale x 8 x i16> %a, 738 <vscale x 8 x i16> %b) 739 ret <vscale x 8 x i16> %out 740} 741 742define <vscale x 4 x i32> @uqadd_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 743; CHECK-LABEL: uqadd_i32: 744; CHECK: // %bb.0: 745; CHECK-NEXT: uqadd z0.s, z0.s, z1.s 746; CHECK-NEXT: ret 747 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) 748 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqadd.nxv4i32(<vscale x 4 x i1> %pg, 749 <vscale x 4 x i32> %a, 750 <vscale x 4 x i32> %b) 751 ret <vscale x 4 x i32> %out 752} 753 754define <vscale x 2 x i64> @uqadd_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 755; CHECK-LABEL: uqadd_i64: 756; CHECK: // %bb.0: 757; CHECK-NEXT: uqadd z0.d, z0.d, z1.d 758; CHECK-NEXT: ret 759 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) 760 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqadd.nxv2i64(<vscale x 2 x i1> %pg, 761 <vscale x 2 x i64> %a, 762 <vscale x 2 x i64> %b) 763 ret <vscale x 2 x i64> %out 764} 765 766; 767; UQSUB 768; 769 770define <vscale x 16 x i8> @uqsub_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 771; CHECK-LABEL: uqsub_i8: 772; CHECK: // %bb.0: 773; CHECK-NEXT: uqsub z0.b, z0.b, z1.b 774; CHECK-NEXT: ret 775 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) 776 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.u.nxv16i8(<vscale x 16 x i1> %pg, 777 <vscale x 16 x i8> %a, 778 <vscale x 16 x i8> %b) 779 ret <vscale x 16 x i8> %out 780} 781 782define <vscale x 8 x i16> @uqsub_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 783; CHECK-LABEL: uqsub_i16: 784; CHECK: // %bb.0: 785; CHECK-NEXT: uqsub z0.h, z0.h, z1.h 786; CHECK-NEXT: ret 787 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) 788 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.u.nxv8i16(<vscale x 8 x i1> %pg, 789 <vscale x 8 x i16> %a, 790 <vscale x 8 x i16> %b) 791 ret <vscale x 8 x i16> %out 792} 793 794define <vscale x 4 x i32> @uqsub_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 795; CHECK-LABEL: uqsub_i32: 796; CHECK: // %bb.0: 797; CHECK-NEXT: uqsub z0.s, z0.s, z1.s 798; CHECK-NEXT: ret 799 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) 800 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.u.nxv4i32(<vscale x 4 x i1> %pg, 801 <vscale x 4 x i32> %a, 802 <vscale x 4 x i32> %b) 803 ret <vscale x 4 x i32> %out 804} 805 806define <vscale x 2 x i64> @uqsub_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 807; CHECK-LABEL: uqsub_i64: 808; CHECK: // %bb.0: 809; CHECK-NEXT: uqsub z0.d, z0.d, z1.d 810; CHECK-NEXT: ret 811 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) 812 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.u.nxv2i64(<vscale x 2 x i1> %pg, 813 <vscale x 2 x i64> %a, 814 <vscale x 2 x i64> %b) 815 ret <vscale x 2 x i64> %out 816} 817 818; As uqsub_i32 but where pg is i8 based and thus compatible for i32. 819define <vscale x 4 x i32> @uqsub_i32_ptrue_all_b(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 820; CHECK-LABEL: uqsub_i32_ptrue_all_b: 821; CHECK: // %bb.0: 822; CHECK-NEXT: uqsub z0.s, z0.s, z1.s 823; CHECK-NEXT: ret 824 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) 825 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b) 826 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.u.nxv4i32(<vscale x 4 x i1> %pg.s, 827 <vscale x 4 x i32> %a, 828 <vscale x 4 x i32> %b) 829 ret <vscale x 4 x i32> %out 830} 831 832; As uqsub_i32 but where pg is i16 based and thus compatible for i32. 833define <vscale x 4 x i32> @uqsub_i32_ptrue_all_h(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 834; CHECK-LABEL: uqsub_i32_ptrue_all_h: 835; CHECK: // %bb.0: 836; CHECK-NEXT: uqsub z0.s, z0.s, z1.s 837; CHECK-NEXT: ret 838 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) 839 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h) 840 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b) 841 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.u.nxv4i32(<vscale x 4 x i1> %pg.s, 842 <vscale x 4 x i32> %a, 843 <vscale x 4 x i32> %b) 844 ret <vscale x 4 x i32> %out 845} 846 847; As uqsub_i32 but where pg is i64 based, which is not compatibile for i32 and 848; thus inactive lanes are important and the immediate form cannot be used. 849define <vscale x 4 x i32> @uqsub_i32_ptrue_all_d(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 850; CHECK-LABEL: uqsub_i32_ptrue_all_d: 851; CHECK: // %bb.0: 852; CHECK-NEXT: ptrue p0.d 853; CHECK-NEXT: uqsub z0.s, p0/m, z0.s, z1.s 854; CHECK-NEXT: ret 855 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) 856 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d) 857 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b) 858 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1> %pg.s, 859 <vscale x 4 x i32> %a, 860 <vscale x 4 x i32> %b) 861 ret <vscale x 4 x i32> %out 862} 863 864; 865; ASR (wide) 866; 867 868define <vscale x 16 x i8> @asr_i8(<vscale x 16 x i8> %a, <vscale x 2 x i64> %b) { 869; CHECK-LABEL: asr_i8: 870; CHECK: // %bb.0: 871; CHECK-NEXT: asr z0.b, z0.b, z1.d 872; CHECK-NEXT: ret 873 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) 874 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.wide.nxv16i8(<vscale x 16 x i1> %pg, 875 <vscale x 16 x i8> %a, 876 <vscale x 2 x i64> %b) 877 ret <vscale x 16 x i8> %out 878} 879 880define <vscale x 8 x i16> @asr_i16(<vscale x 8 x i16> %a, <vscale x 2 x i64> %b) { 881; CHECK-LABEL: asr_i16: 882; CHECK: // %bb.0: 883; CHECK-NEXT: asr z0.h, z0.h, z1.d 884; CHECK-NEXT: ret 885 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) 886 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.wide.nxv8i16(<vscale x 8 x i1> %pg, 887 <vscale x 8 x i16> %a, 888 <vscale x 2 x i64> %b) 889 ret <vscale x 8 x i16> %out 890} 891 892define <vscale x 4 x i32> @asr_i32(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) { 893; CHECK-LABEL: asr_i32: 894; CHECK: // %bb.0: 895; CHECK-NEXT: asr z0.s, z0.s, z1.d 896; CHECK-NEXT: ret 897 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) 898 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.wide.nxv4i32(<vscale x 4 x i1> %pg, 899 <vscale x 4 x i32> %a, 900 <vscale x 2 x i64> %b) 901 ret <vscale x 4 x i32> %out 902} 903 904; 905; LSL (wide) 906; 907 908define <vscale x 16 x i8> @lsl_i8(<vscale x 16 x i8> %a, <vscale x 2 x i64> %b) { 909; CHECK-LABEL: lsl_i8: 910; CHECK: // %bb.0: 911; CHECK-NEXT: lsl z0.b, z0.b, z1.d 912; CHECK-NEXT: ret 913 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) 914 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.wide.nxv16i8(<vscale x 16 x i1> %pg, 915 <vscale x 16 x i8> %a, 916 <vscale x 2 x i64> %b) 917 ret <vscale x 16 x i8> %out 918} 919 920define <vscale x 8 x i16> @lsl_i16(<vscale x 8 x i16> %a, <vscale x 2 x i64> %b) { 921; CHECK-LABEL: lsl_i16: 922; CHECK: // %bb.0: 923; CHECK-NEXT: lsl z0.h, z0.h, z1.d 924; CHECK-NEXT: ret 925 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) 926 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.wide.nxv8i16(<vscale x 8 x i1> %pg, 927 <vscale x 8 x i16> %a, 928 <vscale x 2 x i64> %b) 929 ret <vscale x 8 x i16> %out 930} 931 932define <vscale x 4 x i32> @lsl_i32(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) { 933; CHECK-LABEL: lsl_i32: 934; CHECK: // %bb.0: 935; CHECK-NEXT: lsl z0.s, z0.s, z1.d 936; CHECK-NEXT: ret 937 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) 938 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.wide.nxv4i32(<vscale x 4 x i1> %pg, 939 <vscale x 4 x i32> %a, 940 <vscale x 2 x i64> %b) 941 ret <vscale x 4 x i32> %out 942} 943 944; 945; LSR (wide) 946; 947 948define <vscale x 16 x i8> @lsr_i8(<vscale x 16 x i8> %a, <vscale x 2 x i64> %b) { 949; CHECK-LABEL: lsr_i8: 950; CHECK: // %bb.0: 951; CHECK-NEXT: lsr z0.b, z0.b, z1.d 952; CHECK-NEXT: ret 953 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) 954 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.wide.nxv16i8(<vscale x 16 x i1> %pg, 955 <vscale x 16 x i8> %a, 956 <vscale x 2 x i64> %b) 957 ret <vscale x 16 x i8> %out 958} 959 960define <vscale x 8 x i16> @lsr_i16(<vscale x 8 x i16> %a, <vscale x 2 x i64> %b) { 961; CHECK-LABEL: lsr_i16: 962; CHECK: // %bb.0: 963; CHECK-NEXT: lsr z0.h, z0.h, z1.d 964; CHECK-NEXT: ret 965 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) 966 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.wide.nxv8i16(<vscale x 8 x i1> %pg, 967 <vscale x 8 x i16> %a, 968 <vscale x 2 x i64> %b) 969 ret <vscale x 8 x i16> %out 970} 971 972define <vscale x 4 x i32> @lsr_i32(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) { 973; CHECK-LABEL: lsr_i32: 974; CHECK: // %bb.0: 975; CHECK-NEXT: lsr z0.s, z0.s, z1.d 976; CHECK-NEXT: ret 977 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) 978 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.wide.nxv4i32(<vscale x 4 x i1> %pg, 979 <vscale x 4 x i32> %a, 980 <vscale x 2 x i64> %b) 981 ret <vscale x 4 x i32> %out 982} 983 984; As lsr_i32 but where pg is i8 based and thus compatible for i32. 985define <vscale x 4 x i32> @lsr_i32_ptrue_all_b(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) { 986; CHECK-LABEL: lsr_i32_ptrue_all_b: 987; CHECK: // %bb.0: 988; CHECK-NEXT: lsr z0.s, z0.s, z1.d 989; CHECK-NEXT: ret 990 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) 991 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b) 992 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.wide.nxv4i32(<vscale x 4 x i1> %pg.s, 993 <vscale x 4 x i32> %a, 994 <vscale x 2 x i64> %b) 995 ret <vscale x 4 x i32> %out 996} 997 998; As lsr_i32 but where pg is i16 based and thus compatible for i32. 999define <vscale x 4 x i32> @lsr_i32_ptrue_all_h(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) { 1000; CHECK-LABEL: lsr_i32_ptrue_all_h: 1001; CHECK: // %bb.0: 1002; CHECK-NEXT: lsr z0.s, z0.s, z1.d 1003; CHECK-NEXT: ret 1004 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) 1005 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h) 1006 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b) 1007 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.wide.nxv4i32(<vscale x 4 x i1> %pg.s, 1008 <vscale x 4 x i32> %a, 1009 <vscale x 2 x i64> %b) 1010 ret <vscale x 4 x i32> %out 1011} 1012 1013; As lsr_i32 but where pg is i64 based, which is not compatibile for i32 and 1014; thus inactive lanes are important and the immediate form cannot be used. 1015define <vscale x 4 x i32> @lsr_i32_ptrue_all_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) { 1016; CHECK-LABEL: lsr_i32_ptrue_all_d: 1017; CHECK: // %bb.0: 1018; CHECK-NEXT: ptrue p0.d 1019; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z1.d 1020; CHECK-NEXT: ret 1021 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) 1022 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d) 1023 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b) 1024 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.wide.nxv4i32(<vscale x 4 x i1> %pg.s, 1025 <vscale x 4 x i32> %a, 1026 <vscale x 2 x i64> %b) 1027 ret <vscale x 4 x i32> %out 1028} 1029 1030; 1031; FADD 1032; 1033 1034define <vscale x 8 x half> @fadd_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) { 1035; CHECK-LABEL: fadd_half: 1036; CHECK: // %bb.0: 1037; CHECK-NEXT: fadd z0.h, z0.h, z1.h 1038; CHECK-NEXT: ret 1039 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) 1040 %out = call <vscale x 8 x half> @llvm.aarch64.sve.fadd.u.nxv8f16(<vscale x 8 x i1> %pg, 1041 <vscale x 8 x half> %a, 1042 <vscale x 8 x half> %b) 1043 ret <vscale x 8 x half> %out 1044} 1045 1046define <vscale x 4 x float> @fadd_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) { 1047; CHECK-LABEL: fadd_float: 1048; CHECK: // %bb.0: 1049; CHECK-NEXT: fadd z0.s, z0.s, z1.s 1050; CHECK-NEXT: ret 1051 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) 1052 %out = call <vscale x 4 x float> @llvm.aarch64.sve.fadd.u.nxv4f32(<vscale x 4 x i1> %pg, 1053 <vscale x 4 x float> %a, 1054 <vscale x 4 x float> %b) 1055 ret <vscale x 4 x float> %out 1056} 1057 1058define <vscale x 2 x double> @fadd_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) { 1059; CHECK-LABEL: fadd_double: 1060; CHECK: // %bb.0: 1061; CHECK-NEXT: fadd z0.d, z0.d, z1.d 1062; CHECK-NEXT: ret 1063 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) 1064 %out = call <vscale x 2 x double> @llvm.aarch64.sve.fadd.u.nxv2f64(<vscale x 2 x i1> %pg, 1065 <vscale x 2 x double> %a, 1066 <vscale x 2 x double> %b) 1067 ret <vscale x 2 x double> %out 1068} 1069 1070; 1071; FSUB 1072; 1073 1074define <vscale x 8 x half> @fsub_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) { 1075; CHECK-LABEL: fsub_half: 1076; CHECK: // %bb.0: 1077; CHECK-NEXT: fsub z0.h, z0.h, z1.h 1078; CHECK-NEXT: ret 1079 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) 1080 %out = call <vscale x 8 x half> @llvm.aarch64.sve.fsub.u.nxv8f16(<vscale x 8 x i1> %pg, 1081 <vscale x 8 x half> %a, 1082 <vscale x 8 x half> %b) 1083 ret <vscale x 8 x half> %out 1084} 1085 1086define <vscale x 4 x float> @fsub_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) { 1087; CHECK-LABEL: fsub_float: 1088; CHECK: // %bb.0: 1089; CHECK-NEXT: fsub z0.s, z0.s, z1.s 1090; CHECK-NEXT: ret 1091 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) 1092 %out = call <vscale x 4 x float> @llvm.aarch64.sve.fsub.u.nxv4f32(<vscale x 4 x i1> %pg, 1093 <vscale x 4 x float> %a, 1094 <vscale x 4 x float> %b) 1095 ret <vscale x 4 x float> %out 1096} 1097 1098define <vscale x 2 x double> @fsub_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) { 1099; CHECK-LABEL: fsub_double: 1100; CHECK: // %bb.0: 1101; CHECK-NEXT: fsub z0.d, z0.d, z1.d 1102; CHECK-NEXT: ret 1103 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) 1104 %out = call <vscale x 2 x double> @llvm.aarch64.sve.fsub.u.nxv2f64(<vscale x 2 x i1> %pg, 1105 <vscale x 2 x double> %a, 1106 <vscale x 2 x double> %b) 1107 ret <vscale x 2 x double> %out 1108} 1109 1110; 1111; FMUL 1112; 1113 1114define <vscale x 8 x half> @fmul_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) { 1115; CHECK-LABEL: fmul_half: 1116; CHECK: // %bb.0: 1117; CHECK-NEXT: fmul z0.h, z0.h, z1.h 1118; CHECK-NEXT: ret 1119 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) 1120 %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmul.u.nxv8f16(<vscale x 8 x i1> %pg, 1121 <vscale x 8 x half> %a, 1122 <vscale x 8 x half> %b) 1123 ret <vscale x 8 x half> %out 1124} 1125 1126define <vscale x 4 x float> @fmul_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) { 1127; CHECK-LABEL: fmul_float: 1128; CHECK: // %bb.0: 1129; CHECK-NEXT: fmul z0.s, z0.s, z1.s 1130; CHECK-NEXT: ret 1131 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) 1132 %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmul.u.nxv4f32(<vscale x 4 x i1> %pg, 1133 <vscale x 4 x float> %a, 1134 <vscale x 4 x float> %b) 1135 ret <vscale x 4 x float> %out 1136} 1137 1138define <vscale x 2 x double> @fmul_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) { 1139; CHECK-LABEL: fmul_double: 1140; CHECK: // %bb.0: 1141; CHECK-NEXT: fmul z0.d, z0.d, z1.d 1142; CHECK-NEXT: ret 1143 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) 1144 %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.u.nxv2f64(<vscale x 2 x i1> %pg, 1145 <vscale x 2 x double> %a, 1146 <vscale x 2 x double> %b) 1147 ret <vscale x 2 x double> %out 1148} 1149 1150declare <vscale x 16 x i8> @llvm.aarch64.sve.add.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) 1151declare <vscale x 8 x i16> @llvm.aarch64.sve.add.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) 1152declare <vscale x 4 x i32> @llvm.aarch64.sve.add.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 1153declare <vscale x 2 x i64> @llvm.aarch64.sve.add.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) 1154 1155declare <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 1156 1157declare <vscale x 16 x i8> @llvm.aarch64.sve.sub.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) 1158declare <vscale x 8 x i16> @llvm.aarch64.sve.sub.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) 1159declare <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 1160declare <vscale x 2 x i64> @llvm.aarch64.sve.sub.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) 1161 1162declare <vscale x 16 x i8> @llvm.aarch64.sve.mul.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) 1163declare <vscale x 8 x i16> @llvm.aarch64.sve.mul.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) 1164declare <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 1165declare <vscale x 2 x i64> @llvm.aarch64.sve.mul.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) 1166 1167declare <vscale x 16 x i8> @llvm.aarch64.sve.smulh.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) 1168declare <vscale x 8 x i16> @llvm.aarch64.sve.smulh.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) 1169declare <vscale x 4 x i32> @llvm.aarch64.sve.smulh.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 1170declare <vscale x 2 x i64> @llvm.aarch64.sve.smulh.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) 1171 1172declare <vscale x 4 x i32> @llvm.aarch64.sve.umulh.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 1173 1174declare <vscale x 16 x i8> @llvm.aarch64.sve.umulh.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) 1175declare <vscale x 8 x i16> @llvm.aarch64.sve.umulh.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) 1176declare <vscale x 4 x i32> @llvm.aarch64.sve.umulh.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 1177declare <vscale x 2 x i64> @llvm.aarch64.sve.umulh.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) 1178 1179declare <vscale x 16 x i8> @llvm.aarch64.sve.and.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) 1180declare <vscale x 8 x i16> @llvm.aarch64.sve.and.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) 1181declare <vscale x 4 x i32> @llvm.aarch64.sve.and.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 1182declare <vscale x 2 x i64> @llvm.aarch64.sve.and.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) 1183 1184declare <vscale x 16 x i8> @llvm.aarch64.sve.bic.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) 1185declare <vscale x 8 x i16> @llvm.aarch64.sve.bic.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) 1186declare <vscale x 4 x i32> @llvm.aarch64.sve.bic.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 1187declare <vscale x 2 x i64> @llvm.aarch64.sve.bic.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) 1188 1189declare <vscale x 16 x i8> @llvm.aarch64.sve.eor.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) 1190declare <vscale x 8 x i16> @llvm.aarch64.sve.eor.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) 1191declare <vscale x 4 x i32> @llvm.aarch64.sve.eor.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 1192declare <vscale x 2 x i64> @llvm.aarch64.sve.eor.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) 1193 1194declare <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 1195 1196declare <vscale x 16 x i8> @llvm.aarch64.sve.orr.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) 1197declare <vscale x 8 x i16> @llvm.aarch64.sve.orr.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) 1198declare <vscale x 4 x i32> @llvm.aarch64.sve.orr.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 1199declare <vscale x 2 x i64> @llvm.aarch64.sve.orr.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) 1200 1201declare <vscale x 16 x i8> @llvm.aarch64.sve.sqadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) 1202declare <vscale x 8 x i16> @llvm.aarch64.sve.sqadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) 1203declare <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 1204declare <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) 1205 1206declare <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) 1207declare <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) 1208declare <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 1209declare <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) 1210 1211declare <vscale x 16 x i8> @llvm.aarch64.sve.uqadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) 1212declare <vscale x 8 x i16> @llvm.aarch64.sve.uqadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) 1213declare <vscale x 4 x i32> @llvm.aarch64.sve.uqadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 1214declare <vscale x 2 x i64> @llvm.aarch64.sve.uqadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) 1215 1216declare <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 1217 1218declare <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) 1219declare <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) 1220declare <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 1221declare <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) 1222 1223declare <vscale x 16 x i8> @llvm.aarch64.sve.asr.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>) 1224declare <vscale x 8 x i16> @llvm.aarch64.sve.asr.wide.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 2 x i64>) 1225declare <vscale x 4 x i32> @llvm.aarch64.sve.asr.wide.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 2 x i64>) 1226 1227declare <vscale x 16 x i8> @llvm.aarch64.sve.lsl.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>) 1228declare <vscale x 8 x i16> @llvm.aarch64.sve.lsl.wide.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 2 x i64>) 1229declare <vscale x 4 x i32> @llvm.aarch64.sve.lsl.wide.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 2 x i64>) 1230 1231declare <vscale x 16 x i8> @llvm.aarch64.sve.lsr.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>) 1232declare <vscale x 8 x i16> @llvm.aarch64.sve.lsr.wide.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 2 x i64>) 1233declare <vscale x 4 x i32> @llvm.aarch64.sve.lsr.wide.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 2 x i64>) 1234 1235declare <vscale x 8 x half> @llvm.aarch64.sve.fadd.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>) 1236declare <vscale x 4 x float> @llvm.aarch64.sve.fadd.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>) 1237declare <vscale x 2 x double> @llvm.aarch64.sve.fadd.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>) 1238 1239declare <vscale x 8 x half> @llvm.aarch64.sve.fsub.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>) 1240declare <vscale x 4 x float> @llvm.aarch64.sve.fsub.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>) 1241declare <vscale x 2 x double> @llvm.aarch64.sve.fsub.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>) 1242 1243declare <vscale x 8 x half> @llvm.aarch64.sve.fmul.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>) 1244declare <vscale x 4 x float> @llvm.aarch64.sve.fmul.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>) 1245declare <vscale x 2 x double> @llvm.aarch64.sve.fmul.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>) 1246 1247declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>) 1248declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>) 1249declare <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1>) 1250 1251declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 16 x i1>) 1252declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>) 1253declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1>) 1254 1255declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32) 1256declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32) 1257declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32) 1258declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32) 1259