1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -verify-machineinstrs < %s | FileCheck %s 3 4declare { <vscale x 2 x i8>, <vscale x 2 x i1> } @llvm.umul.with.overflow.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>) 5 6define <vscale x 2 x i8> @umulo_nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i8> %y) { 7; CHECK-LABEL: umulo_nxv2i8: 8; CHECK: // %bb.0: 9; CHECK-NEXT: and z1.d, z1.d, #0xff 10; CHECK-NEXT: and z0.d, z0.d, #0xff 11; CHECK-NEXT: ptrue p0.d 12; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d 13; CHECK-NEXT: lsr z1.d, z0.d, #8 14; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0 15; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 16; CHECK-NEXT: ret 17 %a = call { <vscale x 2 x i8>, <vscale x 2 x i1> } @llvm.umul.with.overflow.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i8> %y) 18 %b = extractvalue { <vscale x 2 x i8>, <vscale x 2 x i1> } %a, 0 19 %c = extractvalue { <vscale x 2 x i8>, <vscale x 2 x i1> } %a, 1 20 %d = select <vscale x 2 x i1> %c, <vscale x 2 x i8> zeroinitializer, <vscale x 2 x i8> %b 21 ret <vscale x 2 x i8> %d 22} 23 24declare { <vscale x 4 x i8>, <vscale x 4 x i1> } @llvm.umul.with.overflow.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>) 25 26define <vscale x 4 x i8> @umulo_nxv4i8(<vscale x 4 x i8> %x, <vscale x 4 x i8> %y) { 27; CHECK-LABEL: umulo_nxv4i8: 28; CHECK: // %bb.0: 29; CHECK-NEXT: and z1.s, z1.s, #0xff 30; CHECK-NEXT: and z0.s, z0.s, #0xff 31; CHECK-NEXT: ptrue p0.s 32; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s 33; CHECK-NEXT: lsr z1.s, z0.s, #8 34; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0 35; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0 36; CHECK-NEXT: ret 37 %a = call { <vscale x 4 x i8>, <vscale x 4 x i1> } @llvm.umul.with.overflow.nxv4i8(<vscale x 4 x i8> %x, <vscale x 4 x i8> %y) 38 %b = extractvalue { <vscale x 4 x i8>, <vscale x 4 x i1> } %a, 0 39 %c = extractvalue { <vscale x 4 x i8>, <vscale x 4 x i1> } %a, 1 40 %d = select <vscale x 4 x i1> %c, <vscale x 4 x i8> zeroinitializer, <vscale x 4 x i8> %b 41 ret <vscale x 4 x i8> %d 42} 43 44declare { <vscale x 8 x i8>, <vscale x 8 x i1> } @llvm.umul.with.overflow.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>) 45 46define <vscale x 8 x i8> @umulo_nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %y) { 47; CHECK-LABEL: umulo_nxv8i8: 48; CHECK: // %bb.0: 49; CHECK-NEXT: and z1.h, z1.h, #0xff 50; CHECK-NEXT: and z0.h, z0.h, #0xff 51; CHECK-NEXT: ptrue p0.h 52; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h 53; CHECK-NEXT: lsr z1.h, z0.h, #8 54; CHECK-NEXT: cmpne p0.h, p0/z, z1.h, #0 55; CHECK-NEXT: mov z0.h, p0/m, #0 // =0x0 56; CHECK-NEXT: ret 57 %a = call { <vscale x 8 x i8>, <vscale x 8 x i1> } @llvm.umul.with.overflow.nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %y) 58 %b = extractvalue { <vscale x 8 x i8>, <vscale x 8 x i1> } %a, 0 59 %c = extractvalue { <vscale x 8 x i8>, <vscale x 8 x i1> } %a, 1 60 %d = select <vscale x 8 x i1> %c, <vscale x 8 x i8> zeroinitializer, <vscale x 8 x i8> %b 61 ret <vscale x 8 x i8> %d 62} 63 64declare { <vscale x 16 x i8>, <vscale x 16 x i1> } @llvm.umul.with.overflow.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>) 65 66define <vscale x 16 x i8> @umulo_nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %y) { 67; CHECK-LABEL: umulo_nxv16i8: 68; CHECK: // %bb.0: 69; CHECK-NEXT: ptrue p0.b 70; CHECK-NEXT: movprfx z2, z0 71; CHECK-NEXT: umulh z2.b, p0/m, z2.b, z1.b 72; CHECK-NEXT: mul z0.b, p0/m, z0.b, z1.b 73; CHECK-NEXT: cmpne p0.b, p0/z, z2.b, #0 74; CHECK-NEXT: mov z0.b, p0/m, #0 // =0x0 75; CHECK-NEXT: ret 76 %a = call { <vscale x 16 x i8>, <vscale x 16 x i1> } @llvm.umul.with.overflow.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %y) 77 %b = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i1> } %a, 0 78 %c = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i1> } %a, 1 79 %d = select <vscale x 16 x i1> %c, <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> %b 80 ret <vscale x 16 x i8> %d 81} 82 83declare { <vscale x 32 x i8>, <vscale x 32 x i1> } @llvm.umul.with.overflow.nxv32i8(<vscale x 32 x i8>, <vscale x 32 x i8>) 84 85define <vscale x 32 x i8> @umulo_nxv32i8(<vscale x 32 x i8> %x, <vscale x 32 x i8> %y) { 86; CHECK-LABEL: umulo_nxv32i8: 87; CHECK: // %bb.0: 88; CHECK-NEXT: ptrue p0.b 89; CHECK-NEXT: movprfx z4, z1 90; CHECK-NEXT: umulh z4.b, p0/m, z4.b, z3.b 91; CHECK-NEXT: movprfx z5, z0 92; CHECK-NEXT: umulh z5.b, p0/m, z5.b, z2.b 93; CHECK-NEXT: mul z1.b, p0/m, z1.b, z3.b 94; CHECK-NEXT: mul z0.b, p0/m, z0.b, z2.b 95; CHECK-NEXT: cmpne p1.b, p0/z, z4.b, #0 96; CHECK-NEXT: cmpne p0.b, p0/z, z5.b, #0 97; CHECK-NEXT: mov z0.b, p0/m, #0 // =0x0 98; CHECK-NEXT: mov z1.b, p1/m, #0 // =0x0 99; CHECK-NEXT: ret 100 %a = call { <vscale x 32 x i8>, <vscale x 32 x i1> } @llvm.umul.with.overflow.nxv32i8(<vscale x 32 x i8> %x, <vscale x 32 x i8> %y) 101 %b = extractvalue { <vscale x 32 x i8>, <vscale x 32 x i1> } %a, 0 102 %c = extractvalue { <vscale x 32 x i8>, <vscale x 32 x i1> } %a, 1 103 %d = select <vscale x 32 x i1> %c, <vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> %b 104 ret <vscale x 32 x i8> %d 105} 106 107declare { <vscale x 64 x i8>, <vscale x 64 x i1> } @llvm.umul.with.overflow.nxv64i8(<vscale x 64 x i8>, <vscale x 64 x i8>) 108 109define <vscale x 64 x i8> @umulo_nxv64i8(<vscale x 64 x i8> %x, <vscale x 64 x i8> %y) { 110; CHECK-LABEL: umulo_nxv64i8: 111; CHECK: // %bb.0: 112; CHECK-NEXT: ptrue p0.b 113; CHECK-NEXT: movprfx z24, z3 114; CHECK-NEXT: umulh z24.b, p0/m, z24.b, z7.b 115; CHECK-NEXT: movprfx z25, z0 116; CHECK-NEXT: umulh z25.b, p0/m, z25.b, z4.b 117; CHECK-NEXT: movprfx z26, z2 118; CHECK-NEXT: umulh z26.b, p0/m, z26.b, z6.b 119; CHECK-NEXT: movprfx z27, z1 120; CHECK-NEXT: umulh z27.b, p0/m, z27.b, z5.b 121; CHECK-NEXT: mul z3.b, p0/m, z3.b, z7.b 122; CHECK-NEXT: mul z2.b, p0/m, z2.b, z6.b 123; CHECK-NEXT: mul z1.b, p0/m, z1.b, z5.b 124; CHECK-NEXT: mul z0.b, p0/m, z0.b, z4.b 125; CHECK-NEXT: cmpne p1.b, p0/z, z25.b, #0 126; CHECK-NEXT: cmpne p2.b, p0/z, z24.b, #0 127; CHECK-NEXT: cmpne p3.b, p0/z, z26.b, #0 128; CHECK-NEXT: cmpne p0.b, p0/z, z27.b, #0 129; CHECK-NEXT: mov z0.b, p1/m, #0 // =0x0 130; CHECK-NEXT: mov z2.b, p3/m, #0 // =0x0 131; CHECK-NEXT: mov z3.b, p2/m, #0 // =0x0 132; CHECK-NEXT: mov z1.b, p0/m, #0 // =0x0 133; CHECK-NEXT: ret 134 %a = call { <vscale x 64 x i8>, <vscale x 64 x i1> } @llvm.umul.with.overflow.nxv64i8(<vscale x 64 x i8> %x, <vscale x 64 x i8> %y) 135 %b = extractvalue { <vscale x 64 x i8>, <vscale x 64 x i1> } %a, 0 136 %c = extractvalue { <vscale x 64 x i8>, <vscale x 64 x i1> } %a, 1 137 %d = select <vscale x 64 x i1> %c, <vscale x 64 x i8> zeroinitializer, <vscale x 64 x i8> %b 138 ret <vscale x 64 x i8> %d 139} 140 141declare { <vscale x 2 x i16>, <vscale x 2 x i1> } @llvm.umul.with.overflow.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>) 142 143define <vscale x 2 x i16> @umulo_nxv2i16(<vscale x 2 x i16> %x, <vscale x 2 x i16> %y) { 144; CHECK-LABEL: umulo_nxv2i16: 145; CHECK: // %bb.0: 146; CHECK-NEXT: and z1.d, z1.d, #0xffff 147; CHECK-NEXT: and z0.d, z0.d, #0xffff 148; CHECK-NEXT: ptrue p0.d 149; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d 150; CHECK-NEXT: lsr z1.d, z0.d, #16 151; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0 152; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 153; CHECK-NEXT: ret 154 %a = call { <vscale x 2 x i16>, <vscale x 2 x i1> } @llvm.umul.with.overflow.nxv2i16(<vscale x 2 x i16> %x, <vscale x 2 x i16> %y) 155 %b = extractvalue { <vscale x 2 x i16>, <vscale x 2 x i1> } %a, 0 156 %c = extractvalue { <vscale x 2 x i16>, <vscale x 2 x i1> } %a, 1 157 %d = select <vscale x 2 x i1> %c, <vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> %b 158 ret <vscale x 2 x i16> %d 159} 160 161declare { <vscale x 4 x i16>, <vscale x 4 x i1> } @llvm.umul.with.overflow.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>) 162 163define <vscale x 4 x i16> @umulo_nxv4i16(<vscale x 4 x i16> %x, <vscale x 4 x i16> %y) { 164; CHECK-LABEL: umulo_nxv4i16: 165; CHECK: // %bb.0: 166; CHECK-NEXT: and z1.s, z1.s, #0xffff 167; CHECK-NEXT: and z0.s, z0.s, #0xffff 168; CHECK-NEXT: ptrue p0.s 169; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s 170; CHECK-NEXT: lsr z1.s, z0.s, #16 171; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0 172; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0 173; CHECK-NEXT: ret 174 %a = call { <vscale x 4 x i16>, <vscale x 4 x i1> } @llvm.umul.with.overflow.nxv4i16(<vscale x 4 x i16> %x, <vscale x 4 x i16> %y) 175 %b = extractvalue { <vscale x 4 x i16>, <vscale x 4 x i1> } %a, 0 176 %c = extractvalue { <vscale x 4 x i16>, <vscale x 4 x i1> } %a, 1 177 %d = select <vscale x 4 x i1> %c, <vscale x 4 x i16> zeroinitializer, <vscale x 4 x i16> %b 178 ret <vscale x 4 x i16> %d 179} 180 181declare { <vscale x 8 x i16>, <vscale x 8 x i1> } @llvm.umul.with.overflow.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>) 182 183define <vscale x 8 x i16> @umulo_nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %y) { 184; CHECK-LABEL: umulo_nxv8i16: 185; CHECK: // %bb.0: 186; CHECK-NEXT: ptrue p0.h 187; CHECK-NEXT: movprfx z2, z0 188; CHECK-NEXT: umulh z2.h, p0/m, z2.h, z1.h 189; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h 190; CHECK-NEXT: cmpne p0.h, p0/z, z2.h, #0 191; CHECK-NEXT: mov z0.h, p0/m, #0 // =0x0 192; CHECK-NEXT: ret 193 %a = call { <vscale x 8 x i16>, <vscale x 8 x i1> } @llvm.umul.with.overflow.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %y) 194 %b = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i1> } %a, 0 195 %c = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i1> } %a, 1 196 %d = select <vscale x 8 x i1> %c, <vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> %b 197 ret <vscale x 8 x i16> %d 198} 199 200declare { <vscale x 16 x i16>, <vscale x 16 x i1> } @llvm.umul.with.overflow.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>) 201 202define <vscale x 16 x i16> @umulo_nxv16i16(<vscale x 16 x i16> %x, <vscale x 16 x i16> %y) { 203; CHECK-LABEL: umulo_nxv16i16: 204; CHECK: // %bb.0: 205; CHECK-NEXT: ptrue p0.h 206; CHECK-NEXT: movprfx z4, z1 207; CHECK-NEXT: umulh z4.h, p0/m, z4.h, z3.h 208; CHECK-NEXT: movprfx z5, z0 209; CHECK-NEXT: umulh z5.h, p0/m, z5.h, z2.h 210; CHECK-NEXT: mul z1.h, p0/m, z1.h, z3.h 211; CHECK-NEXT: mul z0.h, p0/m, z0.h, z2.h 212; CHECK-NEXT: cmpne p1.h, p0/z, z4.h, #0 213; CHECK-NEXT: cmpne p0.h, p0/z, z5.h, #0 214; CHECK-NEXT: mov z0.h, p0/m, #0 // =0x0 215; CHECK-NEXT: mov z1.h, p1/m, #0 // =0x0 216; CHECK-NEXT: ret 217 %a = call { <vscale x 16 x i16>, <vscale x 16 x i1> } @llvm.umul.with.overflow.nxv16i16(<vscale x 16 x i16> %x, <vscale x 16 x i16> %y) 218 %b = extractvalue { <vscale x 16 x i16>, <vscale x 16 x i1> } %a, 0 219 %c = extractvalue { <vscale x 16 x i16>, <vscale x 16 x i1> } %a, 1 220 %d = select <vscale x 16 x i1> %c, <vscale x 16 x i16> zeroinitializer, <vscale x 16 x i16> %b 221 ret <vscale x 16 x i16> %d 222} 223 224declare { <vscale x 32 x i16>, <vscale x 32 x i1> } @llvm.umul.with.overflow.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i16>) 225 226define <vscale x 32 x i16> @umulo_nxv32i16(<vscale x 32 x i16> %x, <vscale x 32 x i16> %y) { 227; CHECK-LABEL: umulo_nxv32i16: 228; CHECK: // %bb.0: 229; CHECK-NEXT: ptrue p0.h 230; CHECK-NEXT: movprfx z24, z3 231; CHECK-NEXT: umulh z24.h, p0/m, z24.h, z7.h 232; CHECK-NEXT: movprfx z25, z0 233; CHECK-NEXT: umulh z25.h, p0/m, z25.h, z4.h 234; CHECK-NEXT: movprfx z26, z2 235; CHECK-NEXT: umulh z26.h, p0/m, z26.h, z6.h 236; CHECK-NEXT: movprfx z27, z1 237; CHECK-NEXT: umulh z27.h, p0/m, z27.h, z5.h 238; CHECK-NEXT: mul z3.h, p0/m, z3.h, z7.h 239; CHECK-NEXT: mul z2.h, p0/m, z2.h, z6.h 240; CHECK-NEXT: mul z1.h, p0/m, z1.h, z5.h 241; CHECK-NEXT: mul z0.h, p0/m, z0.h, z4.h 242; CHECK-NEXT: cmpne p1.h, p0/z, z25.h, #0 243; CHECK-NEXT: cmpne p2.h, p0/z, z24.h, #0 244; CHECK-NEXT: cmpne p3.h, p0/z, z26.h, #0 245; CHECK-NEXT: cmpne p0.h, p0/z, z27.h, #0 246; CHECK-NEXT: mov z0.h, p1/m, #0 // =0x0 247; CHECK-NEXT: mov z2.h, p3/m, #0 // =0x0 248; CHECK-NEXT: mov z3.h, p2/m, #0 // =0x0 249; CHECK-NEXT: mov z1.h, p0/m, #0 // =0x0 250; CHECK-NEXT: ret 251 %a = call { <vscale x 32 x i16>, <vscale x 32 x i1> } @llvm.umul.with.overflow.nxv32i16(<vscale x 32 x i16> %x, <vscale x 32 x i16> %y) 252 %b = extractvalue { <vscale x 32 x i16>, <vscale x 32 x i1> } %a, 0 253 %c = extractvalue { <vscale x 32 x i16>, <vscale x 32 x i1> } %a, 1 254 %d = select <vscale x 32 x i1> %c, <vscale x 32 x i16> zeroinitializer, <vscale x 32 x i16> %b 255 ret <vscale x 32 x i16> %d 256} 257 258declare { <vscale x 2 x i32>, <vscale x 2 x i1> } @llvm.umul.with.overflow.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>) 259 260define <vscale x 2 x i32> @umulo_nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y) { 261; CHECK-LABEL: umulo_nxv2i32: 262; CHECK: // %bb.0: 263; CHECK-NEXT: and z1.d, z1.d, #0xffffffff 264; CHECK-NEXT: and z0.d, z0.d, #0xffffffff 265; CHECK-NEXT: ptrue p0.d 266; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d 267; CHECK-NEXT: lsr z1.d, z0.d, #32 268; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0 269; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 270; CHECK-NEXT: ret 271 %a = call { <vscale x 2 x i32>, <vscale x 2 x i1> } @llvm.umul.with.overflow.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y) 272 %b = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i1> } %a, 0 273 %c = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i1> } %a, 1 274 %d = select <vscale x 2 x i1> %c, <vscale x 2 x i32> zeroinitializer, <vscale x 2 x i32> %b 275 ret <vscale x 2 x i32> %d 276} 277 278declare { <vscale x 4 x i32>, <vscale x 4 x i1> } @llvm.umul.with.overflow.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>) 279 280define <vscale x 4 x i32> @umulo_nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y) { 281; CHECK-LABEL: umulo_nxv4i32: 282; CHECK: // %bb.0: 283; CHECK-NEXT: ptrue p0.s 284; CHECK-NEXT: movprfx z2, z0 285; CHECK-NEXT: umulh z2.s, p0/m, z2.s, z1.s 286; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s 287; CHECK-NEXT: cmpne p0.s, p0/z, z2.s, #0 288; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0 289; CHECK-NEXT: ret 290 %a = call { <vscale x 4 x i32>, <vscale x 4 x i1> } @llvm.umul.with.overflow.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y) 291 %b = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i1> } %a, 0 292 %c = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i1> } %a, 1 293 %d = select <vscale x 4 x i1> %c, <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> %b 294 ret <vscale x 4 x i32> %d 295} 296 297declare { <vscale x 8 x i32>, <vscale x 8 x i1> } @llvm.umul.with.overflow.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>) 298 299define <vscale x 8 x i32> @umulo_nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %y) { 300; CHECK-LABEL: umulo_nxv8i32: 301; CHECK: // %bb.0: 302; CHECK-NEXT: ptrue p0.s 303; CHECK-NEXT: movprfx z4, z1 304; CHECK-NEXT: umulh z4.s, p0/m, z4.s, z3.s 305; CHECK-NEXT: movprfx z5, z0 306; CHECK-NEXT: umulh z5.s, p0/m, z5.s, z2.s 307; CHECK-NEXT: mul z1.s, p0/m, z1.s, z3.s 308; CHECK-NEXT: mul z0.s, p0/m, z0.s, z2.s 309; CHECK-NEXT: cmpne p1.s, p0/z, z4.s, #0 310; CHECK-NEXT: cmpne p0.s, p0/z, z5.s, #0 311; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0 312; CHECK-NEXT: mov z1.s, p1/m, #0 // =0x0 313; CHECK-NEXT: ret 314 %a = call { <vscale x 8 x i32>, <vscale x 8 x i1> } @llvm.umul.with.overflow.nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %y) 315 %b = extractvalue { <vscale x 8 x i32>, <vscale x 8 x i1> } %a, 0 316 %c = extractvalue { <vscale x 8 x i32>, <vscale x 8 x i1> } %a, 1 317 %d = select <vscale x 8 x i1> %c, <vscale x 8 x i32> zeroinitializer, <vscale x 8 x i32> %b 318 ret <vscale x 8 x i32> %d 319} 320 321declare { <vscale x 16 x i32>, <vscale x 16 x i1> } @llvm.umul.with.overflow.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>) 322 323define <vscale x 16 x i32> @umulo_nxv16i32(<vscale x 16 x i32> %x, <vscale x 16 x i32> %y) { 324; CHECK-LABEL: umulo_nxv16i32: 325; CHECK: // %bb.0: 326; CHECK-NEXT: ptrue p0.s 327; CHECK-NEXT: movprfx z24, z3 328; CHECK-NEXT: umulh z24.s, p0/m, z24.s, z7.s 329; CHECK-NEXT: movprfx z25, z0 330; CHECK-NEXT: umulh z25.s, p0/m, z25.s, z4.s 331; CHECK-NEXT: movprfx z26, z2 332; CHECK-NEXT: umulh z26.s, p0/m, z26.s, z6.s 333; CHECK-NEXT: movprfx z27, z1 334; CHECK-NEXT: umulh z27.s, p0/m, z27.s, z5.s 335; CHECK-NEXT: mul z3.s, p0/m, z3.s, z7.s 336; CHECK-NEXT: mul z2.s, p0/m, z2.s, z6.s 337; CHECK-NEXT: mul z1.s, p0/m, z1.s, z5.s 338; CHECK-NEXT: mul z0.s, p0/m, z0.s, z4.s 339; CHECK-NEXT: cmpne p1.s, p0/z, z25.s, #0 340; CHECK-NEXT: cmpne p2.s, p0/z, z24.s, #0 341; CHECK-NEXT: cmpne p3.s, p0/z, z26.s, #0 342; CHECK-NEXT: cmpne p0.s, p0/z, z27.s, #0 343; CHECK-NEXT: mov z0.s, p1/m, #0 // =0x0 344; CHECK-NEXT: mov z2.s, p3/m, #0 // =0x0 345; CHECK-NEXT: mov z3.s, p2/m, #0 // =0x0 346; CHECK-NEXT: mov z1.s, p0/m, #0 // =0x0 347; CHECK-NEXT: ret 348 %a = call { <vscale x 16 x i32>, <vscale x 16 x i1> } @llvm.umul.with.overflow.nxv16i32(<vscale x 16 x i32> %x, <vscale x 16 x i32> %y) 349 %b = extractvalue { <vscale x 16 x i32>, <vscale x 16 x i1> } %a, 0 350 %c = extractvalue { <vscale x 16 x i32>, <vscale x 16 x i1> } %a, 1 351 %d = select <vscale x 16 x i1> %c, <vscale x 16 x i32> zeroinitializer, <vscale x 16 x i32> %b 352 ret <vscale x 16 x i32> %d 353} 354 355declare { <vscale x 2 x i64>, <vscale x 2 x i1> } @llvm.umul.with.overflow.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>) 356 357define <vscale x 2 x i64> @umulo_nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y) { 358; CHECK-LABEL: umulo_nxv2i64: 359; CHECK: // %bb.0: 360; CHECK-NEXT: ptrue p0.d 361; CHECK-NEXT: movprfx z2, z0 362; CHECK-NEXT: umulh z2.d, p0/m, z2.d, z1.d 363; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d 364; CHECK-NEXT: cmpne p0.d, p0/z, z2.d, #0 365; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 366; CHECK-NEXT: ret 367 %a = call { <vscale x 2 x i64>, <vscale x 2 x i1> } @llvm.umul.with.overflow.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y) 368 %b = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i1> } %a, 0 369 %c = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i1> } %a, 1 370 %d = select <vscale x 2 x i1> %c, <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> %b 371 ret <vscale x 2 x i64> %d 372} 373 374declare { <vscale x 4 x i64>, <vscale x 4 x i1> } @llvm.umul.with.overflow.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>) 375 376define <vscale x 4 x i64> @umulo_nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i64> %y) { 377; CHECK-LABEL: umulo_nxv4i64: 378; CHECK: // %bb.0: 379; CHECK-NEXT: ptrue p0.d 380; CHECK-NEXT: movprfx z4, z1 381; CHECK-NEXT: umulh z4.d, p0/m, z4.d, z3.d 382; CHECK-NEXT: movprfx z5, z0 383; CHECK-NEXT: umulh z5.d, p0/m, z5.d, z2.d 384; CHECK-NEXT: mul z1.d, p0/m, z1.d, z3.d 385; CHECK-NEXT: mul z0.d, p0/m, z0.d, z2.d 386; CHECK-NEXT: cmpne p1.d, p0/z, z4.d, #0 387; CHECK-NEXT: cmpne p0.d, p0/z, z5.d, #0 388; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 389; CHECK-NEXT: mov z1.d, p1/m, #0 // =0x0 390; CHECK-NEXT: ret 391 %a = call { <vscale x 4 x i64>, <vscale x 4 x i1> } @llvm.umul.with.overflow.nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i64> %y) 392 %b = extractvalue { <vscale x 4 x i64>, <vscale x 4 x i1> } %a, 0 393 %c = extractvalue { <vscale x 4 x i64>, <vscale x 4 x i1> } %a, 1 394 %d = select <vscale x 4 x i1> %c, <vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> %b 395 ret <vscale x 4 x i64> %d 396} 397 398declare { <vscale x 8 x i64>, <vscale x 8 x i1> } @llvm.umul.with.overflow.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>) 399 400define <vscale x 8 x i64> @umulo_nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %y) { 401; CHECK-LABEL: umulo_nxv8i64: 402; CHECK: // %bb.0: 403; CHECK-NEXT: ptrue p0.d 404; CHECK-NEXT: movprfx z24, z3 405; CHECK-NEXT: umulh z24.d, p0/m, z24.d, z7.d 406; CHECK-NEXT: movprfx z25, z0 407; CHECK-NEXT: umulh z25.d, p0/m, z25.d, z4.d 408; CHECK-NEXT: movprfx z26, z2 409; CHECK-NEXT: umulh z26.d, p0/m, z26.d, z6.d 410; CHECK-NEXT: movprfx z27, z1 411; CHECK-NEXT: umulh z27.d, p0/m, z27.d, z5.d 412; CHECK-NEXT: mul z3.d, p0/m, z3.d, z7.d 413; CHECK-NEXT: mul z2.d, p0/m, z2.d, z6.d 414; CHECK-NEXT: mul z1.d, p0/m, z1.d, z5.d 415; CHECK-NEXT: mul z0.d, p0/m, z0.d, z4.d 416; CHECK-NEXT: cmpne p1.d, p0/z, z25.d, #0 417; CHECK-NEXT: cmpne p2.d, p0/z, z24.d, #0 418; CHECK-NEXT: cmpne p3.d, p0/z, z26.d, #0 419; CHECK-NEXT: cmpne p0.d, p0/z, z27.d, #0 420; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 421; CHECK-NEXT: mov z2.d, p3/m, #0 // =0x0 422; CHECK-NEXT: mov z3.d, p2/m, #0 // =0x0 423; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 424; CHECK-NEXT: ret 425 %a = call { <vscale x 8 x i64>, <vscale x 8 x i1> } @llvm.umul.with.overflow.nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %y) 426 %b = extractvalue { <vscale x 8 x i64>, <vscale x 8 x i1> } %a, 0 427 %c = extractvalue { <vscale x 8 x i64>, <vscale x 8 x i1> } %a, 1 428 %d = select <vscale x 8 x i1> %c, <vscale x 8 x i64> zeroinitializer, <vscale x 8 x i64> %b 429 ret <vscale x 8 x i64> %d 430} 431