1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s | FileCheck %s 3 4target triple = "aarch64-unknown-linux-gnu" 5 6; 7; SQABS (sve2_int_un_pred_arit) 8; 9 10; Check movprfx is not inserted when dstReg == srcReg 11define <vscale x 16 x i8> @sqabs_i8_dupreg(<vscale x 16 x i8> %a) #0 { 12; CHECK-LABEL: sqabs_i8_dupreg: 13; CHECK: // %bb.0: 14; CHECK-NEXT: ptrue p0.b 15; CHECK-NEXT: sqabs z0.b, p0/m, z0.b 16; CHECK-NEXT: ret 17 %pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) 18 %ret = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqabs.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) 19 ret <vscale x 16 x i8> %ret 20} 21 22; Check movprfx is inserted when passthru is undef 23define <vscale x 16 x i8> @sqabs_i8_undef(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 { 24; CHECK-LABEL: sqabs_i8_undef: 25; CHECK: // %bb.0: 26; CHECK-NEXT: ptrue p0.b 27; CHECK-NEXT: movprfx z0, z1 28; CHECK-NEXT: sqabs z0.b, p0/m, z1.b 29; CHECK-NEXT: ret 30 %pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) 31 %ret = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqabs.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b) 32 ret <vscale x 16 x i8> %ret 33} 34 35; Check movprfx is inserted when predicate is all active, making the passthru dead 36define <vscale x 16 x i8> @sqabs_i8_active(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 { 37; CHECK-LABEL: sqabs_i8_active: 38; CHECK: // %bb.0: 39; CHECK-NEXT: ptrue p0.b 40; CHECK-NEXT: movprfx z0, z1 41; CHECK-NEXT: sqabs z0.b, p0/m, z1.b 42; CHECK-NEXT: ret 43 %pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) 44 %ret = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqabs.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b) 45 ret <vscale x 16 x i8> %ret 46} 47 48; Check movprfx is not inserted when predicate is not all active, making the passthru used 49define <vscale x 16 x i8> @sqabs_i8_not_active(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 { 50; CHECK-LABEL: sqabs_i8_not_active: 51; CHECK: // %bb.0: 52; CHECK-NEXT: ptrue p0.d 53; CHECK-NEXT: sqabs z0.b, p0/m, z1.b 54; CHECK-NEXT: ret 55 %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) 56 %pg.to = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg) 57 %ret = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqabs.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg.to, <vscale x 16 x i8> %b) 58 ret <vscale x 16 x i8> %ret 59} 60 61define <vscale x 8 x i16> @sqabs_i16_dupreg(<vscale x 8 x i16> %a) #0 { 62; CHECK-LABEL: sqabs_i16_dupreg: 63; CHECK: // %bb.0: 64; CHECK-NEXT: ptrue p0.h 65; CHECK-NEXT: sqabs z0.h, p0/m, z0.h 66; CHECK-NEXT: ret 67 %pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) 68 %ret = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) 69 ret <vscale x 8 x i16> %ret 70} 71 72define <vscale x 8 x i16> @sqabs_i16_undef(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 { 73; CHECK-LABEL: sqabs_i16_undef: 74; CHECK: // %bb.0: 75; CHECK-NEXT: ptrue p0.h 76; CHECK-NEXT: movprfx z0, z1 77; CHECK-NEXT: sqabs z0.h, p0/m, z1.h 78; CHECK-NEXT: ret 79 %pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) 80 %ret = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b) 81 ret <vscale x 8 x i16> %ret 82} 83 84define <vscale x 8 x i16> @sqabs_i16_active(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 { 85; CHECK-LABEL: sqabs_i16_active: 86; CHECK: // %bb.0: 87; CHECK-NEXT: ptrue p0.h 88; CHECK-NEXT: movprfx z0, z1 89; CHECK-NEXT: sqabs z0.h, p0/m, z1.h 90; CHECK-NEXT: ret 91 %pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) 92 %ret = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b) 93 ret <vscale x 8 x i16> %ret 94} 95 96define <vscale x 8 x i16> @sqabs_i16_not_active(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 { 97; CHECK-LABEL: sqabs_i16_not_active: 98; CHECK: // %bb.0: 99; CHECK-NEXT: ptrue p0.d 100; CHECK-NEXT: sqabs z0.h, p0/m, z1.h 101; CHECK-NEXT: ret 102 %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) 103 %pg.to = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg) 104 %pg.from = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg.to) 105 %ret = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg.from, <vscale x 8 x i16> %b) 106 ret <vscale x 8 x i16> %ret 107} 108 109define <vscale x 4 x i32> @sqabs_i32_dupreg(<vscale x 4 x i32> %a) #0 { 110; CHECK-LABEL: sqabs_i32_dupreg: 111; CHECK: // %bb.0: 112; CHECK-NEXT: ptrue p0.s 113; CHECK-NEXT: sqabs z0.s, p0/m, z0.s 114; CHECK-NEXT: ret 115 %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) 116 %ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) 117 ret <vscale x 4 x i32> %ret 118} 119 120define <vscale x 4 x i32> @sqabs_i32_undef(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 { 121; CHECK-LABEL: sqabs_i32_undef: 122; CHECK: // %bb.0: 123; CHECK-NEXT: ptrue p0.s 124; CHECK-NEXT: movprfx z0, z1 125; CHECK-NEXT: sqabs z0.s, p0/m, z1.s 126; CHECK-NEXT: ret 127 %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) 128 %ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) 129 ret <vscale x 4 x i32> %ret 130} 131 132define <vscale x 4 x i32> @sqabs_i32_active(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 { 133; CHECK-LABEL: sqabs_i32_active: 134; CHECK: // %bb.0: 135; CHECK-NEXT: ptrue p0.s 136; CHECK-NEXT: movprfx z0, z1 137; CHECK-NEXT: sqabs z0.s, p0/m, z1.s 138; CHECK-NEXT: ret 139 %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) 140 %ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) 141 ret <vscale x 4 x i32> %ret 142} 143 144define <vscale x 4 x i32> @sqabs_i32_not_active(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 { 145; CHECK-LABEL: sqabs_i32_not_active: 146; CHECK: // %bb.0: 147; CHECK-NEXT: ptrue p0.d 148; CHECK-NEXT: sqabs z0.s, p0/m, z1.s 149; CHECK-NEXT: ret 150 %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) 151 %pg.to = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg) 152 %pg.from = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.to) 153 %ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg.from, <vscale x 4 x i32> %b) 154 ret <vscale x 4 x i32> %ret 155} 156 157define <vscale x 2 x i64> @sqabs_i64_dupreg(<vscale x 2 x i64> %a) #0 { 158; CHECK-LABEL: sqabs_i64_dupreg: 159; CHECK: // %bb.0: 160; CHECK-NEXT: ptrue p0.d 161; CHECK-NEXT: sqabs z0.d, p0/m, z0.d 162; CHECK-NEXT: ret 163 %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) 164 %ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqabs.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) 165 ret <vscale x 2 x i64> %ret 166} 167 168define <vscale x 2 x i64> @sqabs_i64_undef(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 { 169; CHECK-LABEL: sqabs_i64_undef: 170; CHECK: // %bb.0: 171; CHECK-NEXT: ptrue p0.d 172; CHECK-NEXT: movprfx z0, z1 173; CHECK-NEXT: sqabs z0.d, p0/m, z1.d 174; CHECK-NEXT: ret 175 %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) 176 %ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqabs.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) 177 ret <vscale x 2 x i64> %ret 178} 179 180define <vscale x 2 x i64> @sqabs_i64_active(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 { 181; CHECK-LABEL: sqabs_i64_active: 182; CHECK: // %bb.0: 183; CHECK-NEXT: ptrue p0.d 184; CHECK-NEXT: movprfx z0, z1 185; CHECK-NEXT: sqabs z0.d, p0/m, z1.d 186; CHECK-NEXT: ret 187 %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) 188 %ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqabs.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) 189 ret <vscale x 2 x i64> %ret 190} 191 192define <vscale x 2 x i64> @sqabs_i64_not_active(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i1> %pg) #0 { 193; CHECK-LABEL: sqabs_i64_not_active: 194; CHECK: // %bb.0: 195; CHECK-NEXT: sqabs z0.d, p0/m, z1.d 196; CHECK-NEXT: ret 197 %ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqabs.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) 198 ret <vscale x 2 x i64> %ret 199} 200 201; 202; URECPE (sve2_int_un_pred_arit_s) 203; 204 205define <vscale x 4 x i32> @urecpe_i32_dupreg(<vscale x 4 x i32> %a) #0 { 206; CHECK-LABEL: urecpe_i32_dupreg: 207; CHECK: // %bb.0: 208; CHECK-NEXT: ptrue p0.s 209; CHECK-NEXT: urecpe z0.s, p0/m, z0.s 210; CHECK-NEXT: ret 211 %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) 212 %ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) 213 ret <vscale x 4 x i32> %ret 214} 215 216define <vscale x 4 x i32> @urecpe_i32_undef(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 { 217; CHECK-LABEL: urecpe_i32_undef: 218; CHECK: // %bb.0: 219; CHECK-NEXT: ptrue p0.s 220; CHECK-NEXT: movprfx z0, z1 221; CHECK-NEXT: urecpe z0.s, p0/m, z1.s 222; CHECK-NEXT: ret 223 %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) 224 %ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) 225 ret <vscale x 4 x i32> %ret 226} 227 228define <vscale x 4 x i32> @urecpe_i32_active(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 { 229; CHECK-LABEL: urecpe_i32_active: 230; CHECK: // %bb.0: 231; CHECK-NEXT: ptrue p0.s 232; CHECK-NEXT: movprfx z0, z1 233; CHECK-NEXT: urecpe z0.s, p0/m, z1.s 234; CHECK-NEXT: ret 235 %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) 236 %ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) 237 ret <vscale x 4 x i32> %ret 238} 239 240define <vscale x 4 x i32> @urecpe_i32_not_active(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 { 241; CHECK-LABEL: urecpe_i32_not_active: 242; CHECK: // %bb.0: 243; CHECK-NEXT: ptrue p0.d 244; CHECK-NEXT: urecpe z0.s, p0/m, z1.s 245; CHECK-NEXT: ret 246 %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) 247 %pg.to = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg) 248 %pg.from = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.to) 249 %ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg.from, <vscale x 4 x i32> %b) 250 ret <vscale x 4 x i32> %ret 251} 252 253declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32) 254declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32) 255declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32) 256declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32) 257 258declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>) 259declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1>) 260declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1>) 261 262declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>) 263declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>) 264declare <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1>) 265 266declare <vscale x 16 x i8> @llvm.aarch64.sve.sqabs.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>) 267declare <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>) 268declare <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>) 269declare <vscale x 2 x i64> @llvm.aarch64.sve.sqabs.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>) 270 271declare <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>) 272 273attributes #0 = { nounwind "target-features"="+sve2" } 274