1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256 3; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 4; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 5 6target triple = "aarch64-unknown-linux-gnu" 7 8; 9; CEIL -> FRINTP 10; 11 12; Don't use SVE for 64-bit vectors. 13define <4 x half> @frintp_v4f16(<4 x half> %op) vscale_range(2,0) #0 { 14; CHECK-LABEL: frintp_v4f16: 15; CHECK: // %bb.0: 16; CHECK-NEXT: frintp v0.4h, v0.4h 17; CHECK-NEXT: ret 18 %res = call <4 x half> @llvm.ceil.v4f16(<4 x half> %op) 19 ret <4 x half> %res 20} 21 22; Don't use SVE for 128-bit vectors. 23define <8 x half> @frintp_v8f16(<8 x half> %op) vscale_range(2,0) #0 { 24; CHECK-LABEL: frintp_v8f16: 25; CHECK: // %bb.0: 26; CHECK-NEXT: frintp v0.8h, v0.8h 27; CHECK-NEXT: ret 28 %res = call <8 x half> @llvm.ceil.v8f16(<8 x half> %op) 29 ret <8 x half> %res 30} 31 32define void @frintp_v16f16(ptr %a) vscale_range(2,0) #0 { 33; CHECK-LABEL: frintp_v16f16: 34; CHECK: // %bb.0: 35; CHECK-NEXT: ptrue p0.h, vl16 36; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 37; CHECK-NEXT: frintp z0.h, p0/m, z0.h 38; CHECK-NEXT: st1h { z0.h }, p0, [x0] 39; CHECK-NEXT: ret 40 %op = load <16 x half>, ptr %a 41 %res = call <16 x half> @llvm.ceil.v16f16(<16 x half> %op) 42 store <16 x half> %res, ptr %a 43 ret void 44} 45 46define void @frintp_v32f16(ptr %a) #0 { 47; VBITS_GE_256-LABEL: frintp_v32f16: 48; VBITS_GE_256: // %bb.0: 49; VBITS_GE_256-NEXT: ptrue p0.h, vl16 50; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 51; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] 52; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] 53; VBITS_GE_256-NEXT: frintp z0.h, p0/m, z0.h 54; VBITS_GE_256-NEXT: frintp z1.h, p0/m, z1.h 55; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] 56; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0] 57; VBITS_GE_256-NEXT: ret 58; 59; VBITS_GE_512-LABEL: frintp_v32f16: 60; VBITS_GE_512: // %bb.0: 61; VBITS_GE_512-NEXT: ptrue p0.h, vl32 62; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0] 63; VBITS_GE_512-NEXT: frintp z0.h, p0/m, z0.h 64; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0] 65; VBITS_GE_512-NEXT: ret 66 %op = load <32 x half>, ptr %a 67 %res = call <32 x half> @llvm.ceil.v32f16(<32 x half> %op) 68 store <32 x half> %res, ptr %a 69 ret void 70} 71 72define void @frintp_v64f16(ptr %a) vscale_range(8,0) #0 { 73; CHECK-LABEL: frintp_v64f16: 74; CHECK: // %bb.0: 75; CHECK-NEXT: ptrue p0.h, vl64 76; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 77; CHECK-NEXT: frintp z0.h, p0/m, z0.h 78; CHECK-NEXT: st1h { z0.h }, p0, [x0] 79; CHECK-NEXT: ret 80 %op = load <64 x half>, ptr %a 81 %res = call <64 x half> @llvm.ceil.v64f16(<64 x half> %op) 82 store <64 x half> %res, ptr %a 83 ret void 84} 85 86define void @frintp_v128f16(ptr %a) vscale_range(16,0) #0 { 87; CHECK-LABEL: frintp_v128f16: 88; CHECK: // %bb.0: 89; CHECK-NEXT: ptrue p0.h, vl128 90; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 91; CHECK-NEXT: frintp z0.h, p0/m, z0.h 92; CHECK-NEXT: st1h { z0.h }, p0, [x0] 93; CHECK-NEXT: ret 94 %op = load <128 x half>, ptr %a 95 %res = call <128 x half> @llvm.ceil.v128f16(<128 x half> %op) 96 store <128 x half> %res, ptr %a 97 ret void 98} 99 100; Don't use SVE for 64-bit vectors. 101define <2 x float> @frintp_v2f32(<2 x float> %op) vscale_range(2,0) #0 { 102; CHECK-LABEL: frintp_v2f32: 103; CHECK: // %bb.0: 104; CHECK-NEXT: frintp v0.2s, v0.2s 105; CHECK-NEXT: ret 106 %res = call <2 x float> @llvm.ceil.v2f32(<2 x float> %op) 107 ret <2 x float> %res 108} 109 110; Don't use SVE for 128-bit vectors. 111define <4 x float> @frintp_v4f32(<4 x float> %op) vscale_range(2,0) #0 { 112; CHECK-LABEL: frintp_v4f32: 113; CHECK: // %bb.0: 114; CHECK-NEXT: frintp v0.4s, v0.4s 115; CHECK-NEXT: ret 116 %res = call <4 x float> @llvm.ceil.v4f32(<4 x float> %op) 117 ret <4 x float> %res 118} 119 120define void @frintp_v8f32(ptr %a) vscale_range(2,0) #0 { 121; CHECK-LABEL: frintp_v8f32: 122; CHECK: // %bb.0: 123; CHECK-NEXT: ptrue p0.s, vl8 124; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 125; CHECK-NEXT: frintp z0.s, p0/m, z0.s 126; CHECK-NEXT: st1w { z0.s }, p0, [x0] 127; CHECK-NEXT: ret 128 %op = load <8 x float>, ptr %a 129 %res = call <8 x float> @llvm.ceil.v8f32(<8 x float> %op) 130 store <8 x float> %res, ptr %a 131 ret void 132} 133 134define void @frintp_v16f32(ptr %a) #0 { 135; VBITS_GE_256-LABEL: frintp_v16f32: 136; VBITS_GE_256: // %bb.0: 137; VBITS_GE_256-NEXT: ptrue p0.s, vl8 138; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 139; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] 140; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] 141; VBITS_GE_256-NEXT: frintp z0.s, p0/m, z0.s 142; VBITS_GE_256-NEXT: frintp z1.s, p0/m, z1.s 143; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] 144; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] 145; VBITS_GE_256-NEXT: ret 146; 147; VBITS_GE_512-LABEL: frintp_v16f32: 148; VBITS_GE_512: // %bb.0: 149; VBITS_GE_512-NEXT: ptrue p0.s, vl16 150; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] 151; VBITS_GE_512-NEXT: frintp z0.s, p0/m, z0.s 152; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0] 153; VBITS_GE_512-NEXT: ret 154 %op = load <16 x float>, ptr %a 155 %res = call <16 x float> @llvm.ceil.v16f32(<16 x float> %op) 156 store <16 x float> %res, ptr %a 157 ret void 158} 159 160define void @frintp_v32f32(ptr %a) vscale_range(8,0) #0 { 161; CHECK-LABEL: frintp_v32f32: 162; CHECK: // %bb.0: 163; CHECK-NEXT: ptrue p0.s, vl32 164; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 165; CHECK-NEXT: frintp z0.s, p0/m, z0.s 166; CHECK-NEXT: st1w { z0.s }, p0, [x0] 167; CHECK-NEXT: ret 168 %op = load <32 x float>, ptr %a 169 %res = call <32 x float> @llvm.ceil.v32f32(<32 x float> %op) 170 store <32 x float> %res, ptr %a 171 ret void 172} 173 174define void @frintp_v64f32(ptr %a) vscale_range(16,0) #0 { 175; CHECK-LABEL: frintp_v64f32: 176; CHECK: // %bb.0: 177; CHECK-NEXT: ptrue p0.s, vl64 178; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 179; CHECK-NEXT: frintp z0.s, p0/m, z0.s 180; CHECK-NEXT: st1w { z0.s }, p0, [x0] 181; CHECK-NEXT: ret 182 %op = load <64 x float>, ptr %a 183 %res = call <64 x float> @llvm.ceil.v64f32(<64 x float> %op) 184 store <64 x float> %res, ptr %a 185 ret void 186} 187 188; Don't use SVE for 64-bit vectors. 189define <1 x double> @frintp_v1f64(<1 x double> %op) vscale_range(2,0) #0 { 190; CHECK-LABEL: frintp_v1f64: 191; CHECK: // %bb.0: 192; CHECK-NEXT: frintp d0, d0 193; CHECK-NEXT: ret 194 %res = call <1 x double> @llvm.ceil.v1f64(<1 x double> %op) 195 ret <1 x double> %res 196} 197 198; Don't use SVE for 128-bit vectors. 199define <2 x double> @frintp_v2f64(<2 x double> %op) vscale_range(2,0) #0 { 200; CHECK-LABEL: frintp_v2f64: 201; CHECK: // %bb.0: 202; CHECK-NEXT: frintp v0.2d, v0.2d 203; CHECK-NEXT: ret 204 %res = call <2 x double> @llvm.ceil.v2f64(<2 x double> %op) 205 ret <2 x double> %res 206} 207 208define void @frintp_v4f64(ptr %a) vscale_range(2,0) #0 { 209; CHECK-LABEL: frintp_v4f64: 210; CHECK: // %bb.0: 211; CHECK-NEXT: ptrue p0.d, vl4 212; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 213; CHECK-NEXT: frintp z0.d, p0/m, z0.d 214; CHECK-NEXT: st1d { z0.d }, p0, [x0] 215; CHECK-NEXT: ret 216 %op = load <4 x double>, ptr %a 217 %res = call <4 x double> @llvm.ceil.v4f64(<4 x double> %op) 218 store <4 x double> %res, ptr %a 219 ret void 220} 221 222define void @frintp_v8f64(ptr %a) #0 { 223; VBITS_GE_256-LABEL: frintp_v8f64: 224; VBITS_GE_256: // %bb.0: 225; VBITS_GE_256-NEXT: ptrue p0.d, vl4 226; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 227; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] 228; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] 229; VBITS_GE_256-NEXT: frintp z0.d, p0/m, z0.d 230; VBITS_GE_256-NEXT: frintp z1.d, p0/m, z1.d 231; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] 232; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0] 233; VBITS_GE_256-NEXT: ret 234; 235; VBITS_GE_512-LABEL: frintp_v8f64: 236; VBITS_GE_512: // %bb.0: 237; VBITS_GE_512-NEXT: ptrue p0.d, vl8 238; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] 239; VBITS_GE_512-NEXT: frintp z0.d, p0/m, z0.d 240; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0] 241; VBITS_GE_512-NEXT: ret 242 %op = load <8 x double>, ptr %a 243 %res = call <8 x double> @llvm.ceil.v8f64(<8 x double> %op) 244 store <8 x double> %res, ptr %a 245 ret void 246} 247 248define void @frintp_v16f64(ptr %a) vscale_range(8,0) #0 { 249; CHECK-LABEL: frintp_v16f64: 250; CHECK: // %bb.0: 251; CHECK-NEXT: ptrue p0.d, vl16 252; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 253; CHECK-NEXT: frintp z0.d, p0/m, z0.d 254; CHECK-NEXT: st1d { z0.d }, p0, [x0] 255; CHECK-NEXT: ret 256 %op = load <16 x double>, ptr %a 257 %res = call <16 x double> @llvm.ceil.v16f64(<16 x double> %op) 258 store <16 x double> %res, ptr %a 259 ret void 260} 261 262define void @frintp_v32f64(ptr %a) vscale_range(16,0) #0 { 263; CHECK-LABEL: frintp_v32f64: 264; CHECK: // %bb.0: 265; CHECK-NEXT: ptrue p0.d, vl32 266; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 267; CHECK-NEXT: frintp z0.d, p0/m, z0.d 268; CHECK-NEXT: st1d { z0.d }, p0, [x0] 269; CHECK-NEXT: ret 270 %op = load <32 x double>, ptr %a 271 %res = call <32 x double> @llvm.ceil.v32f64(<32 x double> %op) 272 store <32 x double> %res, ptr %a 273 ret void 274} 275 276; 277; FLOOR -> FRINTM 278; 279 280; Don't use SVE for 64-bit vectors. 281define <4 x half> @frintm_v4f16(<4 x half> %op) vscale_range(2,0) #0 { 282; CHECK-LABEL: frintm_v4f16: 283; CHECK: // %bb.0: 284; CHECK-NEXT: frintm v0.4h, v0.4h 285; CHECK-NEXT: ret 286 %res = call <4 x half> @llvm.floor.v4f16(<4 x half> %op) 287 ret <4 x half> %res 288} 289 290; Don't use SVE for 128-bit vectors. 291define <8 x half> @frintm_v8f16(<8 x half> %op) vscale_range(2,0) #0 { 292; CHECK-LABEL: frintm_v8f16: 293; CHECK: // %bb.0: 294; CHECK-NEXT: frintm v0.8h, v0.8h 295; CHECK-NEXT: ret 296 %res = call <8 x half> @llvm.floor.v8f16(<8 x half> %op) 297 ret <8 x half> %res 298} 299 300define void @frintm_v16f16(ptr %a) vscale_range(2,0) #0 { 301; CHECK-LABEL: frintm_v16f16: 302; CHECK: // %bb.0: 303; CHECK-NEXT: ptrue p0.h, vl16 304; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 305; CHECK-NEXT: frintm z0.h, p0/m, z0.h 306; CHECK-NEXT: st1h { z0.h }, p0, [x0] 307; CHECK-NEXT: ret 308 %op = load <16 x half>, ptr %a 309 %res = call <16 x half> @llvm.floor.v16f16(<16 x half> %op) 310 store <16 x half> %res, ptr %a 311 ret void 312} 313 314define void @frintm_v32f16(ptr %a) #0 { 315; VBITS_GE_256-LABEL: frintm_v32f16: 316; VBITS_GE_256: // %bb.0: 317; VBITS_GE_256-NEXT: ptrue p0.h, vl16 318; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 319; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] 320; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] 321; VBITS_GE_256-NEXT: frintm z0.h, p0/m, z0.h 322; VBITS_GE_256-NEXT: frintm z1.h, p0/m, z1.h 323; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] 324; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0] 325; VBITS_GE_256-NEXT: ret 326; 327; VBITS_GE_512-LABEL: frintm_v32f16: 328; VBITS_GE_512: // %bb.0: 329; VBITS_GE_512-NEXT: ptrue p0.h, vl32 330; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0] 331; VBITS_GE_512-NEXT: frintm z0.h, p0/m, z0.h 332; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0] 333; VBITS_GE_512-NEXT: ret 334 %op = load <32 x half>, ptr %a 335 %res = call <32 x half> @llvm.floor.v32f16(<32 x half> %op) 336 store <32 x half> %res, ptr %a 337 ret void 338} 339 340define void @frintm_v64f16(ptr %a) vscale_range(8,0) #0 { 341; CHECK-LABEL: frintm_v64f16: 342; CHECK: // %bb.0: 343; CHECK-NEXT: ptrue p0.h, vl64 344; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 345; CHECK-NEXT: frintm z0.h, p0/m, z0.h 346; CHECK-NEXT: st1h { z0.h }, p0, [x0] 347; CHECK-NEXT: ret 348 %op = load <64 x half>, ptr %a 349 %res = call <64 x half> @llvm.floor.v64f16(<64 x half> %op) 350 store <64 x half> %res, ptr %a 351 ret void 352} 353 354define void @frintm_v128f16(ptr %a) vscale_range(16,0) #0 { 355; CHECK-LABEL: frintm_v128f16: 356; CHECK: // %bb.0: 357; CHECK-NEXT: ptrue p0.h, vl128 358; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 359; CHECK-NEXT: frintm z0.h, p0/m, z0.h 360; CHECK-NEXT: st1h { z0.h }, p0, [x0] 361; CHECK-NEXT: ret 362 %op = load <128 x half>, ptr %a 363 %res = call <128 x half> @llvm.floor.v128f16(<128 x half> %op) 364 store <128 x half> %res, ptr %a 365 ret void 366} 367 368; Don't use SVE for 64-bit vectors. 369define <2 x float> @frintm_v2f32(<2 x float> %op) vscale_range(2,0) #0 { 370; CHECK-LABEL: frintm_v2f32: 371; CHECK: // %bb.0: 372; CHECK-NEXT: frintm v0.2s, v0.2s 373; CHECK-NEXT: ret 374 %res = call <2 x float> @llvm.floor.v2f32(<2 x float> %op) 375 ret <2 x float> %res 376} 377 378; Don't use SVE for 128-bit vectors. 379define <4 x float> @frintm_v4f32(<4 x float> %op) vscale_range(2,0) #0 { 380; CHECK-LABEL: frintm_v4f32: 381; CHECK: // %bb.0: 382; CHECK-NEXT: frintm v0.4s, v0.4s 383; CHECK-NEXT: ret 384 %res = call <4 x float> @llvm.floor.v4f32(<4 x float> %op) 385 ret <4 x float> %res 386} 387 388define void @frintm_v8f32(ptr %a) vscale_range(2,0) #0 { 389; CHECK-LABEL: frintm_v8f32: 390; CHECK: // %bb.0: 391; CHECK-NEXT: ptrue p0.s, vl8 392; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 393; CHECK-NEXT: frintm z0.s, p0/m, z0.s 394; CHECK-NEXT: st1w { z0.s }, p0, [x0] 395; CHECK-NEXT: ret 396 %op = load <8 x float>, ptr %a 397 %res = call <8 x float> @llvm.floor.v8f32(<8 x float> %op) 398 store <8 x float> %res, ptr %a 399 ret void 400} 401 402define void @frintm_v16f32(ptr %a) #0 { 403; VBITS_GE_256-LABEL: frintm_v16f32: 404; VBITS_GE_256: // %bb.0: 405; VBITS_GE_256-NEXT: ptrue p0.s, vl8 406; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 407; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] 408; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] 409; VBITS_GE_256-NEXT: frintm z0.s, p0/m, z0.s 410; VBITS_GE_256-NEXT: frintm z1.s, p0/m, z1.s 411; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] 412; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] 413; VBITS_GE_256-NEXT: ret 414; 415; VBITS_GE_512-LABEL: frintm_v16f32: 416; VBITS_GE_512: // %bb.0: 417; VBITS_GE_512-NEXT: ptrue p0.s, vl16 418; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] 419; VBITS_GE_512-NEXT: frintm z0.s, p0/m, z0.s 420; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0] 421; VBITS_GE_512-NEXT: ret 422 %op = load <16 x float>, ptr %a 423 %res = call <16 x float> @llvm.floor.v16f32(<16 x float> %op) 424 store <16 x float> %res, ptr %a 425 ret void 426} 427 428define void @frintm_v32f32(ptr %a) vscale_range(8,0) #0 { 429; CHECK-LABEL: frintm_v32f32: 430; CHECK: // %bb.0: 431; CHECK-NEXT: ptrue p0.s, vl32 432; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 433; CHECK-NEXT: frintm z0.s, p0/m, z0.s 434; CHECK-NEXT: st1w { z0.s }, p0, [x0] 435; CHECK-NEXT: ret 436 %op = load <32 x float>, ptr %a 437 %res = call <32 x float> @llvm.floor.v32f32(<32 x float> %op) 438 store <32 x float> %res, ptr %a 439 ret void 440} 441 442define void @frintm_v64f32(ptr %a) vscale_range(16,0) #0 { 443; CHECK-LABEL: frintm_v64f32: 444; CHECK: // %bb.0: 445; CHECK-NEXT: ptrue p0.s, vl64 446; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 447; CHECK-NEXT: frintm z0.s, p0/m, z0.s 448; CHECK-NEXT: st1w { z0.s }, p0, [x0] 449; CHECK-NEXT: ret 450 %op = load <64 x float>, ptr %a 451 %res = call <64 x float> @llvm.floor.v64f32(<64 x float> %op) 452 store <64 x float> %res, ptr %a 453 ret void 454} 455 456; Don't use SVE for 64-bit vectors. 457define <1 x double> @frintm_v1f64(<1 x double> %op) vscale_range(2,0) #0 { 458; CHECK-LABEL: frintm_v1f64: 459; CHECK: // %bb.0: 460; CHECK-NEXT: frintm d0, d0 461; CHECK-NEXT: ret 462 %res = call <1 x double> @llvm.floor.v1f64(<1 x double> %op) 463 ret <1 x double> %res 464} 465 466; Don't use SVE for 128-bit vectors. 467define <2 x double> @frintm_v2f64(<2 x double> %op) vscale_range(2,0) #0 { 468; CHECK-LABEL: frintm_v2f64: 469; CHECK: // %bb.0: 470; CHECK-NEXT: frintm v0.2d, v0.2d 471; CHECK-NEXT: ret 472 %res = call <2 x double> @llvm.floor.v2f64(<2 x double> %op) 473 ret <2 x double> %res 474} 475 476define void @frintm_v4f64(ptr %a) vscale_range(2,0) #0 { 477; CHECK-LABEL: frintm_v4f64: 478; CHECK: // %bb.0: 479; CHECK-NEXT: ptrue p0.d, vl4 480; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 481; CHECK-NEXT: frintm z0.d, p0/m, z0.d 482; CHECK-NEXT: st1d { z0.d }, p0, [x0] 483; CHECK-NEXT: ret 484 %op = load <4 x double>, ptr %a 485 %res = call <4 x double> @llvm.floor.v4f64(<4 x double> %op) 486 store <4 x double> %res, ptr %a 487 ret void 488} 489 490define void @frintm_v8f64(ptr %a) #0 { 491; VBITS_GE_256-LABEL: frintm_v8f64: 492; VBITS_GE_256: // %bb.0: 493; VBITS_GE_256-NEXT: ptrue p0.d, vl4 494; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 495; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] 496; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] 497; VBITS_GE_256-NEXT: frintm z0.d, p0/m, z0.d 498; VBITS_GE_256-NEXT: frintm z1.d, p0/m, z1.d 499; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] 500; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0] 501; VBITS_GE_256-NEXT: ret 502; 503; VBITS_GE_512-LABEL: frintm_v8f64: 504; VBITS_GE_512: // %bb.0: 505; VBITS_GE_512-NEXT: ptrue p0.d, vl8 506; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] 507; VBITS_GE_512-NEXT: frintm z0.d, p0/m, z0.d 508; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0] 509; VBITS_GE_512-NEXT: ret 510 %op = load <8 x double>, ptr %a 511 %res = call <8 x double> @llvm.floor.v8f64(<8 x double> %op) 512 store <8 x double> %res, ptr %a 513 ret void 514} 515 516define void @frintm_v16f64(ptr %a) vscale_range(8,0) #0 { 517; CHECK-LABEL: frintm_v16f64: 518; CHECK: // %bb.0: 519; CHECK-NEXT: ptrue p0.d, vl16 520; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 521; CHECK-NEXT: frintm z0.d, p0/m, z0.d 522; CHECK-NEXT: st1d { z0.d }, p0, [x0] 523; CHECK-NEXT: ret 524 %op = load <16 x double>, ptr %a 525 %res = call <16 x double> @llvm.floor.v16f64(<16 x double> %op) 526 store <16 x double> %res, ptr %a 527 ret void 528} 529 530define void @frintm_v32f64(ptr %a) vscale_range(16,0) #0 { 531; CHECK-LABEL: frintm_v32f64: 532; CHECK: // %bb.0: 533; CHECK-NEXT: ptrue p0.d, vl32 534; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 535; CHECK-NEXT: frintm z0.d, p0/m, z0.d 536; CHECK-NEXT: st1d { z0.d }, p0, [x0] 537; CHECK-NEXT: ret 538 %op = load <32 x double>, ptr %a 539 %res = call <32 x double> @llvm.floor.v32f64(<32 x double> %op) 540 store <32 x double> %res, ptr %a 541 ret void 542} 543 544; 545; FNEARBYINT -> FRINTI 546; 547 548; Don't use SVE for 64-bit vectors. 549define <4 x half> @frinti_v4f16(<4 x half> %op) vscale_range(2,0) #0 { 550; CHECK-LABEL: frinti_v4f16: 551; CHECK: // %bb.0: 552; CHECK-NEXT: frinti v0.4h, v0.4h 553; CHECK-NEXT: ret 554 %res = call <4 x half> @llvm.nearbyint.v4f16(<4 x half> %op) 555 ret <4 x half> %res 556} 557 558; Don't use SVE for 128-bit vectors. 559define <8 x half> @frinti_v8f16(<8 x half> %op) vscale_range(2,0) #0 { 560; CHECK-LABEL: frinti_v8f16: 561; CHECK: // %bb.0: 562; CHECK-NEXT: frinti v0.8h, v0.8h 563; CHECK-NEXT: ret 564 %res = call <8 x half> @llvm.nearbyint.v8f16(<8 x half> %op) 565 ret <8 x half> %res 566} 567 568define void @frinti_v16f16(ptr %a) vscale_range(2,0) #0 { 569; CHECK-LABEL: frinti_v16f16: 570; CHECK: // %bb.0: 571; CHECK-NEXT: ptrue p0.h, vl16 572; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 573; CHECK-NEXT: frinti z0.h, p0/m, z0.h 574; CHECK-NEXT: st1h { z0.h }, p0, [x0] 575; CHECK-NEXT: ret 576 %op = load <16 x half>, ptr %a 577 %res = call <16 x half> @llvm.nearbyint.v16f16(<16 x half> %op) 578 store <16 x half> %res, ptr %a 579 ret void 580} 581 582define void @frinti_v32f16(ptr %a) #0 { 583; VBITS_GE_256-LABEL: frinti_v32f16: 584; VBITS_GE_256: // %bb.0: 585; VBITS_GE_256-NEXT: ptrue p0.h, vl16 586; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 587; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] 588; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] 589; VBITS_GE_256-NEXT: frinti z0.h, p0/m, z0.h 590; VBITS_GE_256-NEXT: frinti z1.h, p0/m, z1.h 591; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] 592; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0] 593; VBITS_GE_256-NEXT: ret 594; 595; VBITS_GE_512-LABEL: frinti_v32f16: 596; VBITS_GE_512: // %bb.0: 597; VBITS_GE_512-NEXT: ptrue p0.h, vl32 598; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0] 599; VBITS_GE_512-NEXT: frinti z0.h, p0/m, z0.h 600; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0] 601; VBITS_GE_512-NEXT: ret 602 %op = load <32 x half>, ptr %a 603 %res = call <32 x half> @llvm.nearbyint.v32f16(<32 x half> %op) 604 store <32 x half> %res, ptr %a 605 ret void 606} 607 608define void @frinti_v64f16(ptr %a) vscale_range(8,0) #0 { 609; CHECK-LABEL: frinti_v64f16: 610; CHECK: // %bb.0: 611; CHECK-NEXT: ptrue p0.h, vl64 612; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 613; CHECK-NEXT: frinti z0.h, p0/m, z0.h 614; CHECK-NEXT: st1h { z0.h }, p0, [x0] 615; CHECK-NEXT: ret 616 %op = load <64 x half>, ptr %a 617 %res = call <64 x half> @llvm.nearbyint.v64f16(<64 x half> %op) 618 store <64 x half> %res, ptr %a 619 ret void 620} 621 622define void @frinti_v128f16(ptr %a) vscale_range(16,0) #0 { 623; CHECK-LABEL: frinti_v128f16: 624; CHECK: // %bb.0: 625; CHECK-NEXT: ptrue p0.h, vl128 626; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 627; CHECK-NEXT: frinti z0.h, p0/m, z0.h 628; CHECK-NEXT: st1h { z0.h }, p0, [x0] 629; CHECK-NEXT: ret 630 %op = load <128 x half>, ptr %a 631 %res = call <128 x half> @llvm.nearbyint.v128f16(<128 x half> %op) 632 store <128 x half> %res, ptr %a 633 ret void 634} 635 636; Don't use SVE for 64-bit vectors. 637define <2 x float> @frinti_v2f32(<2 x float> %op) vscale_range(2,0) #0 { 638; CHECK-LABEL: frinti_v2f32: 639; CHECK: // %bb.0: 640; CHECK-NEXT: frinti v0.2s, v0.2s 641; CHECK-NEXT: ret 642 %res = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> %op) 643 ret <2 x float> %res 644} 645 646; Don't use SVE for 128-bit vectors. 647define <4 x float> @frinti_v4f32(<4 x float> %op) vscale_range(2,0) #0 { 648; CHECK-LABEL: frinti_v4f32: 649; CHECK: // %bb.0: 650; CHECK-NEXT: frinti v0.4s, v0.4s 651; CHECK-NEXT: ret 652 %res = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %op) 653 ret <4 x float> %res 654} 655 656define void @frinti_v8f32(ptr %a) vscale_range(2,0) #0 { 657; CHECK-LABEL: frinti_v8f32: 658; CHECK: // %bb.0: 659; CHECK-NEXT: ptrue p0.s, vl8 660; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 661; CHECK-NEXT: frinti z0.s, p0/m, z0.s 662; CHECK-NEXT: st1w { z0.s }, p0, [x0] 663; CHECK-NEXT: ret 664 %op = load <8 x float>, ptr %a 665 %res = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %op) 666 store <8 x float> %res, ptr %a 667 ret void 668} 669 670define void @frinti_v16f32(ptr %a) #0 { 671; VBITS_GE_256-LABEL: frinti_v16f32: 672; VBITS_GE_256: // %bb.0: 673; VBITS_GE_256-NEXT: ptrue p0.s, vl8 674; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 675; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] 676; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] 677; VBITS_GE_256-NEXT: frinti z0.s, p0/m, z0.s 678; VBITS_GE_256-NEXT: frinti z1.s, p0/m, z1.s 679; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] 680; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] 681; VBITS_GE_256-NEXT: ret 682; 683; VBITS_GE_512-LABEL: frinti_v16f32: 684; VBITS_GE_512: // %bb.0: 685; VBITS_GE_512-NEXT: ptrue p0.s, vl16 686; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] 687; VBITS_GE_512-NEXT: frinti z0.s, p0/m, z0.s 688; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0] 689; VBITS_GE_512-NEXT: ret 690 %op = load <16 x float>, ptr %a 691 %res = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %op) 692 store <16 x float> %res, ptr %a 693 ret void 694} 695 696define void @frinti_v32f32(ptr %a) vscale_range(8,0) #0 { 697; CHECK-LABEL: frinti_v32f32: 698; CHECK: // %bb.0: 699; CHECK-NEXT: ptrue p0.s, vl32 700; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 701; CHECK-NEXT: frinti z0.s, p0/m, z0.s 702; CHECK-NEXT: st1w { z0.s }, p0, [x0] 703; CHECK-NEXT: ret 704 %op = load <32 x float>, ptr %a 705 %res = call <32 x float> @llvm.nearbyint.v32f32(<32 x float> %op) 706 store <32 x float> %res, ptr %a 707 ret void 708} 709 710define void @frinti_v64f32(ptr %a) vscale_range(16,0) #0 { 711; CHECK-LABEL: frinti_v64f32: 712; CHECK: // %bb.0: 713; CHECK-NEXT: ptrue p0.s, vl64 714; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 715; CHECK-NEXT: frinti z0.s, p0/m, z0.s 716; CHECK-NEXT: st1w { z0.s }, p0, [x0] 717; CHECK-NEXT: ret 718 %op = load <64 x float>, ptr %a 719 %res = call <64 x float> @llvm.nearbyint.v64f32(<64 x float> %op) 720 store <64 x float> %res, ptr %a 721 ret void 722} 723 724; Don't use SVE for 64-bit vectors. 725define <1 x double> @frinti_v1f64(<1 x double> %op) vscale_range(2,0) #0 { 726; CHECK-LABEL: frinti_v1f64: 727; CHECK: // %bb.0: 728; CHECK-NEXT: frinti d0, d0 729; CHECK-NEXT: ret 730 %res = call <1 x double> @llvm.nearbyint.v1f64(<1 x double> %op) 731 ret <1 x double> %res 732} 733 734; Don't use SVE for 128-bit vectors. 735define <2 x double> @frinti_v2f64(<2 x double> %op) vscale_range(2,0) #0 { 736; CHECK-LABEL: frinti_v2f64: 737; CHECK: // %bb.0: 738; CHECK-NEXT: frinti v0.2d, v0.2d 739; CHECK-NEXT: ret 740 %res = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %op) 741 ret <2 x double> %res 742} 743 744define void @frinti_v4f64(ptr %a) vscale_range(2,0) #0 { 745; CHECK-LABEL: frinti_v4f64: 746; CHECK: // %bb.0: 747; CHECK-NEXT: ptrue p0.d, vl4 748; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 749; CHECK-NEXT: frinti z0.d, p0/m, z0.d 750; CHECK-NEXT: st1d { z0.d }, p0, [x0] 751; CHECK-NEXT: ret 752 %op = load <4 x double>, ptr %a 753 %res = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %op) 754 store <4 x double> %res, ptr %a 755 ret void 756} 757 758define void @frinti_v8f64(ptr %a) #0 { 759; VBITS_GE_256-LABEL: frinti_v8f64: 760; VBITS_GE_256: // %bb.0: 761; VBITS_GE_256-NEXT: ptrue p0.d, vl4 762; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 763; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] 764; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] 765; VBITS_GE_256-NEXT: frinti z0.d, p0/m, z0.d 766; VBITS_GE_256-NEXT: frinti z1.d, p0/m, z1.d 767; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] 768; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0] 769; VBITS_GE_256-NEXT: ret 770; 771; VBITS_GE_512-LABEL: frinti_v8f64: 772; VBITS_GE_512: // %bb.0: 773; VBITS_GE_512-NEXT: ptrue p0.d, vl8 774; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] 775; VBITS_GE_512-NEXT: frinti z0.d, p0/m, z0.d 776; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0] 777; VBITS_GE_512-NEXT: ret 778 %op = load <8 x double>, ptr %a 779 %res = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %op) 780 store <8 x double> %res, ptr %a 781 ret void 782} 783 784define void @frinti_v16f64(ptr %a) vscale_range(8,0) #0 { 785; CHECK-LABEL: frinti_v16f64: 786; CHECK: // %bb.0: 787; CHECK-NEXT: ptrue p0.d, vl16 788; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 789; CHECK-NEXT: frinti z0.d, p0/m, z0.d 790; CHECK-NEXT: st1d { z0.d }, p0, [x0] 791; CHECK-NEXT: ret 792 %op = load <16 x double>, ptr %a 793 %res = call <16 x double> @llvm.nearbyint.v16f64(<16 x double> %op) 794 store <16 x double> %res, ptr %a 795 ret void 796} 797 798define void @frinti_v32f64(ptr %a) vscale_range(16,0) #0 { 799; CHECK-LABEL: frinti_v32f64: 800; CHECK: // %bb.0: 801; CHECK-NEXT: ptrue p0.d, vl32 802; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 803; CHECK-NEXT: frinti z0.d, p0/m, z0.d 804; CHECK-NEXT: st1d { z0.d }, p0, [x0] 805; CHECK-NEXT: ret 806 %op = load <32 x double>, ptr %a 807 %res = call <32 x double> @llvm.nearbyint.v32f64(<32 x double> %op) 808 store <32 x double> %res, ptr %a 809 ret void 810} 811 812; 813; RINT -> FRINTX 814; 815 816; Don't use SVE for 64-bit vectors. 817define <4 x half> @frintx_v4f16(<4 x half> %op) vscale_range(2,0) #0 { 818; CHECK-LABEL: frintx_v4f16: 819; CHECK: // %bb.0: 820; CHECK-NEXT: frintx v0.4h, v0.4h 821; CHECK-NEXT: ret 822 %res = call <4 x half> @llvm.rint.v4f16(<4 x half> %op) 823 ret <4 x half> %res 824} 825 826; Don't use SVE for 128-bit vectors. 827define <8 x half> @frintx_v8f16(<8 x half> %op) vscale_range(2,0) #0 { 828; CHECK-LABEL: frintx_v8f16: 829; CHECK: // %bb.0: 830; CHECK-NEXT: frintx v0.8h, v0.8h 831; CHECK-NEXT: ret 832 %res = call <8 x half> @llvm.rint.v8f16(<8 x half> %op) 833 ret <8 x half> %res 834} 835 836define void @frintx_v16f16(ptr %a) vscale_range(2,0) #0 { 837; CHECK-LABEL: frintx_v16f16: 838; CHECK: // %bb.0: 839; CHECK-NEXT: ptrue p0.h, vl16 840; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 841; CHECK-NEXT: frintx z0.h, p0/m, z0.h 842; CHECK-NEXT: st1h { z0.h }, p0, [x0] 843; CHECK-NEXT: ret 844 %op = load <16 x half>, ptr %a 845 %res = call <16 x half> @llvm.rint.v16f16(<16 x half> %op) 846 store <16 x half> %res, ptr %a 847 ret void 848} 849 850define void @frintx_v32f16(ptr %a) #0 { 851; VBITS_GE_256-LABEL: frintx_v32f16: 852; VBITS_GE_256: // %bb.0: 853; VBITS_GE_256-NEXT: ptrue p0.h, vl16 854; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 855; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] 856; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] 857; VBITS_GE_256-NEXT: frintx z0.h, p0/m, z0.h 858; VBITS_GE_256-NEXT: frintx z1.h, p0/m, z1.h 859; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] 860; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0] 861; VBITS_GE_256-NEXT: ret 862; 863; VBITS_GE_512-LABEL: frintx_v32f16: 864; VBITS_GE_512: // %bb.0: 865; VBITS_GE_512-NEXT: ptrue p0.h, vl32 866; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0] 867; VBITS_GE_512-NEXT: frintx z0.h, p0/m, z0.h 868; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0] 869; VBITS_GE_512-NEXT: ret 870 %op = load <32 x half>, ptr %a 871 %res = call <32 x half> @llvm.rint.v32f16(<32 x half> %op) 872 store <32 x half> %res, ptr %a 873 ret void 874} 875 876define void @frintx_v64f16(ptr %a) vscale_range(8,0) #0 { 877; CHECK-LABEL: frintx_v64f16: 878; CHECK: // %bb.0: 879; CHECK-NEXT: ptrue p0.h, vl64 880; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 881; CHECK-NEXT: frintx z0.h, p0/m, z0.h 882; CHECK-NEXT: st1h { z0.h }, p0, [x0] 883; CHECK-NEXT: ret 884 %op = load <64 x half>, ptr %a 885 %res = call <64 x half> @llvm.rint.v64f16(<64 x half> %op) 886 store <64 x half> %res, ptr %a 887 ret void 888} 889 890define void @frintx_v128f16(ptr %a) vscale_range(16,0) #0 { 891; CHECK-LABEL: frintx_v128f16: 892; CHECK: // %bb.0: 893; CHECK-NEXT: ptrue p0.h, vl128 894; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 895; CHECK-NEXT: frintx z0.h, p0/m, z0.h 896; CHECK-NEXT: st1h { z0.h }, p0, [x0] 897; CHECK-NEXT: ret 898 %op = load <128 x half>, ptr %a 899 %res = call <128 x half> @llvm.rint.v128f16(<128 x half> %op) 900 store <128 x half> %res, ptr %a 901 ret void 902} 903 904; Don't use SVE for 64-bit vectors. 905define <2 x float> @frintx_v2f32(<2 x float> %op) vscale_range(2,0) #0 { 906; CHECK-LABEL: frintx_v2f32: 907; CHECK: // %bb.0: 908; CHECK-NEXT: frintx v0.2s, v0.2s 909; CHECK-NEXT: ret 910 %res = call <2 x float> @llvm.rint.v2f32(<2 x float> %op) 911 ret <2 x float> %res 912} 913 914; Don't use SVE for 128-bit vectors. 915define <4 x float> @frintx_v4f32(<4 x float> %op) vscale_range(2,0) #0 { 916; CHECK-LABEL: frintx_v4f32: 917; CHECK: // %bb.0: 918; CHECK-NEXT: frintx v0.4s, v0.4s 919; CHECK-NEXT: ret 920 %res = call <4 x float> @llvm.rint.v4f32(<4 x float> %op) 921 ret <4 x float> %res 922} 923 924define void @frintx_v8f32(ptr %a) vscale_range(2,0) #0 { 925; CHECK-LABEL: frintx_v8f32: 926; CHECK: // %bb.0: 927; CHECK-NEXT: ptrue p0.s, vl8 928; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 929; CHECK-NEXT: frintx z0.s, p0/m, z0.s 930; CHECK-NEXT: st1w { z0.s }, p0, [x0] 931; CHECK-NEXT: ret 932 %op = load <8 x float>, ptr %a 933 %res = call <8 x float> @llvm.rint.v8f32(<8 x float> %op) 934 store <8 x float> %res, ptr %a 935 ret void 936} 937 938define void @frintx_v16f32(ptr %a) #0 { 939; VBITS_GE_256-LABEL: frintx_v16f32: 940; VBITS_GE_256: // %bb.0: 941; VBITS_GE_256-NEXT: ptrue p0.s, vl8 942; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 943; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] 944; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] 945; VBITS_GE_256-NEXT: frintx z0.s, p0/m, z0.s 946; VBITS_GE_256-NEXT: frintx z1.s, p0/m, z1.s 947; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] 948; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] 949; VBITS_GE_256-NEXT: ret 950; 951; VBITS_GE_512-LABEL: frintx_v16f32: 952; VBITS_GE_512: // %bb.0: 953; VBITS_GE_512-NEXT: ptrue p0.s, vl16 954; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] 955; VBITS_GE_512-NEXT: frintx z0.s, p0/m, z0.s 956; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0] 957; VBITS_GE_512-NEXT: ret 958 %op = load <16 x float>, ptr %a 959 %res = call <16 x float> @llvm.rint.v16f32(<16 x float> %op) 960 store <16 x float> %res, ptr %a 961 ret void 962} 963 964define void @frintx_v32f32(ptr %a) vscale_range(8,0) #0 { 965; CHECK-LABEL: frintx_v32f32: 966; CHECK: // %bb.0: 967; CHECK-NEXT: ptrue p0.s, vl32 968; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 969; CHECK-NEXT: frintx z0.s, p0/m, z0.s 970; CHECK-NEXT: st1w { z0.s }, p0, [x0] 971; CHECK-NEXT: ret 972 %op = load <32 x float>, ptr %a 973 %res = call <32 x float> @llvm.rint.v32f32(<32 x float> %op) 974 store <32 x float> %res, ptr %a 975 ret void 976} 977 978define void @frintx_v64f32(ptr %a) vscale_range(16,0) #0 { 979; CHECK-LABEL: frintx_v64f32: 980; CHECK: // %bb.0: 981; CHECK-NEXT: ptrue p0.s, vl64 982; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 983; CHECK-NEXT: frintx z0.s, p0/m, z0.s 984; CHECK-NEXT: st1w { z0.s }, p0, [x0] 985; CHECK-NEXT: ret 986 %op = load <64 x float>, ptr %a 987 %res = call <64 x float> @llvm.rint.v64f32(<64 x float> %op) 988 store <64 x float> %res, ptr %a 989 ret void 990} 991 992; Don't use SVE for 64-bit vectors. 993define <1 x double> @frintx_v1f64(<1 x double> %op) vscale_range(2,0) #0 { 994; CHECK-LABEL: frintx_v1f64: 995; CHECK: // %bb.0: 996; CHECK-NEXT: frintx d0, d0 997; CHECK-NEXT: ret 998 %res = call <1 x double> @llvm.rint.v1f64(<1 x double> %op) 999 ret <1 x double> %res 1000} 1001 1002; Don't use SVE for 128-bit vectors. 1003define <2 x double> @frintx_v2f64(<2 x double> %op) vscale_range(2,0) #0 { 1004; CHECK-LABEL: frintx_v2f64: 1005; CHECK: // %bb.0: 1006; CHECK-NEXT: frintx v0.2d, v0.2d 1007; CHECK-NEXT: ret 1008 %res = call <2 x double> @llvm.rint.v2f64(<2 x double> %op) 1009 ret <2 x double> %res 1010} 1011 1012define void @frintx_v4f64(ptr %a) vscale_range(2,0) #0 { 1013; CHECK-LABEL: frintx_v4f64: 1014; CHECK: // %bb.0: 1015; CHECK-NEXT: ptrue p0.d, vl4 1016; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 1017; CHECK-NEXT: frintx z0.d, p0/m, z0.d 1018; CHECK-NEXT: st1d { z0.d }, p0, [x0] 1019; CHECK-NEXT: ret 1020 %op = load <4 x double>, ptr %a 1021 %res = call <4 x double> @llvm.rint.v4f64(<4 x double> %op) 1022 store <4 x double> %res, ptr %a 1023 ret void 1024} 1025 1026define void @frintx_v8f64(ptr %a) #0 { 1027; VBITS_GE_256-LABEL: frintx_v8f64: 1028; VBITS_GE_256: // %bb.0: 1029; VBITS_GE_256-NEXT: ptrue p0.d, vl4 1030; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 1031; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] 1032; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] 1033; VBITS_GE_256-NEXT: frintx z0.d, p0/m, z0.d 1034; VBITS_GE_256-NEXT: frintx z1.d, p0/m, z1.d 1035; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] 1036; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0] 1037; VBITS_GE_256-NEXT: ret 1038; 1039; VBITS_GE_512-LABEL: frintx_v8f64: 1040; VBITS_GE_512: // %bb.0: 1041; VBITS_GE_512-NEXT: ptrue p0.d, vl8 1042; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] 1043; VBITS_GE_512-NEXT: frintx z0.d, p0/m, z0.d 1044; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0] 1045; VBITS_GE_512-NEXT: ret 1046 %op = load <8 x double>, ptr %a 1047 %res = call <8 x double> @llvm.rint.v8f64(<8 x double> %op) 1048 store <8 x double> %res, ptr %a 1049 ret void 1050} 1051 1052define void @frintx_v16f64(ptr %a) vscale_range(8,0) #0 { 1053; CHECK-LABEL: frintx_v16f64: 1054; CHECK: // %bb.0: 1055; CHECK-NEXT: ptrue p0.d, vl16 1056; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 1057; CHECK-NEXT: frintx z0.d, p0/m, z0.d 1058; CHECK-NEXT: st1d { z0.d }, p0, [x0] 1059; CHECK-NEXT: ret 1060 %op = load <16 x double>, ptr %a 1061 %res = call <16 x double> @llvm.rint.v16f64(<16 x double> %op) 1062 store <16 x double> %res, ptr %a 1063 ret void 1064} 1065 1066define void @frintx_v32f64(ptr %a) vscale_range(16,0) #0 { 1067; CHECK-LABEL: frintx_v32f64: 1068; CHECK: // %bb.0: 1069; CHECK-NEXT: ptrue p0.d, vl32 1070; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 1071; CHECK-NEXT: frintx z0.d, p0/m, z0.d 1072; CHECK-NEXT: st1d { z0.d }, p0, [x0] 1073; CHECK-NEXT: ret 1074 %op = load <32 x double>, ptr %a 1075 %res = call <32 x double> @llvm.rint.v32f64(<32 x double> %op) 1076 store <32 x double> %res, ptr %a 1077 ret void 1078} 1079 1080; 1081; ROUND -> FRINTA 1082; 1083 1084; Don't use SVE for 64-bit vectors. 1085define <4 x half> @frinta_v4f16(<4 x half> %op) vscale_range(2,0) #0 { 1086; CHECK-LABEL: frinta_v4f16: 1087; CHECK: // %bb.0: 1088; CHECK-NEXT: frinta v0.4h, v0.4h 1089; CHECK-NEXT: ret 1090 %res = call <4 x half> @llvm.round.v4f16(<4 x half> %op) 1091 ret <4 x half> %res 1092} 1093 1094; Don't use SVE for 128-bit vectors. 1095define <8 x half> @frinta_v8f16(<8 x half> %op) vscale_range(2,0) #0 { 1096; CHECK-LABEL: frinta_v8f16: 1097; CHECK: // %bb.0: 1098; CHECK-NEXT: frinta v0.8h, v0.8h 1099; CHECK-NEXT: ret 1100 %res = call <8 x half> @llvm.round.v8f16(<8 x half> %op) 1101 ret <8 x half> %res 1102} 1103 1104define void @frinta_v16f16(ptr %a) vscale_range(2,0) #0 { 1105; CHECK-LABEL: frinta_v16f16: 1106; CHECK: // %bb.0: 1107; CHECK-NEXT: ptrue p0.h, vl16 1108; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 1109; CHECK-NEXT: frinta z0.h, p0/m, z0.h 1110; CHECK-NEXT: st1h { z0.h }, p0, [x0] 1111; CHECK-NEXT: ret 1112 %op = load <16 x half>, ptr %a 1113 %res = call <16 x half> @llvm.round.v16f16(<16 x half> %op) 1114 store <16 x half> %res, ptr %a 1115 ret void 1116} 1117 1118define void @frinta_v32f16(ptr %a) #0 { 1119; VBITS_GE_256-LABEL: frinta_v32f16: 1120; VBITS_GE_256: // %bb.0: 1121; VBITS_GE_256-NEXT: ptrue p0.h, vl16 1122; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 1123; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] 1124; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] 1125; VBITS_GE_256-NEXT: frinta z0.h, p0/m, z0.h 1126; VBITS_GE_256-NEXT: frinta z1.h, p0/m, z1.h 1127; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] 1128; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0] 1129; VBITS_GE_256-NEXT: ret 1130; 1131; VBITS_GE_512-LABEL: frinta_v32f16: 1132; VBITS_GE_512: // %bb.0: 1133; VBITS_GE_512-NEXT: ptrue p0.h, vl32 1134; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0] 1135; VBITS_GE_512-NEXT: frinta z0.h, p0/m, z0.h 1136; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0] 1137; VBITS_GE_512-NEXT: ret 1138 %op = load <32 x half>, ptr %a 1139 %res = call <32 x half> @llvm.round.v32f16(<32 x half> %op) 1140 store <32 x half> %res, ptr %a 1141 ret void 1142} 1143 1144define void @frinta_v64f16(ptr %a) vscale_range(8,0) #0 { 1145; CHECK-LABEL: frinta_v64f16: 1146; CHECK: // %bb.0: 1147; CHECK-NEXT: ptrue p0.h, vl64 1148; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 1149; CHECK-NEXT: frinta z0.h, p0/m, z0.h 1150; CHECK-NEXT: st1h { z0.h }, p0, [x0] 1151; CHECK-NEXT: ret 1152 %op = load <64 x half>, ptr %a 1153 %res = call <64 x half> @llvm.round.v64f16(<64 x half> %op) 1154 store <64 x half> %res, ptr %a 1155 ret void 1156} 1157 1158define void @frinta_v128f16(ptr %a) vscale_range(16,0) #0 { 1159; CHECK-LABEL: frinta_v128f16: 1160; CHECK: // %bb.0: 1161; CHECK-NEXT: ptrue p0.h, vl128 1162; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 1163; CHECK-NEXT: frinta z0.h, p0/m, z0.h 1164; CHECK-NEXT: st1h { z0.h }, p0, [x0] 1165; CHECK-NEXT: ret 1166 %op = load <128 x half>, ptr %a 1167 %res = call <128 x half> @llvm.round.v128f16(<128 x half> %op) 1168 store <128 x half> %res, ptr %a 1169 ret void 1170} 1171 1172; Don't use SVE for 64-bit vectors. 1173define <2 x float> @frinta_v2f32(<2 x float> %op) vscale_range(2,0) #0 { 1174; CHECK-LABEL: frinta_v2f32: 1175; CHECK: // %bb.0: 1176; CHECK-NEXT: frinta v0.2s, v0.2s 1177; CHECK-NEXT: ret 1178 %res = call <2 x float> @llvm.round.v2f32(<2 x float> %op) 1179 ret <2 x float> %res 1180} 1181 1182; Don't use SVE for 128-bit vectors. 1183define <4 x float> @frinta_v4f32(<4 x float> %op) vscale_range(2,0) #0 { 1184; CHECK-LABEL: frinta_v4f32: 1185; CHECK: // %bb.0: 1186; CHECK-NEXT: frinta v0.4s, v0.4s 1187; CHECK-NEXT: ret 1188 %res = call <4 x float> @llvm.round.v4f32(<4 x float> %op) 1189 ret <4 x float> %res 1190} 1191 1192define void @frinta_v8f32(ptr %a) vscale_range(2,0) #0 { 1193; CHECK-LABEL: frinta_v8f32: 1194; CHECK: // %bb.0: 1195; CHECK-NEXT: ptrue p0.s, vl8 1196; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 1197; CHECK-NEXT: frinta z0.s, p0/m, z0.s 1198; CHECK-NEXT: st1w { z0.s }, p0, [x0] 1199; CHECK-NEXT: ret 1200 %op = load <8 x float>, ptr %a 1201 %res = call <8 x float> @llvm.round.v8f32(<8 x float> %op) 1202 store <8 x float> %res, ptr %a 1203 ret void 1204} 1205 1206define void @frinta_v16f32(ptr %a) #0 { 1207; VBITS_GE_256-LABEL: frinta_v16f32: 1208; VBITS_GE_256: // %bb.0: 1209; VBITS_GE_256-NEXT: ptrue p0.s, vl8 1210; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 1211; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] 1212; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] 1213; VBITS_GE_256-NEXT: frinta z0.s, p0/m, z0.s 1214; VBITS_GE_256-NEXT: frinta z1.s, p0/m, z1.s 1215; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] 1216; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] 1217; VBITS_GE_256-NEXT: ret 1218; 1219; VBITS_GE_512-LABEL: frinta_v16f32: 1220; VBITS_GE_512: // %bb.0: 1221; VBITS_GE_512-NEXT: ptrue p0.s, vl16 1222; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] 1223; VBITS_GE_512-NEXT: frinta z0.s, p0/m, z0.s 1224; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0] 1225; VBITS_GE_512-NEXT: ret 1226 %op = load <16 x float>, ptr %a 1227 %res = call <16 x float> @llvm.round.v16f32(<16 x float> %op) 1228 store <16 x float> %res, ptr %a 1229 ret void 1230} 1231 1232define void @frinta_v32f32(ptr %a) vscale_range(8,0) #0 { 1233; CHECK-LABEL: frinta_v32f32: 1234; CHECK: // %bb.0: 1235; CHECK-NEXT: ptrue p0.s, vl32 1236; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 1237; CHECK-NEXT: frinta z0.s, p0/m, z0.s 1238; CHECK-NEXT: st1w { z0.s }, p0, [x0] 1239; CHECK-NEXT: ret 1240 %op = load <32 x float>, ptr %a 1241 %res = call <32 x float> @llvm.round.v32f32(<32 x float> %op) 1242 store <32 x float> %res, ptr %a 1243 ret void 1244} 1245 1246define void @frinta_v64f32(ptr %a) vscale_range(16,0) #0 { 1247; CHECK-LABEL: frinta_v64f32: 1248; CHECK: // %bb.0: 1249; CHECK-NEXT: ptrue p0.s, vl64 1250; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 1251; CHECK-NEXT: frinta z0.s, p0/m, z0.s 1252; CHECK-NEXT: st1w { z0.s }, p0, [x0] 1253; CHECK-NEXT: ret 1254 %op = load <64 x float>, ptr %a 1255 %res = call <64 x float> @llvm.round.v64f32(<64 x float> %op) 1256 store <64 x float> %res, ptr %a 1257 ret void 1258} 1259 1260; Don't use SVE for 64-bit vectors. 1261define <1 x double> @frinta_v1f64(<1 x double> %op) vscale_range(2,0) #0 { 1262; CHECK-LABEL: frinta_v1f64: 1263; CHECK: // %bb.0: 1264; CHECK-NEXT: frinta d0, d0 1265; CHECK-NEXT: ret 1266 %res = call <1 x double> @llvm.round.v1f64(<1 x double> %op) 1267 ret <1 x double> %res 1268} 1269 1270; Don't use SVE for 128-bit vectors. 1271define <2 x double> @frinta_v2f64(<2 x double> %op) vscale_range(2,0) #0 { 1272; CHECK-LABEL: frinta_v2f64: 1273; CHECK: // %bb.0: 1274; CHECK-NEXT: frinta v0.2d, v0.2d 1275; CHECK-NEXT: ret 1276 %res = call <2 x double> @llvm.round.v2f64(<2 x double> %op) 1277 ret <2 x double> %res 1278} 1279 1280define void @frinta_v4f64(ptr %a) vscale_range(2,0) #0 { 1281; CHECK-LABEL: frinta_v4f64: 1282; CHECK: // %bb.0: 1283; CHECK-NEXT: ptrue p0.d, vl4 1284; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 1285; CHECK-NEXT: frinta z0.d, p0/m, z0.d 1286; CHECK-NEXT: st1d { z0.d }, p0, [x0] 1287; CHECK-NEXT: ret 1288 %op = load <4 x double>, ptr %a 1289 %res = call <4 x double> @llvm.round.v4f64(<4 x double> %op) 1290 store <4 x double> %res, ptr %a 1291 ret void 1292} 1293 1294define void @frinta_v8f64(ptr %a) #0 { 1295; VBITS_GE_256-LABEL: frinta_v8f64: 1296; VBITS_GE_256: // %bb.0: 1297; VBITS_GE_256-NEXT: ptrue p0.d, vl4 1298; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 1299; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] 1300; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] 1301; VBITS_GE_256-NEXT: frinta z0.d, p0/m, z0.d 1302; VBITS_GE_256-NEXT: frinta z1.d, p0/m, z1.d 1303; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] 1304; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0] 1305; VBITS_GE_256-NEXT: ret 1306; 1307; VBITS_GE_512-LABEL: frinta_v8f64: 1308; VBITS_GE_512: // %bb.0: 1309; VBITS_GE_512-NEXT: ptrue p0.d, vl8 1310; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] 1311; VBITS_GE_512-NEXT: frinta z0.d, p0/m, z0.d 1312; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0] 1313; VBITS_GE_512-NEXT: ret 1314 %op = load <8 x double>, ptr %a 1315 %res = call <8 x double> @llvm.round.v8f64(<8 x double> %op) 1316 store <8 x double> %res, ptr %a 1317 ret void 1318} 1319 1320define void @frinta_v16f64(ptr %a) vscale_range(8,0) #0 { 1321; CHECK-LABEL: frinta_v16f64: 1322; CHECK: // %bb.0: 1323; CHECK-NEXT: ptrue p0.d, vl16 1324; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 1325; CHECK-NEXT: frinta z0.d, p0/m, z0.d 1326; CHECK-NEXT: st1d { z0.d }, p0, [x0] 1327; CHECK-NEXT: ret 1328 %op = load <16 x double>, ptr %a 1329 %res = call <16 x double> @llvm.round.v16f64(<16 x double> %op) 1330 store <16 x double> %res, ptr %a 1331 ret void 1332} 1333 1334define void @frinta_v32f64(ptr %a) vscale_range(16,0) #0 { 1335; CHECK-LABEL: frinta_v32f64: 1336; CHECK: // %bb.0: 1337; CHECK-NEXT: ptrue p0.d, vl32 1338; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 1339; CHECK-NEXT: frinta z0.d, p0/m, z0.d 1340; CHECK-NEXT: st1d { z0.d }, p0, [x0] 1341; CHECK-NEXT: ret 1342 %op = load <32 x double>, ptr %a 1343 %res = call <32 x double> @llvm.round.v32f64(<32 x double> %op) 1344 store <32 x double> %res, ptr %a 1345 ret void 1346} 1347 1348; 1349; ROUNDEVEN -> FRINTN 1350; 1351 1352; Don't use SVE for 64-bit vectors. 1353define <4 x half> @frintn_v4f16(<4 x half> %op) vscale_range(2,0) #0 { 1354; CHECK-LABEL: frintn_v4f16: 1355; CHECK: // %bb.0: 1356; CHECK-NEXT: frintn v0.4h, v0.4h 1357; CHECK-NEXT: ret 1358 %res = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %op) 1359 ret <4 x half> %res 1360} 1361 1362; Don't use SVE for 128-bit vectors. 1363define <8 x half> @frintn_v8f16(<8 x half> %op) vscale_range(2,0) #0 { 1364; CHECK-LABEL: frintn_v8f16: 1365; CHECK: // %bb.0: 1366; CHECK-NEXT: frintn v0.8h, v0.8h 1367; CHECK-NEXT: ret 1368 %res = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %op) 1369 ret <8 x half> %res 1370} 1371 1372define void @frintn_v16f16(ptr %a) vscale_range(2,0) #0 { 1373; CHECK-LABEL: frintn_v16f16: 1374; CHECK: // %bb.0: 1375; CHECK-NEXT: ptrue p0.h, vl16 1376; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 1377; CHECK-NEXT: frintn z0.h, p0/m, z0.h 1378; CHECK-NEXT: st1h { z0.h }, p0, [x0] 1379; CHECK-NEXT: ret 1380 %op = load <16 x half>, ptr %a 1381 %res = call <16 x half> @llvm.roundeven.v16f16(<16 x half> %op) 1382 store <16 x half> %res, ptr %a 1383 ret void 1384} 1385 1386define void @frintn_v32f16(ptr %a) #0 { 1387; VBITS_GE_256-LABEL: frintn_v32f16: 1388; VBITS_GE_256: // %bb.0: 1389; VBITS_GE_256-NEXT: ptrue p0.h, vl16 1390; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 1391; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] 1392; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] 1393; VBITS_GE_256-NEXT: frintn z0.h, p0/m, z0.h 1394; VBITS_GE_256-NEXT: frintn z1.h, p0/m, z1.h 1395; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] 1396; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0] 1397; VBITS_GE_256-NEXT: ret 1398; 1399; VBITS_GE_512-LABEL: frintn_v32f16: 1400; VBITS_GE_512: // %bb.0: 1401; VBITS_GE_512-NEXT: ptrue p0.h, vl32 1402; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0] 1403; VBITS_GE_512-NEXT: frintn z0.h, p0/m, z0.h 1404; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0] 1405; VBITS_GE_512-NEXT: ret 1406 %op = load <32 x half>, ptr %a 1407 %res = call <32 x half> @llvm.roundeven.v32f16(<32 x half> %op) 1408 store <32 x half> %res, ptr %a 1409 ret void 1410} 1411 1412define void @frintn_v64f16(ptr %a) vscale_range(8,0) #0 { 1413; CHECK-LABEL: frintn_v64f16: 1414; CHECK: // %bb.0: 1415; CHECK-NEXT: ptrue p0.h, vl64 1416; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 1417; CHECK-NEXT: frintn z0.h, p0/m, z0.h 1418; CHECK-NEXT: st1h { z0.h }, p0, [x0] 1419; CHECK-NEXT: ret 1420 %op = load <64 x half>, ptr %a 1421 %res = call <64 x half> @llvm.roundeven.v64f16(<64 x half> %op) 1422 store <64 x half> %res, ptr %a 1423 ret void 1424} 1425 1426define void @frintn_v128f16(ptr %a) vscale_range(16,0) #0 { 1427; CHECK-LABEL: frintn_v128f16: 1428; CHECK: // %bb.0: 1429; CHECK-NEXT: ptrue p0.h, vl128 1430; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 1431; CHECK-NEXT: frintn z0.h, p0/m, z0.h 1432; CHECK-NEXT: st1h { z0.h }, p0, [x0] 1433; CHECK-NEXT: ret 1434 %op = load <128 x half>, ptr %a 1435 %res = call <128 x half> @llvm.roundeven.v128f16(<128 x half> %op) 1436 store <128 x half> %res, ptr %a 1437 ret void 1438} 1439 1440; Don't use SVE for 64-bit vectors. 1441define <2 x float> @frintn_v2f32(<2 x float> %op) vscale_range(2,0) #0 { 1442; CHECK-LABEL: frintn_v2f32: 1443; CHECK: // %bb.0: 1444; CHECK-NEXT: frintn v0.2s, v0.2s 1445; CHECK-NEXT: ret 1446 %res = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %op) 1447 ret <2 x float> %res 1448} 1449 1450; Don't use SVE for 128-bit vectors. 1451define <4 x float> @frintn_v4f32(<4 x float> %op) vscale_range(2,0) #0 { 1452; CHECK-LABEL: frintn_v4f32: 1453; CHECK: // %bb.0: 1454; CHECK-NEXT: frintn v0.4s, v0.4s 1455; CHECK-NEXT: ret 1456 %res = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %op) 1457 ret <4 x float> %res 1458} 1459 1460define void @frintn_v8f32(ptr %a) vscale_range(2,0) #0 { 1461; CHECK-LABEL: frintn_v8f32: 1462; CHECK: // %bb.0: 1463; CHECK-NEXT: ptrue p0.s, vl8 1464; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 1465; CHECK-NEXT: frintn z0.s, p0/m, z0.s 1466; CHECK-NEXT: st1w { z0.s }, p0, [x0] 1467; CHECK-NEXT: ret 1468 %op = load <8 x float>, ptr %a 1469 %res = call <8 x float> @llvm.roundeven.v8f32(<8 x float> %op) 1470 store <8 x float> %res, ptr %a 1471 ret void 1472} 1473 1474define void @frintn_v16f32(ptr %a) #0 { 1475; VBITS_GE_256-LABEL: frintn_v16f32: 1476; VBITS_GE_256: // %bb.0: 1477; VBITS_GE_256-NEXT: ptrue p0.s, vl8 1478; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 1479; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] 1480; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] 1481; VBITS_GE_256-NEXT: frintn z0.s, p0/m, z0.s 1482; VBITS_GE_256-NEXT: frintn z1.s, p0/m, z1.s 1483; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] 1484; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] 1485; VBITS_GE_256-NEXT: ret 1486; 1487; VBITS_GE_512-LABEL: frintn_v16f32: 1488; VBITS_GE_512: // %bb.0: 1489; VBITS_GE_512-NEXT: ptrue p0.s, vl16 1490; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] 1491; VBITS_GE_512-NEXT: frintn z0.s, p0/m, z0.s 1492; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0] 1493; VBITS_GE_512-NEXT: ret 1494 %op = load <16 x float>, ptr %a 1495 %res = call <16 x float> @llvm.roundeven.v16f32(<16 x float> %op) 1496 store <16 x float> %res, ptr %a 1497 ret void 1498} 1499 1500define void @frintn_v32f32(ptr %a) vscale_range(8,0) #0 { 1501; CHECK-LABEL: frintn_v32f32: 1502; CHECK: // %bb.0: 1503; CHECK-NEXT: ptrue p0.s, vl32 1504; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 1505; CHECK-NEXT: frintn z0.s, p0/m, z0.s 1506; CHECK-NEXT: st1w { z0.s }, p0, [x0] 1507; CHECK-NEXT: ret 1508 %op = load <32 x float>, ptr %a 1509 %res = call <32 x float> @llvm.roundeven.v32f32(<32 x float> %op) 1510 store <32 x float> %res, ptr %a 1511 ret void 1512} 1513 1514define void @frintn_v64f32(ptr %a) vscale_range(16,0) #0 { 1515; CHECK-LABEL: frintn_v64f32: 1516; CHECK: // %bb.0: 1517; CHECK-NEXT: ptrue p0.s, vl64 1518; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 1519; CHECK-NEXT: frintn z0.s, p0/m, z0.s 1520; CHECK-NEXT: st1w { z0.s }, p0, [x0] 1521; CHECK-NEXT: ret 1522 %op = load <64 x float>, ptr %a 1523 %res = call <64 x float> @llvm.roundeven.v64f32(<64 x float> %op) 1524 store <64 x float> %res, ptr %a 1525 ret void 1526} 1527 1528; Don't use SVE for 64-bit vectors. 1529define <1 x double> @frintn_v1f64(<1 x double> %op) vscale_range(2,0) #0 { 1530; CHECK-LABEL: frintn_v1f64: 1531; CHECK: // %bb.0: 1532; CHECK-NEXT: frintn d0, d0 1533; CHECK-NEXT: ret 1534 %res = call <1 x double> @llvm.roundeven.v1f64(<1 x double> %op) 1535 ret <1 x double> %res 1536} 1537 1538; Don't use SVE for 128-bit vectors. 1539define <2 x double> @frintn_v2f64(<2 x double> %op) vscale_range(2,0) #0 { 1540; CHECK-LABEL: frintn_v2f64: 1541; CHECK: // %bb.0: 1542; CHECK-NEXT: frintn v0.2d, v0.2d 1543; CHECK-NEXT: ret 1544 %res = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %op) 1545 ret <2 x double> %res 1546} 1547 1548define void @frintn_v4f64(ptr %a) vscale_range(2,0) #0 { 1549; CHECK-LABEL: frintn_v4f64: 1550; CHECK: // %bb.0: 1551; CHECK-NEXT: ptrue p0.d, vl4 1552; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 1553; CHECK-NEXT: frintn z0.d, p0/m, z0.d 1554; CHECK-NEXT: st1d { z0.d }, p0, [x0] 1555; CHECK-NEXT: ret 1556 %op = load <4 x double>, ptr %a 1557 %res = call <4 x double> @llvm.roundeven.v4f64(<4 x double> %op) 1558 store <4 x double> %res, ptr %a 1559 ret void 1560} 1561 1562define void @frintn_v8f64(ptr %a) #0 { 1563; VBITS_GE_256-LABEL: frintn_v8f64: 1564; VBITS_GE_256: // %bb.0: 1565; VBITS_GE_256-NEXT: ptrue p0.d, vl4 1566; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 1567; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] 1568; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] 1569; VBITS_GE_256-NEXT: frintn z0.d, p0/m, z0.d 1570; VBITS_GE_256-NEXT: frintn z1.d, p0/m, z1.d 1571; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] 1572; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0] 1573; VBITS_GE_256-NEXT: ret 1574; 1575; VBITS_GE_512-LABEL: frintn_v8f64: 1576; VBITS_GE_512: // %bb.0: 1577; VBITS_GE_512-NEXT: ptrue p0.d, vl8 1578; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] 1579; VBITS_GE_512-NEXT: frintn z0.d, p0/m, z0.d 1580; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0] 1581; VBITS_GE_512-NEXT: ret 1582 %op = load <8 x double>, ptr %a 1583 %res = call <8 x double> @llvm.roundeven.v8f64(<8 x double> %op) 1584 store <8 x double> %res, ptr %a 1585 ret void 1586} 1587 1588define void @frintn_v16f64(ptr %a) vscale_range(8,0) #0 { 1589; CHECK-LABEL: frintn_v16f64: 1590; CHECK: // %bb.0: 1591; CHECK-NEXT: ptrue p0.d, vl16 1592; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 1593; CHECK-NEXT: frintn z0.d, p0/m, z0.d 1594; CHECK-NEXT: st1d { z0.d }, p0, [x0] 1595; CHECK-NEXT: ret 1596 %op = load <16 x double>, ptr %a 1597 %res = call <16 x double> @llvm.roundeven.v16f64(<16 x double> %op) 1598 store <16 x double> %res, ptr %a 1599 ret void 1600} 1601 1602define void @frintn_v32f64(ptr %a) vscale_range(16,0) #0 { 1603; CHECK-LABEL: frintn_v32f64: 1604; CHECK: // %bb.0: 1605; CHECK-NEXT: ptrue p0.d, vl32 1606; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 1607; CHECK-NEXT: frintn z0.d, p0/m, z0.d 1608; CHECK-NEXT: st1d { z0.d }, p0, [x0] 1609; CHECK-NEXT: ret 1610 %op = load <32 x double>, ptr %a 1611 %res = call <32 x double> @llvm.roundeven.v32f64(<32 x double> %op) 1612 store <32 x double> %res, ptr %a 1613 ret void 1614} 1615 1616; 1617; TRUNC -> FRINTZ 1618; 1619 1620; Don't use SVE for 64-bit vectors. 1621define <4 x half> @frintz_v4f16(<4 x half> %op) vscale_range(2,0) #0 { 1622; CHECK-LABEL: frintz_v4f16: 1623; CHECK: // %bb.0: 1624; CHECK-NEXT: frintz v0.4h, v0.4h 1625; CHECK-NEXT: ret 1626 %res = call <4 x half> @llvm.trunc.v4f16(<4 x half> %op) 1627 ret <4 x half> %res 1628} 1629 1630; Don't use SVE for 128-bit vectors. 1631define <8 x half> @frintz_v8f16(<8 x half> %op) vscale_range(2,0) #0 { 1632; CHECK-LABEL: frintz_v8f16: 1633; CHECK: // %bb.0: 1634; CHECK-NEXT: frintz v0.8h, v0.8h 1635; CHECK-NEXT: ret 1636 %res = call <8 x half> @llvm.trunc.v8f16(<8 x half> %op) 1637 ret <8 x half> %res 1638} 1639 1640define void @frintz_v16f16(ptr %a) vscale_range(2,0) #0 { 1641; CHECK-LABEL: frintz_v16f16: 1642; CHECK: // %bb.0: 1643; CHECK-NEXT: ptrue p0.h, vl16 1644; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 1645; CHECK-NEXT: frintz z0.h, p0/m, z0.h 1646; CHECK-NEXT: st1h { z0.h }, p0, [x0] 1647; CHECK-NEXT: ret 1648 %op = load <16 x half>, ptr %a 1649 %res = call <16 x half> @llvm.trunc.v16f16(<16 x half> %op) 1650 store <16 x half> %res, ptr %a 1651 ret void 1652} 1653 1654define void @frintz_v32f16(ptr %a) #0 { 1655; VBITS_GE_256-LABEL: frintz_v32f16: 1656; VBITS_GE_256: // %bb.0: 1657; VBITS_GE_256-NEXT: ptrue p0.h, vl16 1658; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 1659; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] 1660; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] 1661; VBITS_GE_256-NEXT: frintz z0.h, p0/m, z0.h 1662; VBITS_GE_256-NEXT: frintz z1.h, p0/m, z1.h 1663; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] 1664; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0] 1665; VBITS_GE_256-NEXT: ret 1666; 1667; VBITS_GE_512-LABEL: frintz_v32f16: 1668; VBITS_GE_512: // %bb.0: 1669; VBITS_GE_512-NEXT: ptrue p0.h, vl32 1670; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0] 1671; VBITS_GE_512-NEXT: frintz z0.h, p0/m, z0.h 1672; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0] 1673; VBITS_GE_512-NEXT: ret 1674 %op = load <32 x half>, ptr %a 1675 %res = call <32 x half> @llvm.trunc.v32f16(<32 x half> %op) 1676 store <32 x half> %res, ptr %a 1677 ret void 1678} 1679 1680define void @frintz_v64f16(ptr %a) vscale_range(8,0) #0 { 1681; CHECK-LABEL: frintz_v64f16: 1682; CHECK: // %bb.0: 1683; CHECK-NEXT: ptrue p0.h, vl64 1684; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 1685; CHECK-NEXT: frintz z0.h, p0/m, z0.h 1686; CHECK-NEXT: st1h { z0.h }, p0, [x0] 1687; CHECK-NEXT: ret 1688 %op = load <64 x half>, ptr %a 1689 %res = call <64 x half> @llvm.trunc.v64f16(<64 x half> %op) 1690 store <64 x half> %res, ptr %a 1691 ret void 1692} 1693 1694define void @frintz_v128f16(ptr %a) vscale_range(16,0) #0 { 1695; CHECK-LABEL: frintz_v128f16: 1696; CHECK: // %bb.0: 1697; CHECK-NEXT: ptrue p0.h, vl128 1698; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 1699; CHECK-NEXT: frintz z0.h, p0/m, z0.h 1700; CHECK-NEXT: st1h { z0.h }, p0, [x0] 1701; CHECK-NEXT: ret 1702 %op = load <128 x half>, ptr %a 1703 %res = call <128 x half> @llvm.trunc.v128f16(<128 x half> %op) 1704 store <128 x half> %res, ptr %a 1705 ret void 1706} 1707 1708; Don't use SVE for 64-bit vectors. 1709define <2 x float> @frintz_v2f32(<2 x float> %op) vscale_range(2,0) #0 { 1710; CHECK-LABEL: frintz_v2f32: 1711; CHECK: // %bb.0: 1712; CHECK-NEXT: frintz v0.2s, v0.2s 1713; CHECK-NEXT: ret 1714 %res = call <2 x float> @llvm.trunc.v2f32(<2 x float> %op) 1715 ret <2 x float> %res 1716} 1717 1718; Don't use SVE for 128-bit vectors. 1719define <4 x float> @frintz_v4f32(<4 x float> %op) vscale_range(2,0) #0 { 1720; CHECK-LABEL: frintz_v4f32: 1721; CHECK: // %bb.0: 1722; CHECK-NEXT: frintz v0.4s, v0.4s 1723; CHECK-NEXT: ret 1724 %res = call <4 x float> @llvm.trunc.v4f32(<4 x float> %op) 1725 ret <4 x float> %res 1726} 1727 1728define void @frintz_v8f32(ptr %a) vscale_range(2,0) #0 { 1729; CHECK-LABEL: frintz_v8f32: 1730; CHECK: // %bb.0: 1731; CHECK-NEXT: ptrue p0.s, vl8 1732; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 1733; CHECK-NEXT: frintz z0.s, p0/m, z0.s 1734; CHECK-NEXT: st1w { z0.s }, p0, [x0] 1735; CHECK-NEXT: ret 1736 %op = load <8 x float>, ptr %a 1737 %res = call <8 x float> @llvm.trunc.v8f32(<8 x float> %op) 1738 store <8 x float> %res, ptr %a 1739 ret void 1740} 1741 1742define void @frintz_v16f32(ptr %a) #0 { 1743; VBITS_GE_256-LABEL: frintz_v16f32: 1744; VBITS_GE_256: // %bb.0: 1745; VBITS_GE_256-NEXT: ptrue p0.s, vl8 1746; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 1747; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] 1748; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] 1749; VBITS_GE_256-NEXT: frintz z0.s, p0/m, z0.s 1750; VBITS_GE_256-NEXT: frintz z1.s, p0/m, z1.s 1751; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] 1752; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] 1753; VBITS_GE_256-NEXT: ret 1754; 1755; VBITS_GE_512-LABEL: frintz_v16f32: 1756; VBITS_GE_512: // %bb.0: 1757; VBITS_GE_512-NEXT: ptrue p0.s, vl16 1758; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] 1759; VBITS_GE_512-NEXT: frintz z0.s, p0/m, z0.s 1760; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0] 1761; VBITS_GE_512-NEXT: ret 1762 %op = load <16 x float>, ptr %a 1763 %res = call <16 x float> @llvm.trunc.v16f32(<16 x float> %op) 1764 store <16 x float> %res, ptr %a 1765 ret void 1766} 1767 1768define void @frintz_v32f32(ptr %a) vscale_range(8,0) #0 { 1769; CHECK-LABEL: frintz_v32f32: 1770; CHECK: // %bb.0: 1771; CHECK-NEXT: ptrue p0.s, vl32 1772; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 1773; CHECK-NEXT: frintz z0.s, p0/m, z0.s 1774; CHECK-NEXT: st1w { z0.s }, p0, [x0] 1775; CHECK-NEXT: ret 1776 %op = load <32 x float>, ptr %a 1777 %res = call <32 x float> @llvm.trunc.v32f32(<32 x float> %op) 1778 store <32 x float> %res, ptr %a 1779 ret void 1780} 1781 1782define void @frintz_v64f32(ptr %a) vscale_range(16,0) #0 { 1783; CHECK-LABEL: frintz_v64f32: 1784; CHECK: // %bb.0: 1785; CHECK-NEXT: ptrue p0.s, vl64 1786; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 1787; CHECK-NEXT: frintz z0.s, p0/m, z0.s 1788; CHECK-NEXT: st1w { z0.s }, p0, [x0] 1789; CHECK-NEXT: ret 1790 %op = load <64 x float>, ptr %a 1791 %res = call <64 x float> @llvm.trunc.v64f32(<64 x float> %op) 1792 store <64 x float> %res, ptr %a 1793 ret void 1794} 1795 1796; Don't use SVE for 64-bit vectors. 1797define <1 x double> @frintz_v1f64(<1 x double> %op) vscale_range(2,0) #0 { 1798; CHECK-LABEL: frintz_v1f64: 1799; CHECK: // %bb.0: 1800; CHECK-NEXT: frintz d0, d0 1801; CHECK-NEXT: ret 1802 %res = call <1 x double> @llvm.trunc.v1f64(<1 x double> %op) 1803 ret <1 x double> %res 1804} 1805 1806; Don't use SVE for 128-bit vectors. 1807define <2 x double> @frintz_v2f64(<2 x double> %op) vscale_range(2,0) #0 { 1808; CHECK-LABEL: frintz_v2f64: 1809; CHECK: // %bb.0: 1810; CHECK-NEXT: frintz v0.2d, v0.2d 1811; CHECK-NEXT: ret 1812 %res = call <2 x double> @llvm.trunc.v2f64(<2 x double> %op) 1813 ret <2 x double> %res 1814} 1815 1816define void @frintz_v4f64(ptr %a) vscale_range(2,0) #0 { 1817; CHECK-LABEL: frintz_v4f64: 1818; CHECK: // %bb.0: 1819; CHECK-NEXT: ptrue p0.d, vl4 1820; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 1821; CHECK-NEXT: frintz z0.d, p0/m, z0.d 1822; CHECK-NEXT: st1d { z0.d }, p0, [x0] 1823; CHECK-NEXT: ret 1824 %op = load <4 x double>, ptr %a 1825 %res = call <4 x double> @llvm.trunc.v4f64(<4 x double> %op) 1826 store <4 x double> %res, ptr %a 1827 ret void 1828} 1829 1830define void @frintz_v8f64(ptr %a) #0 { 1831; VBITS_GE_256-LABEL: frintz_v8f64: 1832; VBITS_GE_256: // %bb.0: 1833; VBITS_GE_256-NEXT: ptrue p0.d, vl4 1834; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 1835; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] 1836; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] 1837; VBITS_GE_256-NEXT: frintz z0.d, p0/m, z0.d 1838; VBITS_GE_256-NEXT: frintz z1.d, p0/m, z1.d 1839; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] 1840; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0] 1841; VBITS_GE_256-NEXT: ret 1842; 1843; VBITS_GE_512-LABEL: frintz_v8f64: 1844; VBITS_GE_512: // %bb.0: 1845; VBITS_GE_512-NEXT: ptrue p0.d, vl8 1846; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] 1847; VBITS_GE_512-NEXT: frintz z0.d, p0/m, z0.d 1848; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0] 1849; VBITS_GE_512-NEXT: ret 1850 %op = load <8 x double>, ptr %a 1851 %res = call <8 x double> @llvm.trunc.v8f64(<8 x double> %op) 1852 store <8 x double> %res, ptr %a 1853 ret void 1854} 1855 1856define void @frintz_v16f64(ptr %a) vscale_range(8,0) #0 { 1857; CHECK-LABEL: frintz_v16f64: 1858; CHECK: // %bb.0: 1859; CHECK-NEXT: ptrue p0.d, vl16 1860; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 1861; CHECK-NEXT: frintz z0.d, p0/m, z0.d 1862; CHECK-NEXT: st1d { z0.d }, p0, [x0] 1863; CHECK-NEXT: ret 1864 %op = load <16 x double>, ptr %a 1865 %res = call <16 x double> @llvm.trunc.v16f64(<16 x double> %op) 1866 store <16 x double> %res, ptr %a 1867 ret void 1868} 1869 1870define void @frintz_v32f64(ptr %a) vscale_range(16,0) #0 { 1871; CHECK-LABEL: frintz_v32f64: 1872; CHECK: // %bb.0: 1873; CHECK-NEXT: ptrue p0.d, vl32 1874; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 1875; CHECK-NEXT: frintz z0.d, p0/m, z0.d 1876; CHECK-NEXT: st1d { z0.d }, p0, [x0] 1877; CHECK-NEXT: ret 1878 %op = load <32 x double>, ptr %a 1879 %res = call <32 x double> @llvm.trunc.v32f64(<32 x double> %op) 1880 store <32 x double> %res, ptr %a 1881 ret void 1882} 1883 1884attributes #0 = { "target-features"="+sve" } 1885 1886declare <4 x half> @llvm.ceil.v4f16(<4 x half>) 1887declare <8 x half> @llvm.ceil.v8f16(<8 x half>) 1888declare <16 x half> @llvm.ceil.v16f16(<16 x half>) 1889declare <32 x half> @llvm.ceil.v32f16(<32 x half>) 1890declare <64 x half> @llvm.ceil.v64f16(<64 x half>) 1891declare <128 x half> @llvm.ceil.v128f16(<128 x half>) 1892declare <2 x float> @llvm.ceil.v2f32(<2 x float>) 1893declare <4 x float> @llvm.ceil.v4f32(<4 x float>) 1894declare <8 x float> @llvm.ceil.v8f32(<8 x float>) 1895declare <16 x float> @llvm.ceil.v16f32(<16 x float>) 1896declare <32 x float> @llvm.ceil.v32f32(<32 x float>) 1897declare <64 x float> @llvm.ceil.v64f32(<64 x float>) 1898declare <1 x double> @llvm.ceil.v1f64(<1 x double>) 1899declare <2 x double> @llvm.ceil.v2f64(<2 x double>) 1900declare <4 x double> @llvm.ceil.v4f64(<4 x double>) 1901declare <8 x double> @llvm.ceil.v8f64(<8 x double>) 1902declare <16 x double> @llvm.ceil.v16f64(<16 x double>) 1903declare <32 x double> @llvm.ceil.v32f64(<32 x double>) 1904 1905declare <4 x half> @llvm.floor.v4f16(<4 x half>) 1906declare <8 x half> @llvm.floor.v8f16(<8 x half>) 1907declare <16 x half> @llvm.floor.v16f16(<16 x half>) 1908declare <32 x half> @llvm.floor.v32f16(<32 x half>) 1909declare <64 x half> @llvm.floor.v64f16(<64 x half>) 1910declare <128 x half> @llvm.floor.v128f16(<128 x half>) 1911declare <2 x float> @llvm.floor.v2f32(<2 x float>) 1912declare <4 x float> @llvm.floor.v4f32(<4 x float>) 1913declare <8 x float> @llvm.floor.v8f32(<8 x float>) 1914declare <16 x float> @llvm.floor.v16f32(<16 x float>) 1915declare <32 x float> @llvm.floor.v32f32(<32 x float>) 1916declare <64 x float> @llvm.floor.v64f32(<64 x float>) 1917declare <1 x double> @llvm.floor.v1f64(<1 x double>) 1918declare <2 x double> @llvm.floor.v2f64(<2 x double>) 1919declare <4 x double> @llvm.floor.v4f64(<4 x double>) 1920declare <8 x double> @llvm.floor.v8f64(<8 x double>) 1921declare <16 x double> @llvm.floor.v16f64(<16 x double>) 1922declare <32 x double> @llvm.floor.v32f64(<32 x double>) 1923 1924declare <4 x half> @llvm.nearbyint.v4f16(<4 x half>) 1925declare <8 x half> @llvm.nearbyint.v8f16(<8 x half>) 1926declare <16 x half> @llvm.nearbyint.v16f16(<16 x half>) 1927declare <32 x half> @llvm.nearbyint.v32f16(<32 x half>) 1928declare <64 x half> @llvm.nearbyint.v64f16(<64 x half>) 1929declare <128 x half> @llvm.nearbyint.v128f16(<128 x half>) 1930declare <2 x float> @llvm.nearbyint.v2f32(<2 x float>) 1931declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) 1932declare <8 x float> @llvm.nearbyint.v8f32(<8 x float>) 1933declare <16 x float> @llvm.nearbyint.v16f32(<16 x float>) 1934declare <32 x float> @llvm.nearbyint.v32f32(<32 x float>) 1935declare <64 x float> @llvm.nearbyint.v64f32(<64 x float>) 1936declare <1 x double> @llvm.nearbyint.v1f64(<1 x double>) 1937declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) 1938declare <4 x double> @llvm.nearbyint.v4f64(<4 x double>) 1939declare <8 x double> @llvm.nearbyint.v8f64(<8 x double>) 1940declare <16 x double> @llvm.nearbyint.v16f64(<16 x double>) 1941declare <32 x double> @llvm.nearbyint.v32f64(<32 x double>) 1942 1943declare <4 x half> @llvm.rint.v4f16(<4 x half>) 1944declare <8 x half> @llvm.rint.v8f16(<8 x half>) 1945declare <16 x half> @llvm.rint.v16f16(<16 x half>) 1946declare <32 x half> @llvm.rint.v32f16(<32 x half>) 1947declare <64 x half> @llvm.rint.v64f16(<64 x half>) 1948declare <128 x half> @llvm.rint.v128f16(<128 x half>) 1949declare <2 x float> @llvm.rint.v2f32(<2 x float>) 1950declare <4 x float> @llvm.rint.v4f32(<4 x float>) 1951declare <8 x float> @llvm.rint.v8f32(<8 x float>) 1952declare <16 x float> @llvm.rint.v16f32(<16 x float>) 1953declare <32 x float> @llvm.rint.v32f32(<32 x float>) 1954declare <64 x float> @llvm.rint.v64f32(<64 x float>) 1955declare <1 x double> @llvm.rint.v1f64(<1 x double>) 1956declare <2 x double> @llvm.rint.v2f64(<2 x double>) 1957declare <4 x double> @llvm.rint.v4f64(<4 x double>) 1958declare <8 x double> @llvm.rint.v8f64(<8 x double>) 1959declare <16 x double> @llvm.rint.v16f64(<16 x double>) 1960declare <32 x double> @llvm.rint.v32f64(<32 x double>) 1961 1962declare <4 x half> @llvm.round.v4f16(<4 x half>) 1963declare <8 x half> @llvm.round.v8f16(<8 x half>) 1964declare <16 x half> @llvm.round.v16f16(<16 x half>) 1965declare <32 x half> @llvm.round.v32f16(<32 x half>) 1966declare <64 x half> @llvm.round.v64f16(<64 x half>) 1967declare <128 x half> @llvm.round.v128f16(<128 x half>) 1968declare <2 x float> @llvm.round.v2f32(<2 x float>) 1969declare <4 x float> @llvm.round.v4f32(<4 x float>) 1970declare <8 x float> @llvm.round.v8f32(<8 x float>) 1971declare <16 x float> @llvm.round.v16f32(<16 x float>) 1972declare <32 x float> @llvm.round.v32f32(<32 x float>) 1973declare <64 x float> @llvm.round.v64f32(<64 x float>) 1974declare <1 x double> @llvm.round.v1f64(<1 x double>) 1975declare <2 x double> @llvm.round.v2f64(<2 x double>) 1976declare <4 x double> @llvm.round.v4f64(<4 x double>) 1977declare <8 x double> @llvm.round.v8f64(<8 x double>) 1978declare <16 x double> @llvm.round.v16f64(<16 x double>) 1979declare <32 x double> @llvm.round.v32f64(<32 x double>) 1980 1981declare <4 x half> @llvm.roundeven.v4f16(<4 x half>) 1982declare <8 x half> @llvm.roundeven.v8f16(<8 x half>) 1983declare <16 x half> @llvm.roundeven.v16f16(<16 x half>) 1984declare <32 x half> @llvm.roundeven.v32f16(<32 x half>) 1985declare <64 x half> @llvm.roundeven.v64f16(<64 x half>) 1986declare <128 x half> @llvm.roundeven.v128f16(<128 x half>) 1987declare <2 x float> @llvm.roundeven.v2f32(<2 x float>) 1988declare <4 x float> @llvm.roundeven.v4f32(<4 x float>) 1989declare <8 x float> @llvm.roundeven.v8f32(<8 x float>) 1990declare <16 x float> @llvm.roundeven.v16f32(<16 x float>) 1991declare <32 x float> @llvm.roundeven.v32f32(<32 x float>) 1992declare <64 x float> @llvm.roundeven.v64f32(<64 x float>) 1993declare <1 x double> @llvm.roundeven.v1f64(<1 x double>) 1994declare <2 x double> @llvm.roundeven.v2f64(<2 x double>) 1995declare <4 x double> @llvm.roundeven.v4f64(<4 x double>) 1996declare <8 x double> @llvm.roundeven.v8f64(<8 x double>) 1997declare <16 x double> @llvm.roundeven.v16f64(<16 x double>) 1998declare <32 x double> @llvm.roundeven.v32f64(<32 x double>) 1999 2000declare <4 x half> @llvm.trunc.v4f16(<4 x half>) 2001declare <8 x half> @llvm.trunc.v8f16(<8 x half>) 2002declare <16 x half> @llvm.trunc.v16f16(<16 x half>) 2003declare <32 x half> @llvm.trunc.v32f16(<32 x half>) 2004declare <64 x half> @llvm.trunc.v64f16(<64 x half>) 2005declare <128 x half> @llvm.trunc.v128f16(<128 x half>) 2006declare <2 x float> @llvm.trunc.v2f32(<2 x float>) 2007declare <4 x float> @llvm.trunc.v4f32(<4 x float>) 2008declare <8 x float> @llvm.trunc.v8f32(<8 x float>) 2009declare <16 x float> @llvm.trunc.v16f32(<16 x float>) 2010declare <32 x float> @llvm.trunc.v32f32(<32 x float>) 2011declare <64 x float> @llvm.trunc.v64f32(<64 x float>) 2012declare <1 x double> @llvm.trunc.v1f64(<1 x double>) 2013declare <2 x double> @llvm.trunc.v2f64(<2 x double>) 2014declare <4 x double> @llvm.trunc.v4f64(<4 x double>) 2015declare <8 x double> @llvm.trunc.v8f64(<8 x double>) 2016declare <16 x double> @llvm.trunc.v16f64(<16 x double>) 2017declare <32 x double> @llvm.trunc.v32f64(<32 x double>) 2018