1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256 3; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 4; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 5 6target triple = "aarch64-unknown-linux-gnu" 7 8; 9; FCVTZU H -> H 10; 11 12; Don't use SVE for 64-bit vectors. 13define <4 x i16> @fcvtzu_v4f16_v4i16(<4 x half> %op1) vscale_range(2,0) #0 { 14; CHECK-LABEL: fcvtzu_v4f16_v4i16: 15; CHECK: // %bb.0: 16; CHECK-NEXT: fcvtzu v0.4h, v0.4h 17; CHECK-NEXT: ret 18 %res = fptoui <4 x half> %op1 to <4 x i16> 19 ret <4 x i16> %res 20} 21 22; Don't use SVE for 128-bit vectors. 23define void @fcvtzu_v8f16_v8i16(ptr %a, ptr %b) vscale_range(2,0) #0 { 24; CHECK-LABEL: fcvtzu_v8f16_v8i16: 25; CHECK: // %bb.0: 26; CHECK-NEXT: ldr q0, [x0] 27; CHECK-NEXT: fcvtzu v0.8h, v0.8h 28; CHECK-NEXT: str q0, [x1] 29; CHECK-NEXT: ret 30 %op1 = load <8 x half>, ptr %a 31 %res = fptoui <8 x half> %op1 to <8 x i16> 32 store <8 x i16> %res, ptr %b 33 ret void 34} 35 36define void @fcvtzu_v16f16_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 { 37; CHECK-LABEL: fcvtzu_v16f16_v16i16: 38; CHECK: // %bb.0: 39; CHECK-NEXT: ptrue p0.h, vl16 40; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 41; CHECK-NEXT: fcvtzu z0.h, p0/m, z0.h 42; CHECK-NEXT: st1h { z0.h }, p0, [x1] 43; CHECK-NEXT: ret 44 %op1 = load <16 x half>, ptr %a 45 %res = fptoui <16 x half> %op1 to <16 x i16> 46 store <16 x i16> %res, ptr %b 47 ret void 48} 49 50define void @fcvtzu_v32f16_v32i16(ptr %a, ptr %b) #0 { 51; VBITS_GE_256-LABEL: fcvtzu_v32f16_v32i16: 52; VBITS_GE_256: // %bb.0: 53; VBITS_GE_256-NEXT: ptrue p0.h, vl16 54; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 55; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] 56; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] 57; VBITS_GE_256-NEXT: fcvtzu z0.h, p0/m, z0.h 58; VBITS_GE_256-NEXT: fcvtzu z1.h, p0/m, z1.h 59; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1, x8, lsl #1] 60; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x1] 61; VBITS_GE_256-NEXT: ret 62; 63; VBITS_GE_512-LABEL: fcvtzu_v32f16_v32i16: 64; VBITS_GE_512: // %bb.0: 65; VBITS_GE_512-NEXT: ptrue p0.h, vl32 66; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0] 67; VBITS_GE_512-NEXT: fcvtzu z0.h, p0/m, z0.h 68; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x1] 69; VBITS_GE_512-NEXT: ret 70 %op1 = load <32 x half>, ptr %a 71 %res = fptoui <32 x half> %op1 to <32 x i16> 72 store <32 x i16> %res, ptr %b 73 ret void 74} 75 76define void @fcvtzu_v64f16_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 { 77; CHECK-LABEL: fcvtzu_v64f16_v64i16: 78; CHECK: // %bb.0: 79; CHECK-NEXT: ptrue p0.h, vl64 80; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 81; CHECK-NEXT: fcvtzu z0.h, p0/m, z0.h 82; CHECK-NEXT: st1h { z0.h }, p0, [x1] 83; CHECK-NEXT: ret 84 %op1 = load <64 x half>, ptr %a 85 %res = fptoui <64 x half> %op1 to <64 x i16> 86 store <64 x i16> %res, ptr %b 87 ret void 88} 89 90define void @fcvtzu_v128f16_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 { 91; CHECK-LABEL: fcvtzu_v128f16_v128i16: 92; CHECK: // %bb.0: 93; CHECK-NEXT: ptrue p0.h, vl128 94; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 95; CHECK-NEXT: fcvtzu z0.h, p0/m, z0.h 96; CHECK-NEXT: st1h { z0.h }, p0, [x1] 97; CHECK-NEXT: ret 98 %op1 = load <128 x half>, ptr %a 99 %res = fptoui <128 x half> %op1 to <128 x i16> 100 store <128 x i16> %res, ptr %b 101 ret void 102} 103 104; 105; FCVTZU H -> S 106; 107 108; Don't use SVE for 64-bit vectors. 109define <2 x i32> @fcvtzu_v2f16_v2i32(<2 x half> %op1) vscale_range(2,0) #0 { 110; CHECK-LABEL: fcvtzu_v2f16_v2i32: 111; CHECK: // %bb.0: 112; CHECK-NEXT: fcvtl v0.4s, v0.4h 113; CHECK-NEXT: fcvtzu v0.4s, v0.4s 114; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 115; CHECK-NEXT: ret 116 %res = fptoui <2 x half> %op1 to <2 x i32> 117 ret <2 x i32> %res 118} 119 120; Don't use SVE for 128-bit vectors. 121define <4 x i32> @fcvtzu_v4f16_v4i32(<4 x half> %op1) vscale_range(2,0) #0 { 122; CHECK-LABEL: fcvtzu_v4f16_v4i32: 123; CHECK: // %bb.0: 124; CHECK-NEXT: fcvtl v0.4s, v0.4h 125; CHECK-NEXT: fcvtzu v0.4s, v0.4s 126; CHECK-NEXT: ret 127 %res = fptoui <4 x half> %op1 to <4 x i32> 128 ret <4 x i32> %res 129} 130 131define void @fcvtzu_v8f16_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 { 132; CHECK-LABEL: fcvtzu_v8f16_v8i32: 133; CHECK: // %bb.0: 134; CHECK-NEXT: ldr q0, [x0] 135; CHECK-NEXT: ptrue p0.s, vl8 136; CHECK-NEXT: uunpklo z0.s, z0.h 137; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h 138; CHECK-NEXT: st1w { z0.s }, p0, [x1] 139; CHECK-NEXT: ret 140 %op1 = load <8 x half>, ptr %a 141 %res = fptoui <8 x half> %op1 to <8 x i32> 142 store <8 x i32> %res, ptr %b 143 ret void 144} 145 146define void @fcvtzu_v16f16_v16i32(ptr %a, ptr %b) #0 { 147; VBITS_GE_256-LABEL: fcvtzu_v16f16_v16i32: 148; VBITS_GE_256: // %bb.0: 149; VBITS_GE_256-NEXT: ptrue p0.h, vl16 150; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 151; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0] 152; VBITS_GE_256-NEXT: ptrue p0.s, vl8 153; VBITS_GE_256-NEXT: uunpklo z1.s, z0.h 154; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16 155; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h 156; VBITS_GE_256-NEXT: fcvtzu z1.s, p0/m, z1.h 157; VBITS_GE_256-NEXT: fcvtzu z0.s, p0/m, z0.h 158; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1] 159; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2] 160; VBITS_GE_256-NEXT: ret 161; 162; VBITS_GE_512-LABEL: fcvtzu_v16f16_v16i32: 163; VBITS_GE_512: // %bb.0: 164; VBITS_GE_512-NEXT: ptrue p0.s, vl16 165; VBITS_GE_512-NEXT: ld1h { z0.s }, p0/z, [x0] 166; VBITS_GE_512-NEXT: fcvtzu z0.s, p0/m, z0.h 167; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1] 168; VBITS_GE_512-NEXT: ret 169 %op1 = load <16 x half>, ptr %a 170 %res = fptoui <16 x half> %op1 to <16 x i32> 171 store <16 x i32> %res, ptr %b 172 ret void 173} 174 175define void @fcvtzu_v32f16_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 { 176; CHECK-LABEL: fcvtzu_v32f16_v32i32: 177; CHECK: // %bb.0: 178; CHECK-NEXT: ptrue p0.s, vl32 179; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0] 180; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h 181; CHECK-NEXT: st1w { z0.s }, p0, [x1] 182; CHECK-NEXT: ret 183 %op1 = load <32 x half>, ptr %a 184 %res = fptoui <32 x half> %op1 to <32 x i32> 185 store <32 x i32> %res, ptr %b 186 ret void 187} 188 189define void @fcvtzu_v64f16_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 { 190; CHECK-LABEL: fcvtzu_v64f16_v64i32: 191; CHECK: // %bb.0: 192; CHECK-NEXT: ptrue p0.s, vl64 193; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0] 194; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h 195; CHECK-NEXT: st1w { z0.s }, p0, [x1] 196; CHECK-NEXT: ret 197 %op1 = load <64 x half>, ptr %a 198 %res = fptoui <64 x half> %op1 to <64 x i32> 199 store <64 x i32> %res, ptr %b 200 ret void 201} 202 203; 204; FCVTZU H -> D 205; 206 207; Don't use SVE for 64-bit vectors. 208define <1 x i64> @fcvtzu_v1f16_v1i64(<1 x half> %op1) vscale_range(2,0) #0 { 209; CHECK-LABEL: fcvtzu_v1f16_v1i64: 210; CHECK: // %bb.0: 211; CHECK-NEXT: fcvtzu x8, h0 212; CHECK-NEXT: fmov d0, x8 213; CHECK-NEXT: ret 214 %res = fptoui <1 x half> %op1 to <1 x i64> 215 ret <1 x i64> %res 216} 217 218; v2f16 is not legal for NEON, so use SVE 219define <2 x i64> @fcvtzu_v2f16_v2i64(<2 x half> %op1) vscale_range(2,0) #0 { 220; CHECK-LABEL: fcvtzu_v2f16_v2i64: 221; CHECK: // %bb.0: 222; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 223; CHECK-NEXT: ptrue p0.d, vl4 224; CHECK-NEXT: uunpklo z0.s, z0.h 225; CHECK-NEXT: uunpklo z0.d, z0.s 226; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h 227; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 228; CHECK-NEXT: ret 229 %res = fptoui <2 x half> %op1 to <2 x i64> 230 ret <2 x i64> %res 231} 232 233define void @fcvtzu_v4f16_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 { 234; CHECK-LABEL: fcvtzu_v4f16_v4i64: 235; CHECK: // %bb.0: 236; CHECK-NEXT: ldr d0, [x0] 237; CHECK-NEXT: ptrue p0.d, vl4 238; CHECK-NEXT: uunpklo z0.s, z0.h 239; CHECK-NEXT: uunpklo z0.d, z0.s 240; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h 241; CHECK-NEXT: st1d { z0.d }, p0, [x1] 242; CHECK-NEXT: ret 243 %op1 = load <4 x half>, ptr %a 244 %res = fptoui <4 x half> %op1 to <4 x i64> 245 store <4 x i64> %res, ptr %b 246 ret void 247} 248 249define void @fcvtzu_v8f16_v8i64(ptr %a, ptr %b) #0 { 250; VBITS_GE_256-LABEL: fcvtzu_v8f16_v8i64: 251; VBITS_GE_256: // %bb.0: 252; VBITS_GE_256-NEXT: ldr q0, [x0] 253; VBITS_GE_256-NEXT: ptrue p0.d, vl4 254; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 255; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 256; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h 257; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h 258; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s 259; VBITS_GE_256-NEXT: uunpklo z1.d, z1.s 260; VBITS_GE_256-NEXT: fcvtzu z0.d, p0/m, z0.h 261; VBITS_GE_256-NEXT: fcvtzu z1.d, p0/m, z1.h 262; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1] 263; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1, x8, lsl #3] 264; VBITS_GE_256-NEXT: ret 265; 266; VBITS_GE_512-LABEL: fcvtzu_v8f16_v8i64: 267; VBITS_GE_512: // %bb.0: 268; VBITS_GE_512-NEXT: ldr q0, [x0] 269; VBITS_GE_512-NEXT: ptrue p0.d, vl8 270; VBITS_GE_512-NEXT: uunpklo z0.s, z0.h 271; VBITS_GE_512-NEXT: uunpklo z0.d, z0.s 272; VBITS_GE_512-NEXT: fcvtzu z0.d, p0/m, z0.h 273; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1] 274; VBITS_GE_512-NEXT: ret 275 %op1 = load <8 x half>, ptr %a 276 %res = fptoui <8 x half> %op1 to <8 x i64> 277 store <8 x i64> %res, ptr %b 278 ret void 279} 280 281define void @fcvtzu_v16f16_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 { 282; CHECK-LABEL: fcvtzu_v16f16_v16i64: 283; CHECK: // %bb.0: 284; CHECK-NEXT: ptrue p0.d, vl16 285; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0] 286; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h 287; CHECK-NEXT: st1d { z0.d }, p0, [x1] 288; CHECK-NEXT: ret 289 %op1 = load <16 x half>, ptr %a 290 %res = fptoui <16 x half> %op1 to <16 x i64> 291 store <16 x i64> %res, ptr %b 292 ret void 293} 294 295define void @fcvtzu_v32f16_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 { 296; CHECK-LABEL: fcvtzu_v32f16_v32i64: 297; CHECK: // %bb.0: 298; CHECK-NEXT: ptrue p0.d, vl32 299; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0] 300; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h 301; CHECK-NEXT: st1d { z0.d }, p0, [x1] 302; CHECK-NEXT: ret 303 %op1 = load <32 x half>, ptr %a 304 %res = fptoui <32 x half> %op1 to <32 x i64> 305 store <32 x i64> %res, ptr %b 306 ret void 307} 308 309; 310; FCVTZU S -> H 311; 312 313; Don't use SVE for 64-bit vectors. 314define <2 x i16> @fcvtzu_v2f32_v2i16(<2 x float> %op1) vscale_range(2,0) #0 { 315; CHECK-LABEL: fcvtzu_v2f32_v2i16: 316; CHECK: // %bb.0: 317; CHECK-NEXT: fcvtzs v0.2s, v0.2s 318; CHECK-NEXT: ret 319 %res = fptoui <2 x float> %op1 to <2 x i16> 320 ret <2 x i16> %res 321} 322 323; Don't use SVE for 128-bit vectors. 324define <4 x i16> @fcvtzu_v4f32_v4i16(<4 x float> %op1) vscale_range(2,0) #0 { 325; CHECK-LABEL: fcvtzu_v4f32_v4i16: 326; CHECK: // %bb.0: 327; CHECK-NEXT: fcvtzu v1.4s, v0.4s 328; CHECK-NEXT: mov w8, v1.s[1] 329; CHECK-NEXT: mov v0.16b, v1.16b 330; CHECK-NEXT: mov w9, v1.s[2] 331; CHECK-NEXT: mov v0.h[1], w8 332; CHECK-NEXT: mov w8, v1.s[3] 333; CHECK-NEXT: mov v0.h[2], w9 334; CHECK-NEXT: mov v0.h[3], w8 335; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 336; CHECK-NEXT: ret 337 %res = fptoui <4 x float> %op1 to <4 x i16> 338 ret <4 x i16> %res 339} 340 341define <8 x i16> @fcvtzu_v8f32_v8i16(ptr %a) vscale_range(2,0) #0 { 342; CHECK-LABEL: fcvtzu_v8f32_v8i16: 343; CHECK: // %bb.0: 344; CHECK-NEXT: ptrue p0.s, vl8 345; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 346; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s 347; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h 348; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 349; CHECK-NEXT: ret 350 %op1 = load <8 x float>, ptr %a 351 %res = fptoui <8 x float> %op1 to <8 x i16> 352 ret <8 x i16> %res 353} 354 355define void @fcvtzu_v16f32_v16i16(ptr %a, ptr %b) #0 { 356; VBITS_GE_256-LABEL: fcvtzu_v16f32_v16i16: 357; VBITS_GE_256: // %bb.0: 358; VBITS_GE_256-NEXT: ptrue p0.s, vl8 359; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 360; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] 361; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] 362; VBITS_GE_256-NEXT: fcvtzu z0.s, p0/m, z0.s 363; VBITS_GE_256-NEXT: fcvtzu z1.s, p0/m, z1.s 364; VBITS_GE_256-NEXT: ptrue p0.h, vl8 365; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h 366; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h 367; VBITS_GE_256-NEXT: splice z1.h, p0, z1.h, z0.h 368; VBITS_GE_256-NEXT: ptrue p0.h, vl16 369; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x1] 370; VBITS_GE_256-NEXT: ret 371; 372; VBITS_GE_512-LABEL: fcvtzu_v16f32_v16i16: 373; VBITS_GE_512: // %bb.0: 374; VBITS_GE_512-NEXT: ptrue p0.s, vl16 375; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] 376; VBITS_GE_512-NEXT: fcvtzu z0.s, p0/m, z0.s 377; VBITS_GE_512-NEXT: st1h { z0.s }, p0, [x1] 378; VBITS_GE_512-NEXT: ret 379 %op1 = load <16 x float>, ptr %a 380 %res = fptoui <16 x float> %op1 to <16 x i16> 381 store <16 x i16> %res, ptr %b 382 ret void 383} 384 385define void @fcvtzu_v32f32_v32i16(ptr %a, ptr %b) vscale_range(8,0) #0 { 386; CHECK-LABEL: fcvtzu_v32f32_v32i16: 387; CHECK: // %bb.0: 388; CHECK-NEXT: ptrue p0.s, vl32 389; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 390; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s 391; CHECK-NEXT: st1h { z0.s }, p0, [x1] 392; CHECK-NEXT: ret 393 %op1 = load <32 x float>, ptr %a 394 %res = fptoui <32 x float> %op1 to <32 x i16> 395 store <32 x i16> %res, ptr %b 396 ret void 397} 398 399define void @fcvtzu_v64f32_v64i16(ptr %a, ptr %b) vscale_range(16,0) #0 { 400; CHECK-LABEL: fcvtzu_v64f32_v64i16: 401; CHECK: // %bb.0: 402; CHECK-NEXT: ptrue p0.s, vl64 403; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 404; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s 405; CHECK-NEXT: st1h { z0.s }, p0, [x1] 406; CHECK-NEXT: ret 407 %op1 = load <64 x float>, ptr %a 408 %res = fptoui <64 x float> %op1 to <64 x i16> 409 store <64 x i16> %res, ptr %b 410 ret void 411} 412 413; 414; FCVTZU S -> S 415; 416 417; Don't use SVE for 64-bit vectors. 418define <2 x i32> @fcvtzu_v2f32_v2i32(<2 x float> %op1) vscale_range(2,0) #0 { 419; CHECK-LABEL: fcvtzu_v2f32_v2i32: 420; CHECK: // %bb.0: 421; CHECK-NEXT: fcvtzu v0.2s, v0.2s 422; CHECK-NEXT: ret 423 %res = fptoui <2 x float> %op1 to <2 x i32> 424 ret <2 x i32> %res 425} 426 427; Don't use SVE for 128-bit vectors. 428define <4 x i32> @fcvtzu_v4f32_v4i32(<4 x float> %op1) vscale_range(2,0) #0 { 429; CHECK-LABEL: fcvtzu_v4f32_v4i32: 430; CHECK: // %bb.0: 431; CHECK-NEXT: fcvtzu v0.4s, v0.4s 432; CHECK-NEXT: ret 433 %res = fptoui <4 x float> %op1 to <4 x i32> 434 ret <4 x i32> %res 435} 436 437define void @fcvtzu_v8f32_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 { 438; CHECK-LABEL: fcvtzu_v8f32_v8i32: 439; CHECK: // %bb.0: 440; CHECK-NEXT: ptrue p0.s, vl8 441; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 442; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s 443; CHECK-NEXT: st1w { z0.s }, p0, [x1] 444; CHECK-NEXT: ret 445 %op1 = load <8 x float>, ptr %a 446 %res = fptoui <8 x float> %op1 to <8 x i32> 447 store <8 x i32> %res, ptr %b 448 ret void 449} 450 451define void @fcvtzu_v16f32_v16i32(ptr %a, ptr %b) #0 { 452; VBITS_GE_256-LABEL: fcvtzu_v16f32_v16i32: 453; VBITS_GE_256: // %bb.0: 454; VBITS_GE_256-NEXT: ptrue p0.s, vl8 455; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 456; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] 457; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] 458; VBITS_GE_256-NEXT: fcvtzu z0.s, p0/m, z0.s 459; VBITS_GE_256-NEXT: fcvtzu z1.s, p0/m, z1.s 460; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2] 461; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1] 462; VBITS_GE_256-NEXT: ret 463; 464; VBITS_GE_512-LABEL: fcvtzu_v16f32_v16i32: 465; VBITS_GE_512: // %bb.0: 466; VBITS_GE_512-NEXT: ptrue p0.s, vl16 467; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] 468; VBITS_GE_512-NEXT: fcvtzu z0.s, p0/m, z0.s 469; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1] 470; VBITS_GE_512-NEXT: ret 471 %op1 = load <16 x float>, ptr %a 472 %res = fptoui <16 x float> %op1 to <16 x i32> 473 store <16 x i32> %res, ptr %b 474 ret void 475} 476 477define void @fcvtzu_v32f32_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 { 478; CHECK-LABEL: fcvtzu_v32f32_v32i32: 479; CHECK: // %bb.0: 480; CHECK-NEXT: ptrue p0.s, vl32 481; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 482; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s 483; CHECK-NEXT: st1w { z0.s }, p0, [x1] 484; CHECK-NEXT: ret 485 %op1 = load <32 x float>, ptr %a 486 %res = fptoui <32 x float> %op1 to <32 x i32> 487 store <32 x i32> %res, ptr %b 488 ret void 489} 490 491define void @fcvtzu_v64f32_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 { 492; CHECK-LABEL: fcvtzu_v64f32_v64i32: 493; CHECK: // %bb.0: 494; CHECK-NEXT: ptrue p0.s, vl64 495; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 496; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s 497; CHECK-NEXT: st1w { z0.s }, p0, [x1] 498; CHECK-NEXT: ret 499 %op1 = load <64 x float>, ptr %a 500 %res = fptoui <64 x float> %op1 to <64 x i32> 501 store <64 x i32> %res, ptr %b 502 ret void 503} 504 505; 506; FCVTZU S -> D 507; 508 509; Don't use SVE for 64-bit vectors. 510define <1 x i64> @fcvtzu_v1f32_v1i64(<1 x float> %op1) vscale_range(2,0) #0 { 511; CHECK-LABEL: fcvtzu_v1f32_v1i64: 512; CHECK: // %bb.0: 513; CHECK-NEXT: fcvtl v0.2d, v0.2s 514; CHECK-NEXT: fcvtzu v0.2d, v0.2d 515; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 516; CHECK-NEXT: ret 517 %res = fptoui <1 x float> %op1 to <1 x i64> 518 ret <1 x i64> %res 519} 520 521; Don't use SVE for 128-bit vectors. 522define <2 x i64> @fcvtzu_v2f32_v2i64(<2 x float> %op1) vscale_range(2,0) #0 { 523; CHECK-LABEL: fcvtzu_v2f32_v2i64: 524; CHECK: // %bb.0: 525; CHECK-NEXT: fcvtl v0.2d, v0.2s 526; CHECK-NEXT: fcvtzu v0.2d, v0.2d 527; CHECK-NEXT: ret 528 %res = fptoui <2 x float> %op1 to <2 x i64> 529 ret <2 x i64> %res 530} 531 532define void @fcvtzu_v4f32_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 { 533; CHECK-LABEL: fcvtzu_v4f32_v4i64: 534; CHECK: // %bb.0: 535; CHECK-NEXT: ldr q0, [x0] 536; CHECK-NEXT: ptrue p0.d, vl4 537; CHECK-NEXT: uunpklo z0.d, z0.s 538; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.s 539; CHECK-NEXT: st1d { z0.d }, p0, [x1] 540; CHECK-NEXT: ret 541 %op1 = load <4 x float>, ptr %a 542 %res = fptoui <4 x float> %op1 to <4 x i64> 543 store <4 x i64> %res, ptr %b 544 ret void 545} 546 547define void @fcvtzu_v8f32_v8i64(ptr %a, ptr %b) #0 { 548; VBITS_GE_256-LABEL: fcvtzu_v8f32_v8i64: 549; VBITS_GE_256: // %bb.0: 550; VBITS_GE_256-NEXT: ptrue p0.s, vl8 551; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 552; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0] 553; VBITS_GE_256-NEXT: ptrue p0.d, vl4 554; VBITS_GE_256-NEXT: uunpklo z1.d, z0.s 555; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16 556; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s 557; VBITS_GE_256-NEXT: fcvtzu z1.d, p0/m, z1.s 558; VBITS_GE_256-NEXT: fcvtzu z0.d, p0/m, z0.s 559; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1] 560; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3] 561; VBITS_GE_256-NEXT: ret 562; 563; VBITS_GE_512-LABEL: fcvtzu_v8f32_v8i64: 564; VBITS_GE_512: // %bb.0: 565; VBITS_GE_512-NEXT: ptrue p0.d, vl8 566; VBITS_GE_512-NEXT: ld1w { z0.d }, p0/z, [x0] 567; VBITS_GE_512-NEXT: fcvtzu z0.d, p0/m, z0.s 568; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1] 569; VBITS_GE_512-NEXT: ret 570 %op1 = load <8 x float>, ptr %a 571 %res = fptoui <8 x float> %op1 to <8 x i64> 572 store <8 x i64> %res, ptr %b 573 ret void 574} 575 576define void @fcvtzu_v16f32_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 { 577; CHECK-LABEL: fcvtzu_v16f32_v16i64: 578; CHECK: // %bb.0: 579; CHECK-NEXT: ptrue p0.d, vl16 580; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0] 581; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.s 582; CHECK-NEXT: st1d { z0.d }, p0, [x1] 583; CHECK-NEXT: ret 584 %op1 = load <16 x float>, ptr %a 585 %res = fptoui <16 x float> %op1 to <16 x i64> 586 store <16 x i64> %res, ptr %b 587 ret void 588} 589 590define void @fcvtzu_v32f32_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 { 591; CHECK-LABEL: fcvtzu_v32f32_v32i64: 592; CHECK: // %bb.0: 593; CHECK-NEXT: ptrue p0.d, vl32 594; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0] 595; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.s 596; CHECK-NEXT: st1d { z0.d }, p0, [x1] 597; CHECK-NEXT: ret 598 %op1 = load <32 x float>, ptr %a 599 %res = fptoui <32 x float> %op1 to <32 x i64> 600 store <32 x i64> %res, ptr %b 601 ret void 602} 603 604 605; 606; FCVTZU D -> H 607; 608 609; v1f64 is perfered to be widened to v4f64, so use SVE 610define <1 x i16> @fcvtzu_v1f64_v1i16(<1 x double> %op1) vscale_range(2,0) #0 { 611; CHECK-LABEL: fcvtzu_v1f64_v1i16: 612; CHECK: // %bb.0: 613; CHECK-NEXT: ptrue p0.d, vl4 614; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 615; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d 616; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s 617; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h 618; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 619; CHECK-NEXT: ret 620 %res = fptoui <1 x double> %op1 to <1 x i16> 621 ret <1 x i16> %res 622} 623 624; Don't use SVE for 128-bit vectors. 625define <2 x i16> @fcvtzu_v2f64_v2i16(<2 x double> %op1) vscale_range(2,0) #0 { 626; CHECK-LABEL: fcvtzu_v2f64_v2i16: 627; CHECK: // %bb.0: 628; CHECK-NEXT: fcvtzs v0.2d, v0.2d 629; CHECK-NEXT: xtn v0.2s, v0.2d 630; CHECK-NEXT: ret 631 %res = fptoui <2 x double> %op1 to <2 x i16> 632 ret <2 x i16> %res 633} 634 635define <4 x i16> @fcvtzu_v4f64_v4i16(ptr %a) vscale_range(2,0) #0 { 636; CHECK-LABEL: fcvtzu_v4f64_v4i16: 637; CHECK: // %bb.0: 638; CHECK-NEXT: ptrue p0.d, vl4 639; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 640; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d 641; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s 642; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h 643; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 644; CHECK-NEXT: ret 645 %op1 = load <4 x double>, ptr %a 646 %res = fptoui <4 x double> %op1 to <4 x i16> 647 ret <4 x i16> %res 648} 649 650define <8 x i16> @fcvtzu_v8f64_v8i16(ptr %a) #0 { 651; VBITS_GE_256-LABEL: fcvtzu_v8f64_v8i16: 652; VBITS_GE_256: // %bb.0: 653; VBITS_GE_256-NEXT: ptrue p0.d, vl4 654; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 655; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] 656; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] 657; VBITS_GE_256-NEXT: fcvtzu z0.d, p0/m, z0.d 658; VBITS_GE_256-NEXT: fcvtzu z1.d, p0/m, z1.d 659; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s 660; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s 661; VBITS_GE_256-NEXT: uzp1 z2.h, z0.h, z0.h 662; VBITS_GE_256-NEXT: uzp1 z0.h, z1.h, z1.h 663; VBITS_GE_256-NEXT: mov v0.d[1], v2.d[0] 664; VBITS_GE_256-NEXT: // kill: def $q0 killed $q0 killed $z0 665; VBITS_GE_256-NEXT: ret 666; 667; VBITS_GE_512-LABEL: fcvtzu_v8f64_v8i16: 668; VBITS_GE_512: // %bb.0: 669; VBITS_GE_512-NEXT: ptrue p0.d, vl8 670; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] 671; VBITS_GE_512-NEXT: fcvtzu z0.d, p0/m, z0.d 672; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s 673; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h 674; VBITS_GE_512-NEXT: // kill: def $q0 killed $q0 killed $z0 675; VBITS_GE_512-NEXT: ret 676 %op1 = load <8 x double>, ptr %a 677 %res = fptoui <8 x double> %op1 to <8 x i16> 678 ret <8 x i16> %res 679} 680 681define void @fcvtzu_v16f64_v16i16(ptr %a, ptr %b) vscale_range(8,0) #0 { 682; CHECK-LABEL: fcvtzu_v16f64_v16i16: 683; CHECK: // %bb.0: 684; CHECK-NEXT: ptrue p0.d, vl16 685; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 686; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d 687; CHECK-NEXT: st1h { z0.d }, p0, [x1] 688; CHECK-NEXT: ret 689 %op1 = load <16 x double>, ptr %a 690 %res = fptoui <16 x double> %op1 to <16 x i16> 691 store <16 x i16> %res, ptr %b 692 ret void 693} 694 695define void @fcvtzu_v32f64_v32i16(ptr %a, ptr %b) vscale_range(16,0) #0 { 696; CHECK-LABEL: fcvtzu_v32f64_v32i16: 697; CHECK: // %bb.0: 698; CHECK-NEXT: ptrue p0.d, vl32 699; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 700; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d 701; CHECK-NEXT: st1h { z0.d }, p0, [x1] 702; CHECK-NEXT: ret 703 %op1 = load <32 x double>, ptr %a 704 %res = fptoui <32 x double> %op1 to <32 x i16> 705 store <32 x i16> %res, ptr %b 706 ret void 707} 708 709; 710; FCVTZU D -> S 711; 712 713; Don't use SVE for 64-bit vectors. 714define <1 x i32> @fcvtzu_v1f64_v1i32(<1 x double> %op1) vscale_range(2,0) #0 { 715; CHECK-LABEL: fcvtzu_v1f64_v1i32: 716; CHECK: // %bb.0: 717; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 718; CHECK-NEXT: fcvtzu v0.2d, v0.2d 719; CHECK-NEXT: xtn v0.2s, v0.2d 720; CHECK-NEXT: ret 721 %res = fptoui <1 x double> %op1 to <1 x i32> 722 ret <1 x i32> %res 723} 724 725; Don't use SVE for 128-bit vectors. 726define <2 x i32> @fcvtzu_v2f64_v2i32(<2 x double> %op1) vscale_range(2,0) #0 { 727; CHECK-LABEL: fcvtzu_v2f64_v2i32: 728; CHECK: // %bb.0: 729; CHECK-NEXT: fcvtzu v0.2d, v0.2d 730; CHECK-NEXT: xtn v0.2s, v0.2d 731; CHECK-NEXT: ret 732 %res = fptoui <2 x double> %op1 to <2 x i32> 733 ret <2 x i32> %res 734} 735 736define <4 x i32> @fcvtzu_v4f64_v4i32(ptr %a) vscale_range(2,0) #0 { 737; CHECK-LABEL: fcvtzu_v4f64_v4i32: 738; CHECK: // %bb.0: 739; CHECK-NEXT: ptrue p0.d, vl4 740; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 741; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d 742; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s 743; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 744; CHECK-NEXT: ret 745 %op1 = load <4 x double>, ptr %a 746 %res = fptoui <4 x double> %op1 to <4 x i32> 747 ret <4 x i32> %res 748} 749 750define void @fcvtzu_v8f64_v8i32(ptr %a, ptr %b) #0 { 751; VBITS_GE_256-LABEL: fcvtzu_v8f64_v8i32: 752; VBITS_GE_256: // %bb.0: 753; VBITS_GE_256-NEXT: ptrue p0.d, vl4 754; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 755; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] 756; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] 757; VBITS_GE_256-NEXT: fcvtzu z0.d, p0/m, z0.d 758; VBITS_GE_256-NEXT: fcvtzu z1.d, p0/m, z1.d 759; VBITS_GE_256-NEXT: ptrue p0.s, vl4 760; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s 761; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s 762; VBITS_GE_256-NEXT: splice z1.s, p0, z1.s, z0.s 763; VBITS_GE_256-NEXT: ptrue p0.s, vl8 764; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1] 765; VBITS_GE_256-NEXT: ret 766; 767; VBITS_GE_512-LABEL: fcvtzu_v8f64_v8i32: 768; VBITS_GE_512: // %bb.0: 769; VBITS_GE_512-NEXT: ptrue p0.d, vl8 770; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] 771; VBITS_GE_512-NEXT: fcvtzu z0.d, p0/m, z0.d 772; VBITS_GE_512-NEXT: st1w { z0.d }, p0, [x1] 773; VBITS_GE_512-NEXT: ret 774 %op1 = load <8 x double>, ptr %a 775 %res = fptoui <8 x double> %op1 to <8 x i32> 776 store <8 x i32> %res, ptr %b 777 ret void 778} 779 780define void @fcvtzu_v16f64_v16i32(ptr %a, ptr %b) vscale_range(8,0) #0 { 781; CHECK-LABEL: fcvtzu_v16f64_v16i32: 782; CHECK: // %bb.0: 783; CHECK-NEXT: ptrue p0.d, vl16 784; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 785; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d 786; CHECK-NEXT: st1w { z0.d }, p0, [x1] 787; CHECK-NEXT: ret 788 %op1 = load <16 x double>, ptr %a 789 %res = fptoui <16 x double> %op1 to <16 x i32> 790 store <16 x i32> %res, ptr %b 791 ret void 792} 793 794define void @fcvtzu_v32f64_v32i32(ptr %a, ptr %b) vscale_range(16,0) #0 { 795; CHECK-LABEL: fcvtzu_v32f64_v32i32: 796; CHECK: // %bb.0: 797; CHECK-NEXT: ptrue p0.d, vl32 798; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 799; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d 800; CHECK-NEXT: st1w { z0.d }, p0, [x1] 801; CHECK-NEXT: ret 802 %op1 = load <32 x double>, ptr %a 803 %res = fptoui <32 x double> %op1 to <32 x i32> 804 store <32 x i32> %res, ptr %b 805 ret void 806} 807 808; 809; FCVTZU D -> D 810; 811 812; Don't use SVE for 64-bit vectors. 813define <1 x i64> @fcvtzu_v1f64_v1i64(<1 x double> %op1) vscale_range(2,0) #0 { 814; CHECK-LABEL: fcvtzu_v1f64_v1i64: 815; CHECK: // %bb.0: 816; CHECK-NEXT: fcvtzu x8, d0 817; CHECK-NEXT: fmov d0, x8 818; CHECK-NEXT: ret 819 %res = fptoui <1 x double> %op1 to <1 x i64> 820 ret <1 x i64> %res 821} 822 823; Don't use SVE for 128-bit vectors. 824define <2 x i64> @fcvtzu_v2f64_v2i64(<2 x double> %op1) vscale_range(2,0) #0 { 825; CHECK-LABEL: fcvtzu_v2f64_v2i64: 826; CHECK: // %bb.0: 827; CHECK-NEXT: fcvtzu v0.2d, v0.2d 828; CHECK-NEXT: ret 829 %res = fptoui <2 x double> %op1 to <2 x i64> 830 ret <2 x i64> %res 831} 832 833define void @fcvtzu_v4f64_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 { 834; CHECK-LABEL: fcvtzu_v4f64_v4i64: 835; CHECK: // %bb.0: 836; CHECK-NEXT: ptrue p0.d, vl4 837; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 838; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d 839; CHECK-NEXT: st1d { z0.d }, p0, [x1] 840; CHECK-NEXT: ret 841 %op1 = load <4 x double>, ptr %a 842 %res = fptoui <4 x double> %op1 to <4 x i64> 843 store <4 x i64> %res, ptr %b 844 ret void 845} 846 847define void @fcvtzu_v8f64_v8i64(ptr %a, ptr %b) #0 { 848; VBITS_GE_256-LABEL: fcvtzu_v8f64_v8i64: 849; VBITS_GE_256: // %bb.0: 850; VBITS_GE_256-NEXT: ptrue p0.d, vl4 851; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 852; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] 853; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] 854; VBITS_GE_256-NEXT: fcvtzu z0.d, p0/m, z0.d 855; VBITS_GE_256-NEXT: fcvtzu z1.d, p0/m, z1.d 856; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3] 857; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1] 858; VBITS_GE_256-NEXT: ret 859; 860; VBITS_GE_512-LABEL: fcvtzu_v8f64_v8i64: 861; VBITS_GE_512: // %bb.0: 862; VBITS_GE_512-NEXT: ptrue p0.d, vl8 863; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] 864; VBITS_GE_512-NEXT: fcvtzu z0.d, p0/m, z0.d 865; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1] 866; VBITS_GE_512-NEXT: ret 867 %op1 = load <8 x double>, ptr %a 868 %res = fptoui <8 x double> %op1 to <8 x i64> 869 store <8 x i64> %res, ptr %b 870 ret void 871} 872 873define void @fcvtzu_v16f64_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 { 874; CHECK-LABEL: fcvtzu_v16f64_v16i64: 875; CHECK: // %bb.0: 876; CHECK-NEXT: ptrue p0.d, vl16 877; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 878; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d 879; CHECK-NEXT: st1d { z0.d }, p0, [x1] 880; CHECK-NEXT: ret 881 %op1 = load <16 x double>, ptr %a 882 %res = fptoui <16 x double> %op1 to <16 x i64> 883 store <16 x i64> %res, ptr %b 884 ret void 885} 886 887define void @fcvtzu_v32f64_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 { 888; CHECK-LABEL: fcvtzu_v32f64_v32i64: 889; CHECK: // %bb.0: 890; CHECK-NEXT: ptrue p0.d, vl32 891; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 892; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d 893; CHECK-NEXT: st1d { z0.d }, p0, [x1] 894; CHECK-NEXT: ret 895 %op1 = load <32 x double>, ptr %a 896 %res = fptoui <32 x double> %op1 to <32 x i64> 897 store <32 x i64> %res, ptr %b 898 ret void 899} 900 901; 902; FCVTZS H -> H 903; 904 905; Don't use SVE for 64-bit vectors. 906define <4 x i16> @fcvtzs_v4f16_v4i16(<4 x half> %op1) vscale_range(2,0) #0 { 907; CHECK-LABEL: fcvtzs_v4f16_v4i16: 908; CHECK: // %bb.0: 909; CHECK-NEXT: fcvtzs v0.4h, v0.4h 910; CHECK-NEXT: ret 911 %res = fptosi <4 x half> %op1 to <4 x i16> 912 ret <4 x i16> %res 913} 914 915; Don't use SVE for 128-bit vectors. 916define void @fcvtzs_v8f16_v8i16(ptr %a, ptr %b) vscale_range(2,0) #0 { 917; CHECK-LABEL: fcvtzs_v8f16_v8i16: 918; CHECK: // %bb.0: 919; CHECK-NEXT: ldr q0, [x0] 920; CHECK-NEXT: fcvtzs v0.8h, v0.8h 921; CHECK-NEXT: str q0, [x1] 922; CHECK-NEXT: ret 923 %op1 = load <8 x half>, ptr %a 924 %res = fptosi <8 x half> %op1 to <8 x i16> 925 store <8 x i16> %res, ptr %b 926 ret void 927} 928 929define void @fcvtzs_v16f16_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 { 930; CHECK-LABEL: fcvtzs_v16f16_v16i16: 931; CHECK: // %bb.0: 932; CHECK-NEXT: ptrue p0.h, vl16 933; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 934; CHECK-NEXT: fcvtzs z0.h, p0/m, z0.h 935; CHECK-NEXT: st1h { z0.h }, p0, [x1] 936; CHECK-NEXT: ret 937 %op1 = load <16 x half>, ptr %a 938 %res = fptosi <16 x half> %op1 to <16 x i16> 939 store <16 x i16> %res, ptr %b 940 ret void 941} 942 943define void @fcvtzs_v32f16_v32i16(ptr %a, ptr %b) #0 { 944; VBITS_GE_256-LABEL: fcvtzs_v32f16_v32i16: 945; VBITS_GE_256: // %bb.0: 946; VBITS_GE_256-NEXT: ptrue p0.h, vl16 947; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 948; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] 949; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] 950; VBITS_GE_256-NEXT: fcvtzs z0.h, p0/m, z0.h 951; VBITS_GE_256-NEXT: fcvtzs z1.h, p0/m, z1.h 952; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1, x8, lsl #1] 953; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x1] 954; VBITS_GE_256-NEXT: ret 955; 956; VBITS_GE_512-LABEL: fcvtzs_v32f16_v32i16: 957; VBITS_GE_512: // %bb.0: 958; VBITS_GE_512-NEXT: ptrue p0.h, vl32 959; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0] 960; VBITS_GE_512-NEXT: fcvtzs z0.h, p0/m, z0.h 961; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x1] 962; VBITS_GE_512-NEXT: ret 963 %op1 = load <32 x half>, ptr %a 964 %res = fptosi <32 x half> %op1 to <32 x i16> 965 store <32 x i16> %res, ptr %b 966 ret void 967} 968 969define void @fcvtzs_v64f16_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 { 970; CHECK-LABEL: fcvtzs_v64f16_v64i16: 971; CHECK: // %bb.0: 972; CHECK-NEXT: ptrue p0.h, vl64 973; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 974; CHECK-NEXT: fcvtzs z0.h, p0/m, z0.h 975; CHECK-NEXT: st1h { z0.h }, p0, [x1] 976; CHECK-NEXT: ret 977 %op1 = load <64 x half>, ptr %a 978 %res = fptosi <64 x half> %op1 to <64 x i16> 979 store <64 x i16> %res, ptr %b 980 ret void 981} 982 983define void @fcvtzs_v128f16_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 { 984; CHECK-LABEL: fcvtzs_v128f16_v128i16: 985; CHECK: // %bb.0: 986; CHECK-NEXT: ptrue p0.h, vl128 987; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 988; CHECK-NEXT: fcvtzs z0.h, p0/m, z0.h 989; CHECK-NEXT: st1h { z0.h }, p0, [x1] 990; CHECK-NEXT: ret 991 %op1 = load <128 x half>, ptr %a 992 %res = fptosi <128 x half> %op1 to <128 x i16> 993 store <128 x i16> %res, ptr %b 994 ret void 995} 996 997; 998; FCVTZS H -> S 999; 1000 1001; Don't use SVE for 64-bit vectors. 1002define <2 x i32> @fcvtzs_v2f16_v2i32(<2 x half> %op1) vscale_range(2,0) #0 { 1003; CHECK-LABEL: fcvtzs_v2f16_v2i32: 1004; CHECK: // %bb.0: 1005; CHECK-NEXT: fcvtl v0.4s, v0.4h 1006; CHECK-NEXT: fcvtzs v0.4s, v0.4s 1007; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 1008; CHECK-NEXT: ret 1009 %res = fptosi <2 x half> %op1 to <2 x i32> 1010 ret <2 x i32> %res 1011} 1012 1013; Don't use SVE for 128-bit vectors. 1014define <4 x i32> @fcvtzs_v4f16_v4i32(<4 x half> %op1) vscale_range(2,0) #0 { 1015; CHECK-LABEL: fcvtzs_v4f16_v4i32: 1016; CHECK: // %bb.0: 1017; CHECK-NEXT: fcvtl v0.4s, v0.4h 1018; CHECK-NEXT: fcvtzs v0.4s, v0.4s 1019; CHECK-NEXT: ret 1020 %res = fptosi <4 x half> %op1 to <4 x i32> 1021 ret <4 x i32> %res 1022} 1023 1024define void @fcvtzs_v8f16_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 { 1025; CHECK-LABEL: fcvtzs_v8f16_v8i32: 1026; CHECK: // %bb.0: 1027; CHECK-NEXT: ldr q0, [x0] 1028; CHECK-NEXT: ptrue p0.s, vl8 1029; CHECK-NEXT: uunpklo z0.s, z0.h 1030; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h 1031; CHECK-NEXT: st1w { z0.s }, p0, [x1] 1032; CHECK-NEXT: ret 1033 %op1 = load <8 x half>, ptr %a 1034 %res = fptosi <8 x half> %op1 to <8 x i32> 1035 store <8 x i32> %res, ptr %b 1036 ret void 1037} 1038 1039define void @fcvtzs_v16f16_v16i32(ptr %a, ptr %b) #0 { 1040; VBITS_GE_256-LABEL: fcvtzs_v16f16_v16i32: 1041; VBITS_GE_256: // %bb.0: 1042; VBITS_GE_256-NEXT: ptrue p0.h, vl16 1043; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 1044; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0] 1045; VBITS_GE_256-NEXT: ptrue p0.s, vl8 1046; VBITS_GE_256-NEXT: uunpklo z1.s, z0.h 1047; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16 1048; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h 1049; VBITS_GE_256-NEXT: fcvtzs z1.s, p0/m, z1.h 1050; VBITS_GE_256-NEXT: fcvtzs z0.s, p0/m, z0.h 1051; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1] 1052; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2] 1053; VBITS_GE_256-NEXT: ret 1054; 1055; VBITS_GE_512-LABEL: fcvtzs_v16f16_v16i32: 1056; VBITS_GE_512: // %bb.0: 1057; VBITS_GE_512-NEXT: ptrue p0.s, vl16 1058; VBITS_GE_512-NEXT: ld1h { z0.s }, p0/z, [x0] 1059; VBITS_GE_512-NEXT: fcvtzs z0.s, p0/m, z0.h 1060; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1] 1061; VBITS_GE_512-NEXT: ret 1062 %op1 = load <16 x half>, ptr %a 1063 %res = fptosi <16 x half> %op1 to <16 x i32> 1064 store <16 x i32> %res, ptr %b 1065 ret void 1066} 1067 1068define void @fcvtzs_v32f16_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 { 1069; CHECK-LABEL: fcvtzs_v32f16_v32i32: 1070; CHECK: // %bb.0: 1071; CHECK-NEXT: ptrue p0.s, vl32 1072; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0] 1073; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h 1074; CHECK-NEXT: st1w { z0.s }, p0, [x1] 1075; CHECK-NEXT: ret 1076 %op1 = load <32 x half>, ptr %a 1077 %res = fptosi <32 x half> %op1 to <32 x i32> 1078 store <32 x i32> %res, ptr %b 1079 ret void 1080} 1081 1082define void @fcvtzs_v64f16_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 { 1083; CHECK-LABEL: fcvtzs_v64f16_v64i32: 1084; CHECK: // %bb.0: 1085; CHECK-NEXT: ptrue p0.s, vl64 1086; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0] 1087; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h 1088; CHECK-NEXT: st1w { z0.s }, p0, [x1] 1089; CHECK-NEXT: ret 1090 %op1 = load <64 x half>, ptr %a 1091 %res = fptosi <64 x half> %op1 to <64 x i32> 1092 store <64 x i32> %res, ptr %b 1093 ret void 1094} 1095 1096; 1097; FCVTZS H -> D 1098; 1099 1100; Don't use SVE for 64-bit vectors. 1101define <1 x i64> @fcvtzs_v1f16_v1i64(<1 x half> %op1) vscale_range(2,0) #0 { 1102; CHECK-LABEL: fcvtzs_v1f16_v1i64: 1103; CHECK: // %bb.0: 1104; CHECK-NEXT: fcvtzs x8, h0 1105; CHECK-NEXT: fmov d0, x8 1106; CHECK-NEXT: ret 1107 %res = fptosi <1 x half> %op1 to <1 x i64> 1108 ret <1 x i64> %res 1109} 1110 1111; v2f16 is not legal for NEON, so use SVE 1112define <2 x i64> @fcvtzs_v2f16_v2i64(<2 x half> %op1) vscale_range(2,0) #0 { 1113; CHECK-LABEL: fcvtzs_v2f16_v2i64: 1114; CHECK: // %bb.0: 1115; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 1116; CHECK-NEXT: ptrue p0.d, vl4 1117; CHECK-NEXT: uunpklo z0.s, z0.h 1118; CHECK-NEXT: uunpklo z0.d, z0.s 1119; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h 1120; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 1121; CHECK-NEXT: ret 1122 %res = fptosi <2 x half> %op1 to <2 x i64> 1123 ret <2 x i64> %res 1124} 1125 1126define void @fcvtzs_v4f16_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 { 1127; CHECK-LABEL: fcvtzs_v4f16_v4i64: 1128; CHECK: // %bb.0: 1129; CHECK-NEXT: ldr d0, [x0] 1130; CHECK-NEXT: ptrue p0.d, vl4 1131; CHECK-NEXT: uunpklo z0.s, z0.h 1132; CHECK-NEXT: uunpklo z0.d, z0.s 1133; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h 1134; CHECK-NEXT: st1d { z0.d }, p0, [x1] 1135; CHECK-NEXT: ret 1136 %op1 = load <4 x half>, ptr %a 1137 %res = fptosi <4 x half> %op1 to <4 x i64> 1138 store <4 x i64> %res, ptr %b 1139 ret void 1140} 1141 1142define void @fcvtzs_v8f16_v8i64(ptr %a, ptr %b) #0 { 1143; VBITS_GE_256-LABEL: fcvtzs_v8f16_v8i64: 1144; VBITS_GE_256: // %bb.0: 1145; VBITS_GE_256-NEXT: ldr q0, [x0] 1146; VBITS_GE_256-NEXT: ptrue p0.d, vl4 1147; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 1148; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 1149; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h 1150; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h 1151; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s 1152; VBITS_GE_256-NEXT: uunpklo z1.d, z1.s 1153; VBITS_GE_256-NEXT: fcvtzs z0.d, p0/m, z0.h 1154; VBITS_GE_256-NEXT: fcvtzs z1.d, p0/m, z1.h 1155; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1] 1156; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1, x8, lsl #3] 1157; VBITS_GE_256-NEXT: ret 1158; 1159; VBITS_GE_512-LABEL: fcvtzs_v8f16_v8i64: 1160; VBITS_GE_512: // %bb.0: 1161; VBITS_GE_512-NEXT: ldr q0, [x0] 1162; VBITS_GE_512-NEXT: ptrue p0.d, vl8 1163; VBITS_GE_512-NEXT: uunpklo z0.s, z0.h 1164; VBITS_GE_512-NEXT: uunpklo z0.d, z0.s 1165; VBITS_GE_512-NEXT: fcvtzs z0.d, p0/m, z0.h 1166; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1] 1167; VBITS_GE_512-NEXT: ret 1168 %op1 = load <8 x half>, ptr %a 1169 %res = fptosi <8 x half> %op1 to <8 x i64> 1170 store <8 x i64> %res, ptr %b 1171 ret void 1172} 1173 1174define void @fcvtzs_v16f16_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 { 1175; CHECK-LABEL: fcvtzs_v16f16_v16i64: 1176; CHECK: // %bb.0: 1177; CHECK-NEXT: ptrue p0.d, vl16 1178; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0] 1179; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h 1180; CHECK-NEXT: st1d { z0.d }, p0, [x1] 1181; CHECK-NEXT: ret 1182 %op1 = load <16 x half>, ptr %a 1183 %res = fptosi <16 x half> %op1 to <16 x i64> 1184 store <16 x i64> %res, ptr %b 1185 ret void 1186} 1187 1188define void @fcvtzs_v32f16_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 { 1189; CHECK-LABEL: fcvtzs_v32f16_v32i64: 1190; CHECK: // %bb.0: 1191; CHECK-NEXT: ptrue p0.d, vl32 1192; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0] 1193; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h 1194; CHECK-NEXT: st1d { z0.d }, p0, [x1] 1195; CHECK-NEXT: ret 1196 %op1 = load <32 x half>, ptr %a 1197 %res = fptosi <32 x half> %op1 to <32 x i64> 1198 store <32 x i64> %res, ptr %b 1199 ret void 1200} 1201 1202; 1203; FCVTZS S -> H 1204; 1205 1206; Don't use SVE for 64-bit vectors. 1207define <2 x i16> @fcvtzs_v2f32_v2i16(<2 x float> %op1) vscale_range(2,0) #0 { 1208; CHECK-LABEL: fcvtzs_v2f32_v2i16: 1209; CHECK: // %bb.0: 1210; CHECK-NEXT: fcvtzs v0.2s, v0.2s 1211; CHECK-NEXT: ret 1212 %res = fptosi <2 x float> %op1 to <2 x i16> 1213 ret <2 x i16> %res 1214} 1215 1216; Don't use SVE for 128-bit vectors. 1217define <4 x i16> @fcvtzs_v4f32_v4i16(<4 x float> %op1) vscale_range(2,0) #0 { 1218; CHECK-LABEL: fcvtzs_v4f32_v4i16: 1219; CHECK: // %bb.0: 1220; CHECK-NEXT: fcvtzs v1.4s, v0.4s 1221; CHECK-NEXT: mov w8, v1.s[1] 1222; CHECK-NEXT: mov v0.16b, v1.16b 1223; CHECK-NEXT: mov w9, v1.s[2] 1224; CHECK-NEXT: mov v0.h[1], w8 1225; CHECK-NEXT: mov w8, v1.s[3] 1226; CHECK-NEXT: mov v0.h[2], w9 1227; CHECK-NEXT: mov v0.h[3], w8 1228; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 1229; CHECK-NEXT: ret 1230 %res = fptosi <4 x float> %op1 to <4 x i16> 1231 ret <4 x i16> %res 1232} 1233 1234define <8 x i16> @fcvtzs_v8f32_v8i16(ptr %a) vscale_range(2,0) #0 { 1235; CHECK-LABEL: fcvtzs_v8f32_v8i16: 1236; CHECK: // %bb.0: 1237; CHECK-NEXT: ptrue p0.s, vl8 1238; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 1239; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s 1240; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h 1241; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 1242; CHECK-NEXT: ret 1243 %op1 = load <8 x float>, ptr %a 1244 %res = fptosi <8 x float> %op1 to <8 x i16> 1245 ret <8 x i16> %res 1246} 1247 1248define void @fcvtzs_v16f32_v16i16(ptr %a, ptr %b) #0 { 1249; VBITS_GE_256-LABEL: fcvtzs_v16f32_v16i16: 1250; VBITS_GE_256: // %bb.0: 1251; VBITS_GE_256-NEXT: ptrue p0.s, vl8 1252; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 1253; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] 1254; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] 1255; VBITS_GE_256-NEXT: fcvtzs z0.s, p0/m, z0.s 1256; VBITS_GE_256-NEXT: fcvtzs z1.s, p0/m, z1.s 1257; VBITS_GE_256-NEXT: ptrue p0.h, vl8 1258; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h 1259; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h 1260; VBITS_GE_256-NEXT: splice z1.h, p0, z1.h, z0.h 1261; VBITS_GE_256-NEXT: ptrue p0.h, vl16 1262; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x1] 1263; VBITS_GE_256-NEXT: ret 1264; 1265; VBITS_GE_512-LABEL: fcvtzs_v16f32_v16i16: 1266; VBITS_GE_512: // %bb.0: 1267; VBITS_GE_512-NEXT: ptrue p0.s, vl16 1268; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] 1269; VBITS_GE_512-NEXT: fcvtzs z0.s, p0/m, z0.s 1270; VBITS_GE_512-NEXT: st1h { z0.s }, p0, [x1] 1271; VBITS_GE_512-NEXT: ret 1272 %op1 = load <16 x float>, ptr %a 1273 %res = fptosi <16 x float> %op1 to <16 x i16> 1274 store <16 x i16> %res, ptr %b 1275 ret void 1276} 1277 1278define void @fcvtzs_v32f32_v32i16(ptr %a, ptr %b) vscale_range(8,0) #0 { 1279; CHECK-LABEL: fcvtzs_v32f32_v32i16: 1280; CHECK: // %bb.0: 1281; CHECK-NEXT: ptrue p0.s, vl32 1282; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 1283; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s 1284; CHECK-NEXT: st1h { z0.s }, p0, [x1] 1285; CHECK-NEXT: ret 1286 %op1 = load <32 x float>, ptr %a 1287 %res = fptosi <32 x float> %op1 to <32 x i16> 1288 store <32 x i16> %res, ptr %b 1289 ret void 1290} 1291 1292define void @fcvtzs_v64f32_v64i16(ptr %a, ptr %b) vscale_range(16,0) #0 { 1293; CHECK-LABEL: fcvtzs_v64f32_v64i16: 1294; CHECK: // %bb.0: 1295; CHECK-NEXT: ptrue p0.s, vl64 1296; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 1297; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s 1298; CHECK-NEXT: st1h { z0.s }, p0, [x1] 1299; CHECK-NEXT: ret 1300 %op1 = load <64 x float>, ptr %a 1301 %res = fptosi <64 x float> %op1 to <64 x i16> 1302 store <64 x i16> %res, ptr %b 1303 ret void 1304} 1305 1306; 1307; FCVTZS S -> S 1308; 1309 1310; Don't use SVE for 64-bit vectors. 1311define <2 x i32> @fcvtzs_v2f32_v2i32(<2 x float> %op1) vscale_range(2,0) #0 { 1312; CHECK-LABEL: fcvtzs_v2f32_v2i32: 1313; CHECK: // %bb.0: 1314; CHECK-NEXT: fcvtzs v0.2s, v0.2s 1315; CHECK-NEXT: ret 1316 %res = fptosi <2 x float> %op1 to <2 x i32> 1317 ret <2 x i32> %res 1318} 1319 1320; Don't use SVE for 128-bit vectors. 1321define <4 x i32> @fcvtzs_v4f32_v4i32(<4 x float> %op1) vscale_range(2,0) #0 { 1322; CHECK-LABEL: fcvtzs_v4f32_v4i32: 1323; CHECK: // %bb.0: 1324; CHECK-NEXT: fcvtzs v0.4s, v0.4s 1325; CHECK-NEXT: ret 1326 %res = fptosi <4 x float> %op1 to <4 x i32> 1327 ret <4 x i32> %res 1328} 1329 1330define void @fcvtzs_v8f32_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 { 1331; CHECK-LABEL: fcvtzs_v8f32_v8i32: 1332; CHECK: // %bb.0: 1333; CHECK-NEXT: ptrue p0.s, vl8 1334; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 1335; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s 1336; CHECK-NEXT: st1w { z0.s }, p0, [x1] 1337; CHECK-NEXT: ret 1338 %op1 = load <8 x float>, ptr %a 1339 %res = fptosi <8 x float> %op1 to <8 x i32> 1340 store <8 x i32> %res, ptr %b 1341 ret void 1342} 1343 1344define void @fcvtzs_v16f32_v16i32(ptr %a, ptr %b) #0 { 1345; VBITS_GE_256-LABEL: fcvtzs_v16f32_v16i32: 1346; VBITS_GE_256: // %bb.0: 1347; VBITS_GE_256-NEXT: ptrue p0.s, vl8 1348; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 1349; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] 1350; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] 1351; VBITS_GE_256-NEXT: fcvtzs z0.s, p0/m, z0.s 1352; VBITS_GE_256-NEXT: fcvtzs z1.s, p0/m, z1.s 1353; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2] 1354; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1] 1355; VBITS_GE_256-NEXT: ret 1356; 1357; VBITS_GE_512-LABEL: fcvtzs_v16f32_v16i32: 1358; VBITS_GE_512: // %bb.0: 1359; VBITS_GE_512-NEXT: ptrue p0.s, vl16 1360; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] 1361; VBITS_GE_512-NEXT: fcvtzs z0.s, p0/m, z0.s 1362; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1] 1363; VBITS_GE_512-NEXT: ret 1364 %op1 = load <16 x float>, ptr %a 1365 %res = fptosi <16 x float> %op1 to <16 x i32> 1366 store <16 x i32> %res, ptr %b 1367 ret void 1368} 1369 1370define void @fcvtzs_v32f32_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 { 1371; CHECK-LABEL: fcvtzs_v32f32_v32i32: 1372; CHECK: // %bb.0: 1373; CHECK-NEXT: ptrue p0.s, vl32 1374; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 1375; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s 1376; CHECK-NEXT: st1w { z0.s }, p0, [x1] 1377; CHECK-NEXT: ret 1378 %op1 = load <32 x float>, ptr %a 1379 %res = fptosi <32 x float> %op1 to <32 x i32> 1380 store <32 x i32> %res, ptr %b 1381 ret void 1382} 1383 1384define void @fcvtzs_v64f32_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 { 1385; CHECK-LABEL: fcvtzs_v64f32_v64i32: 1386; CHECK: // %bb.0: 1387; CHECK-NEXT: ptrue p0.s, vl64 1388; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 1389; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s 1390; CHECK-NEXT: st1w { z0.s }, p0, [x1] 1391; CHECK-NEXT: ret 1392 %op1 = load <64 x float>, ptr %a 1393 %res = fptosi <64 x float> %op1 to <64 x i32> 1394 store <64 x i32> %res, ptr %b 1395 ret void 1396} 1397 1398; 1399; FCVTZS S -> D 1400; 1401 1402; Don't use SVE for 64-bit vectors. 1403define <1 x i64> @fcvtzs_v1f32_v1i64(<1 x float> %op1) vscale_range(2,0) #0 { 1404; CHECK-LABEL: fcvtzs_v1f32_v1i64: 1405; CHECK: // %bb.0: 1406; CHECK-NEXT: fcvtl v0.2d, v0.2s 1407; CHECK-NEXT: fcvtzs v0.2d, v0.2d 1408; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 1409; CHECK-NEXT: ret 1410 %res = fptosi <1 x float> %op1 to <1 x i64> 1411 ret <1 x i64> %res 1412} 1413 1414; Don't use SVE for 128-bit vectors. 1415define <2 x i64> @fcvtzs_v2f32_v2i64(<2 x float> %op1) vscale_range(2,0) #0 { 1416; CHECK-LABEL: fcvtzs_v2f32_v2i64: 1417; CHECK: // %bb.0: 1418; CHECK-NEXT: fcvtl v0.2d, v0.2s 1419; CHECK-NEXT: fcvtzs v0.2d, v0.2d 1420; CHECK-NEXT: ret 1421 %res = fptosi <2 x float> %op1 to <2 x i64> 1422 ret <2 x i64> %res 1423} 1424 1425define void @fcvtzs_v4f32_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 { 1426; CHECK-LABEL: fcvtzs_v4f32_v4i64: 1427; CHECK: // %bb.0: 1428; CHECK-NEXT: ldr q0, [x0] 1429; CHECK-NEXT: ptrue p0.d, vl4 1430; CHECK-NEXT: uunpklo z0.d, z0.s 1431; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s 1432; CHECK-NEXT: st1d { z0.d }, p0, [x1] 1433; CHECK-NEXT: ret 1434 %op1 = load <4 x float>, ptr %a 1435 %res = fptosi <4 x float> %op1 to <4 x i64> 1436 store <4 x i64> %res, ptr %b 1437 ret void 1438} 1439 1440define void @fcvtzs_v8f32_v8i64(ptr %a, ptr %b) #0 { 1441; VBITS_GE_256-LABEL: fcvtzs_v8f32_v8i64: 1442; VBITS_GE_256: // %bb.0: 1443; VBITS_GE_256-NEXT: ptrue p0.s, vl8 1444; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 1445; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0] 1446; VBITS_GE_256-NEXT: ptrue p0.d, vl4 1447; VBITS_GE_256-NEXT: uunpklo z1.d, z0.s 1448; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16 1449; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s 1450; VBITS_GE_256-NEXT: fcvtzs z1.d, p0/m, z1.s 1451; VBITS_GE_256-NEXT: fcvtzs z0.d, p0/m, z0.s 1452; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1] 1453; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3] 1454; VBITS_GE_256-NEXT: ret 1455; 1456; VBITS_GE_512-LABEL: fcvtzs_v8f32_v8i64: 1457; VBITS_GE_512: // %bb.0: 1458; VBITS_GE_512-NEXT: ptrue p0.d, vl8 1459; VBITS_GE_512-NEXT: ld1w { z0.d }, p0/z, [x0] 1460; VBITS_GE_512-NEXT: fcvtzs z0.d, p0/m, z0.s 1461; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1] 1462; VBITS_GE_512-NEXT: ret 1463 %op1 = load <8 x float>, ptr %a 1464 %res = fptosi <8 x float> %op1 to <8 x i64> 1465 store <8 x i64> %res, ptr %b 1466 ret void 1467} 1468 1469define void @fcvtzs_v16f32_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 { 1470; CHECK-LABEL: fcvtzs_v16f32_v16i64: 1471; CHECK: // %bb.0: 1472; CHECK-NEXT: ptrue p0.d, vl16 1473; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0] 1474; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s 1475; CHECK-NEXT: st1d { z0.d }, p0, [x1] 1476; CHECK-NEXT: ret 1477 %op1 = load <16 x float>, ptr %a 1478 %res = fptosi <16 x float> %op1 to <16 x i64> 1479 store <16 x i64> %res, ptr %b 1480 ret void 1481} 1482 1483define void @fcvtzs_v32f32_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 { 1484; CHECK-LABEL: fcvtzs_v32f32_v32i64: 1485; CHECK: // %bb.0: 1486; CHECK-NEXT: ptrue p0.d, vl32 1487; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0] 1488; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s 1489; CHECK-NEXT: st1d { z0.d }, p0, [x1] 1490; CHECK-NEXT: ret 1491 %op1 = load <32 x float>, ptr %a 1492 %res = fptosi <32 x float> %op1 to <32 x i64> 1493 store <32 x i64> %res, ptr %b 1494 ret void 1495} 1496 1497 1498; 1499; FCVTZS D -> H 1500; 1501 1502; v1f64 is perfered to be widened to v4f64, so use SVE 1503define <1 x i16> @fcvtzs_v1f64_v1i16(<1 x double> %op1) vscale_range(2,0) #0 { 1504; CHECK-LABEL: fcvtzs_v1f64_v1i16: 1505; CHECK: // %bb.0: 1506; CHECK-NEXT: ptrue p0.d, vl4 1507; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 1508; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d 1509; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s 1510; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h 1511; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 1512; CHECK-NEXT: ret 1513 %res = fptosi <1 x double> %op1 to <1 x i16> 1514 ret <1 x i16> %res 1515} 1516 1517; Don't use SVE for 128-bit vectors. 1518define <2 x i16> @fcvtzs_v2f64_v2i16(<2 x double> %op1) vscale_range(2,0) #0 { 1519; CHECK-LABEL: fcvtzs_v2f64_v2i16: 1520; CHECK: // %bb.0: 1521; CHECK-NEXT: fcvtzs v0.2d, v0.2d 1522; CHECK-NEXT: xtn v0.2s, v0.2d 1523; CHECK-NEXT: ret 1524 %res = fptosi <2 x double> %op1 to <2 x i16> 1525 ret <2 x i16> %res 1526} 1527 1528define <4 x i16> @fcvtzs_v4f64_v4i16(ptr %a) vscale_range(2,0) #0 { 1529; CHECK-LABEL: fcvtzs_v4f64_v4i16: 1530; CHECK: // %bb.0: 1531; CHECK-NEXT: ptrue p0.d, vl4 1532; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 1533; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d 1534; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s 1535; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h 1536; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 1537; CHECK-NEXT: ret 1538 %op1 = load <4 x double>, ptr %a 1539 %res = fptosi <4 x double> %op1 to <4 x i16> 1540 ret <4 x i16> %res 1541} 1542 1543define <8 x i16> @fcvtzs_v8f64_v8i16(ptr %a) #0 { 1544; VBITS_GE_256-LABEL: fcvtzs_v8f64_v8i16: 1545; VBITS_GE_256: // %bb.0: 1546; VBITS_GE_256-NEXT: ptrue p0.d, vl4 1547; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 1548; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] 1549; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] 1550; VBITS_GE_256-NEXT: fcvtzs z0.d, p0/m, z0.d 1551; VBITS_GE_256-NEXT: fcvtzs z1.d, p0/m, z1.d 1552; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s 1553; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s 1554; VBITS_GE_256-NEXT: uzp1 z2.h, z0.h, z0.h 1555; VBITS_GE_256-NEXT: uzp1 z0.h, z1.h, z1.h 1556; VBITS_GE_256-NEXT: mov v0.d[1], v2.d[0] 1557; VBITS_GE_256-NEXT: // kill: def $q0 killed $q0 killed $z0 1558; VBITS_GE_256-NEXT: ret 1559; 1560; VBITS_GE_512-LABEL: fcvtzs_v8f64_v8i16: 1561; VBITS_GE_512: // %bb.0: 1562; VBITS_GE_512-NEXT: ptrue p0.d, vl8 1563; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] 1564; VBITS_GE_512-NEXT: fcvtzs z0.d, p0/m, z0.d 1565; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s 1566; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h 1567; VBITS_GE_512-NEXT: // kill: def $q0 killed $q0 killed $z0 1568; VBITS_GE_512-NEXT: ret 1569 %op1 = load <8 x double>, ptr %a 1570 %res = fptosi <8 x double> %op1 to <8 x i16> 1571 ret <8 x i16> %res 1572} 1573 1574define void @fcvtzs_v16f64_v16i16(ptr %a, ptr %b) vscale_range(8,0) #0 { 1575; CHECK-LABEL: fcvtzs_v16f64_v16i16: 1576; CHECK: // %bb.0: 1577; CHECK-NEXT: ptrue p0.d, vl16 1578; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 1579; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d 1580; CHECK-NEXT: st1h { z0.d }, p0, [x1] 1581; CHECK-NEXT: ret 1582 %op1 = load <16 x double>, ptr %a 1583 %res = fptosi <16 x double> %op1 to <16 x i16> 1584 store <16 x i16> %res, ptr %b 1585 ret void 1586} 1587 1588define void @fcvtzs_v32f64_v32i16(ptr %a, ptr %b) vscale_range(16,0) #0 { 1589; CHECK-LABEL: fcvtzs_v32f64_v32i16: 1590; CHECK: // %bb.0: 1591; CHECK-NEXT: ptrue p0.d, vl32 1592; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 1593; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d 1594; CHECK-NEXT: st1h { z0.d }, p0, [x1] 1595; CHECK-NEXT: ret 1596 %op1 = load <32 x double>, ptr %a 1597 %res = fptosi <32 x double> %op1 to <32 x i16> 1598 store <32 x i16> %res, ptr %b 1599 ret void 1600} 1601 1602; 1603; FCVTZS D -> S 1604; 1605 1606; Don't use SVE for 64-bit vectors. 1607define <1 x i32> @fcvtzs_v1f64_v1i32(<1 x double> %op1) vscale_range(2,0) #0 { 1608; CHECK-LABEL: fcvtzs_v1f64_v1i32: 1609; CHECK: // %bb.0: 1610; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1611; CHECK-NEXT: fcvtzs v0.2d, v0.2d 1612; CHECK-NEXT: xtn v0.2s, v0.2d 1613; CHECK-NEXT: ret 1614 %res = fptosi <1 x double> %op1 to <1 x i32> 1615 ret <1 x i32> %res 1616} 1617 1618; Don't use SVE for 128-bit vectors. 1619define <2 x i32> @fcvtzs_v2f64_v2i32(<2 x double> %op1) vscale_range(2,0) #0 { 1620; CHECK-LABEL: fcvtzs_v2f64_v2i32: 1621; CHECK: // %bb.0: 1622; CHECK-NEXT: fcvtzs v0.2d, v0.2d 1623; CHECK-NEXT: xtn v0.2s, v0.2d 1624; CHECK-NEXT: ret 1625 %res = fptosi <2 x double> %op1 to <2 x i32> 1626 ret <2 x i32> %res 1627} 1628 1629define <4 x i32> @fcvtzs_v4f64_v4i32(ptr %a) vscale_range(2,0) #0 { 1630; CHECK-LABEL: fcvtzs_v4f64_v4i32: 1631; CHECK: // %bb.0: 1632; CHECK-NEXT: ptrue p0.d, vl4 1633; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 1634; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d 1635; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s 1636; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 1637; CHECK-NEXT: ret 1638 %op1 = load <4 x double>, ptr %a 1639 %res = fptosi <4 x double> %op1 to <4 x i32> 1640 ret <4 x i32> %res 1641} 1642 1643define void @fcvtzs_v8f64_v8i32(ptr %a, ptr %b) #0 { 1644; VBITS_GE_256-LABEL: fcvtzs_v8f64_v8i32: 1645; VBITS_GE_256: // %bb.0: 1646; VBITS_GE_256-NEXT: ptrue p0.d, vl4 1647; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 1648; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] 1649; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] 1650; VBITS_GE_256-NEXT: fcvtzs z0.d, p0/m, z0.d 1651; VBITS_GE_256-NEXT: fcvtzs z1.d, p0/m, z1.d 1652; VBITS_GE_256-NEXT: ptrue p0.s, vl4 1653; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s 1654; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s 1655; VBITS_GE_256-NEXT: splice z1.s, p0, z1.s, z0.s 1656; VBITS_GE_256-NEXT: ptrue p0.s, vl8 1657; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1] 1658; VBITS_GE_256-NEXT: ret 1659; 1660; VBITS_GE_512-LABEL: fcvtzs_v8f64_v8i32: 1661; VBITS_GE_512: // %bb.0: 1662; VBITS_GE_512-NEXT: ptrue p0.d, vl8 1663; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] 1664; VBITS_GE_512-NEXT: fcvtzs z0.d, p0/m, z0.d 1665; VBITS_GE_512-NEXT: st1w { z0.d }, p0, [x1] 1666; VBITS_GE_512-NEXT: ret 1667 %op1 = load <8 x double>, ptr %a 1668 %res = fptosi <8 x double> %op1 to <8 x i32> 1669 store <8 x i32> %res, ptr %b 1670 ret void 1671} 1672 1673define void @fcvtzs_v16f64_v16i32(ptr %a, ptr %b) vscale_range(8,0) #0 { 1674; CHECK-LABEL: fcvtzs_v16f64_v16i32: 1675; CHECK: // %bb.0: 1676; CHECK-NEXT: ptrue p0.d, vl16 1677; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 1678; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d 1679; CHECK-NEXT: st1w { z0.d }, p0, [x1] 1680; CHECK-NEXT: ret 1681 %op1 = load <16 x double>, ptr %a 1682 %res = fptosi <16 x double> %op1 to <16 x i32> 1683 store <16 x i32> %res, ptr %b 1684 ret void 1685} 1686 1687define void @fcvtzs_v32f64_v32i32(ptr %a, ptr %b) vscale_range(16,0) #0 { 1688; CHECK-LABEL: fcvtzs_v32f64_v32i32: 1689; CHECK: // %bb.0: 1690; CHECK-NEXT: ptrue p0.d, vl32 1691; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 1692; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d 1693; CHECK-NEXT: st1w { z0.d }, p0, [x1] 1694; CHECK-NEXT: ret 1695 %op1 = load <32 x double>, ptr %a 1696 %res = fptosi <32 x double> %op1 to <32 x i32> 1697 store <32 x i32> %res, ptr %b 1698 ret void 1699} 1700 1701; 1702; FCVTZS D -> D 1703; 1704 1705; Don't use SVE for 64-bit vectors. 1706define <1 x i64> @fcvtzs_v1f64_v1i64(<1 x double> %op1) vscale_range(2,0) #0 { 1707; CHECK-LABEL: fcvtzs_v1f64_v1i64: 1708; CHECK: // %bb.0: 1709; CHECK-NEXT: fcvtzs x8, d0 1710; CHECK-NEXT: fmov d0, x8 1711; CHECK-NEXT: ret 1712 %res = fptosi <1 x double> %op1 to <1 x i64> 1713 ret <1 x i64> %res 1714} 1715 1716; Don't use SVE for 128-bit vectors. 1717define <2 x i64> @fcvtzs_v2f64_v2i64(<2 x double> %op1) vscale_range(2,0) #0 { 1718; CHECK-LABEL: fcvtzs_v2f64_v2i64: 1719; CHECK: // %bb.0: 1720; CHECK-NEXT: fcvtzs v0.2d, v0.2d 1721; CHECK-NEXT: ret 1722 %res = fptosi <2 x double> %op1 to <2 x i64> 1723 ret <2 x i64> %res 1724} 1725 1726define void @fcvtzs_v4f64_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 { 1727; CHECK-LABEL: fcvtzs_v4f64_v4i64: 1728; CHECK: // %bb.0: 1729; CHECK-NEXT: ptrue p0.d, vl4 1730; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 1731; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d 1732; CHECK-NEXT: st1d { z0.d }, p0, [x1] 1733; CHECK-NEXT: ret 1734 %op1 = load <4 x double>, ptr %a 1735 %res = fptosi <4 x double> %op1 to <4 x i64> 1736 store <4 x i64> %res, ptr %b 1737 ret void 1738} 1739 1740define void @fcvtzs_v8f64_v8i64(ptr %a, ptr %b) #0 { 1741; VBITS_GE_256-LABEL: fcvtzs_v8f64_v8i64: 1742; VBITS_GE_256: // %bb.0: 1743; VBITS_GE_256-NEXT: ptrue p0.d, vl4 1744; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 1745; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] 1746; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] 1747; VBITS_GE_256-NEXT: fcvtzs z0.d, p0/m, z0.d 1748; VBITS_GE_256-NEXT: fcvtzs z1.d, p0/m, z1.d 1749; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3] 1750; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1] 1751; VBITS_GE_256-NEXT: ret 1752; 1753; VBITS_GE_512-LABEL: fcvtzs_v8f64_v8i64: 1754; VBITS_GE_512: // %bb.0: 1755; VBITS_GE_512-NEXT: ptrue p0.d, vl8 1756; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] 1757; VBITS_GE_512-NEXT: fcvtzs z0.d, p0/m, z0.d 1758; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1] 1759; VBITS_GE_512-NEXT: ret 1760 %op1 = load <8 x double>, ptr %a 1761 %res = fptosi <8 x double> %op1 to <8 x i64> 1762 store <8 x i64> %res, ptr %b 1763 ret void 1764} 1765 1766define void @fcvtzs_v16f64_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 { 1767; CHECK-LABEL: fcvtzs_v16f64_v16i64: 1768; CHECK: // %bb.0: 1769; CHECK-NEXT: ptrue p0.d, vl16 1770; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 1771; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d 1772; CHECK-NEXT: st1d { z0.d }, p0, [x1] 1773; CHECK-NEXT: ret 1774 %op1 = load <16 x double>, ptr %a 1775 %res = fptosi <16 x double> %op1 to <16 x i64> 1776 store <16 x i64> %res, ptr %b 1777 ret void 1778} 1779 1780define void @fcvtzs_v32f64_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 { 1781; CHECK-LABEL: fcvtzs_v32f64_v32i64: 1782; CHECK: // %bb.0: 1783; CHECK-NEXT: ptrue p0.d, vl32 1784; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 1785; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d 1786; CHECK-NEXT: st1d { z0.d }, p0, [x1] 1787; CHECK-NEXT: ret 1788 %op1 = load <32 x double>, ptr %a 1789 %res = fptosi <32 x double> %op1 to <32 x i64> 1790 store <32 x i64> %res, ptr %b 1791 ret void 1792} 1793 1794attributes #0 = { "target-features"="+sve" } 1795