1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256 3; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 4; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 5 6target triple = "aarch64-unknown-linux-gnu" 7 8; 9; UCVTF H -> H 10; 11 12; Don't use SVE for 64-bit vectors. 13define <4 x half> @ucvtf_v4i16_v4f16(<4 x i16> %op1) vscale_range(2,0) #0 { 14; CHECK-LABEL: ucvtf_v4i16_v4f16: 15; CHECK: // %bb.0: 16; CHECK-NEXT: ucvtf v0.4h, v0.4h 17; CHECK-NEXT: ret 18 %res = uitofp <4 x i16> %op1 to <4 x half> 19 ret <4 x half> %res 20} 21 22; Don't use SVE for 128-bit vectors. 23define void @ucvtf_v8i16_v8f16(ptr %a, ptr %b) vscale_range(2,0) #0 { 24; CHECK-LABEL: ucvtf_v8i16_v8f16: 25; CHECK: // %bb.0: 26; CHECK-NEXT: ldr q0, [x0] 27; CHECK-NEXT: ucvtf v0.8h, v0.8h 28; CHECK-NEXT: str q0, [x1] 29; CHECK-NEXT: ret 30 %op1 = load <8 x i16>, ptr %a 31 %res = uitofp <8 x i16> %op1 to <8 x half> 32 store <8 x half> %res, ptr %b 33 ret void 34} 35 36define void @ucvtf_v16i16_v16f16(ptr %a, ptr %b) vscale_range(2,0) #0 { 37; CHECK-LABEL: ucvtf_v16i16_v16f16: 38; CHECK: // %bb.0: 39; CHECK-NEXT: ptrue p0.h, vl16 40; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 41; CHECK-NEXT: ucvtf z0.h, p0/m, z0.h 42; CHECK-NEXT: st1h { z0.h }, p0, [x1] 43; CHECK-NEXT: ret 44 %op1 = load <16 x i16>, ptr %a 45 %res = uitofp <16 x i16> %op1 to <16 x half> 46 store <16 x half> %res, ptr %b 47 ret void 48} 49 50define void @ucvtf_v32i16_v32f16(ptr %a, ptr %b) #0 { 51; VBITS_GE_256-LABEL: ucvtf_v32i16_v32f16: 52; VBITS_GE_256: // %bb.0: 53; VBITS_GE_256-NEXT: ptrue p0.h, vl16 54; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 55; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] 56; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] 57; VBITS_GE_256-NEXT: ucvtf z0.h, p0/m, z0.h 58; VBITS_GE_256-NEXT: ucvtf z1.h, p0/m, z1.h 59; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1, x8, lsl #1] 60; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x1] 61; VBITS_GE_256-NEXT: ret 62; 63; VBITS_GE_512-LABEL: ucvtf_v32i16_v32f16: 64; VBITS_GE_512: // %bb.0: 65; VBITS_GE_512-NEXT: ptrue p0.h, vl32 66; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0] 67; VBITS_GE_512-NEXT: ucvtf z0.h, p0/m, z0.h 68; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x1] 69; VBITS_GE_512-NEXT: ret 70 %op1 = load <32 x i16>, ptr %a 71 %res = uitofp <32 x i16> %op1 to <32 x half> 72 store <32 x half> %res, ptr %b 73 ret void 74} 75 76define void @ucvtf_v64i16_v64f16(ptr %a, ptr %b) vscale_range(8,0) #0 { 77; CHECK-LABEL: ucvtf_v64i16_v64f16: 78; CHECK: // %bb.0: 79; CHECK-NEXT: ptrue p0.h, vl64 80; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 81; CHECK-NEXT: ucvtf z0.h, p0/m, z0.h 82; CHECK-NEXT: st1h { z0.h }, p0, [x1] 83; CHECK-NEXT: ret 84 %op1 = load <64 x i16>, ptr %a 85 %res = uitofp <64 x i16> %op1 to <64 x half> 86 store <64 x half> %res, ptr %b 87 ret void 88} 89 90define void @ucvtf_v128i16_v128f16(ptr %a, ptr %b) vscale_range(16,0) #0 { 91; CHECK-LABEL: ucvtf_v128i16_v128f16: 92; CHECK: // %bb.0: 93; CHECK-NEXT: ptrue p0.h, vl128 94; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 95; CHECK-NEXT: ucvtf z0.h, p0/m, z0.h 96; CHECK-NEXT: st1h { z0.h }, p0, [x1] 97; CHECK-NEXT: ret 98 %op1 = load <128 x i16>, ptr %a 99 %res = uitofp <128 x i16> %op1 to <128 x half> 100 store <128 x half> %res, ptr %b 101 ret void 102} 103 104; 105; UCVTF H -> S 106; 107 108; Don't use SVE for 64-bit vectors. 109define <2 x float> @ucvtf_v2i16_v2f32(<2 x i16> %op1) vscale_range(2,0) #0 { 110; CHECK-LABEL: ucvtf_v2i16_v2f32: 111; CHECK: // %bb.0: 112; CHECK-NEXT: movi d1, #0x00ffff0000ffff 113; CHECK-NEXT: and v0.8b, v0.8b, v1.8b 114; CHECK-NEXT: ucvtf v0.2s, v0.2s 115; CHECK-NEXT: ret 116 %res = uitofp <2 x i16> %op1 to <2 x float> 117 ret <2 x float> %res 118} 119 120; Don't use SVE for 128-bit vectors. 121define <4 x float> @ucvtf_v4i16_v4f32(<4 x i16> %op1) vscale_range(2,0) #0 { 122; CHECK-LABEL: ucvtf_v4i16_v4f32: 123; CHECK: // %bb.0: 124; CHECK-NEXT: ushll v0.4s, v0.4h, #0 125; CHECK-NEXT: ucvtf v0.4s, v0.4s 126; CHECK-NEXT: ret 127 %res = uitofp <4 x i16> %op1 to <4 x float> 128 ret <4 x float> %res 129} 130 131define void @ucvtf_v8i16_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 { 132; CHECK-LABEL: ucvtf_v8i16_v8f32: 133; CHECK: // %bb.0: 134; CHECK-NEXT: ldr q0, [x0] 135; CHECK-NEXT: ptrue p0.s, vl8 136; CHECK-NEXT: uunpklo z0.s, z0.h 137; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s 138; CHECK-NEXT: st1w { z0.s }, p0, [x1] 139; CHECK-NEXT: ret 140 %op1 = load <8 x i16>, ptr %a 141 %res = uitofp <8 x i16> %op1 to <8 x float> 142 store <8 x float> %res, ptr %b 143 ret void 144} 145 146define void @ucvtf_v16i16_v16f32(ptr %a, ptr %b) #0 { 147; VBITS_GE_256-LABEL: ucvtf_v16i16_v16f32: 148; VBITS_GE_256: // %bb.0: 149; VBITS_GE_256-NEXT: ptrue p0.h, vl16 150; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 151; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0] 152; VBITS_GE_256-NEXT: ptrue p0.s, vl8 153; VBITS_GE_256-NEXT: uunpklo z1.s, z0.h 154; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16 155; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h 156; VBITS_GE_256-NEXT: ucvtf z1.s, p0/m, z1.s 157; VBITS_GE_256-NEXT: ucvtf z0.s, p0/m, z0.s 158; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1] 159; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2] 160; VBITS_GE_256-NEXT: ret 161; 162; VBITS_GE_512-LABEL: ucvtf_v16i16_v16f32: 163; VBITS_GE_512: // %bb.0: 164; VBITS_GE_512-NEXT: ptrue p0.s, vl16 165; VBITS_GE_512-NEXT: ld1h { z0.s }, p0/z, [x0] 166; VBITS_GE_512-NEXT: ucvtf z0.s, p0/m, z0.s 167; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1] 168; VBITS_GE_512-NEXT: ret 169 %op1 = load <16 x i16>, ptr %a 170 %res = uitofp <16 x i16> %op1 to <16 x float> 171 store <16 x float> %res, ptr %b 172 ret void 173} 174 175define void @ucvtf_v32i16_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 { 176; CHECK-LABEL: ucvtf_v32i16_v32f32: 177; CHECK: // %bb.0: 178; CHECK-NEXT: ptrue p0.s, vl32 179; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0] 180; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s 181; CHECK-NEXT: st1w { z0.s }, p0, [x1] 182; CHECK-NEXT: ret 183 %op1 = load <32 x i16>, ptr %a 184 %res = uitofp <32 x i16> %op1 to <32 x float> 185 store <32 x float> %res, ptr %b 186 ret void 187} 188 189define void @ucvtf_v64i16_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 { 190; CHECK-LABEL: ucvtf_v64i16_v64f32: 191; CHECK: // %bb.0: 192; CHECK-NEXT: ptrue p0.s, vl64 193; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0] 194; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s 195; CHECK-NEXT: st1w { z0.s }, p0, [x1] 196; CHECK-NEXT: ret 197 %op1 = load <64 x i16>, ptr %a 198 %res = uitofp <64 x i16> %op1 to <64 x float> 199 store <64 x float> %res, ptr %b 200 ret void 201} 202 203; 204; UCVTF H -> D 205; 206 207; v1i16 is perfered to be widened to v4i16, which pushes the output into SVE types, so use SVE 208define <1 x double> @ucvtf_v1i16_v1f64(<1 x i16> %op1) vscale_range(2,0) #0 { 209; CHECK-LABEL: ucvtf_v1i16_v1f64: 210; CHECK: // %bb.0: 211; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 212; CHECK-NEXT: ptrue p0.d, vl4 213; CHECK-NEXT: uunpklo z0.s, z0.h 214; CHECK-NEXT: uunpklo z0.d, z0.s 215; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d 216; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 217; CHECK-NEXT: ret 218 %res = uitofp <1 x i16> %op1 to <1 x double> 219 ret <1 x double> %res 220} 221 222; Don't use SVE for 128-bit vectors. 223define <2 x double> @ucvtf_v2i16_v2f64(<2 x i16> %op1) vscale_range(2,0) #0 { 224; CHECK-LABEL: ucvtf_v2i16_v2f64: 225; CHECK: // %bb.0: 226; CHECK-NEXT: movi d1, #0x00ffff0000ffff 227; CHECK-NEXT: and v0.8b, v0.8b, v1.8b 228; CHECK-NEXT: ushll v0.2d, v0.2s, #0 229; CHECK-NEXT: ucvtf v0.2d, v0.2d 230; CHECK-NEXT: ret 231 %res = uitofp <2 x i16> %op1 to <2 x double> 232 ret <2 x double> %res 233} 234 235define void @ucvtf_v4i16_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 { 236; CHECK-LABEL: ucvtf_v4i16_v4f64: 237; CHECK: // %bb.0: 238; CHECK-NEXT: ldr d0, [x0] 239; CHECK-NEXT: ptrue p0.d, vl4 240; CHECK-NEXT: uunpklo z0.s, z0.h 241; CHECK-NEXT: uunpklo z0.d, z0.s 242; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d 243; CHECK-NEXT: st1d { z0.d }, p0, [x1] 244; CHECK-NEXT: ret 245 %op1 = load <4 x i16>, ptr %a 246 %res = uitofp <4 x i16> %op1 to <4 x double> 247 store <4 x double> %res, ptr %b 248 ret void 249} 250 251define void @ucvtf_v8i16_v8f64(ptr %a, ptr %b) #0 { 252; VBITS_GE_256-LABEL: ucvtf_v8i16_v8f64: 253; VBITS_GE_256: // %bb.0: 254; VBITS_GE_256-NEXT: ldr q0, [x0] 255; VBITS_GE_256-NEXT: ptrue p0.d, vl4 256; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 257; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 258; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h 259; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h 260; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s 261; VBITS_GE_256-NEXT: uunpklo z1.d, z1.s 262; VBITS_GE_256-NEXT: ucvtf z0.d, p0/m, z0.d 263; VBITS_GE_256-NEXT: ucvtf z1.d, p0/m, z1.d 264; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1] 265; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1, x8, lsl #3] 266; VBITS_GE_256-NEXT: ret 267; 268; VBITS_GE_512-LABEL: ucvtf_v8i16_v8f64: 269; VBITS_GE_512: // %bb.0: 270; VBITS_GE_512-NEXT: ldr q0, [x0] 271; VBITS_GE_512-NEXT: ptrue p0.d, vl8 272; VBITS_GE_512-NEXT: uunpklo z0.s, z0.h 273; VBITS_GE_512-NEXT: uunpklo z0.d, z0.s 274; VBITS_GE_512-NEXT: ucvtf z0.d, p0/m, z0.d 275; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1] 276; VBITS_GE_512-NEXT: ret 277 %op1 = load <8 x i16>, ptr %a 278 %res = uitofp <8 x i16> %op1 to <8 x double> 279 store <8 x double> %res, ptr %b 280 ret void 281} 282 283define void @ucvtf_v16i16_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 { 284; CHECK-LABEL: ucvtf_v16i16_v16f64: 285; CHECK: // %bb.0: 286; CHECK-NEXT: ptrue p0.d, vl16 287; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0] 288; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d 289; CHECK-NEXT: st1d { z0.d }, p0, [x1] 290; CHECK-NEXT: ret 291 %op1 = load <16 x i16>, ptr %a 292 %res = uitofp <16 x i16> %op1 to <16 x double> 293 store <16 x double> %res, ptr %b 294 ret void 295} 296 297define void @ucvtf_v32i16_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 { 298; CHECK-LABEL: ucvtf_v32i16_v32f64: 299; CHECK: // %bb.0: 300; CHECK-NEXT: ptrue p0.d, vl32 301; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0] 302; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d 303; CHECK-NEXT: st1d { z0.d }, p0, [x1] 304; CHECK-NEXT: ret 305 %op1 = load <32 x i16>, ptr %a 306 %res = uitofp <32 x i16> %op1 to <32 x double> 307 store <32 x double> %res, ptr %b 308 ret void 309} 310 311; 312; UCVTF S -> H 313; 314 315; Don't use SVE for 64-bit vectors. 316define <2 x half> @ucvtf_v2i32_v2f16(<2 x i32> %op1) vscale_range(2,0) #0 { 317; CHECK-LABEL: ucvtf_v2i32_v2f16: 318; CHECK: // %bb.0: 319; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 320; CHECK-NEXT: ucvtf v0.4s, v0.4s 321; CHECK-NEXT: fcvtn v0.4h, v0.4s 322; CHECK-NEXT: ret 323 %res = uitofp <2 x i32> %op1 to <2 x half> 324 ret <2 x half> %res 325} 326 327; Don't use SVE for 128-bit vectors. 328define <4 x half> @ucvtf_v4i32_v4f16(<4 x i32> %op1) vscale_range(2,0) #0 { 329; CHECK-LABEL: ucvtf_v4i32_v4f16: 330; CHECK: // %bb.0: 331; CHECK-NEXT: ucvtf v0.4s, v0.4s 332; CHECK-NEXT: fcvtn v0.4h, v0.4s 333; CHECK-NEXT: ret 334 %res = uitofp <4 x i32> %op1 to <4 x half> 335 ret <4 x half> %res 336} 337 338define <8 x half> @ucvtf_v8i32_v8f16(ptr %a) vscale_range(2,0) #0 { 339; CHECK-LABEL: ucvtf_v8i32_v8f16: 340; CHECK: // %bb.0: 341; CHECK-NEXT: ptrue p0.s, vl8 342; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 343; CHECK-NEXT: ucvtf z0.h, p0/m, z0.s 344; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h 345; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 346; CHECK-NEXT: ret 347 %op1 = load <8 x i32>, ptr %a 348 %res = uitofp <8 x i32> %op1 to <8 x half> 349 ret <8 x half> %res 350} 351 352define void @ucvtf_v16i32_v16f16(ptr %a, ptr %b) #0 { 353; VBITS_GE_256-LABEL: ucvtf_v16i32_v16f16: 354; VBITS_GE_256: // %bb.0: 355; VBITS_GE_256-NEXT: ptrue p0.s, vl8 356; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 357; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] 358; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] 359; VBITS_GE_256-NEXT: ucvtf z0.h, p0/m, z0.s 360; VBITS_GE_256-NEXT: ucvtf z1.h, p0/m, z1.s 361; VBITS_GE_256-NEXT: ptrue p0.h, vl8 362; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h 363; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h 364; VBITS_GE_256-NEXT: splice z1.h, p0, z1.h, z0.h 365; VBITS_GE_256-NEXT: ptrue p0.h, vl16 366; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x1] 367; VBITS_GE_256-NEXT: ret 368; 369; VBITS_GE_512-LABEL: ucvtf_v16i32_v16f16: 370; VBITS_GE_512: // %bb.0: 371; VBITS_GE_512-NEXT: ptrue p0.s, vl16 372; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] 373; VBITS_GE_512-NEXT: ucvtf z0.h, p0/m, z0.s 374; VBITS_GE_512-NEXT: ptrue p0.h, vl16 375; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h 376; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x1] 377; VBITS_GE_512-NEXT: ret 378 %op1 = load <16 x i32>, ptr %a 379 %res = uitofp <16 x i32> %op1 to <16 x half> 380 store <16 x half> %res, ptr %b 381 ret void 382} 383 384define void @ucvtf_v32i32_v32f16(ptr %a, ptr %b) vscale_range(8,0) #0 { 385; CHECK-LABEL: ucvtf_v32i32_v32f16: 386; CHECK: // %bb.0: 387; CHECK-NEXT: ptrue p0.s, vl32 388; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 389; CHECK-NEXT: ucvtf z0.h, p0/m, z0.s 390; CHECK-NEXT: ptrue p0.h, vl32 391; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h 392; CHECK-NEXT: st1h { z0.h }, p0, [x1] 393; CHECK-NEXT: ret 394 %op1 = load <32 x i32>, ptr %a 395 %res = uitofp <32 x i32> %op1 to <32 x half> 396 store <32 x half> %res, ptr %b 397 ret void 398} 399 400define void @ucvtf_v64i32_v64f16(ptr %a, ptr %b) vscale_range(16,0) #0 { 401; CHECK-LABEL: ucvtf_v64i32_v64f16: 402; CHECK: // %bb.0: 403; CHECK-NEXT: ptrue p0.s, vl64 404; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 405; CHECK-NEXT: ucvtf z0.h, p0/m, z0.s 406; CHECK-NEXT: ptrue p0.h, vl64 407; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h 408; CHECK-NEXT: st1h { z0.h }, p0, [x1] 409; CHECK-NEXT: ret 410 %op1 = load <64 x i32>, ptr %a 411 %res = uitofp <64 x i32> %op1 to <64 x half> 412 store <64 x half> %res, ptr %b 413 ret void 414} 415 416; 417; UCVTF S -> S 418; 419 420; Don't use SVE for 64-bit vectors. 421define <2 x float> @ucvtf_v2i32_v2f32(<2 x i32> %op1) vscale_range(2,0) #0 { 422; CHECK-LABEL: ucvtf_v2i32_v2f32: 423; CHECK: // %bb.0: 424; CHECK-NEXT: ucvtf v0.2s, v0.2s 425; CHECK-NEXT: ret 426 %res = uitofp <2 x i32> %op1 to <2 x float> 427 ret <2 x float> %res 428} 429 430; Don't use SVE for 128-bit vectors. 431define <4 x float> @ucvtf_v4i32_v4f32(<4 x i32> %op1) vscale_range(2,0) #0 { 432; CHECK-LABEL: ucvtf_v4i32_v4f32: 433; CHECK: // %bb.0: 434; CHECK-NEXT: ucvtf v0.4s, v0.4s 435; CHECK-NEXT: ret 436 %res = uitofp <4 x i32> %op1 to <4 x float> 437 ret <4 x float> %res 438} 439 440define void @ucvtf_v8i32_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 { 441; CHECK-LABEL: ucvtf_v8i32_v8f32: 442; CHECK: // %bb.0: 443; CHECK-NEXT: ptrue p0.s, vl8 444; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 445; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s 446; CHECK-NEXT: st1w { z0.s }, p0, [x1] 447; CHECK-NEXT: ret 448 %op1 = load <8 x i32>, ptr %a 449 %res = uitofp <8 x i32> %op1 to <8 x float> 450 store <8 x float> %res, ptr %b 451 ret void 452} 453 454define void @ucvtf_v16i32_v16f32(ptr %a, ptr %b) #0 { 455; VBITS_GE_256-LABEL: ucvtf_v16i32_v16f32: 456; VBITS_GE_256: // %bb.0: 457; VBITS_GE_256-NEXT: ptrue p0.s, vl8 458; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 459; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] 460; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] 461; VBITS_GE_256-NEXT: ucvtf z0.s, p0/m, z0.s 462; VBITS_GE_256-NEXT: ucvtf z1.s, p0/m, z1.s 463; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2] 464; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1] 465; VBITS_GE_256-NEXT: ret 466; 467; VBITS_GE_512-LABEL: ucvtf_v16i32_v16f32: 468; VBITS_GE_512: // %bb.0: 469; VBITS_GE_512-NEXT: ptrue p0.s, vl16 470; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] 471; VBITS_GE_512-NEXT: ucvtf z0.s, p0/m, z0.s 472; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1] 473; VBITS_GE_512-NEXT: ret 474 %op1 = load <16 x i32>, ptr %a 475 %res = uitofp <16 x i32> %op1 to <16 x float> 476 store <16 x float> %res, ptr %b 477 ret void 478} 479 480define void @ucvtf_v32i32_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 { 481; CHECK-LABEL: ucvtf_v32i32_v32f32: 482; CHECK: // %bb.0: 483; CHECK-NEXT: ptrue p0.s, vl32 484; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 485; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s 486; CHECK-NEXT: st1w { z0.s }, p0, [x1] 487; CHECK-NEXT: ret 488 %op1 = load <32 x i32>, ptr %a 489 %res = uitofp <32 x i32> %op1 to <32 x float> 490 store <32 x float> %res, ptr %b 491 ret void 492} 493 494define void @ucvtf_v64i32_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 { 495; CHECK-LABEL: ucvtf_v64i32_v64f32: 496; CHECK: // %bb.0: 497; CHECK-NEXT: ptrue p0.s, vl64 498; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 499; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s 500; CHECK-NEXT: st1w { z0.s }, p0, [x1] 501; CHECK-NEXT: ret 502 %op1 = load <64 x i32>, ptr %a 503 %res = uitofp <64 x i32> %op1 to <64 x float> 504 store <64 x float> %res, ptr %b 505 ret void 506} 507 508; 509; UCVTF S -> D 510; 511 512; Don't use SVE for 64-bit vectors. 513define <1 x double> @ucvtf_v1i32_v1f64(<1 x i32> %op1) vscale_range(2,0) #0 { 514; CHECK-LABEL: ucvtf_v1i32_v1f64: 515; CHECK: // %bb.0: 516; CHECK-NEXT: ushll v0.2d, v0.2s, #0 517; CHECK-NEXT: ucvtf v0.2d, v0.2d 518; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 519; CHECK-NEXT: ret 520 %res = uitofp <1 x i32> %op1 to <1 x double> 521 ret <1 x double> %res 522} 523 524; Don't use SVE for 128-bit vectors. 525define <2 x double> @ucvtf_v2i32_v2f64(<2 x i32> %op1) vscale_range(2,0) #0 { 526; CHECK-LABEL: ucvtf_v2i32_v2f64: 527; CHECK: // %bb.0: 528; CHECK-NEXT: ushll v0.2d, v0.2s, #0 529; CHECK-NEXT: ucvtf v0.2d, v0.2d 530; CHECK-NEXT: ret 531 %res = uitofp <2 x i32> %op1 to <2 x double> 532 ret <2 x double> %res 533} 534 535define void @ucvtf_v4i32_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 { 536; CHECK-LABEL: ucvtf_v4i32_v4f64: 537; CHECK: // %bb.0: 538; CHECK-NEXT: ldr q0, [x0] 539; CHECK-NEXT: ptrue p0.d, vl4 540; CHECK-NEXT: uunpklo z0.d, z0.s 541; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d 542; CHECK-NEXT: st1d { z0.d }, p0, [x1] 543; CHECK-NEXT: ret 544 %op1 = load <4 x i32>, ptr %a 545 %res = uitofp <4 x i32> %op1 to <4 x double> 546 store <4 x double> %res, ptr %b 547 ret void 548} 549 550define void @ucvtf_v8i32_v8f64(ptr %a, ptr %b) #0 { 551; VBITS_GE_256-LABEL: ucvtf_v8i32_v8f64: 552; VBITS_GE_256: // %bb.0: 553; VBITS_GE_256-NEXT: ptrue p0.s, vl8 554; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 555; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0] 556; VBITS_GE_256-NEXT: ptrue p0.d, vl4 557; VBITS_GE_256-NEXT: uunpklo z1.d, z0.s 558; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16 559; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s 560; VBITS_GE_256-NEXT: ucvtf z1.d, p0/m, z1.d 561; VBITS_GE_256-NEXT: ucvtf z0.d, p0/m, z0.d 562; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1] 563; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3] 564; VBITS_GE_256-NEXT: ret 565; 566; VBITS_GE_512-LABEL: ucvtf_v8i32_v8f64: 567; VBITS_GE_512: // %bb.0: 568; VBITS_GE_512-NEXT: ptrue p0.d, vl8 569; VBITS_GE_512-NEXT: ld1w { z0.d }, p0/z, [x0] 570; VBITS_GE_512-NEXT: ucvtf z0.d, p0/m, z0.d 571; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1] 572; VBITS_GE_512-NEXT: ret 573 %op1 = load <8 x i32>, ptr %a 574 %res = uitofp <8 x i32> %op1 to <8 x double> 575 store <8 x double> %res, ptr %b 576 ret void 577} 578 579define void @ucvtf_v16i32_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 { 580; CHECK-LABEL: ucvtf_v16i32_v16f64: 581; CHECK: // %bb.0: 582; CHECK-NEXT: ptrue p0.d, vl16 583; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0] 584; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d 585; CHECK-NEXT: st1d { z0.d }, p0, [x1] 586; CHECK-NEXT: ret 587 %op1 = load <16 x i32>, ptr %a 588 %res = uitofp <16 x i32> %op1 to <16 x double> 589 store <16 x double> %res, ptr %b 590 ret void 591} 592 593define void @ucvtf_v32i32_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 { 594; CHECK-LABEL: ucvtf_v32i32_v32f64: 595; CHECK: // %bb.0: 596; CHECK-NEXT: ptrue p0.d, vl32 597; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0] 598; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d 599; CHECK-NEXT: st1d { z0.d }, p0, [x1] 600; CHECK-NEXT: ret 601 %op1 = load <32 x i32>, ptr %a 602 %res = uitofp <32 x i32> %op1 to <32 x double> 603 store <32 x double> %res, ptr %b 604 ret void 605} 606 607; 608; UCVTF D -> H 609; 610 611; Don't use SVE for 64-bit vectors. 612define <1 x half> @ucvtf_v1i64_v1f16(<1 x i64> %op1) vscale_range(2,0) #0 { 613; CHECK-LABEL: ucvtf_v1i64_v1f16: 614; CHECK: // %bb.0: 615; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 616; CHECK-NEXT: fmov x8, d0 617; CHECK-NEXT: ucvtf h0, x8 618; CHECK-NEXT: ret 619 %res = uitofp <1 x i64> %op1 to <1 x half> 620 ret <1 x half> %res 621} 622 623; v2f16 is not legal for NEON, so use SVE 624define <2 x half> @ucvtf_v2i64_v2f16(<2 x i64> %op1) vscale_range(2,0) #0 { 625; CHECK-LABEL: ucvtf_v2i64_v2f16: 626; CHECK: // %bb.0: 627; CHECK-NEXT: ptrue p0.d, vl4 628; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 629; CHECK-NEXT: ucvtf z0.h, p0/m, z0.d 630; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s 631; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h 632; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 633; CHECK-NEXT: ret 634 %res = uitofp <2 x i64> %op1 to <2 x half> 635 ret <2 x half> %res 636} 637 638define <4 x half> @ucvtf_v4i64_v4f16(ptr %a) vscale_range(2,0) #0 { 639; CHECK-LABEL: ucvtf_v4i64_v4f16: 640; CHECK: // %bb.0: 641; CHECK-NEXT: ptrue p0.d, vl4 642; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 643; CHECK-NEXT: ucvtf z0.h, p0/m, z0.d 644; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s 645; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h 646; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 647; CHECK-NEXT: ret 648 %op1 = load <4 x i64>, ptr %a 649 %res = uitofp <4 x i64> %op1 to <4 x half> 650 ret <4 x half> %res 651} 652 653define <8 x half> @ucvtf_v8i64_v8f16(ptr %a) #0 { 654; VBITS_GE_256-LABEL: ucvtf_v8i64_v8f16: 655; VBITS_GE_256: // %bb.0: 656; VBITS_GE_256-NEXT: ptrue p0.d, vl4 657; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 658; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] 659; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] 660; VBITS_GE_256-NEXT: ucvtf z0.h, p0/m, z0.d 661; VBITS_GE_256-NEXT: ucvtf z1.h, p0/m, z1.d 662; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s 663; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s 664; VBITS_GE_256-NEXT: uzp1 z2.h, z0.h, z0.h 665; VBITS_GE_256-NEXT: uzp1 z0.h, z1.h, z1.h 666; VBITS_GE_256-NEXT: mov v0.d[1], v2.d[0] 667; VBITS_GE_256-NEXT: // kill: def $q0 killed $q0 killed $z0 668; VBITS_GE_256-NEXT: ret 669; 670; VBITS_GE_512-LABEL: ucvtf_v8i64_v8f16: 671; VBITS_GE_512: // %bb.0: 672; VBITS_GE_512-NEXT: ptrue p0.d, vl8 673; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] 674; VBITS_GE_512-NEXT: ucvtf z0.h, p0/m, z0.d 675; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s 676; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h 677; VBITS_GE_512-NEXT: // kill: def $q0 killed $q0 killed $z0 678; VBITS_GE_512-NEXT: ret 679 %op1 = load <8 x i64>, ptr %a 680 %res = uitofp <8 x i64> %op1 to <8 x half> 681 ret <8 x half> %res 682} 683 684define void @ucvtf_v16i64_v16f16(ptr %a, ptr %b) vscale_range(8,0) #0 { 685; CHECK-LABEL: ucvtf_v16i64_v16f16: 686; CHECK: // %bb.0: 687; CHECK-NEXT: ptrue p0.d, vl16 688; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 689; CHECK-NEXT: ucvtf z0.h, p0/m, z0.d 690; CHECK-NEXT: ptrue p0.s, vl16 691; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s 692; CHECK-NEXT: st1h { z0.s }, p0, [x1] 693; CHECK-NEXT: ret 694 %op1 = load <16 x i64>, ptr %a 695 %res = uitofp <16 x i64> %op1 to <16 x half> 696 store <16 x half> %res, ptr %b 697 ret void 698} 699 700define void @ucvtf_v32i64_v32f16(ptr %a, ptr %b) vscale_range(16,0) #0 { 701; CHECK-LABEL: ucvtf_v32i64_v32f16: 702; CHECK: // %bb.0: 703; CHECK-NEXT: ptrue p0.d, vl32 704; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 705; CHECK-NEXT: ucvtf z0.h, p0/m, z0.d 706; CHECK-NEXT: ptrue p0.s, vl32 707; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s 708; CHECK-NEXT: st1h { z0.s }, p0, [x1] 709; CHECK-NEXT: ret 710 %op1 = load <32 x i64>, ptr %a 711 %res = uitofp <32 x i64> %op1 to <32 x half> 712 store <32 x half> %res, ptr %b 713 ret void 714} 715 716; 717; UCVTF D -> S 718; 719 720; Don't use SVE for 64-bit vectors. 721define <1 x float> @ucvtf_v1i64_v1f32(<1 x i64> %op1) vscale_range(2,0) #0 { 722; CHECK-LABEL: ucvtf_v1i64_v1f32: 723; CHECK: // %bb.0: 724; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 725; CHECK-NEXT: ucvtf v0.2d, v0.2d 726; CHECK-NEXT: fcvtn v0.2s, v0.2d 727; CHECK-NEXT: ret 728 %res = uitofp <1 x i64> %op1 to <1 x float> 729 ret <1 x float> %res 730} 731 732; Don't use SVE for 128-bit vectors. 733define <2 x float> @ucvtf_v2i64_v2f32(<2 x i64> %op1) vscale_range(2,0) #0 { 734; CHECK-LABEL: ucvtf_v2i64_v2f32: 735; CHECK: // %bb.0: 736; CHECK-NEXT: ucvtf v0.2d, v0.2d 737; CHECK-NEXT: fcvtn v0.2s, v0.2d 738; CHECK-NEXT: ret 739 %res = uitofp <2 x i64> %op1 to <2 x float> 740 ret <2 x float> %res 741} 742 743define <4 x float> @ucvtf_v4i64_v4f32(ptr %a) vscale_range(2,0) #0 { 744; CHECK-LABEL: ucvtf_v4i64_v4f32: 745; CHECK: // %bb.0: 746; CHECK-NEXT: ptrue p0.d, vl4 747; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 748; CHECK-NEXT: ucvtf z0.s, p0/m, z0.d 749; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s 750; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 751; CHECK-NEXT: ret 752 %op1 = load <4 x i64>, ptr %a 753 %res = uitofp <4 x i64> %op1 to <4 x float> 754 ret <4 x float> %res 755} 756 757define void @ucvtf_v8i64_v8f32(ptr %a, ptr %b) #0 { 758; VBITS_GE_256-LABEL: ucvtf_v8i64_v8f32: 759; VBITS_GE_256: // %bb.0: 760; VBITS_GE_256-NEXT: ptrue p0.d, vl4 761; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 762; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] 763; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] 764; VBITS_GE_256-NEXT: ucvtf z0.s, p0/m, z0.d 765; VBITS_GE_256-NEXT: ucvtf z1.s, p0/m, z1.d 766; VBITS_GE_256-NEXT: ptrue p0.s, vl4 767; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s 768; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s 769; VBITS_GE_256-NEXT: splice z1.s, p0, z1.s, z0.s 770; VBITS_GE_256-NEXT: ptrue p0.s, vl8 771; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1] 772; VBITS_GE_256-NEXT: ret 773; 774; VBITS_GE_512-LABEL: ucvtf_v8i64_v8f32: 775; VBITS_GE_512: // %bb.0: 776; VBITS_GE_512-NEXT: ptrue p0.d, vl8 777; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] 778; VBITS_GE_512-NEXT: ucvtf z0.s, p0/m, z0.d 779; VBITS_GE_512-NEXT: ptrue p0.s, vl8 780; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s 781; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1] 782; VBITS_GE_512-NEXT: ret 783 %op1 = load <8 x i64>, ptr %a 784 %res = uitofp <8 x i64> %op1 to <8 x float> 785 store <8 x float> %res, ptr %b 786 ret void 787} 788 789define void @ucvtf_v16i64_v16f32(ptr %a, ptr %b) vscale_range(8,0) #0 { 790; CHECK-LABEL: ucvtf_v16i64_v16f32: 791; CHECK: // %bb.0: 792; CHECK-NEXT: ptrue p0.d, vl16 793; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 794; CHECK-NEXT: ucvtf z0.s, p0/m, z0.d 795; CHECK-NEXT: ptrue p0.s, vl16 796; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s 797; CHECK-NEXT: st1w { z0.s }, p0, [x1] 798; CHECK-NEXT: ret 799 %op1 = load <16 x i64>, ptr %a 800 %res = uitofp <16 x i64> %op1 to <16 x float> 801 store <16 x float> %res, ptr %b 802 ret void 803} 804 805define void @ucvtf_v32i64_v32f32(ptr %a, ptr %b) vscale_range(16,0) #0 { 806; CHECK-LABEL: ucvtf_v32i64_v32f32: 807; CHECK: // %bb.0: 808; CHECK-NEXT: ptrue p0.d, vl32 809; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 810; CHECK-NEXT: ucvtf z0.s, p0/m, z0.d 811; CHECK-NEXT: ptrue p0.s, vl32 812; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s 813; CHECK-NEXT: st1w { z0.s }, p0, [x1] 814; CHECK-NEXT: ret 815 %op1 = load <32 x i64>, ptr %a 816 %res = uitofp <32 x i64> %op1 to <32 x float> 817 store <32 x float> %res, ptr %b 818 ret void 819} 820 821; 822; UCVTF D -> D 823; 824 825; Don't use SVE for 64-bit vectors. 826define <1 x double> @ucvtf_v1i64_v1f64(<1 x i64> %op1) vscale_range(2,0) #0 { 827; CHECK-LABEL: ucvtf_v1i64_v1f64: 828; CHECK: // %bb.0: 829; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 830; CHECK-NEXT: ucvtf d0, d0 831; CHECK-NEXT: ret 832 %res = uitofp <1 x i64> %op1 to <1 x double> 833 ret <1 x double> %res 834} 835 836; Don't use SVE for 128-bit vectors. 837define <2 x double> @ucvtf_v2i64_v2f64(<2 x i64> %op1) vscale_range(2,0) #0 { 838; CHECK-LABEL: ucvtf_v2i64_v2f64: 839; CHECK: // %bb.0: 840; CHECK-NEXT: ucvtf v0.2d, v0.2d 841; CHECK-NEXT: ret 842 %res = uitofp <2 x i64> %op1 to <2 x double> 843 ret <2 x double> %res 844} 845 846define void @ucvtf_v4i64_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 { 847; CHECK-LABEL: ucvtf_v4i64_v4f64: 848; CHECK: // %bb.0: 849; CHECK-NEXT: ptrue p0.d, vl4 850; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 851; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d 852; CHECK-NEXT: st1d { z0.d }, p0, [x1] 853; CHECK-NEXT: ret 854 %op1 = load <4 x i64>, ptr %a 855 %res = uitofp <4 x i64> %op1 to <4 x double> 856 store <4 x double> %res, ptr %b 857 ret void 858} 859 860define void @ucvtf_v8i64_v8f64(ptr %a, ptr %b) #0 { 861; VBITS_GE_256-LABEL: ucvtf_v8i64_v8f64: 862; VBITS_GE_256: // %bb.0: 863; VBITS_GE_256-NEXT: ptrue p0.d, vl4 864; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 865; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] 866; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] 867; VBITS_GE_256-NEXT: ucvtf z0.d, p0/m, z0.d 868; VBITS_GE_256-NEXT: ucvtf z1.d, p0/m, z1.d 869; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3] 870; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1] 871; VBITS_GE_256-NEXT: ret 872; 873; VBITS_GE_512-LABEL: ucvtf_v8i64_v8f64: 874; VBITS_GE_512: // %bb.0: 875; VBITS_GE_512-NEXT: ptrue p0.d, vl8 876; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] 877; VBITS_GE_512-NEXT: ucvtf z0.d, p0/m, z0.d 878; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1] 879; VBITS_GE_512-NEXT: ret 880 %op1 = load <8 x i64>, ptr %a 881 %res = uitofp <8 x i64> %op1 to <8 x double> 882 store <8 x double> %res, ptr %b 883 ret void 884} 885 886define void @ucvtf_v16i64_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 { 887; CHECK-LABEL: ucvtf_v16i64_v16f64: 888; CHECK: // %bb.0: 889; CHECK-NEXT: ptrue p0.d, vl16 890; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 891; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d 892; CHECK-NEXT: st1d { z0.d }, p0, [x1] 893; CHECK-NEXT: ret 894 %op1 = load <16 x i64>, ptr %a 895 %res = uitofp <16 x i64> %op1 to <16 x double> 896 store <16 x double> %res, ptr %b 897 ret void 898} 899 900define void @ucvtf_v32i64_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 { 901; CHECK-LABEL: ucvtf_v32i64_v32f64: 902; CHECK: // %bb.0: 903; CHECK-NEXT: ptrue p0.d, vl32 904; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 905; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d 906; CHECK-NEXT: st1d { z0.d }, p0, [x1] 907; CHECK-NEXT: ret 908 %op1 = load <32 x i64>, ptr %a 909 %res = uitofp <32 x i64> %op1 to <32 x double> 910 store <32 x double> %res, ptr %b 911 ret void 912} 913 914; 915; SCVTF H -> H 916; 917 918; Don't use SVE for 64-bit vectors. 919define <4 x half> @scvtf_v4i16_v4f16(<4 x i16> %op1) vscale_range(2,0) #0 { 920; CHECK-LABEL: scvtf_v4i16_v4f16: 921; CHECK: // %bb.0: 922; CHECK-NEXT: scvtf v0.4h, v0.4h 923; CHECK-NEXT: ret 924 %res = sitofp <4 x i16> %op1 to <4 x half> 925 ret <4 x half> %res 926} 927 928; Don't use SVE for 128-bit vectors. 929define void @scvtf_v8i16_v8f16(ptr %a, ptr %b) vscale_range(2,0) #0 { 930; CHECK-LABEL: scvtf_v8i16_v8f16: 931; CHECK: // %bb.0: 932; CHECK-NEXT: ldr q0, [x0] 933; CHECK-NEXT: scvtf v0.8h, v0.8h 934; CHECK-NEXT: str q0, [x1] 935; CHECK-NEXT: ret 936 %op1 = load <8 x i16>, ptr %a 937 %res = sitofp <8 x i16> %op1 to <8 x half> 938 store <8 x half> %res, ptr %b 939 ret void 940} 941 942define void @scvtf_v16i16_v16f16(ptr %a, ptr %b) vscale_range(2,0) #0 { 943; CHECK-LABEL: scvtf_v16i16_v16f16: 944; CHECK: // %bb.0: 945; CHECK-NEXT: ptrue p0.h, vl16 946; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 947; CHECK-NEXT: scvtf z0.h, p0/m, z0.h 948; CHECK-NEXT: st1h { z0.h }, p0, [x1] 949; CHECK-NEXT: ret 950 %op1 = load <16 x i16>, ptr %a 951 %res = sitofp <16 x i16> %op1 to <16 x half> 952 store <16 x half> %res, ptr %b 953 ret void 954} 955 956define void @scvtf_v32i16_v32f16(ptr %a, ptr %b) #0 { 957; VBITS_GE_256-LABEL: scvtf_v32i16_v32f16: 958; VBITS_GE_256: // %bb.0: 959; VBITS_GE_256-NEXT: ptrue p0.h, vl16 960; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 961; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] 962; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] 963; VBITS_GE_256-NEXT: scvtf z0.h, p0/m, z0.h 964; VBITS_GE_256-NEXT: scvtf z1.h, p0/m, z1.h 965; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1, x8, lsl #1] 966; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x1] 967; VBITS_GE_256-NEXT: ret 968; 969; VBITS_GE_512-LABEL: scvtf_v32i16_v32f16: 970; VBITS_GE_512: // %bb.0: 971; VBITS_GE_512-NEXT: ptrue p0.h, vl32 972; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0] 973; VBITS_GE_512-NEXT: scvtf z0.h, p0/m, z0.h 974; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x1] 975; VBITS_GE_512-NEXT: ret 976 %op1 = load <32 x i16>, ptr %a 977 %res = sitofp <32 x i16> %op1 to <32 x half> 978 store <32 x half> %res, ptr %b 979 ret void 980} 981 982define void @scvtf_v64i16_v64f16(ptr %a, ptr %b) vscale_range(8,0) #0 { 983; CHECK-LABEL: scvtf_v64i16_v64f16: 984; CHECK: // %bb.0: 985; CHECK-NEXT: ptrue p0.h, vl64 986; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 987; CHECK-NEXT: scvtf z0.h, p0/m, z0.h 988; CHECK-NEXT: st1h { z0.h }, p0, [x1] 989; CHECK-NEXT: ret 990 %op1 = load <64 x i16>, ptr %a 991 %res = sitofp <64 x i16> %op1 to <64 x half> 992 store <64 x half> %res, ptr %b 993 ret void 994} 995 996define void @scvtf_v128i16_v128f16(ptr %a, ptr %b) vscale_range(16,0) #0 { 997; CHECK-LABEL: scvtf_v128i16_v128f16: 998; CHECK: // %bb.0: 999; CHECK-NEXT: ptrue p0.h, vl128 1000; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 1001; CHECK-NEXT: scvtf z0.h, p0/m, z0.h 1002; CHECK-NEXT: st1h { z0.h }, p0, [x1] 1003; CHECK-NEXT: ret 1004 %op1 = load <128 x i16>, ptr %a 1005 %res = sitofp <128 x i16> %op1 to <128 x half> 1006 store <128 x half> %res, ptr %b 1007 ret void 1008} 1009 1010; 1011; SCVTF H -> S 1012; 1013 1014; Don't use SVE for 64-bit vectors. 1015define <2 x float> @scvtf_v2i16_v2f32(<2 x i16> %op1) vscale_range(2,0) #0 { 1016; CHECK-LABEL: scvtf_v2i16_v2f32: 1017; CHECK: // %bb.0: 1018; CHECK-NEXT: shl v0.2s, v0.2s, #16 1019; CHECK-NEXT: sshr v0.2s, v0.2s, #16 1020; CHECK-NEXT: scvtf v0.2s, v0.2s 1021; CHECK-NEXT: ret 1022 %res = sitofp <2 x i16> %op1 to <2 x float> 1023 ret <2 x float> %res 1024} 1025 1026; Don't use SVE for 128-bit vectors. 1027define <4 x float> @scvtf_v4i16_v4f32(<4 x i16> %op1) vscale_range(2,0) #0 { 1028; CHECK-LABEL: scvtf_v4i16_v4f32: 1029; CHECK: // %bb.0: 1030; CHECK-NEXT: sshll v0.4s, v0.4h, #0 1031; CHECK-NEXT: scvtf v0.4s, v0.4s 1032; CHECK-NEXT: ret 1033 %res = sitofp <4 x i16> %op1 to <4 x float> 1034 ret <4 x float> %res 1035} 1036 1037define void @scvtf_v8i16_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 { 1038; CHECK-LABEL: scvtf_v8i16_v8f32: 1039; CHECK: // %bb.0: 1040; CHECK-NEXT: ldr q0, [x0] 1041; CHECK-NEXT: ptrue p0.s, vl8 1042; CHECK-NEXT: sunpklo z0.s, z0.h 1043; CHECK-NEXT: scvtf z0.s, p0/m, z0.s 1044; CHECK-NEXT: st1w { z0.s }, p0, [x1] 1045; CHECK-NEXT: ret 1046 %op1 = load <8 x i16>, ptr %a 1047 %res = sitofp <8 x i16> %op1 to <8 x float> 1048 store <8 x float> %res, ptr %b 1049 ret void 1050} 1051 1052define void @scvtf_v16i16_v16f32(ptr %a, ptr %b) #0 { 1053; VBITS_GE_256-LABEL: scvtf_v16i16_v16f32: 1054; VBITS_GE_256: // %bb.0: 1055; VBITS_GE_256-NEXT: ptrue p0.h, vl16 1056; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 1057; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0] 1058; VBITS_GE_256-NEXT: ptrue p0.s, vl8 1059; VBITS_GE_256-NEXT: sunpklo z1.s, z0.h 1060; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16 1061; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h 1062; VBITS_GE_256-NEXT: scvtf z1.s, p0/m, z1.s 1063; VBITS_GE_256-NEXT: scvtf z0.s, p0/m, z0.s 1064; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1] 1065; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2] 1066; VBITS_GE_256-NEXT: ret 1067; 1068; VBITS_GE_512-LABEL: scvtf_v16i16_v16f32: 1069; VBITS_GE_512: // %bb.0: 1070; VBITS_GE_512-NEXT: ptrue p0.h, vl16 1071; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0] 1072; VBITS_GE_512-NEXT: ptrue p0.s, vl16 1073; VBITS_GE_512-NEXT: sunpklo z0.s, z0.h 1074; VBITS_GE_512-NEXT: scvtf z0.s, p0/m, z0.s 1075; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1] 1076; VBITS_GE_512-NEXT: ret 1077 %op1 = load <16 x i16>, ptr %a 1078 %res = sitofp <16 x i16> %op1 to <16 x float> 1079 store <16 x float> %res, ptr %b 1080 ret void 1081} 1082 1083define void @scvtf_v32i16_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 { 1084; CHECK-LABEL: scvtf_v32i16_v32f32: 1085; CHECK: // %bb.0: 1086; CHECK-NEXT: ptrue p0.h, vl32 1087; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 1088; CHECK-NEXT: ptrue p0.s, vl32 1089; CHECK-NEXT: sunpklo z0.s, z0.h 1090; CHECK-NEXT: scvtf z0.s, p0/m, z0.s 1091; CHECK-NEXT: st1w { z0.s }, p0, [x1] 1092; CHECK-NEXT: ret 1093 %op1 = load <32 x i16>, ptr %a 1094 %res = sitofp <32 x i16> %op1 to <32 x float> 1095 store <32 x float> %res, ptr %b 1096 ret void 1097} 1098 1099define void @scvtf_v64i16_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 { 1100; CHECK-LABEL: scvtf_v64i16_v64f32: 1101; CHECK: // %bb.0: 1102; CHECK-NEXT: ptrue p0.h, vl64 1103; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 1104; CHECK-NEXT: ptrue p0.s, vl64 1105; CHECK-NEXT: sunpklo z0.s, z0.h 1106; CHECK-NEXT: scvtf z0.s, p0/m, z0.s 1107; CHECK-NEXT: st1w { z0.s }, p0, [x1] 1108; CHECK-NEXT: ret 1109 %op1 = load <64 x i16>, ptr %a 1110 %res = sitofp <64 x i16> %op1 to <64 x float> 1111 store <64 x float> %res, ptr %b 1112 ret void 1113} 1114 1115; 1116; SCVTF H -> D 1117; 1118 1119; v1i16 is perfered to be widened to v4i16, which pushes the output into SVE types, so use SVE 1120define <1 x double> @scvtf_v1i16_v1f64(<1 x i16> %op1) vscale_range(2,0) #0 { 1121; CHECK-LABEL: scvtf_v1i16_v1f64: 1122; CHECK: // %bb.0: 1123; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 1124; CHECK-NEXT: ptrue p0.d, vl4 1125; CHECK-NEXT: sunpklo z0.s, z0.h 1126; CHECK-NEXT: sunpklo z0.d, z0.s 1127; CHECK-NEXT: scvtf z0.d, p0/m, z0.d 1128; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 1129; CHECK-NEXT: ret 1130 %res = sitofp <1 x i16> %op1 to <1 x double> 1131 ret <1 x double> %res 1132} 1133 1134; Don't use SVE for 128-bit vectors. 1135define <2 x double> @scvtf_v2i16_v2f64(<2 x i16> %op1) vscale_range(2,0) #0 { 1136; CHECK-LABEL: scvtf_v2i16_v2f64: 1137; CHECK: // %bb.0: 1138; CHECK-NEXT: shl v0.2s, v0.2s, #16 1139; CHECK-NEXT: sshr v0.2s, v0.2s, #16 1140; CHECK-NEXT: sshll v0.2d, v0.2s, #0 1141; CHECK-NEXT: scvtf v0.2d, v0.2d 1142; CHECK-NEXT: ret 1143 %res = sitofp <2 x i16> %op1 to <2 x double> 1144 ret <2 x double> %res 1145} 1146 1147define void @scvtf_v4i16_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 { 1148; CHECK-LABEL: scvtf_v4i16_v4f64: 1149; CHECK: // %bb.0: 1150; CHECK-NEXT: ldr d0, [x0] 1151; CHECK-NEXT: ptrue p0.d, vl4 1152; CHECK-NEXT: sunpklo z0.s, z0.h 1153; CHECK-NEXT: sunpklo z0.d, z0.s 1154; CHECK-NEXT: scvtf z0.d, p0/m, z0.d 1155; CHECK-NEXT: st1d { z0.d }, p0, [x1] 1156; CHECK-NEXT: ret 1157 %op1 = load <4 x i16>, ptr %a 1158 %res = sitofp <4 x i16> %op1 to <4 x double> 1159 store <4 x double> %res, ptr %b 1160 ret void 1161} 1162 1163define void @scvtf_v8i16_v8f64(ptr %a, ptr %b) #0 { 1164; VBITS_GE_256-LABEL: scvtf_v8i16_v8f64: 1165; VBITS_GE_256: // %bb.0: 1166; VBITS_GE_256-NEXT: ldr q0, [x0] 1167; VBITS_GE_256-NEXT: ptrue p0.d, vl4 1168; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 1169; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 1170; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h 1171; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h 1172; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s 1173; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s 1174; VBITS_GE_256-NEXT: scvtf z0.d, p0/m, z0.d 1175; VBITS_GE_256-NEXT: scvtf z1.d, p0/m, z1.d 1176; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1] 1177; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1, x8, lsl #3] 1178; VBITS_GE_256-NEXT: ret 1179; 1180; VBITS_GE_512-LABEL: scvtf_v8i16_v8f64: 1181; VBITS_GE_512: // %bb.0: 1182; VBITS_GE_512-NEXT: ldr q0, [x0] 1183; VBITS_GE_512-NEXT: ptrue p0.d, vl8 1184; VBITS_GE_512-NEXT: sunpklo z0.s, z0.h 1185; VBITS_GE_512-NEXT: sunpklo z0.d, z0.s 1186; VBITS_GE_512-NEXT: scvtf z0.d, p0/m, z0.d 1187; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1] 1188; VBITS_GE_512-NEXT: ret 1189 %op1 = load <8 x i16>, ptr %a 1190 %res = sitofp <8 x i16> %op1 to <8 x double> 1191 store <8 x double> %res, ptr %b 1192 ret void 1193} 1194 1195define void @scvtf_v16i16_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 { 1196; CHECK-LABEL: scvtf_v16i16_v16f64: 1197; CHECK: // %bb.0: 1198; CHECK-NEXT: ptrue p0.h, vl16 1199; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 1200; CHECK-NEXT: ptrue p0.d, vl16 1201; CHECK-NEXT: sunpklo z0.s, z0.h 1202; CHECK-NEXT: sunpklo z0.d, z0.s 1203; CHECK-NEXT: scvtf z0.d, p0/m, z0.d 1204; CHECK-NEXT: st1d { z0.d }, p0, [x1] 1205; CHECK-NEXT: ret 1206 %op1 = load <16 x i16>, ptr %a 1207 %res = sitofp <16 x i16> %op1 to <16 x double> 1208 store <16 x double> %res, ptr %b 1209 ret void 1210} 1211 1212define void @scvtf_v32i16_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 { 1213; CHECK-LABEL: scvtf_v32i16_v32f64: 1214; CHECK: // %bb.0: 1215; CHECK-NEXT: ptrue p0.h, vl32 1216; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 1217; CHECK-NEXT: ptrue p0.d, vl32 1218; CHECK-NEXT: sunpklo z0.s, z0.h 1219; CHECK-NEXT: sunpklo z0.d, z0.s 1220; CHECK-NEXT: scvtf z0.d, p0/m, z0.d 1221; CHECK-NEXT: st1d { z0.d }, p0, [x1] 1222; CHECK-NEXT: ret 1223 %op1 = load <32 x i16>, ptr %a 1224 %res = sitofp <32 x i16> %op1 to <32 x double> 1225 store <32 x double> %res, ptr %b 1226 ret void 1227} 1228 1229; 1230; SCVTF S -> H 1231; 1232 1233; Don't use SVE for 64-bit vectors. 1234define <2 x half> @scvtf_v2i32_v2f16(<2 x i32> %op1) vscale_range(2,0) #0 { 1235; CHECK-LABEL: scvtf_v2i32_v2f16: 1236; CHECK: // %bb.0: 1237; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1238; CHECK-NEXT: scvtf v0.4s, v0.4s 1239; CHECK-NEXT: fcvtn v0.4h, v0.4s 1240; CHECK-NEXT: ret 1241 %res = sitofp <2 x i32> %op1 to <2 x half> 1242 ret <2 x half> %res 1243} 1244 1245; Don't use SVE for 128-bit vectors. 1246define <4 x half> @scvtf_v4i32_v4f16(<4 x i32> %op1) vscale_range(2,0) #0 { 1247; CHECK-LABEL: scvtf_v4i32_v4f16: 1248; CHECK: // %bb.0: 1249; CHECK-NEXT: scvtf v0.4s, v0.4s 1250; CHECK-NEXT: fcvtn v0.4h, v0.4s 1251; CHECK-NEXT: ret 1252 %res = sitofp <4 x i32> %op1 to <4 x half> 1253 ret <4 x half> %res 1254} 1255 1256define <8 x half> @scvtf_v8i32_v8f16(ptr %a) vscale_range(2,0) #0 { 1257; CHECK-LABEL: scvtf_v8i32_v8f16: 1258; CHECK: // %bb.0: 1259; CHECK-NEXT: ptrue p0.s, vl8 1260; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 1261; CHECK-NEXT: scvtf z0.h, p0/m, z0.s 1262; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h 1263; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 1264; CHECK-NEXT: ret 1265 %op1 = load <8 x i32>, ptr %a 1266 %res = sitofp <8 x i32> %op1 to <8 x half> 1267 ret <8 x half> %res 1268} 1269 1270define void @scvtf_v16i32_v16f16(ptr %a, ptr %b) #0 { 1271; VBITS_GE_256-LABEL: scvtf_v16i32_v16f16: 1272; VBITS_GE_256: // %bb.0: 1273; VBITS_GE_256-NEXT: ptrue p0.s, vl8 1274; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 1275; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] 1276; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] 1277; VBITS_GE_256-NEXT: scvtf z0.h, p0/m, z0.s 1278; VBITS_GE_256-NEXT: scvtf z1.h, p0/m, z1.s 1279; VBITS_GE_256-NEXT: ptrue p0.h, vl8 1280; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h 1281; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h 1282; VBITS_GE_256-NEXT: splice z1.h, p0, z1.h, z0.h 1283; VBITS_GE_256-NEXT: ptrue p0.h, vl16 1284; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x1] 1285; VBITS_GE_256-NEXT: ret 1286; 1287; VBITS_GE_512-LABEL: scvtf_v16i32_v16f16: 1288; VBITS_GE_512: // %bb.0: 1289; VBITS_GE_512-NEXT: ptrue p0.s, vl16 1290; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] 1291; VBITS_GE_512-NEXT: scvtf z0.h, p0/m, z0.s 1292; VBITS_GE_512-NEXT: ptrue p0.h, vl16 1293; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h 1294; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x1] 1295; VBITS_GE_512-NEXT: ret 1296 %op1 = load <16 x i32>, ptr %a 1297 %res = sitofp <16 x i32> %op1 to <16 x half> 1298 store <16 x half> %res, ptr %b 1299 ret void 1300} 1301 1302define void @scvtf_v32i32_v32f16(ptr %a, ptr %b) vscale_range(8,0) #0 { 1303; CHECK-LABEL: scvtf_v32i32_v32f16: 1304; CHECK: // %bb.0: 1305; CHECK-NEXT: ptrue p0.s, vl32 1306; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 1307; CHECK-NEXT: scvtf z0.h, p0/m, z0.s 1308; CHECK-NEXT: ptrue p0.h, vl32 1309; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h 1310; CHECK-NEXT: st1h { z0.h }, p0, [x1] 1311; CHECK-NEXT: ret 1312 %op1 = load <32 x i32>, ptr %a 1313 %res = sitofp <32 x i32> %op1 to <32 x half> 1314 store <32 x half> %res, ptr %b 1315 ret void 1316} 1317 1318define void @scvtf_v64i32_v64f16(ptr %a, ptr %b) vscale_range(16,0) #0 { 1319; CHECK-LABEL: scvtf_v64i32_v64f16: 1320; CHECK: // %bb.0: 1321; CHECK-NEXT: ptrue p0.s, vl64 1322; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 1323; CHECK-NEXT: scvtf z0.h, p0/m, z0.s 1324; CHECK-NEXT: ptrue p0.h, vl64 1325; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h 1326; CHECK-NEXT: st1h { z0.h }, p0, [x1] 1327; CHECK-NEXT: ret 1328 %op1 = load <64 x i32>, ptr %a 1329 %res = sitofp <64 x i32> %op1 to <64 x half> 1330 store <64 x half> %res, ptr %b 1331 ret void 1332} 1333 1334; 1335; SCVTF S -> S 1336; 1337 1338; Don't use SVE for 64-bit vectors. 1339define <2 x float> @scvtf_v2i32_v2f32(<2 x i32> %op1) vscale_range(2,0) #0 { 1340; CHECK-LABEL: scvtf_v2i32_v2f32: 1341; CHECK: // %bb.0: 1342; CHECK-NEXT: scvtf v0.2s, v0.2s 1343; CHECK-NEXT: ret 1344 %res = sitofp <2 x i32> %op1 to <2 x float> 1345 ret <2 x float> %res 1346} 1347 1348; Don't use SVE for 128-bit vectors. 1349define <4 x float> @scvtf_v4i32_v4f32(<4 x i32> %op1) vscale_range(2,0) #0 { 1350; CHECK-LABEL: scvtf_v4i32_v4f32: 1351; CHECK: // %bb.0: 1352; CHECK-NEXT: scvtf v0.4s, v0.4s 1353; CHECK-NEXT: ret 1354 %res = sitofp <4 x i32> %op1 to <4 x float> 1355 ret <4 x float> %res 1356} 1357 1358define void @scvtf_v8i32_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 { 1359; CHECK-LABEL: scvtf_v8i32_v8f32: 1360; CHECK: // %bb.0: 1361; CHECK-NEXT: ptrue p0.s, vl8 1362; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 1363; CHECK-NEXT: scvtf z0.s, p0/m, z0.s 1364; CHECK-NEXT: st1w { z0.s }, p0, [x1] 1365; CHECK-NEXT: ret 1366 %op1 = load <8 x i32>, ptr %a 1367 %res = sitofp <8 x i32> %op1 to <8 x float> 1368 store <8 x float> %res, ptr %b 1369 ret void 1370} 1371 1372define void @scvtf_v16i32_v16f32(ptr %a, ptr %b) #0 { 1373; VBITS_GE_256-LABEL: scvtf_v16i32_v16f32: 1374; VBITS_GE_256: // %bb.0: 1375; VBITS_GE_256-NEXT: ptrue p0.s, vl8 1376; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 1377; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] 1378; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] 1379; VBITS_GE_256-NEXT: scvtf z0.s, p0/m, z0.s 1380; VBITS_GE_256-NEXT: scvtf z1.s, p0/m, z1.s 1381; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2] 1382; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1] 1383; VBITS_GE_256-NEXT: ret 1384; 1385; VBITS_GE_512-LABEL: scvtf_v16i32_v16f32: 1386; VBITS_GE_512: // %bb.0: 1387; VBITS_GE_512-NEXT: ptrue p0.s, vl16 1388; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] 1389; VBITS_GE_512-NEXT: scvtf z0.s, p0/m, z0.s 1390; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1] 1391; VBITS_GE_512-NEXT: ret 1392 %op1 = load <16 x i32>, ptr %a 1393 %res = sitofp <16 x i32> %op1 to <16 x float> 1394 store <16 x float> %res, ptr %b 1395 ret void 1396} 1397 1398define void @scvtf_v32i32_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 { 1399; CHECK-LABEL: scvtf_v32i32_v32f32: 1400; CHECK: // %bb.0: 1401; CHECK-NEXT: ptrue p0.s, vl32 1402; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 1403; CHECK-NEXT: scvtf z0.s, p0/m, z0.s 1404; CHECK-NEXT: st1w { z0.s }, p0, [x1] 1405; CHECK-NEXT: ret 1406 %op1 = load <32 x i32>, ptr %a 1407 %res = sitofp <32 x i32> %op1 to <32 x float> 1408 store <32 x float> %res, ptr %b 1409 ret void 1410} 1411 1412define void @scvtf_v64i32_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 { 1413; CHECK-LABEL: scvtf_v64i32_v64f32: 1414; CHECK: // %bb.0: 1415; CHECK-NEXT: ptrue p0.s, vl64 1416; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 1417; CHECK-NEXT: scvtf z0.s, p0/m, z0.s 1418; CHECK-NEXT: st1w { z0.s }, p0, [x1] 1419; CHECK-NEXT: ret 1420 %op1 = load <64 x i32>, ptr %a 1421 %res = sitofp <64 x i32> %op1 to <64 x float> 1422 store <64 x float> %res, ptr %b 1423 ret void 1424} 1425 1426; 1427; SCVTF S -> D 1428; 1429 1430; Don't use SVE for 64-bit vectors. 1431define <1 x double> @scvtf_v1i32_v1f64(<1 x i32> %op1) vscale_range(2,0) #0 { 1432; CHECK-LABEL: scvtf_v1i32_v1f64: 1433; CHECK: // %bb.0: 1434; CHECK-NEXT: sshll v0.2d, v0.2s, #0 1435; CHECK-NEXT: scvtf v0.2d, v0.2d 1436; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 1437; CHECK-NEXT: ret 1438 %res = sitofp <1 x i32> %op1 to <1 x double> 1439 ret <1 x double> %res 1440} 1441 1442; Don't use SVE for 128-bit vectors. 1443define <2 x double> @scvtf_v2i32_v2f64(<2 x i32> %op1) vscale_range(2,0) #0 { 1444; CHECK-LABEL: scvtf_v2i32_v2f64: 1445; CHECK: // %bb.0: 1446; CHECK-NEXT: sshll v0.2d, v0.2s, #0 1447; CHECK-NEXT: scvtf v0.2d, v0.2d 1448; CHECK-NEXT: ret 1449 %res = sitofp <2 x i32> %op1 to <2 x double> 1450 ret <2 x double> %res 1451} 1452 1453define void @scvtf_v4i32_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 { 1454; CHECK-LABEL: scvtf_v4i32_v4f64: 1455; CHECK: // %bb.0: 1456; CHECK-NEXT: ldr q0, [x0] 1457; CHECK-NEXT: ptrue p0.d, vl4 1458; CHECK-NEXT: sunpklo z0.d, z0.s 1459; CHECK-NEXT: scvtf z0.d, p0/m, z0.d 1460; CHECK-NEXT: st1d { z0.d }, p0, [x1] 1461; CHECK-NEXT: ret 1462 %op1 = load <4 x i32>, ptr %a 1463 %res = sitofp <4 x i32> %op1 to <4 x double> 1464 store <4 x double> %res, ptr %b 1465 ret void 1466} 1467 1468define void @scvtf_v8i32_v8f64(ptr %a, ptr %b) #0 { 1469; VBITS_GE_256-LABEL: scvtf_v8i32_v8f64: 1470; VBITS_GE_256: // %bb.0: 1471; VBITS_GE_256-NEXT: ptrue p0.s, vl8 1472; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 1473; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0] 1474; VBITS_GE_256-NEXT: ptrue p0.d, vl4 1475; VBITS_GE_256-NEXT: sunpklo z1.d, z0.s 1476; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16 1477; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s 1478; VBITS_GE_256-NEXT: scvtf z1.d, p0/m, z1.d 1479; VBITS_GE_256-NEXT: scvtf z0.d, p0/m, z0.d 1480; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1] 1481; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3] 1482; VBITS_GE_256-NEXT: ret 1483; 1484; VBITS_GE_512-LABEL: scvtf_v8i32_v8f64: 1485; VBITS_GE_512: // %bb.0: 1486; VBITS_GE_512-NEXT: ptrue p0.s, vl8 1487; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] 1488; VBITS_GE_512-NEXT: ptrue p0.d, vl8 1489; VBITS_GE_512-NEXT: sunpklo z0.d, z0.s 1490; VBITS_GE_512-NEXT: scvtf z0.d, p0/m, z0.d 1491; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1] 1492; VBITS_GE_512-NEXT: ret 1493 %op1 = load <8 x i32>, ptr %a 1494 %res = sitofp <8 x i32> %op1 to <8 x double> 1495 store <8 x double> %res, ptr %b 1496 ret void 1497} 1498 1499define void @scvtf_v16i32_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 { 1500; CHECK-LABEL: scvtf_v16i32_v16f64: 1501; CHECK: // %bb.0: 1502; CHECK-NEXT: ptrue p0.s, vl16 1503; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 1504; CHECK-NEXT: ptrue p0.d, vl16 1505; CHECK-NEXT: sunpklo z0.d, z0.s 1506; CHECK-NEXT: scvtf z0.d, p0/m, z0.d 1507; CHECK-NEXT: st1d { z0.d }, p0, [x1] 1508; CHECK-NEXT: ret 1509 %op1 = load <16 x i32>, ptr %a 1510 %res = sitofp <16 x i32> %op1 to <16 x double> 1511 store <16 x double> %res, ptr %b 1512 ret void 1513} 1514 1515define void @scvtf_v32i32_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 { 1516; CHECK-LABEL: scvtf_v32i32_v32f64: 1517; CHECK: // %bb.0: 1518; CHECK-NEXT: ptrue p0.s, vl32 1519; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 1520; CHECK-NEXT: ptrue p0.d, vl32 1521; CHECK-NEXT: sunpklo z0.d, z0.s 1522; CHECK-NEXT: scvtf z0.d, p0/m, z0.d 1523; CHECK-NEXT: st1d { z0.d }, p0, [x1] 1524; CHECK-NEXT: ret 1525 %op1 = load <32 x i32>, ptr %a 1526 %res = sitofp <32 x i32> %op1 to <32 x double> 1527 store <32 x double> %res, ptr %b 1528 ret void 1529} 1530 1531; 1532; SCVTF D -> H 1533; 1534 1535; Don't use SVE for 64-bit vectors. 1536define <1 x half> @scvtf_v1i64_v1f16(<1 x i64> %op1) vscale_range(2,0) #0 { 1537; CHECK-LABEL: scvtf_v1i64_v1f16: 1538; CHECK: // %bb.0: 1539; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1540; CHECK-NEXT: fmov x8, d0 1541; CHECK-NEXT: scvtf h0, x8 1542; CHECK-NEXT: ret 1543 %res = sitofp <1 x i64> %op1 to <1 x half> 1544 ret <1 x half> %res 1545} 1546 1547; v2f16 is not legal for NEON, so use SVE 1548define <2 x half> @scvtf_v2i64_v2f16(<2 x i64> %op1) vscale_range(2,0) #0 { 1549; CHECK-LABEL: scvtf_v2i64_v2f16: 1550; CHECK: // %bb.0: 1551; CHECK-NEXT: ptrue p0.d, vl4 1552; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 1553; CHECK-NEXT: scvtf z0.h, p0/m, z0.d 1554; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s 1555; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h 1556; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 1557; CHECK-NEXT: ret 1558 %res = sitofp <2 x i64> %op1 to <2 x half> 1559 ret <2 x half> %res 1560} 1561 1562define <4 x half> @scvtf_v4i64_v4f16(ptr %a) vscale_range(2,0) #0 { 1563; CHECK-LABEL: scvtf_v4i64_v4f16: 1564; CHECK: // %bb.0: 1565; CHECK-NEXT: ptrue p0.d, vl4 1566; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 1567; CHECK-NEXT: scvtf z0.h, p0/m, z0.d 1568; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s 1569; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h 1570; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 1571; CHECK-NEXT: ret 1572 %op1 = load <4 x i64>, ptr %a 1573 %res = sitofp <4 x i64> %op1 to <4 x half> 1574 ret <4 x half> %res 1575} 1576 1577define <8 x half> @scvtf_v8i64_v8f16(ptr %a) #0 { 1578; VBITS_GE_256-LABEL: scvtf_v8i64_v8f16: 1579; VBITS_GE_256: // %bb.0: 1580; VBITS_GE_256-NEXT: ptrue p0.d, vl4 1581; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 1582; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] 1583; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] 1584; VBITS_GE_256-NEXT: scvtf z0.h, p0/m, z0.d 1585; VBITS_GE_256-NEXT: scvtf z1.h, p0/m, z1.d 1586; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s 1587; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s 1588; VBITS_GE_256-NEXT: uzp1 z2.h, z0.h, z0.h 1589; VBITS_GE_256-NEXT: uzp1 z0.h, z1.h, z1.h 1590; VBITS_GE_256-NEXT: mov v0.d[1], v2.d[0] 1591; VBITS_GE_256-NEXT: // kill: def $q0 killed $q0 killed $z0 1592; VBITS_GE_256-NEXT: ret 1593; 1594; VBITS_GE_512-LABEL: scvtf_v8i64_v8f16: 1595; VBITS_GE_512: // %bb.0: 1596; VBITS_GE_512-NEXT: ptrue p0.d, vl8 1597; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] 1598; VBITS_GE_512-NEXT: scvtf z0.h, p0/m, z0.d 1599; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s 1600; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h 1601; VBITS_GE_512-NEXT: // kill: def $q0 killed $q0 killed $z0 1602; VBITS_GE_512-NEXT: ret 1603 %op1 = load <8 x i64>, ptr %a 1604 %res = sitofp <8 x i64> %op1 to <8 x half> 1605 ret <8 x half> %res 1606} 1607 1608define void @scvtf_v16i64_v16f16(ptr %a, ptr %b) vscale_range(8,0) #0 { 1609; CHECK-LABEL: scvtf_v16i64_v16f16: 1610; CHECK: // %bb.0: 1611; CHECK-NEXT: ptrue p0.d, vl16 1612; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 1613; CHECK-NEXT: scvtf z0.h, p0/m, z0.d 1614; CHECK-NEXT: ptrue p0.s, vl16 1615; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s 1616; CHECK-NEXT: st1h { z0.s }, p0, [x1] 1617; CHECK-NEXT: ret 1618 %op1 = load <16 x i64>, ptr %a 1619 %res = sitofp <16 x i64> %op1 to <16 x half> 1620 store <16 x half> %res, ptr %b 1621 ret void 1622} 1623 1624define void @scvtf_v32i64_v32f16(ptr %a, ptr %b) vscale_range(16,0) #0 { 1625; CHECK-LABEL: scvtf_v32i64_v32f16: 1626; CHECK: // %bb.0: 1627; CHECK-NEXT: ptrue p0.d, vl32 1628; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 1629; CHECK-NEXT: scvtf z0.h, p0/m, z0.d 1630; CHECK-NEXT: ptrue p0.s, vl32 1631; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s 1632; CHECK-NEXT: st1h { z0.s }, p0, [x1] 1633; CHECK-NEXT: ret 1634 %op1 = load <32 x i64>, ptr %a 1635 %res = sitofp <32 x i64> %op1 to <32 x half> 1636 store <32 x half> %res, ptr %b 1637 ret void 1638} 1639 1640; 1641; SCVTF D -> S 1642; 1643 1644; Don't use SVE for 64-bit vectors. 1645define <1 x float> @scvtf_v1i64_v1f32(<1 x i64> %op1) vscale_range(2,0) #0 { 1646; CHECK-LABEL: scvtf_v1i64_v1f32: 1647; CHECK: // %bb.0: 1648; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1649; CHECK-NEXT: scvtf v0.2d, v0.2d 1650; CHECK-NEXT: fcvtn v0.2s, v0.2d 1651; CHECK-NEXT: ret 1652 %res = sitofp <1 x i64> %op1 to <1 x float> 1653 ret <1 x float> %res 1654} 1655 1656; Don't use SVE for 128-bit vectors. 1657define <2 x float> @scvtf_v2i64_v2f32(<2 x i64> %op1) vscale_range(2,0) #0 { 1658; CHECK-LABEL: scvtf_v2i64_v2f32: 1659; CHECK: // %bb.0: 1660; CHECK-NEXT: scvtf v0.2d, v0.2d 1661; CHECK-NEXT: fcvtn v0.2s, v0.2d 1662; CHECK-NEXT: ret 1663 %res = sitofp <2 x i64> %op1 to <2 x float> 1664 ret <2 x float> %res 1665} 1666 1667define <4 x float> @scvtf_v4i64_v4f32(ptr %a) vscale_range(2,0) #0 { 1668; CHECK-LABEL: scvtf_v4i64_v4f32: 1669; CHECK: // %bb.0: 1670; CHECK-NEXT: ptrue p0.d, vl4 1671; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 1672; CHECK-NEXT: scvtf z0.s, p0/m, z0.d 1673; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s 1674; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 1675; CHECK-NEXT: ret 1676 %op1 = load <4 x i64>, ptr %a 1677 %res = sitofp <4 x i64> %op1 to <4 x float> 1678 ret <4 x float> %res 1679} 1680 1681define void @scvtf_v8i64_v8f32(ptr %a, ptr %b) #0 { 1682; VBITS_GE_256-LABEL: scvtf_v8i64_v8f32: 1683; VBITS_GE_256: // %bb.0: 1684; VBITS_GE_256-NEXT: ptrue p0.d, vl4 1685; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 1686; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] 1687; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] 1688; VBITS_GE_256-NEXT: scvtf z0.s, p0/m, z0.d 1689; VBITS_GE_256-NEXT: scvtf z1.s, p0/m, z1.d 1690; VBITS_GE_256-NEXT: ptrue p0.s, vl4 1691; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s 1692; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s 1693; VBITS_GE_256-NEXT: splice z1.s, p0, z1.s, z0.s 1694; VBITS_GE_256-NEXT: ptrue p0.s, vl8 1695; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1] 1696; VBITS_GE_256-NEXT: ret 1697; 1698; VBITS_GE_512-LABEL: scvtf_v8i64_v8f32: 1699; VBITS_GE_512: // %bb.0: 1700; VBITS_GE_512-NEXT: ptrue p0.d, vl8 1701; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] 1702; VBITS_GE_512-NEXT: scvtf z0.s, p0/m, z0.d 1703; VBITS_GE_512-NEXT: ptrue p0.s, vl8 1704; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s 1705; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1] 1706; VBITS_GE_512-NEXT: ret 1707 %op1 = load <8 x i64>, ptr %a 1708 %res = sitofp <8 x i64> %op1 to <8 x float> 1709 store <8 x float> %res, ptr %b 1710 ret void 1711} 1712 1713define void @scvtf_v16i64_v16f32(ptr %a, ptr %b) vscale_range(8,0) #0 { 1714; CHECK-LABEL: scvtf_v16i64_v16f32: 1715; CHECK: // %bb.0: 1716; CHECK-NEXT: ptrue p0.d, vl16 1717; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 1718; CHECK-NEXT: scvtf z0.s, p0/m, z0.d 1719; CHECK-NEXT: ptrue p0.s, vl16 1720; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s 1721; CHECK-NEXT: st1w { z0.s }, p0, [x1] 1722; CHECK-NEXT: ret 1723 %op1 = load <16 x i64>, ptr %a 1724 %res = sitofp <16 x i64> %op1 to <16 x float> 1725 store <16 x float> %res, ptr %b 1726 ret void 1727} 1728 1729define void @scvtf_v32i64_v32f32(ptr %a, ptr %b) vscale_range(16,0) #0 { 1730; CHECK-LABEL: scvtf_v32i64_v32f32: 1731; CHECK: // %bb.0: 1732; CHECK-NEXT: ptrue p0.d, vl32 1733; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 1734; CHECK-NEXT: scvtf z0.s, p0/m, z0.d 1735; CHECK-NEXT: ptrue p0.s, vl32 1736; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s 1737; CHECK-NEXT: st1w { z0.s }, p0, [x1] 1738; CHECK-NEXT: ret 1739 %op1 = load <32 x i64>, ptr %a 1740 %res = sitofp <32 x i64> %op1 to <32 x float> 1741 store <32 x float> %res, ptr %b 1742 ret void 1743} 1744 1745; 1746; SCVTF D -> D 1747; 1748 1749; Don't use SVE for 64-bit vectors. 1750define <1 x double> @scvtf_v1i64_v1f64(<1 x i64> %op1) vscale_range(2,0) #0 { 1751; CHECK-LABEL: scvtf_v1i64_v1f64: 1752; CHECK: // %bb.0: 1753; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1754; CHECK-NEXT: scvtf d0, d0 1755; CHECK-NEXT: ret 1756 %res = sitofp <1 x i64> %op1 to <1 x double> 1757 ret <1 x double> %res 1758} 1759 1760; Don't use SVE for 128-bit vectors. 1761define <2 x double> @scvtf_v2i64_v2f64(<2 x i64> %op1) vscale_range(2,0) #0 { 1762; CHECK-LABEL: scvtf_v2i64_v2f64: 1763; CHECK: // %bb.0: 1764; CHECK-NEXT: scvtf v0.2d, v0.2d 1765; CHECK-NEXT: ret 1766 %res = sitofp <2 x i64> %op1 to <2 x double> 1767 ret <2 x double> %res 1768} 1769 1770define void @scvtf_v4i64_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 { 1771; CHECK-LABEL: scvtf_v4i64_v4f64: 1772; CHECK: // %bb.0: 1773; CHECK-NEXT: ptrue p0.d, vl4 1774; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 1775; CHECK-NEXT: scvtf z0.d, p0/m, z0.d 1776; CHECK-NEXT: st1d { z0.d }, p0, [x1] 1777; CHECK-NEXT: ret 1778 %op1 = load <4 x i64>, ptr %a 1779 %res = sitofp <4 x i64> %op1 to <4 x double> 1780 store <4 x double> %res, ptr %b 1781 ret void 1782} 1783 1784define void @scvtf_v8i64_v8f64(ptr %a, ptr %b) #0 { 1785; VBITS_GE_256-LABEL: scvtf_v8i64_v8f64: 1786; VBITS_GE_256: // %bb.0: 1787; VBITS_GE_256-NEXT: ptrue p0.d, vl4 1788; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 1789; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] 1790; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] 1791; VBITS_GE_256-NEXT: scvtf z0.d, p0/m, z0.d 1792; VBITS_GE_256-NEXT: scvtf z1.d, p0/m, z1.d 1793; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3] 1794; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1] 1795; VBITS_GE_256-NEXT: ret 1796; 1797; VBITS_GE_512-LABEL: scvtf_v8i64_v8f64: 1798; VBITS_GE_512: // %bb.0: 1799; VBITS_GE_512-NEXT: ptrue p0.d, vl8 1800; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] 1801; VBITS_GE_512-NEXT: scvtf z0.d, p0/m, z0.d 1802; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1] 1803; VBITS_GE_512-NEXT: ret 1804 %op1 = load <8 x i64>, ptr %a 1805 %res = sitofp <8 x i64> %op1 to <8 x double> 1806 store <8 x double> %res, ptr %b 1807 ret void 1808} 1809 1810define void @scvtf_v16i64_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 { 1811; CHECK-LABEL: scvtf_v16i64_v16f64: 1812; CHECK: // %bb.0: 1813; CHECK-NEXT: ptrue p0.d, vl16 1814; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 1815; CHECK-NEXT: scvtf z0.d, p0/m, z0.d 1816; CHECK-NEXT: st1d { z0.d }, p0, [x1] 1817; CHECK-NEXT: ret 1818 %op1 = load <16 x i64>, ptr %a 1819 %res = sitofp <16 x i64> %op1 to <16 x double> 1820 store <16 x double> %res, ptr %b 1821 ret void 1822} 1823 1824define void @scvtf_v32i64_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 { 1825; CHECK-LABEL: scvtf_v32i64_v32f64: 1826; CHECK: // %bb.0: 1827; CHECK-NEXT: ptrue p0.d, vl32 1828; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 1829; CHECK-NEXT: scvtf z0.d, p0/m, z0.d 1830; CHECK-NEXT: st1d { z0.d }, p0, [x1] 1831; CHECK-NEXT: ret 1832 %op1 = load <32 x i64>, ptr %a 1833 %res = sitofp <32 x i64> %op1 to <32 x double> 1834 store <32 x double> %res, ptr %b 1835 ret void 1836} 1837 1838attributes #0 = { "target-features"="+sve" } 1839