1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s 3 4; FP_EXTEND 5 6define <vscale x 8 x float> @fcvts_nxv8f16(<vscale x 8 x half> %a) { 7; CHECK-LABEL: fcvts_nxv8f16: 8; CHECK: // %bb.0: 9; CHECK-NEXT: uunpklo z1.s, z0.h 10; CHECK-NEXT: uunpkhi z2.s, z0.h 11; CHECK-NEXT: ptrue p0.s 12; CHECK-NEXT: movprfx z0, z1 13; CHECK-NEXT: fcvt z0.s, p0/m, z1.h 14; CHECK-NEXT: movprfx z1, z2 15; CHECK-NEXT: fcvt z1.s, p0/m, z2.h 16; CHECK-NEXT: ret 17 %res = fpext <vscale x 8 x half> %a to <vscale x 8 x float> 18 ret <vscale x 8 x float> %res 19} 20 21define <vscale x 4 x double> @fcvtd_nxv4f16(<vscale x 4 x half> %a) { 22; CHECK-LABEL: fcvtd_nxv4f16: 23; CHECK: // %bb.0: 24; CHECK-NEXT: uunpklo z1.d, z0.s 25; CHECK-NEXT: uunpkhi z2.d, z0.s 26; CHECK-NEXT: ptrue p0.d 27; CHECK-NEXT: movprfx z0, z1 28; CHECK-NEXT: fcvt z0.d, p0/m, z1.h 29; CHECK-NEXT: movprfx z1, z2 30; CHECK-NEXT: fcvt z1.d, p0/m, z2.h 31; CHECK-NEXT: ret 32 %res = fpext <vscale x 4 x half> %a to <vscale x 4 x double> 33 ret <vscale x 4 x double> %res 34} 35 36define <vscale x 8 x double> @fcvtd_nxv8f16(<vscale x 8 x half> %a) { 37; CHECK-LABEL: fcvtd_nxv8f16: 38; CHECK: // %bb.0: 39; CHECK-NEXT: uunpklo z1.s, z0.h 40; CHECK-NEXT: uunpkhi z0.s, z0.h 41; CHECK-NEXT: ptrue p0.d 42; CHECK-NEXT: uunpklo z2.d, z1.s 43; CHECK-NEXT: uunpkhi z1.d, z1.s 44; CHECK-NEXT: uunpklo z3.d, z0.s 45; CHECK-NEXT: uunpkhi z4.d, z0.s 46; CHECK-NEXT: fcvt z1.d, p0/m, z1.h 47; CHECK-NEXT: movprfx z0, z2 48; CHECK-NEXT: fcvt z0.d, p0/m, z2.h 49; CHECK-NEXT: movprfx z2, z3 50; CHECK-NEXT: fcvt z2.d, p0/m, z3.h 51; CHECK-NEXT: movprfx z3, z4 52; CHECK-NEXT: fcvt z3.d, p0/m, z4.h 53; CHECK-NEXT: ret 54 %res = fpext <vscale x 8 x half> %a to <vscale x 8 x double> 55 ret <vscale x 8 x double> %res 56} 57 58define <vscale x 4 x double> @fcvtd_nxv4f32(<vscale x 4 x float> %a) { 59; CHECK-LABEL: fcvtd_nxv4f32: 60; CHECK: // %bb.0: 61; CHECK-NEXT: uunpklo z1.d, z0.s 62; CHECK-NEXT: uunpkhi z2.d, z0.s 63; CHECK-NEXT: ptrue p0.d 64; CHECK-NEXT: movprfx z0, z1 65; CHECK-NEXT: fcvt z0.d, p0/m, z1.s 66; CHECK-NEXT: movprfx z1, z2 67; CHECK-NEXT: fcvt z1.d, p0/m, z2.s 68; CHECK-NEXT: ret 69 %res = fpext <vscale x 4 x float> %a to <vscale x 4 x double> 70 ret <vscale x 4 x double> %res 71} 72 73define <vscale x 8 x double> @fcvtd_nxv8f32(<vscale x 8 x float> %a) { 74; CHECK-LABEL: fcvtd_nxv8f32: 75; CHECK: // %bb.0: 76; CHECK-NEXT: uunpklo z2.d, z0.s 77; CHECK-NEXT: uunpkhi z3.d, z0.s 78; CHECK-NEXT: uunpklo z4.d, z1.s 79; CHECK-NEXT: uunpkhi z5.d, z1.s 80; CHECK-NEXT: ptrue p0.d 81; CHECK-NEXT: movprfx z0, z2 82; CHECK-NEXT: fcvt z0.d, p0/m, z2.s 83; CHECK-NEXT: movprfx z1, z3 84; CHECK-NEXT: fcvt z1.d, p0/m, z3.s 85; CHECK-NEXT: movprfx z2, z4 86; CHECK-NEXT: fcvt z2.d, p0/m, z4.s 87; CHECK-NEXT: movprfx z3, z5 88; CHECK-NEXT: fcvt z3.d, p0/m, z5.s 89; CHECK-NEXT: ret 90 %res = fpext <vscale x 8 x float> %a to <vscale x 8 x double> 91 ret <vscale x 8 x double> %res 92} 93 94; FP_ROUND 95 96define <vscale x 8 x half> @fcvth_nxv8f32(<vscale x 8 x float> %a) { 97; CHECK-LABEL: fcvth_nxv8f32: 98; CHECK: // %bb.0: 99; CHECK-NEXT: ptrue p0.s 100; CHECK-NEXT: fcvt z1.h, p0/m, z1.s 101; CHECK-NEXT: fcvt z0.h, p0/m, z0.s 102; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h 103; CHECK-NEXT: ret 104 %res = fptrunc <vscale x 8 x float> %a to <vscale x 8 x half> 105 ret <vscale x 8 x half> %res 106} 107 108define <vscale x 8 x half> @fcvth_nxv8f64(<vscale x 8 x double> %a) { 109; CHECK-LABEL: fcvth_nxv8f64: 110; CHECK: // %bb.0: 111; CHECK-NEXT: ptrue p0.d 112; CHECK-NEXT: fcvt z3.h, p0/m, z3.d 113; CHECK-NEXT: fcvt z2.h, p0/m, z2.d 114; CHECK-NEXT: fcvt z1.h, p0/m, z1.d 115; CHECK-NEXT: fcvt z0.h, p0/m, z0.d 116; CHECK-NEXT: uzp1 z2.s, z2.s, z3.s 117; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s 118; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h 119; CHECK-NEXT: ret 120 %res = fptrunc <vscale x 8 x double> %a to <vscale x 8 x half> 121 ret <vscale x 8 x half> %res 122} 123 124define <vscale x 4 x half> @fcvth_nxv4f64(<vscale x 4 x double> %a) { 125; CHECK-LABEL: fcvth_nxv4f64: 126; CHECK: // %bb.0: 127; CHECK-NEXT: ptrue p0.d 128; CHECK-NEXT: fcvt z1.h, p0/m, z1.d 129; CHECK-NEXT: fcvt z0.h, p0/m, z0.d 130; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s 131; CHECK-NEXT: ret 132 %res = fptrunc <vscale x 4 x double> %a to <vscale x 4 x half> 133 ret <vscale x 4 x half> %res 134} 135 136define <vscale x 4 x float> @fcvts_nxv4f64(<vscale x 4 x double> %a) { 137; CHECK-LABEL: fcvts_nxv4f64: 138; CHECK: // %bb.0: 139; CHECK-NEXT: ptrue p0.d 140; CHECK-NEXT: fcvt z1.s, p0/m, z1.d 141; CHECK-NEXT: fcvt z0.s, p0/m, z0.d 142; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s 143; CHECK-NEXT: ret 144 %res = fptrunc <vscale x 4 x double> %a to <vscale x 4 x float> 145 ret <vscale x 4 x float> %res 146} 147 148define <vscale x 8 x float> @fcvts_nxv8f64(<vscale x 8 x double> %a) { 149; CHECK-LABEL: fcvts_nxv8f64: 150; CHECK: // %bb.0: 151; CHECK-NEXT: ptrue p0.d 152; CHECK-NEXT: fcvt z1.s, p0/m, z1.d 153; CHECK-NEXT: fcvt z0.s, p0/m, z0.d 154; CHECK-NEXT: fcvt z3.s, p0/m, z3.d 155; CHECK-NEXT: fcvt z2.s, p0/m, z2.d 156; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s 157; CHECK-NEXT: uzp1 z1.s, z2.s, z3.s 158; CHECK-NEXT: ret 159 %res = fptrunc <vscale x 8 x double> %a to <vscale x 8 x float> 160 ret <vscale x 8 x float> %res 161} 162 163; FP_TO_SINT 164 165; Split operand 166define <vscale x 4 x i32> @fcvtzs_s_nxv4f64(<vscale x 4 x double> %a) { 167; CHECK-LABEL: fcvtzs_s_nxv4f64: 168; CHECK: // %bb.0: 169; CHECK-NEXT: ptrue p0.d 170; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d 171; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d 172; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s 173; CHECK-NEXT: ret 174 %res = fptosi <vscale x 4 x double> %a to <vscale x 4 x i32> 175 ret <vscale x 4 x i32> %res 176} 177 178define <vscale x 8 x i16> @fcvtzs_h_nxv8f64(<vscale x 8 x double> %a) { 179; CHECK-LABEL: fcvtzs_h_nxv8f64: 180; CHECK: // %bb.0: 181; CHECK-NEXT: ptrue p0.d 182; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.d 183; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d 184; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d 185; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d 186; CHECK-NEXT: uzp1 z2.s, z2.s, z3.s 187; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s 188; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h 189; CHECK-NEXT: ret 190 %res = fptosi <vscale x 8 x double> %a to <vscale x 8 x i16> 191 ret <vscale x 8 x i16> %res 192} 193 194; Split result 195define <vscale x 4 x i64> @fcvtzs_d_nxv4f32(<vscale x 4 x float> %a) { 196; CHECK-LABEL: fcvtzs_d_nxv4f32: 197; CHECK: // %bb.0: 198; CHECK-NEXT: uunpklo z1.d, z0.s 199; CHECK-NEXT: uunpkhi z2.d, z0.s 200; CHECK-NEXT: ptrue p0.d 201; CHECK-NEXT: movprfx z0, z1 202; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.s 203; CHECK-NEXT: movprfx z1, z2 204; CHECK-NEXT: fcvtzs z1.d, p0/m, z2.s 205; CHECK-NEXT: ret 206 %res = fptosi <vscale x 4 x float> %a to <vscale x 4 x i64> 207 ret <vscale x 4 x i64> %res 208} 209 210define <vscale x 16 x i32> @fcvtzs_s_nxv16f16(<vscale x 16 x half> %a) { 211; CHECK-LABEL: fcvtzs_s_nxv16f16: 212; CHECK: // %bb.0: 213; CHECK-NEXT: uunpklo z2.s, z0.h 214; CHECK-NEXT: uunpkhi z3.s, z0.h 215; CHECK-NEXT: uunpklo z4.s, z1.h 216; CHECK-NEXT: uunpkhi z5.s, z1.h 217; CHECK-NEXT: ptrue p0.s 218; CHECK-NEXT: movprfx z0, z2 219; CHECK-NEXT: fcvtzs z0.s, p0/m, z2.h 220; CHECK-NEXT: movprfx z1, z3 221; CHECK-NEXT: fcvtzs z1.s, p0/m, z3.h 222; CHECK-NEXT: movprfx z2, z4 223; CHECK-NEXT: fcvtzs z2.s, p0/m, z4.h 224; CHECK-NEXT: movprfx z3, z5 225; CHECK-NEXT: fcvtzs z3.s, p0/m, z5.h 226; CHECK-NEXT: ret 227 %res = fptosi <vscale x 16 x half> %a to <vscale x 16 x i32> 228 ret <vscale x 16 x i32> %res 229} 230 231; FP_TO_UINT 232 233; Split operand 234define <vscale x 4 x i32> @fcvtzu_s_nxv4f64(<vscale x 4 x double> %a) { 235; CHECK-LABEL: fcvtzu_s_nxv4f64: 236; CHECK: // %bb.0: 237; CHECK-NEXT: ptrue p0.d 238; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d 239; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d 240; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s 241; CHECK-NEXT: ret 242 %res = fptoui <vscale x 4 x double> %a to <vscale x 4 x i32> 243 ret <vscale x 4 x i32> %res 244} 245 246; Split result 247define <vscale x 4 x i64> @fcvtzu_d_nxv4f32(<vscale x 4 x float> %a) { 248; CHECK-LABEL: fcvtzu_d_nxv4f32: 249; CHECK: // %bb.0: 250; CHECK-NEXT: uunpklo z1.d, z0.s 251; CHECK-NEXT: uunpkhi z2.d, z0.s 252; CHECK-NEXT: ptrue p0.d 253; CHECK-NEXT: movprfx z0, z1 254; CHECK-NEXT: fcvtzu z0.d, p0/m, z1.s 255; CHECK-NEXT: movprfx z1, z2 256; CHECK-NEXT: fcvtzu z1.d, p0/m, z2.s 257; CHECK-NEXT: ret 258 %res = fptoui <vscale x 4 x float> %a to <vscale x 4 x i64> 259 ret <vscale x 4 x i64> %res 260} 261 262; SINT_TO_FP 263 264; Split operand 265define <vscale x 4 x float> @scvtf_s_nxv4i64(<vscale x 4 x i64> %a) { 266; CHECK-LABEL: scvtf_s_nxv4i64: 267; CHECK: // %bb.0: 268; CHECK-NEXT: ptrue p0.d 269; CHECK-NEXT: scvtf z1.s, p0/m, z1.d 270; CHECK-NEXT: scvtf z0.s, p0/m, z0.d 271; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s 272; CHECK-NEXT: ret 273 %res = sitofp <vscale x 4 x i64> %a to <vscale x 4 x float> 274 ret <vscale x 4 x float> %res 275} 276 277define <vscale x 8 x half> @scvtf_h_nxv8i64(<vscale x 8 x i64> %a) { 278; CHECK-LABEL: scvtf_h_nxv8i64: 279; CHECK: // %bb.0: 280; CHECK-NEXT: ptrue p0.d 281; CHECK-NEXT: scvtf z3.h, p0/m, z3.d 282; CHECK-NEXT: scvtf z2.h, p0/m, z2.d 283; CHECK-NEXT: scvtf z1.h, p0/m, z1.d 284; CHECK-NEXT: scvtf z0.h, p0/m, z0.d 285; CHECK-NEXT: uzp1 z2.s, z2.s, z3.s 286; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s 287; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h 288; CHECK-NEXT: ret 289 %res = sitofp <vscale x 8 x i64> %a to <vscale x 8 x half> 290 ret <vscale x 8 x half> %res 291} 292 293; Split result 294define <vscale x 16 x float> @scvtf_s_nxv16i8(<vscale x 16 x i8> %a) { 295; CHECK-LABEL: scvtf_s_nxv16i8: 296; CHECK: // %bb.0: 297; CHECK-NEXT: sunpklo z1.h, z0.b 298; CHECK-NEXT: sunpkhi z0.h, z0.b 299; CHECK-NEXT: ptrue p0.s 300; CHECK-NEXT: sunpklo z2.s, z1.h 301; CHECK-NEXT: sunpkhi z1.s, z1.h 302; CHECK-NEXT: sunpklo z3.s, z0.h 303; CHECK-NEXT: sunpkhi z4.s, z0.h 304; CHECK-NEXT: scvtf z1.s, p0/m, z1.s 305; CHECK-NEXT: movprfx z0, z2 306; CHECK-NEXT: scvtf z0.s, p0/m, z2.s 307; CHECK-NEXT: movprfx z2, z3 308; CHECK-NEXT: scvtf z2.s, p0/m, z3.s 309; CHECK-NEXT: movprfx z3, z4 310; CHECK-NEXT: scvtf z3.s, p0/m, z4.s 311; CHECK-NEXT: ret 312 %res = sitofp <vscale x 16 x i8> %a to <vscale x 16 x float> 313 ret <vscale x 16 x float> %res 314} 315 316define <vscale x 4 x double> @scvtf_d_nxv4i32(<vscale x 4 x i32> %a) { 317; CHECK-LABEL: scvtf_d_nxv4i32: 318; CHECK: // %bb.0: 319; CHECK-NEXT: sunpklo z1.d, z0.s 320; CHECK-NEXT: sunpkhi z2.d, z0.s 321; CHECK-NEXT: ptrue p0.d 322; CHECK-NEXT: movprfx z0, z1 323; CHECK-NEXT: scvtf z0.d, p0/m, z1.d 324; CHECK-NEXT: movprfx z1, z2 325; CHECK-NEXT: scvtf z1.d, p0/m, z2.d 326; CHECK-NEXT: ret 327 %res = sitofp <vscale x 4 x i32> %a to <vscale x 4 x double> 328 ret <vscale x 4 x double> %res 329} 330 331define <vscale x 4 x double> @scvtf_d_nxv4i1(<vscale x 4 x i1> %a) { 332; CHECK-LABEL: scvtf_d_nxv4i1: 333; CHECK: // %bb.0: 334; CHECK-NEXT: punpklo p2.h, p0.b 335; CHECK-NEXT: punpkhi p0.h, p0.b 336; CHECK-NEXT: mov z0.d, p2/z, #-1 // =0xffffffffffffffff 337; CHECK-NEXT: ptrue p1.d 338; CHECK-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff 339; CHECK-NEXT: scvtf z0.d, p1/m, z0.d 340; CHECK-NEXT: scvtf z1.d, p1/m, z1.d 341; CHECK-NEXT: ret 342 %res = sitofp <vscale x 4 x i1> %a to <vscale x 4 x double> 343 ret <vscale x 4 x double> %res 344} 345 346; UINT_TO_FP 347 348; Split operand 349define <vscale x 4 x float> @ucvtf_s_nxv4i64(<vscale x 4 x i64> %a) { 350; CHECK-LABEL: ucvtf_s_nxv4i64: 351; CHECK: // %bb.0: 352; CHECK-NEXT: ptrue p0.d 353; CHECK-NEXT: ucvtf z1.s, p0/m, z1.d 354; CHECK-NEXT: ucvtf z0.s, p0/m, z0.d 355; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s 356; CHECK-NEXT: ret 357 %res = uitofp <vscale x 4 x i64> %a to <vscale x 4 x float> 358 ret <vscale x 4 x float> %res 359} 360 361define <vscale x 8 x half> @ucvtf_h_nxv8i64(<vscale x 8 x i64> %a) { 362; CHECK-LABEL: ucvtf_h_nxv8i64: 363; CHECK: // %bb.0: 364; CHECK-NEXT: ptrue p0.d 365; CHECK-NEXT: ucvtf z3.h, p0/m, z3.d 366; CHECK-NEXT: ucvtf z2.h, p0/m, z2.d 367; CHECK-NEXT: ucvtf z1.h, p0/m, z1.d 368; CHECK-NEXT: ucvtf z0.h, p0/m, z0.d 369; CHECK-NEXT: uzp1 z2.s, z2.s, z3.s 370; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s 371; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h 372; CHECK-NEXT: ret 373 %res = uitofp <vscale x 8 x i64> %a to <vscale x 8 x half> 374 ret <vscale x 8 x half> %res 375} 376 377; Split result 378define <vscale x 4 x double> @ucvtf_d_nxv4i32(<vscale x 4 x i32> %a) { 379; CHECK-LABEL: ucvtf_d_nxv4i32: 380; CHECK: // %bb.0: 381; CHECK-NEXT: uunpklo z1.d, z0.s 382; CHECK-NEXT: uunpkhi z2.d, z0.s 383; CHECK-NEXT: ptrue p0.d 384; CHECK-NEXT: movprfx z0, z1 385; CHECK-NEXT: ucvtf z0.d, p0/m, z1.d 386; CHECK-NEXT: movprfx z1, z2 387; CHECK-NEXT: ucvtf z1.d, p0/m, z2.d 388; CHECK-NEXT: ret 389 %res = uitofp <vscale x 4 x i32> %a to <vscale x 4 x double> 390 ret <vscale x 4 x double> %res 391} 392 393define <vscale x 4 x double> @ucvtf_d_nxv4i1(<vscale x 4 x i1> %a) { 394; CHECK-LABEL: ucvtf_d_nxv4i1: 395; CHECK: // %bb.0: 396; CHECK-NEXT: punpklo p2.h, p0.b 397; CHECK-NEXT: punpkhi p0.h, p0.b 398; CHECK-NEXT: mov z0.d, p2/z, #1 // =0x1 399; CHECK-NEXT: ptrue p1.d 400; CHECK-NEXT: mov z1.d, p0/z, #1 // =0x1 401; CHECK-NEXT: ucvtf z0.d, p1/m, z0.d 402; CHECK-NEXT: ucvtf z1.d, p1/m, z1.d 403; CHECK-NEXT: ret 404 %res = uitofp <vscale x 4 x i1> %a to <vscale x 4 x double> 405 ret <vscale x 4 x double> %res 406} 407