1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=aarch64 -mattr=+sve | FileCheck %s 3 4; Float 5 6declare <vscale x 2 x i32> @llvm.fptoui.sat.nxv2f32.nxv2i32(<vscale x 2 x float>) 7declare <vscale x 4 x i32> @llvm.fptoui.sat.nxv4f32.nxv4i32(<vscale x 4 x float>) 8declare <vscale x 8 x i32> @llvm.fptoui.sat.nxv8f32.nxv8i32(<vscale x 8 x float>) 9declare <vscale x 4 x i16> @llvm.fptoui.sat.nxv4f32.nxv4i16(<vscale x 4 x float>) 10declare <vscale x 8 x i16> @llvm.fptoui.sat.nxv8f32.nxv8i16(<vscale x 8 x float>) 11declare <vscale x 2 x i64> @llvm.fptoui.sat.nxv2f32.nxv2i64(<vscale x 2 x float>) 12declare <vscale x 4 x i64> @llvm.fptoui.sat.nxv4f32.nxv4i64(<vscale x 4 x float>) 13 14define <vscale x 2 x i32> @test_signed_v2f32_v2i32(<vscale x 2 x float> %f) { 15; CHECK-LABEL: test_signed_v2f32_v2i32: 16; CHECK: // %bb.0: 17; CHECK-NEXT: ptrue p0.d 18; CHECK-NEXT: mov w8, #1333788671 // =0x4f7fffff 19; CHECK-NEXT: mov z1.s, w8 20; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0 21; CHECK-NEXT: movprfx z2, z0 22; CHECK-NEXT: fcvtzu z2.d, p0/m, z0.s 23; CHECK-NEXT: not p1.b, p0/z, p1.b 24; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z1.s 25; CHECK-NEXT: mov z0.d, #0xffffffff 26; CHECK-NEXT: mov z2.d, p1/m, #0 // =0x0 27; CHECK-NEXT: sel z0.d, p0, z0.d, z2.d 28; CHECK-NEXT: ret 29 %x = call <vscale x 2 x i32> @llvm.fptoui.sat.nxv2f32.nxv2i32(<vscale x 2 x float> %f) 30 ret <vscale x 2 x i32> %x 31} 32 33define <vscale x 4 x i32> @test_signed_v4f32_v4i32(<vscale x 4 x float> %f) { 34; CHECK-LABEL: test_signed_v4f32_v4i32: 35; CHECK: // %bb.0: 36; CHECK-NEXT: ptrue p0.s 37; CHECK-NEXT: mov w8, #1333788671 // =0x4f7fffff 38; CHECK-NEXT: mov z2.s, w8 39; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0 40; CHECK-NEXT: movprfx z1, z0 41; CHECK-NEXT: fcvtzu z1.s, p0/m, z0.s 42; CHECK-NEXT: not p1.b, p0/z, p1.b 43; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z2.s 44; CHECK-NEXT: mov z1.s, p1/m, #0 // =0x0 45; CHECK-NEXT: mov z1.s, p0/m, #-1 // =0xffffffffffffffff 46; CHECK-NEXT: mov z0.d, z1.d 47; CHECK-NEXT: ret 48 %x = call <vscale x 4 x i32> @llvm.fptoui.sat.nxv4f32.nxv4i32(<vscale x 4 x float> %f) 49 ret <vscale x 4 x i32> %x 50} 51 52define <vscale x 8 x i32> @test_signed_v8f32_v8i32(<vscale x 8 x float> %f) { 53; CHECK-LABEL: test_signed_v8f32_v8i32: 54; CHECK: // %bb.0: 55; CHECK-NEXT: ptrue p0.s 56; CHECK-NEXT: mov w8, #1333788671 // =0x4f7fffff 57; CHECK-NEXT: mov z4.s, w8 58; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0 59; CHECK-NEXT: fcmge p2.s, p0/z, z1.s, #0.0 60; CHECK-NEXT: movprfx z2, z0 61; CHECK-NEXT: fcvtzu z2.s, p0/m, z0.s 62; CHECK-NEXT: movprfx z3, z1 63; CHECK-NEXT: fcvtzu z3.s, p0/m, z1.s 64; CHECK-NEXT: fcmgt p3.s, p0/z, z0.s, z4.s 65; CHECK-NEXT: not p1.b, p0/z, p1.b 66; CHECK-NEXT: not p2.b, p0/z, p2.b 67; CHECK-NEXT: fcmgt p0.s, p0/z, z1.s, z4.s 68; CHECK-NEXT: mov z2.s, p1/m, #0 // =0x0 69; CHECK-NEXT: mov z3.s, p2/m, #0 // =0x0 70; CHECK-NEXT: mov z2.s, p3/m, #-1 // =0xffffffffffffffff 71; CHECK-NEXT: mov z3.s, p0/m, #-1 // =0xffffffffffffffff 72; CHECK-NEXT: mov z0.d, z2.d 73; CHECK-NEXT: mov z1.d, z3.d 74; CHECK-NEXT: ret 75 %x = call <vscale x 8 x i32> @llvm.fptoui.sat.nxv8f32.nxv8i32(<vscale x 8 x float> %f) 76 ret <vscale x 8 x i32> %x 77} 78 79define <vscale x 4 x i16> @test_signed_v4f32_v4i16(<vscale x 4 x float> %f) { 80; CHECK-LABEL: test_signed_v4f32_v4i16: 81; CHECK: // %bb.0: 82; CHECK-NEXT: ptrue p0.s 83; CHECK-NEXT: mov w8, #65280 // =0xff00 84; CHECK-NEXT: movk w8, #18303, lsl #16 85; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0 86; CHECK-NEXT: mov z1.s, w8 87; CHECK-NEXT: movprfx z2, z0 88; CHECK-NEXT: fcvtzu z2.s, p0/m, z0.s 89; CHECK-NEXT: not p1.b, p0/z, p1.b 90; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z1.s 91; CHECK-NEXT: mov z0.s, #65535 // =0xffff 92; CHECK-NEXT: mov z2.s, p1/m, #0 // =0x0 93; CHECK-NEXT: sel z0.s, p0, z0.s, z2.s 94; CHECK-NEXT: ret 95 %x = call <vscale x 4 x i16> @llvm.fptoui.sat.nxv4f32.nxv4i16(<vscale x 4 x float> %f) 96 ret <vscale x 4 x i16> %x 97} 98 99define <vscale x 8 x i16> @test_signed_v8f32_v8i16(<vscale x 8 x float> %f) { 100; CHECK-LABEL: test_signed_v8f32_v8i16: 101; CHECK: // %bb.0: 102; CHECK-NEXT: ptrue p0.s 103; CHECK-NEXT: mov w8, #65280 // =0xff00 104; CHECK-NEXT: movk w8, #18303, lsl #16 105; CHECK-NEXT: fcmge p1.s, p0/z, z1.s, #0.0 106; CHECK-NEXT: fcmge p2.s, p0/z, z0.s, #0.0 107; CHECK-NEXT: mov z2.s, w8 108; CHECK-NEXT: movprfx z3, z1 109; CHECK-NEXT: fcvtzu z3.s, p0/m, z1.s 110; CHECK-NEXT: movprfx z4, z0 111; CHECK-NEXT: fcvtzu z4.s, p0/m, z0.s 112; CHECK-NEXT: fcmgt p3.s, p0/z, z1.s, z2.s 113; CHECK-NEXT: not p1.b, p0/z, p1.b 114; CHECK-NEXT: not p2.b, p0/z, p2.b 115; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z2.s 116; CHECK-NEXT: mov z0.s, #65535 // =0xffff 117; CHECK-NEXT: mov z3.s, p1/m, #0 // =0x0 118; CHECK-NEXT: mov z4.s, p2/m, #0 // =0x0 119; CHECK-NEXT: sel z1.s, p3, z0.s, z3.s 120; CHECK-NEXT: sel z0.s, p0, z0.s, z4.s 121; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h 122; CHECK-NEXT: ret 123 %x = call <vscale x 8 x i16> @llvm.fptoui.sat.nxv8f32.nxv8i16(<vscale x 8 x float> %f) 124 ret <vscale x 8 x i16> %x 125} 126 127define <vscale x 2 x i64> @test_signed_v2f32_v2i64(<vscale x 2 x float> %f) { 128; CHECK-LABEL: test_signed_v2f32_v2i64: 129; CHECK: // %bb.0: 130; CHECK-NEXT: ptrue p0.d 131; CHECK-NEXT: mov w8, #1602224127 // =0x5f7fffff 132; CHECK-NEXT: mov z2.s, w8 133; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0 134; CHECK-NEXT: movprfx z1, z0 135; CHECK-NEXT: fcvtzu z1.d, p0/m, z0.s 136; CHECK-NEXT: not p1.b, p0/z, p1.b 137; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z2.s 138; CHECK-NEXT: mov z1.d, p1/m, #0 // =0x0 139; CHECK-NEXT: mov z1.d, p0/m, #-1 // =0xffffffffffffffff 140; CHECK-NEXT: mov z0.d, z1.d 141; CHECK-NEXT: ret 142 %x = call <vscale x 2 x i64> @llvm.fptoui.sat.nxv2f32.nxv2i64(<vscale x 2 x float> %f) 143 ret <vscale x 2 x i64> %x 144} 145 146define <vscale x 4 x i64> @test_signed_v4f32_v4i64(<vscale x 4 x float> %f) { 147; CHECK-LABEL: test_signed_v4f32_v4i64: 148; CHECK: // %bb.0: 149; CHECK-NEXT: uunpklo z2.d, z0.s 150; CHECK-NEXT: uunpkhi z3.d, z0.s 151; CHECK-NEXT: mov w8, #1602224127 // =0x5f7fffff 152; CHECK-NEXT: ptrue p0.d 153; CHECK-NEXT: mov z4.s, w8 154; CHECK-NEXT: fcmge p1.s, p0/z, z2.s, #0.0 155; CHECK-NEXT: fcmge p2.s, p0/z, z3.s, #0.0 156; CHECK-NEXT: movprfx z0, z2 157; CHECK-NEXT: fcvtzu z0.d, p0/m, z2.s 158; CHECK-NEXT: movprfx z1, z3 159; CHECK-NEXT: fcvtzu z1.d, p0/m, z3.s 160; CHECK-NEXT: fcmgt p3.s, p0/z, z2.s, z4.s 161; CHECK-NEXT: not p1.b, p0/z, p1.b 162; CHECK-NEXT: not p2.b, p0/z, p2.b 163; CHECK-NEXT: fcmgt p0.s, p0/z, z3.s, z4.s 164; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 165; CHECK-NEXT: mov z1.d, p2/m, #0 // =0x0 166; CHECK-NEXT: mov z0.d, p3/m, #-1 // =0xffffffffffffffff 167; CHECK-NEXT: mov z1.d, p0/m, #-1 // =0xffffffffffffffff 168; CHECK-NEXT: ret 169 %x = call <vscale x 4 x i64> @llvm.fptoui.sat.nxv4f32.nxv4i64(<vscale x 4 x float> %f) 170 ret <vscale x 4 x i64> %x 171} 172 173; Double 174 175declare <vscale x 2 x i32> @llvm.fptoui.sat.nxv2f64.nxv2i32(<vscale x 2 x double>) 176declare <vscale x 4 x i32> @llvm.fptoui.sat.nxv4f64.nxv4i32(<vscale x 4 x double>) 177declare <vscale x 8 x i32> @llvm.fptoui.sat.nxv8f64.nxv8i32(<vscale x 8 x double>) 178declare <vscale x 4 x i16> @llvm.fptoui.sat.nxv4f64.nxv4i16(<vscale x 4 x double>) 179declare <vscale x 8 x i16> @llvm.fptoui.sat.nxv8f64.nxv8i16(<vscale x 8 x double>) 180declare <vscale x 2 x i64> @llvm.fptoui.sat.nxv2f64.nxv2i64(<vscale x 2 x double>) 181declare <vscale x 4 x i64> @llvm.fptoui.sat.nxv4f64.nxv4i64(<vscale x 4 x double>) 182 183define <vscale x 2 x i32> @test_signed_v2f64_v2i32(<vscale x 2 x double> %f) { 184; CHECK-LABEL: test_signed_v2f64_v2i32: 185; CHECK: // %bb.0: 186; CHECK-NEXT: ptrue p0.d 187; CHECK-NEXT: mov x8, #281474974613504 // =0xffffffe00000 188; CHECK-NEXT: movk x8, #16879, lsl #48 189; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, #0.0 190; CHECK-NEXT: mov z1.d, x8 191; CHECK-NEXT: movprfx z2, z0 192; CHECK-NEXT: fcvtzu z2.d, p0/m, z0.d 193; CHECK-NEXT: not p1.b, p0/z, p1.b 194; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z1.d 195; CHECK-NEXT: mov z0.d, #0xffffffff 196; CHECK-NEXT: mov z2.d, p1/m, #0 // =0x0 197; CHECK-NEXT: sel z0.d, p0, z0.d, z2.d 198; CHECK-NEXT: ret 199 %x = call <vscale x 2 x i32> @llvm.fptoui.sat.nxv2f64.nxv2i32(<vscale x 2 x double> %f) 200 ret <vscale x 2 x i32> %x 201} 202 203define <vscale x 4 x i32> @test_signed_v4f64_v4i32(<vscale x 4 x double> %f) { 204; CHECK-LABEL: test_signed_v4f64_v4i32: 205; CHECK: // %bb.0: 206; CHECK-NEXT: ptrue p0.d 207; CHECK-NEXT: mov x8, #281474974613504 // =0xffffffe00000 208; CHECK-NEXT: movk x8, #16879, lsl #48 209; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, #0.0 210; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, #0.0 211; CHECK-NEXT: mov z2.d, x8 212; CHECK-NEXT: movprfx z3, z1 213; CHECK-NEXT: fcvtzu z3.d, p0/m, z1.d 214; CHECK-NEXT: movprfx z4, z0 215; CHECK-NEXT: fcvtzu z4.d, p0/m, z0.d 216; CHECK-NEXT: fcmgt p3.d, p0/z, z1.d, z2.d 217; CHECK-NEXT: not p1.b, p0/z, p1.b 218; CHECK-NEXT: not p2.b, p0/z, p2.b 219; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z2.d 220; CHECK-NEXT: mov z0.d, #0xffffffff 221; CHECK-NEXT: mov z3.d, p1/m, #0 // =0x0 222; CHECK-NEXT: mov z4.d, p2/m, #0 // =0x0 223; CHECK-NEXT: sel z1.d, p3, z0.d, z3.d 224; CHECK-NEXT: sel z0.d, p0, z0.d, z4.d 225; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s 226; CHECK-NEXT: ret 227 %x = call <vscale x 4 x i32> @llvm.fptoui.sat.nxv4f64.nxv4i32(<vscale x 4 x double> %f) 228 ret <vscale x 4 x i32> %x 229} 230 231define <vscale x 8 x i32> @test_signed_v8f64_v8i32(<vscale x 8 x double> %f) { 232; CHECK-LABEL: test_signed_v8f64_v8i32: 233; CHECK: // %bb.0: 234; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 235; CHECK-NEXT: addvl sp, sp, #-1 236; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill 237; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill 238; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill 239; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG 240; CHECK-NEXT: .cfi_offset w29, -16 241; CHECK-NEXT: ptrue p0.d 242; CHECK-NEXT: mov x8, #281474974613504 // =0xffffffe00000 243; CHECK-NEXT: movk x8, #16879, lsl #48 244; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, #0.0 245; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, #0.0 246; CHECK-NEXT: fcmge p3.d, p0/z, z3.d, #0.0 247; CHECK-NEXT: fcmge p4.d, p0/z, z2.d, #0.0 248; CHECK-NEXT: movprfx z5, z1 249; CHECK-NEXT: fcvtzu z5.d, p0/m, z1.d 250; CHECK-NEXT: mov z4.d, x8 251; CHECK-NEXT: movprfx z6, z0 252; CHECK-NEXT: fcvtzu z6.d, p0/m, z0.d 253; CHECK-NEXT: movprfx z7, z3 254; CHECK-NEXT: fcvtzu z7.d, p0/m, z3.d 255; CHECK-NEXT: movprfx z24, z2 256; CHECK-NEXT: fcvtzu z24.d, p0/m, z2.d 257; CHECK-NEXT: not p1.b, p0/z, p1.b 258; CHECK-NEXT: fcmgt p5.d, p0/z, z1.d, z4.d 259; CHECK-NEXT: fcmgt p6.d, p0/z, z0.d, z4.d 260; CHECK-NEXT: not p2.b, p0/z, p2.b 261; CHECK-NEXT: mov z0.d, #0xffffffff 262; CHECK-NEXT: not p3.b, p0/z, p3.b 263; CHECK-NEXT: mov z5.d, p1/m, #0 // =0x0 264; CHECK-NEXT: fcmgt p1.d, p0/z, z3.d, z4.d 265; CHECK-NEXT: not p4.b, p0/z, p4.b 266; CHECK-NEXT: fcmgt p0.d, p0/z, z2.d, z4.d 267; CHECK-NEXT: mov z6.d, p2/m, #0 // =0x0 268; CHECK-NEXT: mov z7.d, p3/m, #0 // =0x0 269; CHECK-NEXT: mov z24.d, p4/m, #0 // =0x0 270; CHECK-NEXT: sel z1.d, p5, z0.d, z5.d 271; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload 272; CHECK-NEXT: sel z2.d, p6, z0.d, z6.d 273; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload 274; CHECK-NEXT: sel z3.d, p1, z0.d, z7.d 275; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload 276; CHECK-NEXT: sel z4.d, p0, z0.d, z24.d 277; CHECK-NEXT: uzp1 z0.s, z2.s, z1.s 278; CHECK-NEXT: uzp1 z1.s, z4.s, z3.s 279; CHECK-NEXT: addvl sp, sp, #1 280; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 281; CHECK-NEXT: ret 282 %x = call <vscale x 8 x i32> @llvm.fptoui.sat.nxv8f64.nxv8i32(<vscale x 8 x double> %f) 283 ret <vscale x 8 x i32> %x 284} 285 286define <vscale x 4 x i16> @test_signed_v4f64_v4i16(<vscale x 4 x double> %f) { 287; CHECK-LABEL: test_signed_v4f64_v4i16: 288; CHECK: // %bb.0: 289; CHECK-NEXT: ptrue p0.d 290; CHECK-NEXT: mov x8, #281337537757184 // =0xffe000000000 291; CHECK-NEXT: movk x8, #16623, lsl #48 292; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, #0.0 293; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, #0.0 294; CHECK-NEXT: mov z2.d, x8 295; CHECK-NEXT: movprfx z3, z1 296; CHECK-NEXT: fcvtzu z3.d, p0/m, z1.d 297; CHECK-NEXT: movprfx z4, z0 298; CHECK-NEXT: fcvtzu z4.d, p0/m, z0.d 299; CHECK-NEXT: fcmgt p3.d, p0/z, z1.d, z2.d 300; CHECK-NEXT: not p1.b, p0/z, p1.b 301; CHECK-NEXT: not p2.b, p0/z, p2.b 302; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z2.d 303; CHECK-NEXT: mov z0.d, #65535 // =0xffff 304; CHECK-NEXT: mov z3.d, p1/m, #0 // =0x0 305; CHECK-NEXT: mov z4.d, p2/m, #0 // =0x0 306; CHECK-NEXT: sel z1.d, p3, z0.d, z3.d 307; CHECK-NEXT: sel z0.d, p0, z0.d, z4.d 308; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s 309; CHECK-NEXT: ret 310 %x = call <vscale x 4 x i16> @llvm.fptoui.sat.nxv4f64.nxv4i16(<vscale x 4 x double> %f) 311 ret <vscale x 4 x i16> %x 312} 313 314define <vscale x 8 x i16> @test_signed_v8f64_v8i16(<vscale x 8 x double> %f) { 315; CHECK-LABEL: test_signed_v8f64_v8i16: 316; CHECK: // %bb.0: 317; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 318; CHECK-NEXT: addvl sp, sp, #-1 319; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill 320; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill 321; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill 322; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG 323; CHECK-NEXT: .cfi_offset w29, -16 324; CHECK-NEXT: ptrue p0.d 325; CHECK-NEXT: mov x8, #281337537757184 // =0xffe000000000 326; CHECK-NEXT: movk x8, #16623, lsl #48 327; CHECK-NEXT: fcmge p1.d, p0/z, z3.d, #0.0 328; CHECK-NEXT: fcmge p2.d, p0/z, z2.d, #0.0 329; CHECK-NEXT: fcmge p3.d, p0/z, z1.d, #0.0 330; CHECK-NEXT: fcmge p4.d, p0/z, z0.d, #0.0 331; CHECK-NEXT: movprfx z5, z3 332; CHECK-NEXT: fcvtzu z5.d, p0/m, z3.d 333; CHECK-NEXT: mov z4.d, x8 334; CHECK-NEXT: movprfx z6, z2 335; CHECK-NEXT: fcvtzu z6.d, p0/m, z2.d 336; CHECK-NEXT: movprfx z7, z1 337; CHECK-NEXT: fcvtzu z7.d, p0/m, z1.d 338; CHECK-NEXT: movprfx z24, z0 339; CHECK-NEXT: fcvtzu z24.d, p0/m, z0.d 340; CHECK-NEXT: not p1.b, p0/z, p1.b 341; CHECK-NEXT: fcmgt p5.d, p0/z, z3.d, z4.d 342; CHECK-NEXT: fcmgt p6.d, p0/z, z2.d, z4.d 343; CHECK-NEXT: not p2.b, p0/z, p2.b 344; CHECK-NEXT: mov z2.d, #65535 // =0xffff 345; CHECK-NEXT: not p3.b, p0/z, p3.b 346; CHECK-NEXT: mov z5.d, p1/m, #0 // =0x0 347; CHECK-NEXT: fcmgt p1.d, p0/z, z1.d, z4.d 348; CHECK-NEXT: not p4.b, p0/z, p4.b 349; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z4.d 350; CHECK-NEXT: mov z6.d, p2/m, #0 // =0x0 351; CHECK-NEXT: mov z7.d, p3/m, #0 // =0x0 352; CHECK-NEXT: mov z24.d, p4/m, #0 // =0x0 353; CHECK-NEXT: sel z0.d, p5, z2.d, z5.d 354; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload 355; CHECK-NEXT: sel z1.d, p6, z2.d, z6.d 356; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload 357; CHECK-NEXT: sel z3.d, p1, z2.d, z7.d 358; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload 359; CHECK-NEXT: sel z2.d, p0, z2.d, z24.d 360; CHECK-NEXT: uzp1 z0.s, z1.s, z0.s 361; CHECK-NEXT: uzp1 z1.s, z2.s, z3.s 362; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h 363; CHECK-NEXT: addvl sp, sp, #1 364; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 365; CHECK-NEXT: ret 366 %x = call <vscale x 8 x i16> @llvm.fptoui.sat.nxv8f64.nxv8i16(<vscale x 8 x double> %f) 367 ret <vscale x 8 x i16> %x 368} 369 370define <vscale x 2 x i64> @test_signed_v2f64_v2i64(<vscale x 2 x double> %f) { 371; CHECK-LABEL: test_signed_v2f64_v2i64: 372; CHECK: // %bb.0: 373; CHECK-NEXT: ptrue p0.d 374; CHECK-NEXT: mov x8, #4895412794951729151 // =0x43efffffffffffff 375; CHECK-NEXT: mov z2.d, x8 376; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, #0.0 377; CHECK-NEXT: movprfx z1, z0 378; CHECK-NEXT: fcvtzu z1.d, p0/m, z0.d 379; CHECK-NEXT: not p1.b, p0/z, p1.b 380; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z2.d 381; CHECK-NEXT: mov z1.d, p1/m, #0 // =0x0 382; CHECK-NEXT: mov z1.d, p0/m, #-1 // =0xffffffffffffffff 383; CHECK-NEXT: mov z0.d, z1.d 384; CHECK-NEXT: ret 385 %x = call <vscale x 2 x i64> @llvm.fptoui.sat.nxv2f64.nxv2i64(<vscale x 2 x double> %f) 386 ret <vscale x 2 x i64> %x 387} 388 389define <vscale x 4 x i64> @test_signed_v4f64_v4i64(<vscale x 4 x double> %f) { 390; CHECK-LABEL: test_signed_v4f64_v4i64: 391; CHECK: // %bb.0: 392; CHECK-NEXT: ptrue p0.d 393; CHECK-NEXT: mov x8, #4895412794951729151 // =0x43efffffffffffff 394; CHECK-NEXT: mov z4.d, x8 395; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, #0.0 396; CHECK-NEXT: fcmge p2.d, p0/z, z1.d, #0.0 397; CHECK-NEXT: movprfx z2, z0 398; CHECK-NEXT: fcvtzu z2.d, p0/m, z0.d 399; CHECK-NEXT: movprfx z3, z1 400; CHECK-NEXT: fcvtzu z3.d, p0/m, z1.d 401; CHECK-NEXT: fcmgt p3.d, p0/z, z0.d, z4.d 402; CHECK-NEXT: not p1.b, p0/z, p1.b 403; CHECK-NEXT: not p2.b, p0/z, p2.b 404; CHECK-NEXT: fcmgt p0.d, p0/z, z1.d, z4.d 405; CHECK-NEXT: mov z2.d, p1/m, #0 // =0x0 406; CHECK-NEXT: mov z3.d, p2/m, #0 // =0x0 407; CHECK-NEXT: mov z2.d, p3/m, #-1 // =0xffffffffffffffff 408; CHECK-NEXT: mov z3.d, p0/m, #-1 // =0xffffffffffffffff 409; CHECK-NEXT: mov z0.d, z2.d 410; CHECK-NEXT: mov z1.d, z3.d 411; CHECK-NEXT: ret 412 %x = call <vscale x 4 x i64> @llvm.fptoui.sat.nxv4f64.nxv4i64(<vscale x 4 x double> %f) 413 ret <vscale x 4 x i64> %x 414} 415 416 417; half 418 419declare <vscale x 2 x i32> @llvm.fptoui.sat.nxv2f16.nxv2i32(<vscale x 2 x half>) 420declare <vscale x 4 x i32> @llvm.fptoui.sat.nxv4f16.nxv4i32(<vscale x 4 x half>) 421declare <vscale x 8 x i32> @llvm.fptoui.sat.nxv8f16.nxv8i32(<vscale x 8 x half>) 422declare <vscale x 4 x i16> @llvm.fptoui.sat.nxv4f16.nxv4i16(<vscale x 4 x half>) 423declare <vscale x 8 x i16> @llvm.fptoui.sat.nxv8f16.nxv8i16(<vscale x 8 x half>) 424declare <vscale x 2 x i64> @llvm.fptoui.sat.nxv2f16.nxv2i64(<vscale x 2 x half>) 425declare <vscale x 4 x i64> @llvm.fptoui.sat.nxv4f16.nxv4i64(<vscale x 4 x half>) 426 427define <vscale x 2 x i32> @test_signed_v2f16_v2i32(<vscale x 2 x half> %f) { 428; CHECK-LABEL: test_signed_v2f16_v2i32: 429; CHECK: // %bb.0: 430; CHECK-NEXT: ptrue p0.d 431; CHECK-NEXT: mov w8, #31743 // =0x7bff 432; CHECK-NEXT: mov z1.h, w8 433; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0 434; CHECK-NEXT: movprfx z2, z0 435; CHECK-NEXT: fcvtzu z2.d, p0/m, z0.h 436; CHECK-NEXT: not p1.b, p0/z, p1.b 437; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z1.h 438; CHECK-NEXT: mov z0.d, #0xffffffff 439; CHECK-NEXT: mov z2.d, p1/m, #0 // =0x0 440; CHECK-NEXT: sel z0.d, p0, z0.d, z2.d 441; CHECK-NEXT: ret 442 %x = call <vscale x 2 x i32> @llvm.fptoui.sat.nxv2f16.nxv2i32(<vscale x 2 x half> %f) 443 ret <vscale x 2 x i32> %x 444} 445 446define <vscale x 4 x i32> @test_signed_v4f16_v4i32(<vscale x 4 x half> %f) { 447; CHECK-LABEL: test_signed_v4f16_v4i32: 448; CHECK: // %bb.0: 449; CHECK-NEXT: ptrue p0.s 450; CHECK-NEXT: mov w8, #31743 // =0x7bff 451; CHECK-NEXT: mov z2.h, w8 452; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0 453; CHECK-NEXT: movprfx z1, z0 454; CHECK-NEXT: fcvtzu z1.s, p0/m, z0.h 455; CHECK-NEXT: not p1.b, p0/z, p1.b 456; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z2.h 457; CHECK-NEXT: mov z1.s, p1/m, #0 // =0x0 458; CHECK-NEXT: mov z1.s, p0/m, #-1 // =0xffffffffffffffff 459; CHECK-NEXT: mov z0.d, z1.d 460; CHECK-NEXT: ret 461 %x = call <vscale x 4 x i32> @llvm.fptoui.sat.nxv4f16.nxv4i32(<vscale x 4 x half> %f) 462 ret <vscale x 4 x i32> %x 463} 464 465define <vscale x 8 x i32> @test_signed_v8f16_v8i32(<vscale x 8 x half> %f) { 466; CHECK-LABEL: test_signed_v8f16_v8i32: 467; CHECK: // %bb.0: 468; CHECK-NEXT: uunpklo z2.s, z0.h 469; CHECK-NEXT: uunpkhi z3.s, z0.h 470; CHECK-NEXT: mov w8, #31743 // =0x7bff 471; CHECK-NEXT: ptrue p0.s 472; CHECK-NEXT: mov z4.h, w8 473; CHECK-NEXT: fcmge p1.h, p0/z, z2.h, #0.0 474; CHECK-NEXT: fcmge p2.h, p0/z, z3.h, #0.0 475; CHECK-NEXT: movprfx z0, z2 476; CHECK-NEXT: fcvtzu z0.s, p0/m, z2.h 477; CHECK-NEXT: movprfx z1, z3 478; CHECK-NEXT: fcvtzu z1.s, p0/m, z3.h 479; CHECK-NEXT: fcmgt p3.h, p0/z, z2.h, z4.h 480; CHECK-NEXT: not p1.b, p0/z, p1.b 481; CHECK-NEXT: not p2.b, p0/z, p2.b 482; CHECK-NEXT: fcmgt p0.h, p0/z, z3.h, z4.h 483; CHECK-NEXT: mov z0.s, p1/m, #0 // =0x0 484; CHECK-NEXT: mov z1.s, p2/m, #0 // =0x0 485; CHECK-NEXT: mov z0.s, p3/m, #-1 // =0xffffffffffffffff 486; CHECK-NEXT: mov z1.s, p0/m, #-1 // =0xffffffffffffffff 487; CHECK-NEXT: ret 488 %x = call <vscale x 8 x i32> @llvm.fptoui.sat.nxv8f16.nxv8i32(<vscale x 8 x half> %f) 489 ret <vscale x 8 x i32> %x 490} 491 492define <vscale x 4 x i16> @test_signed_v4f16_v4i16(<vscale x 4 x half> %f) { 493; CHECK-LABEL: test_signed_v4f16_v4i16: 494; CHECK: // %bb.0: 495; CHECK-NEXT: ptrue p0.s 496; CHECK-NEXT: mov w8, #31743 // =0x7bff 497; CHECK-NEXT: mov z1.h, w8 498; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0 499; CHECK-NEXT: movprfx z2, z0 500; CHECK-NEXT: fcvtzu z2.s, p0/m, z0.h 501; CHECK-NEXT: not p1.b, p0/z, p1.b 502; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z1.h 503; CHECK-NEXT: mov z0.s, #65535 // =0xffff 504; CHECK-NEXT: mov z2.s, p1/m, #0 // =0x0 505; CHECK-NEXT: sel z0.s, p0, z0.s, z2.s 506; CHECK-NEXT: ret 507 %x = call <vscale x 4 x i16> @llvm.fptoui.sat.nxv4f16.nxv4i16(<vscale x 4 x half> %f) 508 ret <vscale x 4 x i16> %x 509} 510 511define <vscale x 8 x i16> @test_signed_v8f16_v8i16(<vscale x 8 x half> %f) { 512; CHECK-LABEL: test_signed_v8f16_v8i16: 513; CHECK: // %bb.0: 514; CHECK-NEXT: ptrue p0.h 515; CHECK-NEXT: mov w8, #31743 // =0x7bff 516; CHECK-NEXT: mov z2.h, w8 517; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0 518; CHECK-NEXT: movprfx z1, z0 519; CHECK-NEXT: fcvtzu z1.h, p0/m, z0.h 520; CHECK-NEXT: not p1.b, p0/z, p1.b 521; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z2.h 522; CHECK-NEXT: mov z1.h, p1/m, #0 // =0x0 523; CHECK-NEXT: mov z1.h, p0/m, #-1 // =0xffffffffffffffff 524; CHECK-NEXT: mov z0.d, z1.d 525; CHECK-NEXT: ret 526 %x = call <vscale x 8 x i16> @llvm.fptoui.sat.nxv8f16.nxv8i16(<vscale x 8 x half> %f) 527 ret <vscale x 8 x i16> %x 528} 529 530define <vscale x 2 x i64> @test_signed_v2f16_v2i64(<vscale x 2 x half> %f) { 531; CHECK-LABEL: test_signed_v2f16_v2i64: 532; CHECK: // %bb.0: 533; CHECK-NEXT: ptrue p0.d 534; CHECK-NEXT: mov w8, #31743 // =0x7bff 535; CHECK-NEXT: mov z2.h, w8 536; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0 537; CHECK-NEXT: movprfx z1, z0 538; CHECK-NEXT: fcvtzu z1.d, p0/m, z0.h 539; CHECK-NEXT: not p1.b, p0/z, p1.b 540; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z2.h 541; CHECK-NEXT: mov z1.d, p1/m, #0 // =0x0 542; CHECK-NEXT: mov z1.d, p0/m, #-1 // =0xffffffffffffffff 543; CHECK-NEXT: mov z0.d, z1.d 544; CHECK-NEXT: ret 545 %x = call <vscale x 2 x i64> @llvm.fptoui.sat.nxv2f16.nxv2i64(<vscale x 2 x half> %f) 546 ret <vscale x 2 x i64> %x 547} 548 549define <vscale x 4 x i64> @test_signed_v4f16_v4i64(<vscale x 4 x half> %f) { 550; CHECK-LABEL: test_signed_v4f16_v4i64: 551; CHECK: // %bb.0: 552; CHECK-NEXT: uunpklo z2.d, z0.s 553; CHECK-NEXT: uunpkhi z3.d, z0.s 554; CHECK-NEXT: mov w8, #31743 // =0x7bff 555; CHECK-NEXT: ptrue p0.d 556; CHECK-NEXT: mov z4.h, w8 557; CHECK-NEXT: fcmge p1.h, p0/z, z2.h, #0.0 558; CHECK-NEXT: fcmge p2.h, p0/z, z3.h, #0.0 559; CHECK-NEXT: movprfx z0, z2 560; CHECK-NEXT: fcvtzu z0.d, p0/m, z2.h 561; CHECK-NEXT: movprfx z1, z3 562; CHECK-NEXT: fcvtzu z1.d, p0/m, z3.h 563; CHECK-NEXT: fcmgt p3.h, p0/z, z2.h, z4.h 564; CHECK-NEXT: not p1.b, p0/z, p1.b 565; CHECK-NEXT: not p2.b, p0/z, p2.b 566; CHECK-NEXT: fcmgt p0.h, p0/z, z3.h, z4.h 567; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 568; CHECK-NEXT: mov z1.d, p2/m, #0 // =0x0 569; CHECK-NEXT: mov z0.d, p3/m, #-1 // =0xffffffffffffffff 570; CHECK-NEXT: mov z1.d, p0/m, #-1 // =0xffffffffffffffff 571; CHECK-NEXT: ret 572 %x = call <vscale x 4 x i64> @llvm.fptoui.sat.nxv4f16.nxv4i64(<vscale x 4 x half> %f) 573 ret <vscale x 4 x i64> %x 574} 575 576