1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -verify-machineinstrs -mcpu=mvp -mattr=+simd128 | FileCheck %s 3 4; Test that vector float-to-int and int-to-float instructions lower correctly 5 6target triple = "wasm32-unknown-unknown" 7 8define <4 x float> @convert_s_v4f32(<4 x i32> %x) { 9; CHECK-LABEL: convert_s_v4f32: 10; CHECK: .functype convert_s_v4f32 (v128) -> (v128) 11; CHECK-NEXT: # %bb.0: 12; CHECK-NEXT: local.get 0 13; CHECK-NEXT: f32x4.convert_i32x4_s 14; CHECK-NEXT: # fallthrough-return 15 %a = sitofp <4 x i32> %x to <4 x float> 16 ret <4 x float> %a 17} 18 19define <4 x float> @convert_u_v4f32(<4 x i32> %x) { 20; CHECK-LABEL: convert_u_v4f32: 21; CHECK: .functype convert_u_v4f32 (v128) -> (v128) 22; CHECK-NEXT: # %bb.0: 23; CHECK-NEXT: local.get 0 24; CHECK-NEXT: f32x4.convert_i32x4_u 25; CHECK-NEXT: # fallthrough-return 26 %a = uitofp <4 x i32> %x to <4 x float> 27 ret <4 x float> %a 28} 29 30define <2 x double> @convert_s_v2f64(<2 x i64> %x) { 31; CHECK-LABEL: convert_s_v2f64: 32; CHECK: .functype convert_s_v2f64 (v128) -> (v128) 33; CHECK-NEXT: # %bb.0: 34; CHECK-NEXT: local.get 0 35; CHECK-NEXT: i64x2.extract_lane 0 36; CHECK-NEXT: f64.convert_i64_s 37; CHECK-NEXT: f64x2.splat 38; CHECK-NEXT: local.get 0 39; CHECK-NEXT: i64x2.extract_lane 1 40; CHECK-NEXT: f64.convert_i64_s 41; CHECK-NEXT: f64x2.replace_lane 1 42; CHECK-NEXT: # fallthrough-return 43 %a = sitofp <2 x i64> %x to <2 x double> 44 ret <2 x double> %a 45} 46 47define <2 x double> @convert_u_v2f64(<2 x i64> %x) { 48; CHECK-LABEL: convert_u_v2f64: 49; CHECK: .functype convert_u_v2f64 (v128) -> (v128) 50; CHECK-NEXT: # %bb.0: 51; CHECK-NEXT: local.get 0 52; CHECK-NEXT: v128.const 4294967295, 4294967295 53; CHECK-NEXT: v128.and 54; CHECK-NEXT: v128.const 4841369599423283200, 4841369599423283200 55; CHECK-NEXT: v128.or 56; CHECK-NEXT: local.get 0 57; CHECK-NEXT: i32.const 32 58; CHECK-NEXT: i64x2.shr_u 59; CHECK-NEXT: v128.const 4985484787499139072, 4985484787499139072 60; CHECK-NEXT: v128.or 61; CHECK-NEXT: v128.const 0x1.00000001p84, 0x1.00000001p84 62; CHECK-NEXT: f64x2.sub 63; CHECK-NEXT: f64x2.add 64; CHECK-NEXT: # fallthrough-return 65 %a = uitofp <2 x i64> %x to <2 x double> 66 ret <2 x double> %a 67} 68 69define <4 x i32> @trunc_sat_s_v4i32(<4 x float> %x) { 70; CHECK-LABEL: trunc_sat_s_v4i32: 71; CHECK: .functype trunc_sat_s_v4i32 (v128) -> (v128) 72; CHECK-NEXT: # %bb.0: 73; CHECK-NEXT: local.get 0 74; CHECK-NEXT: i32x4.trunc_sat_f32x4_s 75; CHECK-NEXT: # fallthrough-return 76 %a = fptosi <4 x float> %x to <4 x i32> 77 ret <4 x i32> %a 78} 79 80define <4 x i32> @trunc_sat_u_v4i32(<4 x float> %x) { 81; CHECK-LABEL: trunc_sat_u_v4i32: 82; CHECK: .functype trunc_sat_u_v4i32 (v128) -> (v128) 83; CHECK-NEXT: # %bb.0: 84; CHECK-NEXT: local.get 0 85; CHECK-NEXT: i32x4.trunc_sat_f32x4_u 86; CHECK-NEXT: # fallthrough-return 87 %a = fptoui <4 x float> %x to <4 x i32> 88 ret <4 x i32> %a 89} 90 91define <2 x i64> @trunc_sat_s_v2i64(<2 x double> %x) { 92; CHECK-LABEL: trunc_sat_s_v2i64: 93; CHECK: .functype trunc_sat_s_v2i64 (v128) -> (v128) 94; CHECK-NEXT: .local f64, i64, i64 95; CHECK-NEXT: # %bb.0: 96; CHECK-NEXT: block 97; CHECK-NEXT: block 98; CHECK-NEXT: local.get 0 99; CHECK-NEXT: f64x2.extract_lane 1 100; CHECK-NEXT: local.tee 1 101; CHECK-NEXT: f64.abs 102; CHECK-NEXT: f64.const 0x1p63 103; CHECK-NEXT: f64.lt 104; CHECK-NEXT: i32.eqz 105; CHECK-NEXT: br_if 0 # 0: down to label1 106; CHECK-NEXT: # %bb.1: 107; CHECK-NEXT: local.get 1 108; CHECK-NEXT: i64.trunc_f64_s 109; CHECK-NEXT: local.set 2 110; CHECK-NEXT: br 1 # 1: down to label0 111; CHECK-NEXT: .LBB6_2: 112; CHECK-NEXT: end_block # label1: 113; CHECK-NEXT: i64.const -9223372036854775808 114; CHECK-NEXT: local.set 2 115; CHECK-NEXT: .LBB6_3: 116; CHECK-NEXT: end_block # label0: 117; CHECK-NEXT: block 118; CHECK-NEXT: block 119; CHECK-NEXT: local.get 0 120; CHECK-NEXT: f64x2.extract_lane 0 121; CHECK-NEXT: local.tee 1 122; CHECK-NEXT: f64.abs 123; CHECK-NEXT: f64.const 0x1p63 124; CHECK-NEXT: f64.lt 125; CHECK-NEXT: i32.eqz 126; CHECK-NEXT: br_if 0 # 0: down to label3 127; CHECK-NEXT: # %bb.4: 128; CHECK-NEXT: local.get 1 129; CHECK-NEXT: i64.trunc_f64_s 130; CHECK-NEXT: local.set 3 131; CHECK-NEXT: br 1 # 1: down to label2 132; CHECK-NEXT: .LBB6_5: 133; CHECK-NEXT: end_block # label3: 134; CHECK-NEXT: i64.const -9223372036854775808 135; CHECK-NEXT: local.set 3 136; CHECK-NEXT: .LBB6_6: 137; CHECK-NEXT: end_block # label2: 138; CHECK-NEXT: local.get 3 139; CHECK-NEXT: i64x2.splat 140; CHECK-NEXT: local.get 2 141; CHECK-NEXT: i64x2.replace_lane 1 142; CHECK-NEXT: # fallthrough-return 143 %a = fptosi <2 x double> %x to <2 x i64> 144 ret <2 x i64> %a 145} 146 147define <2 x i64> @trunc_sat_u_v2i64(<2 x double> %x) { 148; CHECK-LABEL: trunc_sat_u_v2i64: 149; CHECK: .functype trunc_sat_u_v2i64 (v128) -> (v128) 150; CHECK-NEXT: .local f64, i64, i64 151; CHECK-NEXT: # %bb.0: 152; CHECK-NEXT: block 153; CHECK-NEXT: block 154; CHECK-NEXT: local.get 0 155; CHECK-NEXT: f64x2.extract_lane 1 156; CHECK-NEXT: local.tee 1 157; CHECK-NEXT: f64.const 0x1p64 158; CHECK-NEXT: f64.lt 159; CHECK-NEXT: local.get 1 160; CHECK-NEXT: f64.const 0x0p0 161; CHECK-NEXT: f64.ge 162; CHECK-NEXT: i32.and 163; CHECK-NEXT: i32.eqz 164; CHECK-NEXT: br_if 0 # 0: down to label5 165; CHECK-NEXT: # %bb.1: 166; CHECK-NEXT: local.get 1 167; CHECK-NEXT: i64.trunc_f64_u 168; CHECK-NEXT: local.set 2 169; CHECK-NEXT: br 1 # 1: down to label4 170; CHECK-NEXT: .LBB7_2: 171; CHECK-NEXT: end_block # label5: 172; CHECK-NEXT: i64.const 0 173; CHECK-NEXT: local.set 2 174; CHECK-NEXT: .LBB7_3: 175; CHECK-NEXT: end_block # label4: 176; CHECK-NEXT: block 177; CHECK-NEXT: block 178; CHECK-NEXT: local.get 0 179; CHECK-NEXT: f64x2.extract_lane 0 180; CHECK-NEXT: local.tee 1 181; CHECK-NEXT: f64.const 0x1p64 182; CHECK-NEXT: f64.lt 183; CHECK-NEXT: local.get 1 184; CHECK-NEXT: f64.const 0x0p0 185; CHECK-NEXT: f64.ge 186; CHECK-NEXT: i32.and 187; CHECK-NEXT: i32.eqz 188; CHECK-NEXT: br_if 0 # 0: down to label7 189; CHECK-NEXT: # %bb.4: 190; CHECK-NEXT: local.get 1 191; CHECK-NEXT: i64.trunc_f64_u 192; CHECK-NEXT: local.set 3 193; CHECK-NEXT: br 1 # 1: down to label6 194; CHECK-NEXT: .LBB7_5: 195; CHECK-NEXT: end_block # label7: 196; CHECK-NEXT: i64.const 0 197; CHECK-NEXT: local.set 3 198; CHECK-NEXT: .LBB7_6: 199; CHECK-NEXT: end_block # label6: 200; CHECK-NEXT: local.get 3 201; CHECK-NEXT: i64x2.splat 202; CHECK-NEXT: local.get 2 203; CHECK-NEXT: i64x2.replace_lane 1 204; CHECK-NEXT: # fallthrough-return 205 %a = fptoui <2 x double> %x to <2 x i64> 206 ret <2 x i64> %a 207} 208 209define <4 x float> @demote_zero_v4f32(<2 x double> %x) { 210; CHECK-LABEL: demote_zero_v4f32: 211; CHECK: .functype demote_zero_v4f32 (v128) -> (v128) 212; CHECK-NEXT: # %bb.0: 213; CHECK-NEXT: local.get 0 214; CHECK-NEXT: f32x4.demote_f64x2_zero 215; CHECK-NEXT: # fallthrough-return 216 %v = shufflevector <2 x double> %x, <2 x double> zeroinitializer, 217 <4 x i32> <i32 0, i32 1, i32 2, i32 3> 218 %a = fptrunc <4 x double> %v to <4 x float> 219 ret <4 x float> %a 220} 221 222define <4 x float> @demote_zero_v4f32_2(<2 x double> %x) { 223; CHECK-LABEL: demote_zero_v4f32_2: 224; CHECK: .functype demote_zero_v4f32_2 (v128) -> (v128) 225; CHECK-NEXT: # %bb.0: 226; CHECK-NEXT: local.get 0 227; CHECK-NEXT: f32x4.demote_f64x2_zero 228; CHECK-NEXT: # fallthrough-return 229 %v = fptrunc <2 x double> %x to <2 x float> 230 %a = shufflevector <2 x float> %v, <2 x float> zeroinitializer, 231 <4 x i32> <i32 0, i32 1, i32 2, i32 3> 232 ret <4 x float> %a 233} 234 235define <2 x double> @convert_low_s_v2f64(<4 x i32> %x) { 236; CHECK-LABEL: convert_low_s_v2f64: 237; CHECK: .functype convert_low_s_v2f64 (v128) -> (v128) 238; CHECK-NEXT: # %bb.0: 239; CHECK-NEXT: local.get 0 240; CHECK-NEXT: f64x2.convert_low_i32x4_s 241; CHECK-NEXT: # fallthrough-return 242 %v = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 243 %a = sitofp <2 x i32> %v to <2 x double> 244 ret <2 x double> %a 245} 246 247define <2 x double> @convert_low_u_v2f64(<4 x i32> %x) { 248; CHECK-LABEL: convert_low_u_v2f64: 249; CHECK: .functype convert_low_u_v2f64 (v128) -> (v128) 250; CHECK-NEXT: # %bb.0: 251; CHECK-NEXT: local.get 0 252; CHECK-NEXT: f64x2.convert_low_i32x4_u 253; CHECK-NEXT: # fallthrough-return 254 %v = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 255 %a = uitofp <2 x i32> %v to <2 x double> 256 ret <2 x double> %a 257} 258 259 260define <2 x double> @convert_low_s_v2f64_2(<4 x i32> %x) { 261; CHECK-LABEL: convert_low_s_v2f64_2: 262; CHECK: .functype convert_low_s_v2f64_2 (v128) -> (v128) 263; CHECK-NEXT: # %bb.0: 264; CHECK-NEXT: local.get 0 265; CHECK-NEXT: f64x2.convert_low_i32x4_s 266; CHECK-NEXT: # fallthrough-return 267 %v = sitofp <4 x i32> %x to <4 x double> 268 %a = shufflevector <4 x double> %v, <4 x double> undef, <2 x i32> <i32 0, i32 1> 269 ret <2 x double> %a 270} 271 272define <2 x double> @convert_low_u_v2f64_2(<4 x i32> %x) { 273; CHECK-LABEL: convert_low_u_v2f64_2: 274; CHECK: .functype convert_low_u_v2f64_2 (v128) -> (v128) 275; CHECK-NEXT: # %bb.0: 276; CHECK-NEXT: local.get 0 277; CHECK-NEXT: f64x2.convert_low_i32x4_u 278; CHECK-NEXT: # fallthrough-return 279 %v = uitofp <4 x i32> %x to <4 x double> 280 %a = shufflevector <4 x double> %v, <4 x double> undef, <2 x i32> <i32 0, i32 1> 281 ret <2 x double> %a 282} 283 284define <2 x double> @promote_low_v2f64(<4 x float> %x) { 285; CHECK-LABEL: promote_low_v2f64: 286; CHECK: .functype promote_low_v2f64 (v128) -> (v128) 287; CHECK-NEXT: # %bb.0: 288; CHECK-NEXT: local.get 0 289; CHECK-NEXT: f64x2.promote_low_f32x4 290; CHECK-NEXT: # fallthrough-return 291 %v = shufflevector <4 x float> %x, <4 x float> undef, <2 x i32> <i32 0, i32 1> 292 %a = fpext <2 x float> %v to <2 x double> 293 ret <2 x double> %a 294} 295 296define <2 x double> @promote_low_v2f64_2(<4 x float> %x) { 297; CHECK-LABEL: promote_low_v2f64_2: 298; CHECK: .functype promote_low_v2f64_2 (v128) -> (v128) 299; CHECK-NEXT: # %bb.0: 300; CHECK-NEXT: local.get 0 301; CHECK-NEXT: f64x2.promote_low_f32x4 302; CHECK-NEXT: # fallthrough-return 303 %v = fpext <4 x float> %x to <4 x double> 304 %a = shufflevector <4 x double> %v, <4 x double> undef, <2 x i32> <i32 0, i32 1> 305 ret <2 x double> %a 306} 307 308;; Also check with illegally wide vectors 309 310define <4 x double> @convert_low_s_v4f64(<8 x i32> %x) { 311; CHECK-LABEL: convert_low_s_v4f64: 312; CHECK: .functype convert_low_s_v4f64 (i32, v128, v128) -> () 313; CHECK-NEXT: # %bb.0: 314; CHECK-NEXT: local.get 0 315; CHECK-NEXT: local.get 1 316; CHECK-NEXT: local.get 1 317; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3 318; CHECK-NEXT: f64x2.convert_low_i32x4_s 319; CHECK-NEXT: v128.store 16 320; CHECK-NEXT: local.get 0 321; CHECK-NEXT: local.get 1 322; CHECK-NEXT: f64x2.convert_low_i32x4_s 323; CHECK-NEXT: v128.store 0 324; CHECK-NEXT: # fallthrough-return 325 %v = shufflevector <8 x i32> %x, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 326 %a = sitofp <4 x i32> %v to <4 x double> 327 ret <4 x double> %a 328} 329 330define <4 x double> @convert_low_u_v4f64(<8 x i32> %x) { 331; CHECK-LABEL: convert_low_u_v4f64: 332; CHECK: .functype convert_low_u_v4f64 (i32, v128, v128) -> () 333; CHECK-NEXT: # %bb.0: 334; CHECK-NEXT: local.get 0 335; CHECK-NEXT: local.get 1 336; CHECK-NEXT: local.get 1 337; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3 338; CHECK-NEXT: f64x2.convert_low_i32x4_u 339; CHECK-NEXT: v128.store 16 340; CHECK-NEXT: local.get 0 341; CHECK-NEXT: local.get 1 342; CHECK-NEXT: f64x2.convert_low_i32x4_u 343; CHECK-NEXT: v128.store 0 344; CHECK-NEXT: # fallthrough-return 345 %v = shufflevector <8 x i32> %x, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 346 %a = uitofp <4 x i32> %v to <4 x double> 347 ret <4 x double> %a 348} 349 350 351define <4 x double> @convert_low_s_v4f64_2(<8 x i32> %x) { 352; CHECK-LABEL: convert_low_s_v4f64_2: 353; CHECK: .functype convert_low_s_v4f64_2 (i32, v128, v128) -> () 354; CHECK-NEXT: # %bb.0: 355; CHECK-NEXT: local.get 0 356; CHECK-NEXT: local.get 1 357; CHECK-NEXT: local.get 1 358; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3 359; CHECK-NEXT: f64x2.convert_low_i32x4_s 360; CHECK-NEXT: v128.store 16 361; CHECK-NEXT: local.get 0 362; CHECK-NEXT: local.get 1 363; CHECK-NEXT: f64x2.convert_low_i32x4_s 364; CHECK-NEXT: v128.store 0 365; CHECK-NEXT: # fallthrough-return 366 %v = sitofp <8 x i32> %x to <8 x double> 367 %a = shufflevector <8 x double> %v, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 368 ret <4 x double> %a 369} 370 371define <4 x double> @convert_low_u_v4f64_2(<8 x i32> %x) { 372; CHECK-LABEL: convert_low_u_v4f64_2: 373; CHECK: .functype convert_low_u_v4f64_2 (i32, v128, v128) -> () 374; CHECK-NEXT: # %bb.0: 375; CHECK-NEXT: local.get 0 376; CHECK-NEXT: local.get 1 377; CHECK-NEXT: local.get 1 378; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3 379; CHECK-NEXT: f64x2.convert_low_i32x4_u 380; CHECK-NEXT: v128.store 16 381; CHECK-NEXT: local.get 0 382; CHECK-NEXT: local.get 1 383; CHECK-NEXT: f64x2.convert_low_i32x4_u 384; CHECK-NEXT: v128.store 0 385; CHECK-NEXT: # fallthrough-return 386 %v = uitofp <8 x i32> %x to <8 x double> 387 %a = shufflevector <8 x double> %v, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 388 ret <4 x double> %a 389} 390 391define <4 x double> @promote_low_v4f64(<8 x float> %x) { 392; CHECK-LABEL: promote_low_v4f64: 393; CHECK: .functype promote_low_v4f64 (i32, v128, v128) -> () 394; CHECK-NEXT: # %bb.0: 395; CHECK-NEXT: local.get 0 396; CHECK-NEXT: local.get 1 397; CHECK-NEXT: local.get 1 398; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3 399; CHECK-NEXT: f64x2.promote_low_f32x4 400; CHECK-NEXT: v128.store 16 401; CHECK-NEXT: local.get 0 402; CHECK-NEXT: local.get 1 403; CHECK-NEXT: f64x2.promote_low_f32x4 404; CHECK-NEXT: v128.store 0 405; CHECK-NEXT: # fallthrough-return 406 %v = shufflevector <8 x float> %x, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 407 %a = fpext <4 x float> %v to <4 x double> 408 ret <4 x double> %a 409} 410 411define <4 x double> @promote_low_v4f64_2(<8 x float> %x) { 412; CHECK-LABEL: promote_low_v4f64_2: 413; CHECK: .functype promote_low_v4f64_2 (i32, v128, v128) -> () 414; CHECK-NEXT: # %bb.0: 415; CHECK-NEXT: local.get 0 416; CHECK-NEXT: local.get 1 417; CHECK-NEXT: local.get 1 418; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3 419; CHECK-NEXT: f64x2.promote_low_f32x4 420; CHECK-NEXT: v128.store 16 421; CHECK-NEXT: local.get 0 422; CHECK-NEXT: local.get 1 423; CHECK-NEXT: f64x2.promote_low_f32x4 424; CHECK-NEXT: v128.store 0 425; CHECK-NEXT: # fallthrough-return 426 %v = fpext <8 x float> %x to <8 x double> 427 %a = shufflevector <8 x double> %v, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 428 ret <4 x double> %a 429} 430 431define <2 x double> @promote_mixed_v2f64(<4 x float> %x, <4 x float> %y) { 432; CHECK-LABEL: promote_mixed_v2f64: 433; CHECK: .functype promote_mixed_v2f64 (v128, v128) -> (v128) 434; CHECK-NEXT: # %bb.0: 435; CHECK-NEXT: local.get 0 436; CHECK-NEXT: local.get 1 437; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3 438; CHECK-NEXT: f64x2.promote_low_f32x4 439; CHECK-NEXT: # fallthrough-return 440 %v = shufflevector <4 x float> %x, <4 x float> %y, <2 x i32> <i32 2, i32 7> 441 %a = fpext <2 x float> %v to <2 x double> 442 ret <2 x double> %a 443} 444