1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=hexagon -hexagon-hvx-widen=32 < %s -verify-machineinstrs | FileCheck %s 3 4target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" 5target triple = "hexagon" 6 7; s8 -> f16 8; No widening 9define void @s8f16_0(ptr %a0, ptr %a1) #0 { 10; CHECK-LABEL: s8f16_0: 11; CHECK: .cfi_startproc 12; CHECK-NEXT: // %bb.0: 13; CHECK-NEXT: { 14; CHECK-NEXT: v0 = vmem(r0+#0) 15; CHECK-NEXT: } 16; CHECK-NEXT: { 17; CHECK-NEXT: r7 = #1 18; CHECK-NEXT: r6 = #64 19; CHECK-NEXT: v1:0.h = vunpack(v0.b) 20; CHECK-NEXT: } 21; CHECK-NEXT: { 22; CHECK-NEXT: v2.h = vsplat(r7) 23; CHECK-NEXT: r3:2 = combine(#31,#5) 24; CHECK-NEXT: v3.h = vabs(v0.h) 25; CHECK-NEXT: v4.h = vabs(v1.h) 26; CHECK-NEXT: } 27; CHECK-NEXT: { 28; CHECK-NEXT: v8.h = vsplat(r6) 29; CHECK-NEXT: v7.h = vsplat(r3) 30; CHECK-NEXT: v9 = vxor(v9,v9) 31; CHECK-NEXT: } 32; CHECK-NEXT: { 33; CHECK-NEXT: r5 = ##32768 34; CHECK-NEXT: v5.uh = vcl0(v3.uh) 35; CHECK-NEXT: q0 = vcmp.gt(v9.h,v0.h) 36; CHECK-NEXT: } 37; CHECK-NEXT: { 38; CHECK-NEXT: v10.h = vsplat(r5) 39; CHECK-NEXT: r4 = #10 40; CHECK-NEXT: v6.uh = vcl0(v4.uh) 41; CHECK-NEXT: v5.h = vadd(v5.h,v2.h) 42; CHECK-NEXT: } 43; CHECK-NEXT: { 44; CHECK-NEXT: v27 = vmux(q0,v10,v9) 45; CHECK-NEXT: v6.h = vadd(v6.h,v2.h) 46; CHECK-NEXT: } 47; CHECK-NEXT: { 48; CHECK-NEXT: v3.h = vasl(v3.h,v5.h) 49; CHECK-NEXT: } 50; CHECK-NEXT: { 51; CHECK-NEXT: v4.h = vasl(v4.h,v6.h) 52; CHECK-NEXT: v13 = vand(v3,v8) 53; CHECK-NEXT: v11.h = vadd(v3.h,v7.h) 54; CHECK-NEXT: } 55; CHECK-NEXT: { 56; CHECK-NEXT: v14.h = vadd(v4.h,v7.h) 57; CHECK-NEXT: q2 = vcmp.eq(v13.h,v9.h) 58; CHECK-NEXT: v8 = vand(v4,v8) 59; CHECK-NEXT: q1 = vcmp.gt(v3.uh,v11.uh) 60; CHECK-NEXT: } 61; CHECK-NEXT: { 62; CHECK-NEXT: v11.uh = vlsr(v11.uh,r2) 63; CHECK-NEXT: v13 = vmux(q2,v9,v2) 64; CHECK-NEXT: q2 = vcmp.eq(v8.h,v9.h) 65; CHECK-NEXT: q3 = vcmp.gt(v4.uh,v14.uh) 66; CHECK-NEXT: } 67; CHECK-NEXT: { 68; CHECK-NEXT: v20.uh = vlsr(v14.uh,r2) 69; CHECK-NEXT: v22 = vmux(q2,v9,v2) 70; CHECK-NEXT: v21 = vmux(q1,v2,v9) 71; CHECK-NEXT: v2 = vmux(q3,v2,v9) 72; CHECK-NEXT: } 73; CHECK-NEXT: { 74; CHECK-NEXT: v19.uh = vlsr(v4.uh,r2) 75; CHECK-NEXT: v13.h = vadd(v11.h,v13.h) 76; CHECK-NEXT: v24.h = vadd(v20.h,v22.h) 77; CHECK-NEXT: v2.h = vadd(v2.h,v7.h) 78; CHECK-NEXT: } 79; CHECK-NEXT: { 80; CHECK-NEXT: v12.uh = vlsr(v3.uh,r2) 81; CHECK-NEXT: v23.h = vadd(v21.h,v7.h) 82; CHECK-NEXT: v2.h = vsub(v2.h,v6.h) 83; CHECK-NEXT: q3 = vcmp.gt(v9.h,v1.h) 84; CHECK-NEXT: } 85; CHECK-NEXT: { 86; CHECK-NEXT: v11.uh = vlsr(v11.uh,r7) 87; CHECK-NEXT: v3.h = vsub(v23.h,v5.h) 88; CHECK-NEXT: q2 = vcmp.eq(v12.h,v11.h) 89; CHECK-NEXT: q1 = vcmp.eq(v19.h,v20.h) 90; CHECK-NEXT: } 91; CHECK-NEXT: { 92; CHECK-NEXT: v25.uh = vlsr(v13.uh,r7) 93; CHECK-NEXT: v28 = vmux(q3,v10,v9) 94; CHECK-NEXT: q3 = vcmp.eq(v0.h,v9.h) 95; CHECK-NEXT: } 96; CHECK-NEXT: { 97; CHECK-NEXT: v26.uh = vlsr(v24.uh,r7) 98; CHECK-NEXT: v5 = vmux(q2,v25,v11) 99; CHECK-NEXT: q2 = vcmp.eq(v1.h,v9.h) 100; CHECK-NEXT: } 101; CHECK-NEXT: { 102; CHECK-NEXT: v4.uh = vlsr(v20.uh,r7) 103; CHECK-NEXT: v5 = vor(v27,v5) 104; CHECK-NEXT: } 105; CHECK-NEXT: { 106; CHECK-NEXT: v3.h = vasl(v3.h,r4) 107; CHECK-NEXT: v4 = vmux(q1,v26,v4) 108; CHECK-NEXT: } 109; CHECK-NEXT: { 110; CHECK-NEXT: v2.h = vasl(v2.h,r4) 111; CHECK-NEXT: v4 = vor(v28,v4) 112; CHECK-NEXT: v29 = vor(v5,v3) 113; CHECK-NEXT: } 114; CHECK-NEXT: { 115; CHECK-NEXT: v2 = vor(v4,v2) 116; CHECK-NEXT: v31 = vmux(q3,v9,v29) 117; CHECK-NEXT: vmem(r1+#0) = v31.new 118; CHECK-NEXT: } 119; CHECK-NEXT: { 120; CHECK-NEXT: v30 = vmux(q2,v9,v2) 121; CHECK-NEXT: jumpr r31 122; CHECK-NEXT: vmem(r1+#1) = v30.new 123; CHECK-NEXT: } 124 %v0 = load <128 x i8>, ptr %a0, align 128 125 %v1 = sitofp <128 x i8> %v0 to <128 x half> 126 store <128 x half> %v1, ptr %a1, align 128 127 ret void 128} 129 130; Widen input 131define void @s8f16_1(ptr %a0, ptr %a1) #0 { 132; CHECK-LABEL: s8f16_1: 133; CHECK: .cfi_startproc 134; CHECK-NEXT: // %bb.0: 135; CHECK-NEXT: { 136; CHECK-NEXT: v0 = vmem(r0+#0) 137; CHECK-NEXT: } 138; CHECK-NEXT: { 139; CHECK-NEXT: r6 = #1 140; CHECK-NEXT: r3:2 = combine(#64,#31) 141; CHECK-NEXT: v1:0.h = vunpack(v0.b) 142; CHECK-NEXT: } 143; CHECK-NEXT: { 144; CHECK-NEXT: v3.h = vsplat(r6) 145; CHECK-NEXT: v4.h = vsplat(r2) 146; CHECK-NEXT: v2.h = vabs(v0.h) 147; CHECK-NEXT: v1 = vxor(v1,v1) 148; CHECK-NEXT: } 149; CHECK-NEXT: { 150; CHECK-NEXT: v6.h = vsplat(r3) 151; CHECK-NEXT: r5:4 = combine(##32768,#5) 152; CHECK-NEXT: r2 = #10 153; CHECK-NEXT: } 154; CHECK-NEXT: { 155; CHECK-NEXT: v8.h = vsplat(r5) 156; CHECK-NEXT: v5.uh = vcl0(v2.uh) 157; CHECK-NEXT: q3 = vcmp.eq(v0.h,v1.h) 158; CHECK-NEXT: } 159; CHECK-NEXT: { 160; CHECK-NEXT: v5.h = vadd(v5.h,v3.h) 161; CHECK-NEXT: } 162; CHECK-NEXT: { 163; CHECK-NEXT: v2.h = vasl(v2.h,v5.h) 164; CHECK-NEXT: } 165; CHECK-NEXT: { 166; CHECK-NEXT: v7.h = vadd(v2.h,v4.h) 167; CHECK-NEXT: v6 = vand(v2,v6) 168; CHECK-NEXT: } 169; CHECK-NEXT: { 170; CHECK-NEXT: v2.uh = vlsr(v2.uh,r4) 171; CHECK-NEXT: q0 = vcmp.eq(v6.h,v1.h) 172; CHECK-NEXT: q1 = vcmp.gt(v2.uh,v7.uh) 173; CHECK-NEXT: } 174; CHECK-NEXT: { 175; CHECK-NEXT: v25.uh = vlsr(v7.uh,r4) 176; CHECK-NEXT: v26 = vmux(q0,v1,v3) 177; CHECK-NEXT: v3 = vmux(q1,v3,v1) 178; CHECK-NEXT: q1 = vcmp.gt(v1.h,v0.h) 179; CHECK-NEXT: } 180; CHECK-NEXT: { 181; CHECK-NEXT: v7.h = vadd(v25.h,v26.h) 182; CHECK-NEXT: v3.h = vadd(v3.h,v4.h) 183; CHECK-NEXT: q2 = vcmp.eq(v2.h,v25.h) 184; CHECK-NEXT: v30 = vmux(q1,v8,v1) 185; CHECK-NEXT: } 186; CHECK-NEXT: { 187; CHECK-NEXT: v27.uh = vlsr(v25.uh,r6) 188; CHECK-NEXT: v28.h = vsub(v3.h,v5.h) 189; CHECK-NEXT: } 190; CHECK-NEXT: { 191; CHECK-NEXT: v29.uh = vlsr(v7.uh,r6) 192; CHECK-NEXT: } 193; CHECK-NEXT: { 194; CHECK-NEXT: v2.h = vasl(v28.h,r2) 195; CHECK-NEXT: v3 = vmux(q2,v29,v27) 196; CHECK-NEXT: } 197; CHECK-NEXT: { 198; CHECK-NEXT: v3 = vor(v30,v3) 199; CHECK-NEXT: } 200; CHECK-NEXT: { 201; CHECK-NEXT: v31 = vor(v3,v2) 202; CHECK-NEXT: } 203; CHECK-NEXT: { 204; CHECK-NEXT: v0 = vmux(q3,v1,v31) 205; CHECK-NEXT: jumpr r31 206; CHECK-NEXT: vmem(r1+#0) = v0.new 207; CHECK-NEXT: } 208 %v0 = load <64 x i8>, ptr %a0, align 128 209 %v1 = sitofp <64 x i8> %v0 to <64 x half> 210 store <64 x half> %v1, ptr %a1, align 128 211 ret void 212} 213 214 215; s8 -> f32 216; No widening 217define void @s8f32_0(ptr %a0, ptr %a1) #0 { 218; CHECK-LABEL: s8f32_0: 219; CHECK: .cfi_startproc 220; CHECK-NEXT: // %bb.0: 221; CHECK-NEXT: { 222; CHECK-NEXT: r7 = #64 223; CHECK-NEXT: r0 = #1 224; CHECK-NEXT: v0 = vmem(r0+#0) 225; CHECK-NEXT: } 226; CHECK-NEXT: { 227; CHECK-NEXT: v2 = vsplat(r0) 228; CHECK-NEXT: r3:2 = combine(##255,#8) 229; CHECK-NEXT: v1 = valign(v0,v0,r7) 230; CHECK-NEXT: } 231; CHECK-NEXT: { 232; CHECK-NEXT: v3 = vsplat(r3) 233; CHECK-NEXT: r7 = #512 234; CHECK-NEXT: v9:8.h = vunpack(v0.b) 235; CHECK-NEXT: } 236; CHECK-NEXT: { 237; CHECK-NEXT: v4 = vsplat(r7) 238; CHECK-NEXT: r6 = ##-2147483648 239; CHECK-NEXT: r5 = #159 240; CHECK-NEXT: } 241; CHECK-NEXT: { 242; CHECK-NEXT: r4 = #23 243; CHECK-NEXT: v7:6.h = vunpack(v1.b) 244; CHECK-NEXT: } 245; CHECK-NEXT: { 246; CHECK-NEXT: v8 = vsplat(r6) 247; CHECK-NEXT: v1:0.w = vunpack(v8.h) 248; CHECK-NEXT: } 249; CHECK-NEXT: { 250; CHECK-NEXT: v7:6.w = vunpack(v6.h) 251; CHECK-NEXT: v5.w = vabs(v0.w) 252; CHECK-NEXT: v10.w = vabs(v1.w) 253; CHECK-NEXT: } 254; CHECK-NEXT: { 255; CHECK-NEXT: v26.w = vabs(v6.w) 256; CHECK-NEXT: v13.w = vabs(v7.w) 257; CHECK-NEXT: } 258; CHECK-NEXT: { 259; CHECK-NEXT: v9.uw = vcl0(v5.uw) 260; CHECK-NEXT: } 261; CHECK-NEXT: { 262; CHECK-NEXT: v12.uw = vcl0(v26.uw) 263; CHECK-NEXT: v9.w = vadd(v9.w,v2.w) 264; CHECK-NEXT: } 265; CHECK-NEXT: { 266; CHECK-NEXT: v14.uw = vcl0(v13.uw) 267; CHECK-NEXT: v15.w = vadd(v12.w,v2.w) 268; CHECK-NEXT: } 269; CHECK-NEXT: { 270; CHECK-NEXT: v11.uw = vcl0(v10.uw) 271; CHECK-NEXT: v12.w = vadd(v14.w,v2.w) 272; CHECK-NEXT: } 273; CHECK-NEXT: { 274; CHECK-NEXT: v27.w = vasl(v26.w,v15.w) 275; CHECK-NEXT: v11.w = vadd(v11.w,v2.w) 276; CHECK-NEXT: } 277; CHECK-NEXT: { 278; CHECK-NEXT: v13.w = vasl(v13.w,v12.w) 279; CHECK-NEXT: v20 = vand(v27,v4) 280; CHECK-NEXT: v19.w = vadd(v27.w,v3.w) 281; CHECK-NEXT: } 282; CHECK-NEXT: { 283; CHECK-NEXT: v16.w = vasl(v5.w,v9.w) 284; CHECK-NEXT: v5 = vxor(v5,v5) 285; CHECK-NEXT: v23.w = vadd(v13.w,v3.w) 286; CHECK-NEXT: v28 = vand(v13,v4) 287; CHECK-NEXT: } 288; CHECK-NEXT: { 289; CHECK-NEXT: v17.w = vasl(v10.w,v11.w) 290; CHECK-NEXT: q3 = vcmp.eq(v20.w,v5.w) 291; CHECK-NEXT: q2 = vcmp.gt(v27.uw,v19.uw) 292; CHECK-NEXT: q0 = vcmp.gt(v5.w,v6.w) 293; CHECK-NEXT: } 294; CHECK-NEXT: { 295; CHECK-NEXT: v21.uw = vlsr(v27.uw,r2) 296; CHECK-NEXT: v30 = vmux(q3,v5,v2) 297; CHECK-NEXT: q3 = vcmp.eq(v28.w,v5.w) 298; CHECK-NEXT: v22 = vand(v17,v4) 299; CHECK-NEXT: } 300; CHECK-NEXT: { 301; CHECK-NEXT: v14.uw = vlsr(v19.uw,r2) 302; CHECK-NEXT: v27 = vmux(q3,v5,v2) 303; CHECK-NEXT: q1 = vcmp.eq(v22.w,v5.w) 304; CHECK-NEXT: v24 = vmux(q2,v2,v5) 305; CHECK-NEXT: } 306; CHECK-NEXT: { 307; CHECK-NEXT: v31.uw = vlsr(v23.uw,r2) 308; CHECK-NEXT: v22.w = vadd(v14.w,v30.w) 309; CHECK-NEXT: v30.w = vadd(v17.w,v3.w) 310; CHECK-NEXT: q2 = vcmp.eq(v21.w,v14.w) 311; CHECK-NEXT: } 312; CHECK-NEXT: { 313; CHECK-NEXT: v29.uw = vlsr(v13.uw,r2) 314; CHECK-NEXT: v28.w = vadd(v31.w,v27.w) 315; CHECK-NEXT: v3.w = vadd(v16.w,v3.w) 316; CHECK-NEXT: v4 = vand(v16,v4) 317; CHECK-NEXT: } 318; CHECK-NEXT: { 319; CHECK-NEXT: v14.uw = vlsr(v14.uw,r0) 320; CHECK-NEXT: q3 = vcmp.eq(v29.w,v31.w) 321; CHECK-NEXT: v18 = vmux(q0,v8,v5) 322; CHECK-NEXT: q0 = vcmp.gt(v5.w,v7.w) 323; CHECK-NEXT: } 324; CHECK-NEXT: { 325; CHECK-NEXT: v19.uw = vlsr(v31.uw,r0) 326; CHECK-NEXT: v26 = vmux(q1,v5,v2) 327; CHECK-NEXT: v31 = vmux(q0,v8,v5) 328; CHECK-NEXT: q0 = vcmp.gt(v16.uw,v3.uw) 329; CHECK-NEXT: } 330; CHECK-NEXT: { 331; CHECK-NEXT: v10 = vsplat(r5) 332; CHECK-NEXT: v29.uw = vlsr(v22.uw,r0) 333; CHECK-NEXT: v15.w = vsub(v24.w,v15.w) 334; CHECK-NEXT: } 335; CHECK-NEXT: { 336; CHECK-NEXT: v20.uw = vlsr(v28.uw,r0) 337; CHECK-NEXT: v14 = vmux(q2,v29,v14) 338; CHECK-NEXT: q2 = vcmp.gt(v13.uw,v23.uw) 339; CHECK-NEXT: v15.w = vadd(v15.w,v10.w) 340; CHECK-NEXT: } 341; CHECK-NEXT: { 342; CHECK-NEXT: v25.uw = vlsr(v30.uw,r2) 343; CHECK-NEXT: v19 = vmux(q3,v20,v19) 344; CHECK-NEXT: q3 = vcmp.eq(v4.w,v5.w) 345; CHECK-NEXT: v27 = vmux(q2,v2,v5) 346; CHECK-NEXT: } 347; CHECK-NEXT: { 348; CHECK-NEXT: v3.uw = vlsr(v3.uw,r2) 349; CHECK-NEXT: q2 = vcmp.gt(v17.uw,v30.uw) 350; CHECK-NEXT: v28.w = vadd(v25.w,v26.w) 351; CHECK-NEXT: v29 = vmux(q3,v5,v2) 352; CHECK-NEXT: } 353; CHECK-NEXT: { 354; CHECK-NEXT: v17.uw = vlsr(v17.uw,r2) 355; CHECK-NEXT: v19 = vor(v31,v19) 356; CHECK-NEXT: v31 = vmux(q2,v2,v5) 357; CHECK-NEXT: v2 = vmux(q0,v2,v5) 358; CHECK-NEXT: } 359; CHECK-NEXT: { 360; CHECK-NEXT: v24.uw = vlsr(v16.uw,r2) 361; CHECK-NEXT: v30.w = vadd(v3.w,v29.w) 362; CHECK-NEXT: v2.w = vsub(v2.w,v9.w) 363; CHECK-NEXT: v11.w = vsub(v31.w,v11.w) 364; CHECK-NEXT: } 365; CHECK-NEXT: { 366; CHECK-NEXT: v16.uw = vlsr(v28.uw,r0) 367; CHECK-NEXT: q3 = vcmp.eq(v17.w,v25.w) 368; CHECK-NEXT: v4.w = vsub(v27.w,v12.w) 369; CHECK-NEXT: v2.w = vadd(v2.w,v10.w) 370; CHECK-NEXT: } 371; CHECK-NEXT: { 372; CHECK-NEXT: v13.uw = vlsr(v25.uw,r0) 373; CHECK-NEXT: q0 = vcmp.eq(v24.w,v3.w) 374; CHECK-NEXT: v21.w = vadd(v11.w,v10.w) 375; CHECK-NEXT: q2 = vcmp.gt(v5.w,v1.w) 376; CHECK-NEXT: } 377; CHECK-NEXT: { 378; CHECK-NEXT: v22.uw = vlsr(v30.uw,r0) 379; CHECK-NEXT: v23 = vmux(q3,v16,v13) 380; CHECK-NEXT: q3 = vcmp.gt(v5.w,v0.w) 381; CHECK-NEXT: v24 = vmux(q2,v8,v5) 382; CHECK-NEXT: } 383; CHECK-NEXT: { 384; CHECK-NEXT: v3.uw = vlsr(v3.uw,r0) 385; CHECK-NEXT: v4.w = vadd(v4.w,v10.w) 386; CHECK-NEXT: v8 = vmux(q3,v8,v5) 387; CHECK-NEXT: v10 = vor(v24,v23) 388; CHECK-NEXT: } 389; CHECK-NEXT: { 390; CHECK-NEXT: v9.w = vasl(v21.w,r4) 391; CHECK-NEXT: v3 = vmux(q0,v22,v3) 392; CHECK-NEXT: v14 = vor(v18,v14) 393; CHECK-NEXT: q2 = vcmp.eq(v1.w,v5.w) 394; CHECK-NEXT: } 395; CHECK-NEXT: { 396; CHECK-NEXT: v2.w = vasl(v2.w,r4) 397; CHECK-NEXT: v3 = vor(v8,v3) 398; CHECK-NEXT: v25 = vor(v10,v9) 399; CHECK-NEXT: q3 = vcmp.eq(v0.w,v5.w) 400; CHECK-NEXT: } 401; CHECK-NEXT: { 402; CHECK-NEXT: v15.w = vasl(v15.w,r4) 403; CHECK-NEXT: v2 = vor(v3,v2) 404; CHECK-NEXT: v27 = vmux(q2,v5,v25) 405; CHECK-NEXT: vmem(r1+#1) = v27.new 406; CHECK-NEXT: } 407; CHECK-NEXT: { 408; CHECK-NEXT: v26.w = vasl(v4.w,r4) 409; CHECK-NEXT: v29 = vmux(q3,v5,v2) 410; CHECK-NEXT: q2 = vcmp.eq(v7.w,v5.w) 411; CHECK-NEXT: vmem(r1+#0) = v29.new 412; CHECK-NEXT: } 413; CHECK-NEXT: { 414; CHECK-NEXT: v28 = vor(v19,v26) 415; CHECK-NEXT: v30 = vor(v14,v15) 416; CHECK-NEXT: q3 = vcmp.eq(v6.w,v5.w) 417; CHECK-NEXT: } 418; CHECK-NEXT: { 419; CHECK-NEXT: v0 = vmux(q2,v5,v28) 420; CHECK-NEXT: v31 = vmux(q3,v5,v30) 421; CHECK-NEXT: vmem(r1+#3) = v0.new 422; CHECK-NEXT: } 423; CHECK-NEXT: { 424; CHECK-NEXT: jumpr r31 425; CHECK-NEXT: vmem(r1+#2) = v31 426; CHECK-NEXT: } 427 %v0 = load <128 x i8>, ptr %a0, align 128 428 %v1 = sitofp <128 x i8> %v0 to <128 x float> 429 store <128 x float> %v1, ptr %a1, align 128 430 ret void 431} 432 433; Widen input #1 434define void @s8f32_1(ptr %a0, ptr %a1) #0 { 435; CHECK-LABEL: s8f32_1: 436; CHECK: .cfi_startproc 437; CHECK-NEXT: // %bb.0: 438; CHECK-NEXT: { 439; CHECK-NEXT: r0 = #1 440; CHECK-NEXT: v3:2.h = vunpack(v0.b) 441; CHECK-NEXT: v0.cur = vmem(r0+#0) 442; CHECK-NEXT: } 443; CHECK-NEXT: { 444; CHECK-NEXT: v1 = vsplat(r0) 445; CHECK-NEXT: r3:2 = combine(##255,#8) 446; CHECK-NEXT: r6 = #512 447; CHECK-NEXT: } 448; CHECK-NEXT: { 449; CHECK-NEXT: v7 = vsplat(r3) 450; CHECK-NEXT: v3:2.w = vunpack(v2.h) 451; CHECK-NEXT: v22 = vxor(v22,v22) 452; CHECK-NEXT: } 453; CHECK-NEXT: { 454; CHECK-NEXT: v10 = vsplat(r6) 455; CHECK-NEXT: r7 = ##-2147483648 456; CHECK-NEXT: r5 = #159 457; CHECK-NEXT: } 458; CHECK-NEXT: { 459; CHECK-NEXT: v9 = vsplat(r7) 460; CHECK-NEXT: v4.w = vabs(v2.w) 461; CHECK-NEXT: v5.w = vabs(v3.w) 462; CHECK-NEXT: q0 = vcmp.gt(v22.w,v2.w) 463; CHECK-NEXT: } 464; CHECK-NEXT: { 465; CHECK-NEXT: v12 = vsplat(r5) 466; CHECK-NEXT: r4 = #23 467; CHECK-NEXT: v11 = vmux(q0,v9,v22) 468; CHECK-NEXT: q0 = vcmp.gt(v22.w,v3.w) 469; CHECK-NEXT: } 470; CHECK-NEXT: { 471; CHECK-NEXT: v6.uw = vcl0(v4.uw) 472; CHECK-NEXT: v30 = vmux(q0,v9,v22) 473; CHECK-NEXT: } 474; CHECK-NEXT: { 475; CHECK-NEXT: v8.uw = vcl0(v5.uw) 476; CHECK-NEXT: v6.w = vadd(v6.w,v1.w) 477; CHECK-NEXT: } 478; CHECK-NEXT: { 479; CHECK-NEXT: v8.w = vadd(v8.w,v1.w) 480; CHECK-NEXT: } 481; CHECK-NEXT: { 482; CHECK-NEXT: v4.w = vasl(v4.w,v6.w) 483; CHECK-NEXT: } 484; CHECK-NEXT: { 485; CHECK-NEXT: v5.w = vasl(v5.w,v8.w) 486; CHECK-NEXT: v13 = vand(v4,v10) 487; CHECK-NEXT: v14.w = vadd(v4.w,v7.w) 488; CHECK-NEXT: } 489; CHECK-NEXT: { 490; CHECK-NEXT: v10 = vand(v5,v10) 491; CHECK-NEXT: v7.w = vadd(v5.w,v7.w) 492; CHECK-NEXT: q2 = vcmp.gt(v4.uw,v14.uw) 493; CHECK-NEXT: q1 = vcmp.eq(v13.w,v22.w) 494; CHECK-NEXT: } 495; CHECK-NEXT: { 496; CHECK-NEXT: v14.uw = vlsr(v14.uw,r2) 497; CHECK-NEXT: q3 = vcmp.eq(v10.w,v22.w) 498; CHECK-NEXT: v25 = vmux(q2,v1,v22) 499; CHECK-NEXT: q2 = vcmp.gt(v5.uw,v7.uw) 500; CHECK-NEXT: } 501; CHECK-NEXT: { 502; CHECK-NEXT: v7.uw = vlsr(v7.uw,r2) 503; CHECK-NEXT: v26 = vmux(q1,v22,v1) 504; CHECK-NEXT: v27 = vmux(q3,v22,v1) 505; CHECK-NEXT: v1 = vmux(q2,v1,v22) 506; CHECK-NEXT: } 507; CHECK-NEXT: { 508; CHECK-NEXT: v24.uw = vlsr(v5.uw,r2) 509; CHECK-NEXT: v5.w = vadd(v14.w,v26.w) 510; CHECK-NEXT: v29.w = vadd(v7.w,v27.w) 511; CHECK-NEXT: v6.w = vsub(v25.w,v6.w) 512; CHECK-NEXT: } 513; CHECK-NEXT: { 514; CHECK-NEXT: v23.uw = vlsr(v4.uw,r2) 515; CHECK-NEXT: v1.w = vsub(v1.w,v8.w) 516; CHECK-NEXT: v6.w = vadd(v6.w,v12.w) 517; CHECK-NEXT: q3 = vcmp.eq(v24.w,v7.w) 518; CHECK-NEXT: } 519; CHECK-NEXT: { 520; CHECK-NEXT: v28.uw = vlsr(v14.uw,r0) 521; CHECK-NEXT: v1.w = vadd(v1.w,v12.w) 522; CHECK-NEXT: q1 = vcmp.eq(v23.w,v14.w) 523; CHECK-NEXT: q2 = vcmp.eq(v3.w,v22.w) 524; CHECK-NEXT: } 525; CHECK-NEXT: { 526; CHECK-NEXT: v5.uw = vlsr(v5.uw,r0) 527; CHECK-NEXT: } 528; CHECK-NEXT: { 529; CHECK-NEXT: v7.uw = vlsr(v7.uw,r0) 530; CHECK-NEXT: v5 = vmux(q1,v5,v28) 531; CHECK-NEXT: } 532; CHECK-NEXT: { 533; CHECK-NEXT: v4.uw = vlsr(v29.uw,r0) 534; CHECK-NEXT: v5 = vor(v11,v5) 535; CHECK-NEXT: } 536; CHECK-NEXT: { 537; CHECK-NEXT: v6.w = vasl(v6.w,r4) 538; CHECK-NEXT: v4 = vmux(q3,v4,v7) 539; CHECK-NEXT: q3 = vcmp.eq(v2.w,v22.w) 540; CHECK-NEXT: } 541; CHECK-NEXT: { 542; CHECK-NEXT: v1.w = vasl(v1.w,r4) 543; CHECK-NEXT: v4 = vor(v30,v4) 544; CHECK-NEXT: v31 = vor(v5,v6) 545; CHECK-NEXT: } 546; CHECK-NEXT: { 547; CHECK-NEXT: v1 = vor(v4,v1) 548; CHECK-NEXT: v0 = vmux(q3,v22,v31) 549; CHECK-NEXT: vmem(r1+#0) = v0.new 550; CHECK-NEXT: } 551; CHECK-NEXT: { 552; CHECK-NEXT: v1 = vmux(q2,v22,v1) 553; CHECK-NEXT: jumpr r31 554; CHECK-NEXT: vmem(r1+#1) = v1.new 555; CHECK-NEXT: } 556 %v0 = load <64 x i8>, ptr %a0, align 128 557 %v1 = sitofp <64 x i8> %v0 to <64 x float> 558 store <64 x float> %v1, ptr %a1, align 128 559 ret void 560} 561 562; Widen input #2 563define void @s8f32_2(ptr %a0, ptr %a1) #0 { 564; CHECK-LABEL: s8f32_2: 565; CHECK: .cfi_startproc 566; CHECK-NEXT: // %bb.0: 567; CHECK-NEXT: { 568; CHECK-NEXT: v0 = vmem(r0+#0) 569; CHECK-NEXT: } 570; CHECK-NEXT: { 571; CHECK-NEXT: r0 = #1 572; CHECK-NEXT: r3 = #512 573; CHECK-NEXT: v1:0.h = vunpack(v0.b) 574; CHECK-NEXT: } 575; CHECK-NEXT: { 576; CHECK-NEXT: v2 = vsplat(r0) 577; CHECK-NEXT: v4 = vsplat(r3) 578; CHECK-NEXT: r2 = #255 579; CHECK-NEXT: v3 = vxor(v3,v3) 580; CHECK-NEXT: } 581; CHECK-NEXT: { 582; CHECK-NEXT: r7:6 = combine(##-2147483648,#8) 583; CHECK-NEXT: r4 = #159 584; CHECK-NEXT: v1:0.w = vunpack(v0.h) 585; CHECK-NEXT: } 586; CHECK-NEXT: { 587; CHECK-NEXT: v1 = vsplat(r2) 588; CHECK-NEXT: v8 = vsplat(r4) 589; CHECK-NEXT: v5.w = vabs(v0.w) 590; CHECK-NEXT: q2 = vcmp.gt(v3.w,v0.w) 591; CHECK-NEXT: } 592; CHECK-NEXT: { 593; CHECK-NEXT: v7 = vsplat(r7) 594; CHECK-NEXT: r2 = #23 595; CHECK-NEXT: } 596; CHECK-NEXT: { 597; CHECK-NEXT: v6.uw = vcl0(v5.uw) 598; CHECK-NEXT: v30 = vmux(q2,v7,v3) 599; CHECK-NEXT: } 600; CHECK-NEXT: { 601; CHECK-NEXT: v6.w = vadd(v6.w,v2.w) 602; CHECK-NEXT: } 603; CHECK-NEXT: { 604; CHECK-NEXT: v5.w = vasl(v5.w,v6.w) 605; CHECK-NEXT: } 606; CHECK-NEXT: { 607; CHECK-NEXT: v1.w = vadd(v5.w,v1.w) 608; CHECK-NEXT: v4 = vand(v5,v4) 609; CHECK-NEXT: } 610; CHECK-NEXT: { 611; CHECK-NEXT: v5.uw = vlsr(v5.uw,r6) 612; CHECK-NEXT: q0 = vcmp.eq(v4.w,v3.w) 613; CHECK-NEXT: q1 = vcmp.gt(v5.uw,v1.uw) 614; CHECK-NEXT: } 615; CHECK-NEXT: { 616; CHECK-NEXT: v1.uw = vlsr(v1.uw,r6) 617; CHECK-NEXT: v4 = vmux(q0,v3,v2) 618; CHECK-NEXT: v2 = vmux(q1,v2,v3) 619; CHECK-NEXT: } 620; CHECK-NEXT: { 621; CHECK-NEXT: v4.w = vadd(v1.w,v4.w) 622; CHECK-NEXT: v2.w = vsub(v2.w,v6.w) 623; CHECK-NEXT: q3 = vcmp.eq(v5.w,v1.w) 624; CHECK-NEXT: } 625; CHECK-NEXT: { 626; CHECK-NEXT: v28.uw = vlsr(v1.uw,r0) 627; CHECK-NEXT: v2.w = vadd(v2.w,v8.w) 628; CHECK-NEXT: } 629; CHECK-NEXT: { 630; CHECK-NEXT: v29.uw = vlsr(v4.uw,r0) 631; CHECK-NEXT: } 632; CHECK-NEXT: { 633; CHECK-NEXT: v2.w = vasl(v2.w,r2) 634; CHECK-NEXT: v1 = vmux(q3,v29,v28) 635; CHECK-NEXT: q3 = vcmp.eq(v0.w,v3.w) 636; CHECK-NEXT: } 637; CHECK-NEXT: { 638; CHECK-NEXT: v1 = vor(v30,v1) 639; CHECK-NEXT: } 640; CHECK-NEXT: { 641; CHECK-NEXT: v31 = vor(v1,v2) 642; CHECK-NEXT: } 643; CHECK-NEXT: { 644; CHECK-NEXT: v0 = vmux(q3,v3,v31) 645; CHECK-NEXT: jumpr r31 646; CHECK-NEXT: vmem(r1+#0) = v0.new 647; CHECK-NEXT: } 648 %v0 = load <32 x i8>, ptr %a0, align 128 649 %v1 = sitofp <32 x i8> %v0 to <32 x float> 650 store <32 x float> %v1, ptr %a1, align 128 651 ret void 652} 653 654 655; s16 -> f16 656; No widening 657define void @s16f16_0(ptr %a0, ptr %a1) #0 { 658; CHECK-LABEL: s16f16_0: 659; CHECK: .cfi_startproc 660; CHECK-NEXT: // %bb.0: 661; CHECK-NEXT: { 662; CHECK-NEXT: r6 = #1 663; CHECK-NEXT: r3:2 = combine(#64,#31) 664; CHECK-NEXT: v1.h = vabs(v0.h) 665; CHECK-NEXT: v0.cur = vmem(r0+#0) 666; CHECK-NEXT: } 667; CHECK-NEXT: { 668; CHECK-NEXT: v3.h = vsplat(r6) 669; CHECK-NEXT: v5.h = vsplat(r2) 670; CHECK-NEXT: v2 = vxor(v2,v2) 671; CHECK-NEXT: } 672; CHECK-NEXT: { 673; CHECK-NEXT: v6.h = vsplat(r3) 674; CHECK-NEXT: r5:4 = combine(##32768,#5) 675; CHECK-NEXT: v4.uh = vcl0(v1.uh) 676; CHECK-NEXT: } 677; CHECK-NEXT: { 678; CHECK-NEXT: v8.h = vsplat(r5) 679; CHECK-NEXT: r2 = #10 680; CHECK-NEXT: v4.h = vadd(v4.h,v3.h) 681; CHECK-NEXT: q3 = vcmp.eq(v0.h,v2.h) 682; CHECK-NEXT: } 683; CHECK-NEXT: { 684; CHECK-NEXT: v1.h = vasl(v1.h,v4.h) 685; CHECK-NEXT: } 686; CHECK-NEXT: { 687; CHECK-NEXT: v7.h = vadd(v1.h,v5.h) 688; CHECK-NEXT: v6 = vand(v1,v6) 689; CHECK-NEXT: } 690; CHECK-NEXT: { 691; CHECK-NEXT: v1.uh = vlsr(v1.uh,r4) 692; CHECK-NEXT: q0 = vcmp.eq(v6.h,v2.h) 693; CHECK-NEXT: q1 = vcmp.gt(v1.uh,v7.uh) 694; CHECK-NEXT: } 695; CHECK-NEXT: { 696; CHECK-NEXT: v25.uh = vlsr(v7.uh,r4) 697; CHECK-NEXT: v26 = vmux(q0,v2,v3) 698; CHECK-NEXT: v3 = vmux(q1,v3,v2) 699; CHECK-NEXT: q1 = vcmp.gt(v2.h,v0.h) 700; CHECK-NEXT: } 701; CHECK-NEXT: { 702; CHECK-NEXT: v7.h = vadd(v25.h,v26.h) 703; CHECK-NEXT: v3.h = vadd(v3.h,v5.h) 704; CHECK-NEXT: q2 = vcmp.eq(v1.h,v25.h) 705; CHECK-NEXT: v30 = vmux(q1,v8,v2) 706; CHECK-NEXT: } 707; CHECK-NEXT: { 708; CHECK-NEXT: v27.uh = vlsr(v25.uh,r6) 709; CHECK-NEXT: v28.h = vsub(v3.h,v4.h) 710; CHECK-NEXT: } 711; CHECK-NEXT: { 712; CHECK-NEXT: v29.uh = vlsr(v7.uh,r6) 713; CHECK-NEXT: } 714; CHECK-NEXT: { 715; CHECK-NEXT: v1.h = vasl(v28.h,r2) 716; CHECK-NEXT: v3 = vmux(q2,v29,v27) 717; CHECK-NEXT: } 718; CHECK-NEXT: { 719; CHECK-NEXT: v3 = vor(v30,v3) 720; CHECK-NEXT: } 721; CHECK-NEXT: { 722; CHECK-NEXT: v31 = vor(v3,v1) 723; CHECK-NEXT: } 724; CHECK-NEXT: { 725; CHECK-NEXT: v0 = vmux(q3,v2,v31) 726; CHECK-NEXT: jumpr r31 727; CHECK-NEXT: vmem(r1+#0) = v0.new 728; CHECK-NEXT: } 729 %v0 = load <64 x i16>, ptr %a0, align 128 730 %v1 = sitofp <64 x i16> %v0 to <64 x half> 731 store <64 x half> %v1, ptr %a1, align 128 732 ret void 733} 734 735; Widen input and result 736define void @s16f16_1(ptr %a0, ptr %a1) #0 { 737; CHECK-LABEL: s16f16_1: 738; CHECK: .cfi_startproc 739; CHECK-NEXT: // %bb.0: 740; CHECK-NEXT: { 741; CHECK-NEXT: r3:2 = combine(#31,#1) 742; CHECK-NEXT: r7 = #64 743; CHECK-NEXT: v1.h = vabs(v0.h) 744; CHECK-NEXT: v0.cur = vmem(r0+#0) 745; CHECK-NEXT: } 746; CHECK-NEXT: { 747; CHECK-NEXT: v2.h = vsplat(r2) 748; CHECK-NEXT: v5.h = vsplat(r3) 749; CHECK-NEXT: r6 = #5 750; CHECK-NEXT: v3 = vxor(v3,v3) 751; CHECK-NEXT: } 752; CHECK-NEXT: { 753; CHECK-NEXT: v6.h = vsplat(r7) 754; CHECK-NEXT: r4 = ##32768 755; CHECK-NEXT: v4.uh = vcl0(v1.uh) 756; CHECK-NEXT: } 757; CHECK-NEXT: { 758; CHECK-NEXT: v8.h = vsplat(r4) 759; CHECK-NEXT: r4 = #10 760; CHECK-NEXT: q2 = vcmp.gt(v3.h,v0.h) 761; CHECK-NEXT: v4.h = vadd(v4.h,v2.h) 762; CHECK-NEXT: } 763; CHECK-NEXT: { 764; CHECK-NEXT: v30 = vmux(q2,v8,v3) 765; CHECK-NEXT: q2 = vcmp.eq(v0.h,v3.h) 766; CHECK-NEXT: } 767; CHECK-NEXT: { 768; CHECK-NEXT: v1.h = vasl(v1.h,v4.h) 769; CHECK-NEXT: } 770; CHECK-NEXT: { 771; CHECK-NEXT: v7.h = vadd(v1.h,v5.h) 772; CHECK-NEXT: v6 = vand(v1,v6) 773; CHECK-NEXT: } 774; CHECK-NEXT: { 775; CHECK-NEXT: v1.uh = vlsr(v1.uh,r6) 776; CHECK-NEXT: q1 = vcmp.eq(v6.h,v3.h) 777; CHECK-NEXT: q0 = vcmp.gt(v1.uh,v7.uh) 778; CHECK-NEXT: } 779; CHECK-NEXT: { 780; CHECK-NEXT: v25.uh = vlsr(v7.uh,r6) 781; CHECK-NEXT: v26 = vmux(q1,v3,v2) 782; CHECK-NEXT: v2 = vmux(q0,v2,v3) 783; CHECK-NEXT: } 784; CHECK-NEXT: { 785; CHECK-NEXT: v7.h = vadd(v25.h,v26.h) 786; CHECK-NEXT: v2.h = vadd(v2.h,v5.h) 787; CHECK-NEXT: q3 = vcmp.eq(v1.h,v25.h) 788; CHECK-NEXT: } 789; CHECK-NEXT: { 790; CHECK-NEXT: v27.uh = vlsr(v25.uh,r2) 791; CHECK-NEXT: v28.h = vsub(v2.h,v4.h) 792; CHECK-NEXT: } 793; CHECK-NEXT: { 794; CHECK-NEXT: v29.uh = vlsr(v7.uh,r2) 795; CHECK-NEXT: } 796; CHECK-NEXT: { 797; CHECK-NEXT: v1.h = vasl(v28.h,r4) 798; CHECK-NEXT: q3 = vsetq(r7) 799; CHECK-NEXT: v2 = vmux(q3,v29,v27) 800; CHECK-NEXT: } 801; CHECK-NEXT: { 802; CHECK-NEXT: v2 = vor(v30,v2) 803; CHECK-NEXT: } 804; CHECK-NEXT: { 805; CHECK-NEXT: v31 = vor(v2,v1) 806; CHECK-NEXT: } 807; CHECK-NEXT: { 808; CHECK-NEXT: v0 = vmux(q2,v3,v31) 809; CHECK-NEXT: } 810; CHECK-NEXT: { 811; CHECK-NEXT: jumpr r31 812; CHECK-NEXT: if (q3) vmem(r1+#0) = v0 813; CHECK-NEXT: } 814 %v0 = load <32 x i16>, ptr %a0, align 128 815 %v1 = sitofp <32 x i16> %v0 to <32 x half> 816 store <32 x half> %v1, ptr %a1, align 128 817 ret void 818} 819 820 821; s16 -> f32 822; No widening 823define void @s16f32_0(ptr %a0, ptr %a1) #0 { 824; CHECK-LABEL: s16f32_0: 825; CHECK: .cfi_startproc 826; CHECK-NEXT: // %bb.0: 827; CHECK-NEXT: { 828; CHECK-NEXT: v0 = vmem(r0+#0) 829; CHECK-NEXT: } 830; CHECK-NEXT: { 831; CHECK-NEXT: r0 = #1 832; CHECK-NEXT: r3:2 = combine(##255,#8) 833; CHECK-NEXT: v1:0.w = vunpack(v0.h) 834; CHECK-NEXT: } 835; CHECK-NEXT: { 836; CHECK-NEXT: v3 = vsplat(r0) 837; CHECK-NEXT: r7 = #512 838; CHECK-NEXT: v4.w = vabs(v0.w) 839; CHECK-NEXT: v6.w = vabs(v1.w) 840; CHECK-NEXT: } 841; CHECK-NEXT: { 842; CHECK-NEXT: v5 = vsplat(r3) 843; CHECK-NEXT: v9 = vsplat(r7) 844; CHECK-NEXT: r5 = #159 845; CHECK-NEXT: v2 = vxor(v2,v2) 846; CHECK-NEXT: } 847; CHECK-NEXT: { 848; CHECK-NEXT: v13 = vsplat(r5) 849; CHECK-NEXT: r6 = ##-2147483648 850; CHECK-NEXT: v7.uw = vcl0(v4.uw) 851; CHECK-NEXT: } 852; CHECK-NEXT: { 853; CHECK-NEXT: v10 = vsplat(r6) 854; CHECK-NEXT: v8.uw = vcl0(v6.uw) 855; CHECK-NEXT: q0 = vcmp.gt(v2.w,v0.w) 856; CHECK-NEXT: v7.w = vadd(v7.w,v3.w) 857; CHECK-NEXT: } 858; CHECK-NEXT: { 859; CHECK-NEXT: r4 = #23 860; CHECK-NEXT: v8.w = vadd(v8.w,v3.w) 861; CHECK-NEXT: v27 = vmux(q0,v10,v2) 862; CHECK-NEXT: } 863; CHECK-NEXT: { 864; CHECK-NEXT: v4.w = vasl(v4.w,v7.w) 865; CHECK-NEXT: } 866; CHECK-NEXT: { 867; CHECK-NEXT: v6.w = vasl(v6.w,v8.w) 868; CHECK-NEXT: v11.w = vadd(v4.w,v5.w) 869; CHECK-NEXT: v12 = vand(v4,v9) 870; CHECK-NEXT: } 871; CHECK-NEXT: { 872; CHECK-NEXT: v5.w = vadd(v6.w,v5.w) 873; CHECK-NEXT: v9 = vand(v6,v9) 874; CHECK-NEXT: q1 = vcmp.eq(v12.w,v2.w) 875; CHECK-NEXT: q2 = vcmp.gt(v4.uw,v11.uw) 876; CHECK-NEXT: } 877; CHECK-NEXT: { 878; CHECK-NEXT: v22.uw = vlsr(v11.uw,r2) 879; CHECK-NEXT: q3 = vcmp.eq(v9.w,v2.w) 880; CHECK-NEXT: v23 = vmux(q1,v2,v3) 881; CHECK-NEXT: v14 = vmux(q2,v3,v2) 882; CHECK-NEXT: } 883; CHECK-NEXT: { 884; CHECK-NEXT: v24.uw = vlsr(v5.uw,r2) 885; CHECK-NEXT: v11.w = vadd(v22.w,v23.w) 886; CHECK-NEXT: q2 = vcmp.gt(v6.uw,v5.uw) 887; CHECK-NEXT: v25 = vmux(q3,v2,v3) 888; CHECK-NEXT: } 889; CHECK-NEXT: { 890; CHECK-NEXT: v21.uw = vlsr(v4.uw,r2) 891; CHECK-NEXT: v5.w = vadd(v24.w,v25.w) 892; CHECK-NEXT: v3 = vmux(q2,v3,v2) 893; CHECK-NEXT: v7.w = vsub(v14.w,v7.w) 894; CHECK-NEXT: } 895; CHECK-NEXT: { 896; CHECK-NEXT: v6.uw = vlsr(v6.uw,r2) 897; CHECK-NEXT: v3.w = vsub(v3.w,v8.w) 898; CHECK-NEXT: q3 = vcmp.eq(v21.w,v22.w) 899; CHECK-NEXT: v7.w = vadd(v7.w,v13.w) 900; CHECK-NEXT: } 901; CHECK-NEXT: { 902; CHECK-NEXT: v4.uw = vlsr(v22.uw,r0) 903; CHECK-NEXT: v3.w = vadd(v3.w,v13.w) 904; CHECK-NEXT: q2 = vcmp.eq(v6.w,v24.w) 905; CHECK-NEXT: } 906; CHECK-NEXT: { 907; CHECK-NEXT: v11.uw = vlsr(v11.uw,r0) 908; CHECK-NEXT: } 909; CHECK-NEXT: { 910; CHECK-NEXT: v5.uw = vlsr(v5.uw,r0) 911; CHECK-NEXT: v4 = vmux(q3,v11,v4) 912; CHECK-NEXT: q3 = vcmp.gt(v2.w,v1.w) 913; CHECK-NEXT: } 914; CHECK-NEXT: { 915; CHECK-NEXT: v26.uw = vlsr(v24.uw,r0) 916; CHECK-NEXT: v28 = vmux(q3,v10,v2) 917; CHECK-NEXT: v4 = vor(v27,v4) 918; CHECK-NEXT: q3 = vcmp.eq(v0.w,v2.w) 919; CHECK-NEXT: } 920; CHECK-NEXT: { 921; CHECK-NEXT: v7.w = vasl(v7.w,r4) 922; CHECK-NEXT: v5 = vmux(q2,v5,v26) 923; CHECK-NEXT: q2 = vcmp.eq(v1.w,v2.w) 924; CHECK-NEXT: } 925; CHECK-NEXT: { 926; CHECK-NEXT: v3.w = vasl(v3.w,r4) 927; CHECK-NEXT: v5 = vor(v28,v5) 928; CHECK-NEXT: v29 = vor(v4,v7) 929; CHECK-NEXT: } 930; CHECK-NEXT: { 931; CHECK-NEXT: v3 = vor(v5,v3) 932; CHECK-NEXT: v31 = vmux(q3,v2,v29) 933; CHECK-NEXT: vmem(r1+#0) = v31.new 934; CHECK-NEXT: } 935; CHECK-NEXT: { 936; CHECK-NEXT: v30 = vmux(q2,v2,v3) 937; CHECK-NEXT: jumpr r31 938; CHECK-NEXT: vmem(r1+#1) = v30.new 939; CHECK-NEXT: } 940 %v0 = load <64 x i16>, ptr %a0, align 128 941 %v1 = sitofp <64 x i16> %v0 to <64 x float> 942 store <64 x float> %v1, ptr %a1, align 128 943 ret void 944} 945 946; Widen input 947define void @s16f32_1(ptr %a0, ptr %a1) #0 { 948; CHECK-LABEL: s16f32_1: 949; CHECK: .cfi_startproc 950; CHECK-NEXT: // %bb.0: 951; CHECK-NEXT: { 952; CHECK-NEXT: v0 = vmem(r0+#0) 953; CHECK-NEXT: } 954; CHECK-NEXT: { 955; CHECK-NEXT: r0 = #1 956; CHECK-NEXT: r2 = #255 957; CHECK-NEXT: v1:0.w = vunpack(v0.h) 958; CHECK-NEXT: } 959; CHECK-NEXT: { 960; CHECK-NEXT: v3 = vsplat(r0) 961; CHECK-NEXT: v4 = vsplat(r2) 962; CHECK-NEXT: r3 = #512 963; CHECK-NEXT: v2.w = vabs(v0.w) 964; CHECK-NEXT: } 965; CHECK-NEXT: { 966; CHECK-NEXT: v6 = vsplat(r3) 967; CHECK-NEXT: r7:6 = combine(##-2147483648,#8) 968; CHECK-NEXT: v1 = vxor(v1,v1) 969; CHECK-NEXT: } 970; CHECK-NEXT: { 971; CHECK-NEXT: r4 = #159 972; CHECK-NEXT: v5.uw = vcl0(v2.uw) 973; CHECK-NEXT: } 974; CHECK-NEXT: { 975; CHECK-NEXT: v7 = vsplat(r4) 976; CHECK-NEXT: v29 = vsplat(r7) 977; CHECK-NEXT: q2 = vcmp.gt(v1.w,v0.w) 978; CHECK-NEXT: v5.w = vadd(v5.w,v3.w) 979; CHECK-NEXT: } 980; CHECK-NEXT: { 981; CHECK-NEXT: r2 = #23 982; CHECK-NEXT: } 983; CHECK-NEXT: { 984; CHECK-NEXT: v2.w = vasl(v2.w,v5.w) 985; CHECK-NEXT: } 986; CHECK-NEXT: { 987; CHECK-NEXT: v4.w = vadd(v2.w,v4.w) 988; CHECK-NEXT: v6 = vand(v2,v6) 989; CHECK-NEXT: } 990; CHECK-NEXT: { 991; CHECK-NEXT: v2.uw = vlsr(v2.uw,r6) 992; CHECK-NEXT: q0 = vcmp.eq(v6.w,v1.w) 993; CHECK-NEXT: q1 = vcmp.gt(v2.uw,v4.uw) 994; CHECK-NEXT: } 995; CHECK-NEXT: { 996; CHECK-NEXT: v4.uw = vlsr(v4.uw,r6) 997; CHECK-NEXT: v6 = vmux(q0,v1,v3) 998; CHECK-NEXT: v3 = vmux(q1,v3,v1) 999; CHECK-NEXT: } 1000; CHECK-NEXT: { 1001; CHECK-NEXT: v6.w = vadd(v4.w,v6.w) 1002; CHECK-NEXT: v27.w = vsub(v3.w,v5.w) 1003; CHECK-NEXT: q3 = vcmp.eq(v2.w,v4.w) 1004; CHECK-NEXT: } 1005; CHECK-NEXT: { 1006; CHECK-NEXT: v28.uw = vlsr(v4.uw,r0) 1007; CHECK-NEXT: v2.w = vadd(v27.w,v7.w) 1008; CHECK-NEXT: v4 = vmux(q2,v29,v1) 1009; CHECK-NEXT: } 1010; CHECK-NEXT: { 1011; CHECK-NEXT: v30.uw = vlsr(v6.uw,r0) 1012; CHECK-NEXT: } 1013; CHECK-NEXT: { 1014; CHECK-NEXT: v2.w = vasl(v2.w,r2) 1015; CHECK-NEXT: v3 = vmux(q3,v30,v28) 1016; CHECK-NEXT: q3 = vcmp.eq(v0.w,v1.w) 1017; CHECK-NEXT: } 1018; CHECK-NEXT: { 1019; CHECK-NEXT: v3 = vor(v4,v3) 1020; CHECK-NEXT: } 1021; CHECK-NEXT: { 1022; CHECK-NEXT: v31 = vor(v3,v2) 1023; CHECK-NEXT: } 1024; CHECK-NEXT: { 1025; CHECK-NEXT: v0 = vmux(q3,v1,v31) 1026; CHECK-NEXT: jumpr r31 1027; CHECK-NEXT: vmem(r1+#0) = v0.new 1028; CHECK-NEXT: } 1029 %v0 = load <32 x i16>, ptr %a0, align 128 1030 %v1 = sitofp <32 x i16> %v0 to <32 x float> 1031 store <32 x float> %v1, ptr %a1, align 128 1032 ret void 1033} 1034 1035 1036; s32 -> f16 1037; No widening 1038define void @s32f16_0(ptr %a0, ptr %a1) #0 { 1039; CHECK-LABEL: s32f16_0: 1040; CHECK: .cfi_startproc 1041; CHECK-NEXT: // %bb.0: 1042; CHECK-NEXT: { 1043; CHECK-NEXT: r3:2 = combine(#8,#1) 1044; CHECK-NEXT: r6 = #255 1045; CHECK-NEXT: v6.w = vabs(v1.w) 1046; CHECK-NEXT: v1.cur = vmem(r0+#0) 1047; CHECK-NEXT: } 1048; CHECK-NEXT: { 1049; CHECK-NEXT: v2 = vsplat(r2) 1050; CHECK-NEXT: r4 = #512 1051; CHECK-NEXT: v5.w = vabs(v0.w) 1052; CHECK-NEXT: v0.cur = vmem(r0+#1) 1053; CHECK-NEXT: } 1054; CHECK-NEXT: { 1055; CHECK-NEXT: v9 = vsplat(r4) 1056; CHECK-NEXT: v8 = vsplat(r6) 1057; CHECK-NEXT: v3.uw = vcl0(v6.uw) 1058; CHECK-NEXT: v20 = vxor(v20,v20) 1059; CHECK-NEXT: } 1060; CHECK-NEXT: { 1061; CHECK-NEXT: r4 = #159 1062; CHECK-NEXT: v4.uw = vcl0(v5.uw) 1063; CHECK-NEXT: v3.w = vadd(v3.w,v2.w) 1064; CHECK-NEXT: } 1065; CHECK-NEXT: { 1066; CHECK-NEXT: v27 = vsplat(r4) 1067; CHECK-NEXT: r5 = ##-2147483648 1068; CHECK-NEXT: v7.w = vadd(v4.w,v2.w) 1069; CHECK-NEXT: } 1070; CHECK-NEXT: { 1071; CHECK-NEXT: v13 = vsplat(r5) 1072; CHECK-NEXT: v6.w = vasl(v6.w,v3.w) 1073; CHECK-NEXT: q0 = vcmp.gt(v20.w,v1.w) 1074; CHECK-NEXT: } 1075; CHECK-NEXT: { 1076; CHECK-NEXT: v5.w = vasl(v5.w,v7.w) 1077; CHECK-NEXT: v26 = vmux(q0,v13,v20) 1078; CHECK-NEXT: v10.w = vadd(v6.w,v8.w) 1079; CHECK-NEXT: v11 = vand(v6,v9) 1080; CHECK-NEXT: } 1081; CHECK-NEXT: { 1082; CHECK-NEXT: v9 = vand(v5,v9) 1083; CHECK-NEXT: q3 = vcmp.eq(v11.w,v20.w) 1084; CHECK-NEXT: v8.w = vadd(v5.w,v8.w) 1085; CHECK-NEXT: q1 = vcmp.gt(v6.uw,v10.uw) 1086; CHECK-NEXT: } 1087; CHECK-NEXT: { 1088; CHECK-NEXT: v21.uw = vlsr(v10.uw,r3) 1089; CHECK-NEXT: q2 = vcmp.eq(v9.w,v20.w) 1090; CHECK-NEXT: v22 = vmux(q3,v20,v2) 1091; CHECK-NEXT: q3 = vcmp.gt(v5.uw,v8.uw) 1092; CHECK-NEXT: } 1093; CHECK-NEXT: { 1094; CHECK-NEXT: v8.uw = vlsr(v8.uw,r3) 1095; CHECK-NEXT: v9.w = vadd(v21.w,v22.w) 1096; CHECK-NEXT: v24 = vmux(q2,v20,v2) 1097; CHECK-NEXT: v23 = vmux(q1,v2,v20) 1098; CHECK-NEXT: } 1099; CHECK-NEXT: { 1100; CHECK-NEXT: v12.uw = vlsr(v6.uw,r3) 1101; CHECK-NEXT: v2 = vmux(q3,v2,v20) 1102; CHECK-NEXT: v25.w = vadd(v8.w,v24.w) 1103; CHECK-NEXT: v3.w = vsub(v23.w,v3.w) 1104; CHECK-NEXT: } 1105; CHECK-NEXT: { 1106; CHECK-NEXT: v5.uw = vlsr(v5.uw,r3) 1107; CHECK-NEXT: v2.w = vsub(v2.w,v7.w) 1108; CHECK-NEXT: q3 = vcmp.eq(v12.w,v21.w) 1109; CHECK-NEXT: v3.w = vadd(v3.w,v27.w) 1110; CHECK-NEXT: } 1111; CHECK-NEXT: { 1112; CHECK-NEXT: r3 = #23 1113; CHECK-NEXT: v6.uw = vlsr(v21.uw,r2) 1114; CHECK-NEXT: q2 = vcmp.eq(v5.w,v8.w) 1115; CHECK-NEXT: v2.w = vadd(v2.w,v27.w) 1116; CHECK-NEXT: } 1117; CHECK-NEXT: { 1118; CHECK-NEXT: v9.uw = vlsr(v9.uw,r2) 1119; CHECK-NEXT: } 1120; CHECK-NEXT: { 1121; CHECK-NEXT: v28.uw = vlsr(v25.uw,r2) 1122; CHECK-NEXT: v6 = vmux(q3,v9,v6) 1123; CHECK-NEXT: q3 = vcmp.gt(v20.w,v0.w) 1124; CHECK-NEXT: } 1125; CHECK-NEXT: { 1126; CHECK-NEXT: v29.uw = vlsr(v8.uw,r2) 1127; CHECK-NEXT: v30 = vmux(q3,v13,v20) 1128; CHECK-NEXT: v6 = vor(v26,v6) 1129; CHECK-NEXT: q3 = vcmp.eq(v0.w,v20.w) 1130; CHECK-NEXT: } 1131; CHECK-NEXT: { 1132; CHECK-NEXT: v3.w = vasl(v3.w,r3) 1133; CHECK-NEXT: v5 = vmux(q2,v28,v29) 1134; CHECK-NEXT: q2 = vcmp.eq(v1.w,v20.w) 1135; CHECK-NEXT: } 1136; CHECK-NEXT: { 1137; CHECK-NEXT: v2.w = vasl(v2.w,r3) 1138; CHECK-NEXT: v31 = vor(v30,v5) 1139; CHECK-NEXT: v3 = vor(v6,v3) 1140; CHECK-NEXT: } 1141; CHECK-NEXT: { 1142; CHECK-NEXT: v1 = vor(v31,v2) 1143; CHECK-NEXT: v3 = vmux(q2,v20,v3) 1144; CHECK-NEXT: } 1145; CHECK-NEXT: { 1146; CHECK-NEXT: v0 = vmux(q3,v20,v1) 1147; CHECK-NEXT: } 1148; CHECK-NEXT: { 1149; CHECK-NEXT: v2.qf32 = vadd(v3.sf,v20.sf) 1150; CHECK-NEXT: } 1151; CHECK-NEXT: { 1152; CHECK-NEXT: v3.qf32 = vadd(v0.sf,v20.sf) 1153; CHECK-NEXT: } 1154; CHECK-NEXT: { 1155; CHECK-NEXT: v0.hf = v3:2.qf32 1156; CHECK-NEXT: } 1157; CHECK-NEXT: { 1158; CHECK-NEXT: v0.h = vdeal(v0.h) 1159; CHECK-NEXT: jumpr r31 1160; CHECK-NEXT: vmem(r1+#0) = v0.new 1161; CHECK-NEXT: } 1162 %v0 = load <64 x i32>, ptr %a0, align 128 1163 %v1 = sitofp <64 x i32> %v0 to <64 x half> 1164 store <64 x half> %v1, ptr %a1, align 128 1165 ret void 1166} 1167 1168; Widen result 1169define void @s32f16_1(ptr %a0, ptr %a1) #0 { 1170; CHECK-LABEL: s32f16_1: 1171; CHECK: .cfi_startproc 1172; CHECK-NEXT: // %bb.0: 1173; CHECK-NEXT: { 1174; CHECK-NEXT: r6 = #1 1175; CHECK-NEXT: v1.w = vabs(v0.w) 1176; CHECK-NEXT: v0.cur = vmem(r0+#0) 1177; CHECK-NEXT: } 1178; CHECK-NEXT: { 1179; CHECK-NEXT: v2 = vsplat(r6) 1180; CHECK-NEXT: r3:2 = combine(##255,#8) 1181; CHECK-NEXT: r4 = #512 1182; CHECK-NEXT: } 1183; CHECK-NEXT: { 1184; CHECK-NEXT: v5 = vsplat(r3) 1185; CHECK-NEXT: v6 = vsplat(r4) 1186; CHECK-NEXT: v4.uw = vcl0(v1.uw) 1187; CHECK-NEXT: v3 = vxor(v3,v3) 1188; CHECK-NEXT: } 1189; CHECK-NEXT: { 1190; CHECK-NEXT: r5 = #159 1191; CHECK-NEXT: r4 = ##-2147483648 1192; CHECK-NEXT: v4.w = vadd(v4.w,v2.w) 1193; CHECK-NEXT: } 1194; CHECK-NEXT: { 1195; CHECK-NEXT: v28 = vsplat(r5) 1196; CHECK-NEXT: v29 = vsplat(r4) 1197; CHECK-NEXT: q3 = vcmp.gt(v3.w,v0.w) 1198; CHECK-NEXT: } 1199; CHECK-NEXT: { 1200; CHECK-NEXT: r3 = #23 1201; CHECK-NEXT: v1.w = vasl(v1.w,v4.w) 1202; CHECK-NEXT: v31 = vmux(q3,v29,v3) 1203; CHECK-NEXT: } 1204; CHECK-NEXT: { 1205; CHECK-NEXT: v5.w = vadd(v1.w,v5.w) 1206; CHECK-NEXT: v6 = vand(v1,v6) 1207; CHECK-NEXT: } 1208; CHECK-NEXT: { 1209; CHECK-NEXT: v7.uw = vlsr(v1.uw,r2) 1210; CHECK-NEXT: q0 = vcmp.eq(v6.w,v3.w) 1211; CHECK-NEXT: q1 = vcmp.gt(v1.uw,v5.uw) 1212; CHECK-NEXT: } 1213; CHECK-NEXT: { 1214; CHECK-NEXT: r2 = #64 1215; CHECK-NEXT: v1.uw = vlsr(v5.uw,r2) 1216; CHECK-NEXT: v27 = vmux(q0,v3,v2) 1217; CHECK-NEXT: v2 = vmux(q1,v2,v3) 1218; CHECK-NEXT: } 1219; CHECK-NEXT: { 1220; CHECK-NEXT: q3 = vsetq(r2) 1221; CHECK-NEXT: v5.w = vadd(v1.w,v27.w) 1222; CHECK-NEXT: v2.w = vsub(v2.w,v4.w) 1223; CHECK-NEXT: q2 = vcmp.eq(v7.w,v1.w) 1224; CHECK-NEXT: } 1225; CHECK-NEXT: { 1226; CHECK-NEXT: v1.uw = vlsr(v1.uw,r6) 1227; CHECK-NEXT: v2.w = vadd(v2.w,v28.w) 1228; CHECK-NEXT: } 1229; CHECK-NEXT: { 1230; CHECK-NEXT: v30.uw = vlsr(v5.uw,r6) 1231; CHECK-NEXT: } 1232; CHECK-NEXT: { 1233; CHECK-NEXT: v2.w = vasl(v2.w,r3) 1234; CHECK-NEXT: v1 = vmux(q2,v30,v1) 1235; CHECK-NEXT: q2 = vcmp.eq(v0.w,v3.w) 1236; CHECK-NEXT: } 1237; CHECK-NEXT: { 1238; CHECK-NEXT: v1 = vor(v31,v1) 1239; CHECK-NEXT: } 1240; CHECK-NEXT: { 1241; CHECK-NEXT: v1.qf32 = vadd(v3.sf,v3.sf) 1242; CHECK-NEXT: v0 = vor(v1,v2) 1243; CHECK-NEXT: } 1244; CHECK-NEXT: { 1245; CHECK-NEXT: v0 = vmux(q2,v3,v0) 1246; CHECK-NEXT: } 1247; CHECK-NEXT: { 1248; CHECK-NEXT: v0.qf32 = vadd(v0.sf,v3.sf) 1249; CHECK-NEXT: } 1250; CHECK-NEXT: { 1251; CHECK-NEXT: v0.hf = v1:0.qf32 1252; CHECK-NEXT: } 1253; CHECK-NEXT: { 1254; CHECK-NEXT: v0.h = vdeal(v0.h) 1255; CHECK-NEXT: } 1256; CHECK-NEXT: { 1257; CHECK-NEXT: jumpr r31 1258; CHECK-NEXT: if (q3) vmem(r1+#0) = v0 1259; CHECK-NEXT: } 1260 %v0 = load <32 x i32>, ptr %a0, align 128 1261 %v1 = sitofp <32 x i32> %v0 to <32 x half> 1262 store <32 x half> %v1, ptr %a1, align 128 1263 ret void 1264} 1265 1266; s32 -> f32 1267; No widening 1268define void @s32f32_0(ptr %a0, ptr %a1) #0 { 1269; CHECK-LABEL: s32f32_0: 1270; CHECK: .cfi_startproc 1271; CHECK-NEXT: // %bb.0: 1272; CHECK-NEXT: { 1273; CHECK-NEXT: r0 = #1 1274; CHECK-NEXT: r2 = #255 1275; CHECK-NEXT: v1.w = vabs(v0.w) 1276; CHECK-NEXT: v0.cur = vmem(r0+#0) 1277; CHECK-NEXT: } 1278; CHECK-NEXT: { 1279; CHECK-NEXT: v3 = vsplat(r0) 1280; CHECK-NEXT: v5 = vsplat(r2) 1281; CHECK-NEXT: r3 = #512 1282; CHECK-NEXT: v2 = vxor(v2,v2) 1283; CHECK-NEXT: } 1284; CHECK-NEXT: { 1285; CHECK-NEXT: v6 = vsplat(r3) 1286; CHECK-NEXT: r7:6 = combine(##-2147483648,#8) 1287; CHECK-NEXT: v4.uw = vcl0(v1.uw) 1288; CHECK-NEXT: } 1289; CHECK-NEXT: { 1290; CHECK-NEXT: r4 = #159 1291; CHECK-NEXT: v4.w = vadd(v4.w,v3.w) 1292; CHECK-NEXT: } 1293; CHECK-NEXT: { 1294; CHECK-NEXT: v7 = vsplat(r4) 1295; CHECK-NEXT: v29 = vsplat(r7) 1296; CHECK-NEXT: r2 = #23 1297; CHECK-NEXT: q2 = vcmp.gt(v2.w,v0.w) 1298; CHECK-NEXT: } 1299; CHECK-NEXT: { 1300; CHECK-NEXT: v1.w = vasl(v1.w,v4.w) 1301; CHECK-NEXT: } 1302; CHECK-NEXT: { 1303; CHECK-NEXT: v5.w = vadd(v1.w,v5.w) 1304; CHECK-NEXT: v6 = vand(v1,v6) 1305; CHECK-NEXT: } 1306; CHECK-NEXT: { 1307; CHECK-NEXT: v1.uw = vlsr(v1.uw,r6) 1308; CHECK-NEXT: q0 = vcmp.eq(v6.w,v2.w) 1309; CHECK-NEXT: q1 = vcmp.gt(v1.uw,v5.uw) 1310; CHECK-NEXT: } 1311; CHECK-NEXT: { 1312; CHECK-NEXT: v5.uw = vlsr(v5.uw,r6) 1313; CHECK-NEXT: v6 = vmux(q0,v2,v3) 1314; CHECK-NEXT: v3 = vmux(q1,v3,v2) 1315; CHECK-NEXT: } 1316; CHECK-NEXT: { 1317; CHECK-NEXT: v6.w = vadd(v5.w,v6.w) 1318; CHECK-NEXT: v27.w = vsub(v3.w,v4.w) 1319; CHECK-NEXT: q3 = vcmp.eq(v1.w,v5.w) 1320; CHECK-NEXT: v4 = vmux(q2,v29,v2) 1321; CHECK-NEXT: } 1322; CHECK-NEXT: { 1323; CHECK-NEXT: v28.uw = vlsr(v5.uw,r0) 1324; CHECK-NEXT: v1.w = vadd(v27.w,v7.w) 1325; CHECK-NEXT: } 1326; CHECK-NEXT: { 1327; CHECK-NEXT: v30.uw = vlsr(v6.uw,r0) 1328; CHECK-NEXT: } 1329; CHECK-NEXT: { 1330; CHECK-NEXT: v1.w = vasl(v1.w,r2) 1331; CHECK-NEXT: v3 = vmux(q3,v30,v28) 1332; CHECK-NEXT: q3 = vcmp.eq(v0.w,v2.w) 1333; CHECK-NEXT: } 1334; CHECK-NEXT: { 1335; CHECK-NEXT: v3 = vor(v4,v3) 1336; CHECK-NEXT: } 1337; CHECK-NEXT: { 1338; CHECK-NEXT: v31 = vor(v3,v1) 1339; CHECK-NEXT: } 1340; CHECK-NEXT: { 1341; CHECK-NEXT: v0 = vmux(q3,v2,v31) 1342; CHECK-NEXT: jumpr r31 1343; CHECK-NEXT: vmem(r1+#0) = v0.new 1344; CHECK-NEXT: } 1345 %v0 = load <32 x i32>, ptr %a0, align 128 1346 %v1 = sitofp <32 x i32> %v0 to <32 x float> 1347 store <32 x float> %v1, ptr %a1, align 128 1348 ret void 1349} 1350 1351; Widen input and result 1352define void @s32f32_1(ptr %a0, ptr %a1) #0 { 1353; CHECK-LABEL: s32f32_1: 1354; CHECK: .cfi_startproc 1355; CHECK-NEXT: // %bb.0: 1356; CHECK-NEXT: { 1357; CHECK-NEXT: r0 = #1 1358; CHECK-NEXT: r2 = #255 1359; CHECK-NEXT: v1.w = vabs(v0.w) 1360; CHECK-NEXT: v0.cur = vmem(r0+#0) 1361; CHECK-NEXT: } 1362; CHECK-NEXT: { 1363; CHECK-NEXT: v2 = vsplat(r0) 1364; CHECK-NEXT: v5 = vsplat(r2) 1365; CHECK-NEXT: r3 = #512 1366; CHECK-NEXT: v3 = vxor(v3,v3) 1367; CHECK-NEXT: } 1368; CHECK-NEXT: { 1369; CHECK-NEXT: v6 = vsplat(r3) 1370; CHECK-NEXT: r7:6 = combine(##-2147483648,#8) 1371; CHECK-NEXT: v4.uw = vcl0(v1.uw) 1372; CHECK-NEXT: } 1373; CHECK-NEXT: { 1374; CHECK-NEXT: r4 = #159 1375; CHECK-NEXT: v4.w = vadd(v4.w,v2.w) 1376; CHECK-NEXT: } 1377; CHECK-NEXT: { 1378; CHECK-NEXT: v7 = vsplat(r4) 1379; CHECK-NEXT: v29 = vsplat(r7) 1380; CHECK-NEXT: r3 = #23 1381; CHECK-NEXT: q3 = vcmp.gt(v3.w,v0.w) 1382; CHECK-NEXT: } 1383; CHECK-NEXT: { 1384; CHECK-NEXT: r2 = #64 1385; CHECK-NEXT: v1.w = vasl(v1.w,v4.w) 1386; CHECK-NEXT: } 1387; CHECK-NEXT: { 1388; CHECK-NEXT: v5.w = vadd(v1.w,v5.w) 1389; CHECK-NEXT: v6 = vand(v1,v6) 1390; CHECK-NEXT: } 1391; CHECK-NEXT: { 1392; CHECK-NEXT: v1.uw = vlsr(v1.uw,r6) 1393; CHECK-NEXT: q0 = vcmp.eq(v6.w,v3.w) 1394; CHECK-NEXT: q1 = vcmp.gt(v1.uw,v5.uw) 1395; CHECK-NEXT: } 1396; CHECK-NEXT: { 1397; CHECK-NEXT: v5.uw = vlsr(v5.uw,r6) 1398; CHECK-NEXT: v6 = vmux(q0,v3,v2) 1399; CHECK-NEXT: v2 = vmux(q1,v2,v3) 1400; CHECK-NEXT: } 1401; CHECK-NEXT: { 1402; CHECK-NEXT: v6.w = vadd(v5.w,v6.w) 1403; CHECK-NEXT: v27.w = vsub(v2.w,v4.w) 1404; CHECK-NEXT: q2 = vcmp.eq(v1.w,v5.w) 1405; CHECK-NEXT: v4 = vmux(q3,v29,v3) 1406; CHECK-NEXT: } 1407; CHECK-NEXT: { 1408; CHECK-NEXT: v28.uw = vlsr(v5.uw,r0) 1409; CHECK-NEXT: q3 = vsetq(r2) 1410; CHECK-NEXT: v1.w = vadd(v27.w,v7.w) 1411; CHECK-NEXT: } 1412; CHECK-NEXT: { 1413; CHECK-NEXT: v30.uw = vlsr(v6.uw,r0) 1414; CHECK-NEXT: } 1415; CHECK-NEXT: { 1416; CHECK-NEXT: v1.w = vasl(v1.w,r3) 1417; CHECK-NEXT: v2 = vmux(q2,v30,v28) 1418; CHECK-NEXT: q2 = vcmp.eq(v0.w,v3.w) 1419; CHECK-NEXT: } 1420; CHECK-NEXT: { 1421; CHECK-NEXT: v2 = vor(v4,v2) 1422; CHECK-NEXT: } 1423; CHECK-NEXT: { 1424; CHECK-NEXT: v31 = vor(v2,v1) 1425; CHECK-NEXT: } 1426; CHECK-NEXT: { 1427; CHECK-NEXT: v0 = vmux(q2,v3,v31) 1428; CHECK-NEXT: } 1429; CHECK-NEXT: { 1430; CHECK-NEXT: jumpr r31 1431; CHECK-NEXT: if (q3) vmem(r1+#0) = v0 1432; CHECK-NEXT: } 1433 %v0 = load <16 x i32>, ptr %a0, align 128 1434 %v1 = sitofp <16 x i32> %v0 to <16 x float> 1435 store <16 x float> %v1, ptr %a1, align 128 1436 ret void 1437} 1438 1439 1440; u8 -> f16 1441; No widening 1442define void @u8f16_0(ptr %a0, ptr %a1) #0 { 1443; CHECK-LABEL: u8f16_0: 1444; CHECK: .cfi_startproc 1445; CHECK-NEXT: // %bb.0: 1446; CHECK-NEXT: { 1447; CHECK-NEXT: v0 = vmem(r0+#0) 1448; CHECK-NEXT: } 1449; CHECK-NEXT: { 1450; CHECK-NEXT: r6 = #1 1451; CHECK-NEXT: r3:2 = combine(#31,#5) 1452; CHECK-NEXT: v1:0.uh = vunpack(v0.ub) 1453; CHECK-NEXT: } 1454; CHECK-NEXT: { 1455; CHECK-NEXT: v3.h = vsplat(r6) 1456; CHECK-NEXT: v4.h = vsplat(r3) 1457; CHECK-NEXT: r5 = #64 1458; CHECK-NEXT: v2 = vxor(v2,v2) 1459; CHECK-NEXT: } 1460; CHECK-NEXT: { 1461; CHECK-NEXT: v6.h = vsplat(r5) 1462; CHECK-NEXT: r4 = #10 1463; CHECK-NEXT: v5.uh = vcl0(v0.uh) 1464; CHECK-NEXT: } 1465; CHECK-NEXT: { 1466; CHECK-NEXT: v7.uh = vcl0(v1.uh) 1467; CHECK-NEXT: v5.h = vadd(v5.h,v3.h) 1468; CHECK-NEXT: } 1469; CHECK-NEXT: { 1470; CHECK-NEXT: v7.h = vadd(v7.h,v3.h) 1471; CHECK-NEXT: } 1472; CHECK-NEXT: { 1473; CHECK-NEXT: v8.h = vasl(v0.h,v5.h) 1474; CHECK-NEXT: } 1475; CHECK-NEXT: { 1476; CHECK-NEXT: v11.h = vasl(v1.h,v7.h) 1477; CHECK-NEXT: v10 = vand(v8,v6) 1478; CHECK-NEXT: v9.h = vadd(v8.h,v4.h) 1479; CHECK-NEXT: } 1480; CHECK-NEXT: { 1481; CHECK-NEXT: v22.h = vadd(v11.h,v4.h) 1482; CHECK-NEXT: v6 = vand(v11,v6) 1483; CHECK-NEXT: q0 = vcmp.gt(v8.uh,v9.uh) 1484; CHECK-NEXT: q1 = vcmp.eq(v10.h,v2.h) 1485; CHECK-NEXT: } 1486; CHECK-NEXT: { 1487; CHECK-NEXT: v21.uh = vlsr(v8.uh,r2) 1488; CHECK-NEXT: q2 = vcmp.eq(v6.h,v2.h) 1489; CHECK-NEXT: q3 = vcmp.gt(v11.uh,v22.uh) 1490; CHECK-NEXT: v12 = vmux(q1,v2,v3) 1491; CHECK-NEXT: } 1492; CHECK-NEXT: { 1493; CHECK-NEXT: v9.uh = vlsr(v9.uh,r2) 1494; CHECK-NEXT: v13 = vmux(q2,v2,v3) 1495; CHECK-NEXT: v25 = vmux(q0,v3,v2) 1496; CHECK-NEXT: v3 = vmux(q3,v3,v2) 1497; CHECK-NEXT: } 1498; CHECK-NEXT: { 1499; CHECK-NEXT: v8.uh = vlsr(v22.uh,r2) 1500; CHECK-NEXT: v24.h = vadd(v9.h,v12.h) 1501; CHECK-NEXT: v3.h = vadd(v3.h,v4.h) 1502; CHECK-NEXT: v12.h = vadd(v25.h,v4.h) 1503; CHECK-NEXT: } 1504; CHECK-NEXT: { 1505; CHECK-NEXT: v23.uh = vlsr(v11.uh,r2) 1506; CHECK-NEXT: v13.h = vadd(v8.h,v13.h) 1507; CHECK-NEXT: v5.h = vsub(v12.h,v5.h) 1508; CHECK-NEXT: v3.h = vsub(v3.h,v7.h) 1509; CHECK-NEXT: } 1510; CHECK-NEXT: { 1511; CHECK-NEXT: v14.uh = vlsr(v9.uh,r6) 1512; CHECK-NEXT: q2 = vcmp.eq(v21.h,v9.h) 1513; CHECK-NEXT: q3 = vcmp.eq(v23.h,v8.h) 1514; CHECK-NEXT: } 1515; CHECK-NEXT: { 1516; CHECK-NEXT: v26.uh = vlsr(v24.uh,r6) 1517; CHECK-NEXT: } 1518; CHECK-NEXT: { 1519; CHECK-NEXT: v27.uh = vlsr(v13.uh,r6) 1520; CHECK-NEXT: v4 = vmux(q2,v26,v14) 1521; CHECK-NEXT: q2 = vcmp.eq(v1.h,v2.h) 1522; CHECK-NEXT: } 1523; CHECK-NEXT: { 1524; CHECK-NEXT: v28.uh = vlsr(v8.uh,r6) 1525; CHECK-NEXT: } 1526; CHECK-NEXT: { 1527; CHECK-NEXT: v5.h = vasl(v5.h,r4) 1528; CHECK-NEXT: v6 = vmux(q3,v27,v28) 1529; CHECK-NEXT: q3 = vcmp.eq(v0.h,v2.h) 1530; CHECK-NEXT: } 1531; CHECK-NEXT: { 1532; CHECK-NEXT: v3.h = vasl(v3.h,r4) 1533; CHECK-NEXT: v29 = vor(v4,v5) 1534; CHECK-NEXT: } 1535; CHECK-NEXT: { 1536; CHECK-NEXT: v3 = vor(v6,v3) 1537; CHECK-NEXT: v31 = vmux(q3,v2,v29) 1538; CHECK-NEXT: vmem(r1+#0) = v31.new 1539; CHECK-NEXT: } 1540; CHECK-NEXT: { 1541; CHECK-NEXT: v30 = vmux(q2,v2,v3) 1542; CHECK-NEXT: jumpr r31 1543; CHECK-NEXT: vmem(r1+#1) = v30.new 1544; CHECK-NEXT: } 1545 %v0 = load <128 x i8>, ptr %a0, align 128 1546 %v1 = uitofp <128 x i8> %v0 to <128 x half> 1547 store <128 x half> %v1, ptr %a1, align 128 1548 ret void 1549} 1550 1551; Widen input 1552define void @u8f16_1(ptr %a0, ptr %a1) #0 { 1553; CHECK-LABEL: u8f16_1: 1554; CHECK: .cfi_startproc 1555; CHECK-NEXT: // %bb.0: 1556; CHECK-NEXT: { 1557; CHECK-NEXT: v0 = vmem(r0+#0) 1558; CHECK-NEXT: } 1559; CHECK-NEXT: { 1560; CHECK-NEXT: r6 = #1 1561; CHECK-NEXT: r3:2 = combine(#64,#31) 1562; CHECK-NEXT: v1:0.uh = vunpack(v0.ub) 1563; CHECK-NEXT: } 1564; CHECK-NEXT: { 1565; CHECK-NEXT: v1.h = vsplat(r6) 1566; CHECK-NEXT: v4.h = vsplat(r2) 1567; CHECK-NEXT: r5 = #5 1568; CHECK-NEXT: v2 = vxor(v2,v2) 1569; CHECK-NEXT: } 1570; CHECK-NEXT: { 1571; CHECK-NEXT: v5.h = vsplat(r3) 1572; CHECK-NEXT: r4 = #10 1573; CHECK-NEXT: v3.uh = vcl0(v0.uh) 1574; CHECK-NEXT: q3 = vcmp.eq(v0.h,v2.h) 1575; CHECK-NEXT: } 1576; CHECK-NEXT: { 1577; CHECK-NEXT: v3.h = vadd(v3.h,v1.h) 1578; CHECK-NEXT: } 1579; CHECK-NEXT: { 1580; CHECK-NEXT: v6.h = vasl(v0.h,v3.h) 1581; CHECK-NEXT: } 1582; CHECK-NEXT: { 1583; CHECK-NEXT: v7.h = vadd(v6.h,v4.h) 1584; CHECK-NEXT: v5 = vand(v6,v5) 1585; CHECK-NEXT: } 1586; CHECK-NEXT: { 1587; CHECK-NEXT: v6.uh = vlsr(v6.uh,r5) 1588; CHECK-NEXT: q0 = vcmp.gt(v6.uh,v7.uh) 1589; CHECK-NEXT: q1 = vcmp.eq(v5.h,v2.h) 1590; CHECK-NEXT: } 1591; CHECK-NEXT: { 1592; CHECK-NEXT: v26.uh = vlsr(v7.uh,r5) 1593; CHECK-NEXT: v27 = vmux(q1,v2,v1) 1594; CHECK-NEXT: v1 = vmux(q0,v1,v2) 1595; CHECK-NEXT: } 1596; CHECK-NEXT: { 1597; CHECK-NEXT: v1.h = vadd(v1.h,v4.h) 1598; CHECK-NEXT: v28.h = vadd(v26.h,v27.h) 1599; CHECK-NEXT: q2 = vcmp.eq(v6.h,v26.h) 1600; CHECK-NEXT: } 1601; CHECK-NEXT: { 1602; CHECK-NEXT: v29.uh = vlsr(v26.uh,r6) 1603; CHECK-NEXT: v1.h = vsub(v1.h,v3.h) 1604; CHECK-NEXT: } 1605; CHECK-NEXT: { 1606; CHECK-NEXT: v30.uh = vlsr(v28.uh,r6) 1607; CHECK-NEXT: } 1608; CHECK-NEXT: { 1609; CHECK-NEXT: v1.h = vasl(v1.h,r4) 1610; CHECK-NEXT: v3 = vmux(q2,v30,v29) 1611; CHECK-NEXT: } 1612; CHECK-NEXT: { 1613; CHECK-NEXT: v31 = vor(v3,v1) 1614; CHECK-NEXT: } 1615; CHECK-NEXT: { 1616; CHECK-NEXT: v0 = vmux(q3,v2,v31) 1617; CHECK-NEXT: jumpr r31 1618; CHECK-NEXT: vmem(r1+#0) = v0.new 1619; CHECK-NEXT: } 1620 %v0 = load <64 x i8>, ptr %a0, align 128 1621 %v1 = uitofp <64 x i8> %v0 to <64 x half> 1622 store <64 x half> %v1, ptr %a1, align 128 1623 ret void 1624} 1625 1626 1627; u8 -> f32 1628; No widening 1629define void @u8f32_0(ptr %a0, ptr %a1) #0 { 1630; CHECK-LABEL: u8f32_0: 1631; CHECK: .cfi_startproc 1632; CHECK-NEXT: // %bb.0: 1633; CHECK-NEXT: { 1634; CHECK-NEXT: r7 = #64 1635; CHECK-NEXT: r0 = #1 1636; CHECK-NEXT: r6 = #512 1637; CHECK-NEXT: v0 = vmem(r0+#0) 1638; CHECK-NEXT: } 1639; CHECK-NEXT: { 1640; CHECK-NEXT: v4 = vsplat(r0) 1641; CHECK-NEXT: r3:2 = combine(##255,#8) 1642; CHECK-NEXT: v1 = valign(v0,v0,r7) 1643; CHECK-NEXT: } 1644; CHECK-NEXT: { 1645; CHECK-NEXT: v15 = vsplat(r6) 1646; CHECK-NEXT: v6 = vsplat(r3) 1647; CHECK-NEXT: r5 = #159 1648; CHECK-NEXT: v3:2.uh = vunpack(v0.ub) 1649; CHECK-NEXT: } 1650; CHECK-NEXT: { 1651; CHECK-NEXT: r4 = #23 1652; CHECK-NEXT: v31:30.uh = vunpack(v1.ub) 1653; CHECK-NEXT: } 1654; CHECK-NEXT: { 1655; CHECK-NEXT: v3:2.uw = vunpack(v2.uh) 1656; CHECK-NEXT: } 1657; CHECK-NEXT: { 1658; CHECK-NEXT: v1:0.uw = vunpack(v30.uh) 1659; CHECK-NEXT: } 1660; CHECK-NEXT: { 1661; CHECK-NEXT: v5.uw = vcl0(v2.uw) 1662; CHECK-NEXT: } 1663; CHECK-NEXT: { 1664; CHECK-NEXT: v7.uw = vcl0(v0.uw) 1665; CHECK-NEXT: v5.w = vadd(v5.w,v4.w) 1666; CHECK-NEXT: } 1667; CHECK-NEXT: { 1668; CHECK-NEXT: v8.uw = vcl0(v3.uw) 1669; CHECK-NEXT: v11.w = vadd(v7.w,v4.w) 1670; CHECK-NEXT: v7 = vxor(v7,v7) 1671; CHECK-NEXT: } 1672; CHECK-NEXT: { 1673; CHECK-NEXT: v9.uw = vcl0(v1.uw) 1674; CHECK-NEXT: v10.w = vadd(v8.w,v4.w) 1675; CHECK-NEXT: } 1676; CHECK-NEXT: { 1677; CHECK-NEXT: v9 = vsplat(r5) 1678; CHECK-NEXT: v14.w = vasl(v0.w,v11.w) 1679; CHECK-NEXT: v8.w = vadd(v9.w,v4.w) 1680; CHECK-NEXT: } 1681; CHECK-NEXT: { 1682; CHECK-NEXT: v12.w = vasl(v2.w,v5.w) 1683; CHECK-NEXT: v24 = vand(v14,v15) 1684; CHECK-NEXT: v20.w = vadd(v14.w,v6.w) 1685; CHECK-NEXT: } 1686; CHECK-NEXT: { 1687; CHECK-NEXT: v13.w = vasl(v3.w,v10.w) 1688; CHECK-NEXT: v19 = vand(v12,v15) 1689; CHECK-NEXT: q3 = vcmp.eq(v24.w,v7.w) 1690; CHECK-NEXT: v18.w = vadd(v12.w,v6.w) 1691; CHECK-NEXT: } 1692; CHECK-NEXT: { 1693; CHECK-NEXT: v16.w = vasl(v1.w,v8.w) 1694; CHECK-NEXT: v23 = vand(v13,v15) 1695; CHECK-NEXT: v22.w = vadd(v13.w,v6.w) 1696; CHECK-NEXT: q0 = vcmp.gt(v14.uw,v20.uw) 1697; CHECK-NEXT: } 1698; CHECK-NEXT: { 1699; CHECK-NEXT: v6.w = vadd(v16.w,v6.w) 1700; CHECK-NEXT: v15 = vand(v16,v15) 1701; CHECK-NEXT: v30 = vmux(q3,v7,v4) 1702; CHECK-NEXT: q2 = vcmp.eq(v19.w,v7.w) 1703; CHECK-NEXT: } 1704; CHECK-NEXT: { 1705; CHECK-NEXT: v21.uw = vlsr(v14.uw,r2) 1706; CHECK-NEXT: q3 = vcmp.eq(v15.w,v7.w) 1707; CHECK-NEXT: v28 = vmux(q0,v4,v7) 1708; CHECK-NEXT: q1 = vcmp.eq(v23.w,v7.w) 1709; CHECK-NEXT: } 1710; CHECK-NEXT: { 1711; CHECK-NEXT: v14.uw = vlsr(v20.uw,r2) 1712; CHECK-NEXT: v26 = vmux(q3,v7,v4) 1713; CHECK-NEXT: v11.w = vsub(v28.w,v11.w) 1714; CHECK-NEXT: q3 = vcmp.gt(v13.uw,v22.uw) 1715; CHECK-NEXT: } 1716; CHECK-NEXT: { 1717; CHECK-NEXT: v15.uw = vlsr(v6.uw,r2) 1718; CHECK-NEXT: v20.w = vadd(v14.w,v30.w) 1719; CHECK-NEXT: v30 = vmux(q1,v7,v4) 1720; CHECK-NEXT: v31 = vmux(q2,v7,v4) 1721; CHECK-NEXT: } 1722; CHECK-NEXT: { 1723; CHECK-NEXT: v19.uw = vlsr(v18.uw,r2) 1724; CHECK-NEXT: v29.w = vadd(v15.w,v26.w) 1725; CHECK-NEXT: q1 = vcmp.gt(v12.uw,v18.uw) 1726; CHECK-NEXT: v11.w = vadd(v11.w,v9.w) 1727; CHECK-NEXT: } 1728; CHECK-NEXT: { 1729; CHECK-NEXT: v28.uw = vlsr(v22.uw,r2) 1730; CHECK-NEXT: v23.w = vadd(v19.w,v31.w) 1731; CHECK-NEXT: v22 = vmux(q3,v4,v7) 1732; CHECK-NEXT: q3 = vcmp.gt(v16.uw,v6.uw) 1733; CHECK-NEXT: } 1734; CHECK-NEXT: { 1735; CHECK-NEXT: v24.uw = vlsr(v29.uw,r0) 1736; CHECK-NEXT: v31.w = vadd(v28.w,v30.w) 1737; CHECK-NEXT: v30 = vmux(q1,v4,v7) 1738; CHECK-NEXT: v4 = vmux(q3,v4,v7) 1739; CHECK-NEXT: } 1740; CHECK-NEXT: { 1741; CHECK-NEXT: v17.uw = vlsr(v12.uw,r2) 1742; CHECK-NEXT: v5.w = vsub(v30.w,v5.w) 1743; CHECK-NEXT: v29.w = vsub(v22.w,v10.w) 1744; CHECK-NEXT: v4.w = vsub(v4.w,v8.w) 1745; CHECK-NEXT: } 1746; CHECK-NEXT: { 1747; CHECK-NEXT: v13.uw = vlsr(v13.uw,r2) 1748; CHECK-NEXT: v6.w = vadd(v29.w,v9.w) 1749; CHECK-NEXT: v5.w = vadd(v5.w,v9.w) 1750; CHECK-NEXT: q0 = vcmp.eq(v21.w,v14.w) 1751; CHECK-NEXT: } 1752; CHECK-NEXT: { 1753; CHECK-NEXT: v25.uw = vlsr(v16.uw,r2) 1754; CHECK-NEXT: q2 = vcmp.eq(v17.w,v19.w) 1755; CHECK-NEXT: q3 = vcmp.eq(v13.w,v28.w) 1756; CHECK-NEXT: v4.w = vadd(v4.w,v9.w) 1757; CHECK-NEXT: } 1758; CHECK-NEXT: { 1759; CHECK-NEXT: v21.uw = vlsr(v23.uw,r0) 1760; CHECK-NEXT: q1 = vcmp.eq(v25.w,v15.w) 1761; CHECK-NEXT: } 1762; CHECK-NEXT: { 1763; CHECK-NEXT: v23.uw = vlsr(v19.uw,r0) 1764; CHECK-NEXT: } 1765; CHECK-NEXT: { 1766; CHECK-NEXT: v31.uw = vlsr(v31.uw,r0) 1767; CHECK-NEXT: v23 = vmux(q2,v21,v23) 1768; CHECK-NEXT: q2 = vcmp.eq(v3.w,v7.w) 1769; CHECK-NEXT: } 1770; CHECK-NEXT: { 1771; CHECK-NEXT: v16.uw = vlsr(v28.uw,r0) 1772; CHECK-NEXT: } 1773; CHECK-NEXT: { 1774; CHECK-NEXT: v26.uw = vlsr(v15.uw,r0) 1775; CHECK-NEXT: v8 = vmux(q3,v31,v16) 1776; CHECK-NEXT: q3 = vcmp.eq(v2.w,v7.w) 1777; CHECK-NEXT: } 1778; CHECK-NEXT: { 1779; CHECK-NEXT: v6.w = vasl(v6.w,r4) 1780; CHECK-NEXT: v22 = vmux(q1,v24,v26) 1781; CHECK-NEXT: } 1782; CHECK-NEXT: { 1783; CHECK-NEXT: v5.w = vasl(v5.w,r4) 1784; CHECK-NEXT: v6 = vor(v8,v6) 1785; CHECK-NEXT: } 1786; CHECK-NEXT: { 1787; CHECK-NEXT: v27.uw = vlsr(v14.uw,r0) 1788; CHECK-NEXT: v25 = vor(v23,v5) 1789; CHECK-NEXT: v26 = vmux(q2,v7,v6) 1790; CHECK-NEXT: vmem(r1+#1) = v26.new 1791; CHECK-NEXT: } 1792; CHECK-NEXT: { 1793; CHECK-NEXT: v20.uw = vlsr(v20.uw,r0) 1794; CHECK-NEXT: v28 = vmux(q3,v7,v25) 1795; CHECK-NEXT: q2 = vcmp.eq(v1.w,v7.w) 1796; CHECK-NEXT: vmem(r1+#0) = v28.new 1797; CHECK-NEXT: } 1798; CHECK-NEXT: { 1799; CHECK-NEXT: v11.w = vasl(v11.w,r4) 1800; CHECK-NEXT: v20 = vmux(q0,v20,v27) 1801; CHECK-NEXT: q3 = vcmp.eq(v0.w,v7.w) 1802; CHECK-NEXT: } 1803; CHECK-NEXT: { 1804; CHECK-NEXT: v24.w = vasl(v4.w,r4) 1805; CHECK-NEXT: v29 = vor(v20,v11) 1806; CHECK-NEXT: } 1807; CHECK-NEXT: { 1808; CHECK-NEXT: v27 = vor(v22,v24) 1809; CHECK-NEXT: v31 = vmux(q3,v7,v29) 1810; CHECK-NEXT: vmem(r1+#2) = v31.new 1811; CHECK-NEXT: } 1812; CHECK-NEXT: { 1813; CHECK-NEXT: v30 = vmux(q2,v7,v27) 1814; CHECK-NEXT: jumpr r31 1815; CHECK-NEXT: vmem(r1+#3) = v30.new 1816; CHECK-NEXT: } 1817 %v0 = load <128 x i8>, ptr %a0, align 128 1818 %v1 = uitofp <128 x i8> %v0 to <128 x float> 1819 store <128 x float> %v1, ptr %a1, align 128 1820 ret void 1821} 1822 1823; Widen input #1 1824define void @u8f32_1(ptr %a0, ptr %a1) #0 { 1825; CHECK-LABEL: u8f32_1: 1826; CHECK: .cfi_startproc 1827; CHECK-NEXT: // %bb.0: 1828; CHECK-NEXT: { 1829; CHECK-NEXT: r7 = #1 1830; CHECK-NEXT: r6 = #512 1831; CHECK-NEXT: v3:2.uh = vunpack(v0.ub) 1832; CHECK-NEXT: v0.cur = vmem(r0+#0) 1833; CHECK-NEXT: } 1834; CHECK-NEXT: { 1835; CHECK-NEXT: v1 = vsplat(r7) 1836; CHECK-NEXT: v8 = vsplat(r6) 1837; CHECK-NEXT: r3:2 = combine(##255,#8) 1838; CHECK-NEXT: } 1839; CHECK-NEXT: { 1840; CHECK-NEXT: v6 = vsplat(r3) 1841; CHECK-NEXT: r5 = #159 1842; CHECK-NEXT: v3:2.uw = vunpack(v2.uh) 1843; CHECK-NEXT: v21 = vxor(v21,v21) 1844; CHECK-NEXT: } 1845; CHECK-NEXT: { 1846; CHECK-NEXT: v13 = vsplat(r5) 1847; CHECK-NEXT: r4 = #23 1848; CHECK-NEXT: } 1849; CHECK-NEXT: { 1850; CHECK-NEXT: v4.uw = vcl0(v2.uw) 1851; CHECK-NEXT: } 1852; CHECK-NEXT: { 1853; CHECK-NEXT: v5.uw = vcl0(v3.uw) 1854; CHECK-NEXT: v4.w = vadd(v4.w,v1.w) 1855; CHECK-NEXT: } 1856; CHECK-NEXT: { 1857; CHECK-NEXT: v5.w = vadd(v5.w,v1.w) 1858; CHECK-NEXT: } 1859; CHECK-NEXT: { 1860; CHECK-NEXT: v7.w = vasl(v2.w,v4.w) 1861; CHECK-NEXT: } 1862; CHECK-NEXT: { 1863; CHECK-NEXT: v9.w = vasl(v3.w,v5.w) 1864; CHECK-NEXT: v11 = vand(v7,v8) 1865; CHECK-NEXT: v10.w = vadd(v7.w,v6.w) 1866; CHECK-NEXT: } 1867; CHECK-NEXT: { 1868; CHECK-NEXT: v6.w = vadd(v9.w,v6.w) 1869; CHECK-NEXT: q1 = vcmp.eq(v11.w,v21.w) 1870; CHECK-NEXT: v8 = vand(v9,v8) 1871; CHECK-NEXT: q0 = vcmp.gt(v7.uw,v10.uw) 1872; CHECK-NEXT: } 1873; CHECK-NEXT: { 1874; CHECK-NEXT: v22.uw = vlsr(v10.uw,r2) 1875; CHECK-NEXT: v24 = vmux(q1,v21,v1) 1876; CHECK-NEXT: q3 = vcmp.eq(v8.w,v21.w) 1877; CHECK-NEXT: q1 = vcmp.gt(v9.uw,v6.uw) 1878; CHECK-NEXT: } 1879; CHECK-NEXT: { 1880; CHECK-NEXT: v23.uw = vlsr(v6.uw,r2) 1881; CHECK-NEXT: v25 = vmux(q0,v1,v21) 1882; CHECK-NEXT: v27 = vmux(q3,v21,v1) 1883; CHECK-NEXT: v1 = vmux(q1,v1,v21) 1884; CHECK-NEXT: } 1885; CHECK-NEXT: { 1886; CHECK-NEXT: v4.w = vsub(v25.w,v4.w) 1887; CHECK-NEXT: v1.w = vsub(v1.w,v5.w) 1888; CHECK-NEXT: v10.w = vadd(v22.w,v24.w) 1889; CHECK-NEXT: v28.w = vadd(v23.w,v27.w) 1890; CHECK-NEXT: } 1891; CHECK-NEXT: { 1892; CHECK-NEXT: v12.uw = vlsr(v7.uw,r2) 1893; CHECK-NEXT: v4.w = vadd(v4.w,v13.w) 1894; CHECK-NEXT: v1.w = vadd(v1.w,v13.w) 1895; CHECK-NEXT: } 1896; CHECK-NEXT: { 1897; CHECK-NEXT: v26.uw = vlsr(v9.uw,r2) 1898; CHECK-NEXT: q2 = vcmp.eq(v12.w,v22.w) 1899; CHECK-NEXT: } 1900; CHECK-NEXT: { 1901; CHECK-NEXT: v11.uw = vlsr(v22.uw,r7) 1902; CHECK-NEXT: q3 = vcmp.eq(v26.w,v23.w) 1903; CHECK-NEXT: } 1904; CHECK-NEXT: { 1905; CHECK-NEXT: v30.uw = vlsr(v10.uw,r7) 1906; CHECK-NEXT: } 1907; CHECK-NEXT: { 1908; CHECK-NEXT: v29.uw = vlsr(v23.uw,r7) 1909; CHECK-NEXT: v5 = vmux(q2,v30,v11) 1910; CHECK-NEXT: q2 = vcmp.eq(v3.w,v21.w) 1911; CHECK-NEXT: } 1912; CHECK-NEXT: { 1913; CHECK-NEXT: v6.uw = vlsr(v28.uw,r7) 1914; CHECK-NEXT: } 1915; CHECK-NEXT: { 1916; CHECK-NEXT: v4.w = vasl(v4.w,r4) 1917; CHECK-NEXT: v6 = vmux(q3,v6,v29) 1918; CHECK-NEXT: q3 = vcmp.eq(v2.w,v21.w) 1919; CHECK-NEXT: } 1920; CHECK-NEXT: { 1921; CHECK-NEXT: v1.w = vasl(v1.w,r4) 1922; CHECK-NEXT: v31 = vor(v5,v4) 1923; CHECK-NEXT: } 1924; CHECK-NEXT: { 1925; CHECK-NEXT: v1 = vor(v6,v1) 1926; CHECK-NEXT: v0 = vmux(q3,v21,v31) 1927; CHECK-NEXT: vmem(r1+#0) = v0.new 1928; CHECK-NEXT: } 1929; CHECK-NEXT: { 1930; CHECK-NEXT: v1 = vmux(q2,v21,v1) 1931; CHECK-NEXT: jumpr r31 1932; CHECK-NEXT: vmem(r1+#1) = v1.new 1933; CHECK-NEXT: } 1934 %v0 = load <64 x i8>, ptr %a0, align 128 1935 %v1 = uitofp <64 x i8> %v0 to <64 x float> 1936 store <64 x float> %v1, ptr %a1, align 128 1937 ret void 1938} 1939 1940; Widen input #2 1941define void @u8f32_2(ptr %a0, ptr %a1) #0 { 1942; CHECK-LABEL: u8f32_2: 1943; CHECK: .cfi_startproc 1944; CHECK-NEXT: // %bb.0: 1945; CHECK-NEXT: { 1946; CHECK-NEXT: v0 = vmem(r0+#0) 1947; CHECK-NEXT: } 1948; CHECK-NEXT: { 1949; CHECK-NEXT: r6 = #1 1950; CHECK-NEXT: r3 = #512 1951; CHECK-NEXT: v1:0.uh = vunpack(v0.ub) 1952; CHECK-NEXT: } 1953; CHECK-NEXT: { 1954; CHECK-NEXT: v2 = vsplat(r6) 1955; CHECK-NEXT: v4 = vsplat(r3) 1956; CHECK-NEXT: r2 = #255 1957; CHECK-NEXT: v3 = vxor(v3,v3) 1958; CHECK-NEXT: } 1959; CHECK-NEXT: { 1960; CHECK-NEXT: r5:4 = combine(##159,#8) 1961; CHECK-NEXT: v1:0.uw = vunpack(v0.uh) 1962; CHECK-NEXT: } 1963; CHECK-NEXT: { 1964; CHECK-NEXT: v1 = vsplat(r2) 1965; CHECK-NEXT: v7 = vsplat(r5) 1966; CHECK-NEXT: q3 = vcmp.eq(v0.w,v3.w) 1967; CHECK-NEXT: } 1968; CHECK-NEXT: { 1969; CHECK-NEXT: v5.uw = vcl0(v0.uw) 1970; CHECK-NEXT: } 1971; CHECK-NEXT: { 1972; CHECK-NEXT: v5.w = vadd(v5.w,v2.w) 1973; CHECK-NEXT: } 1974; CHECK-NEXT: { 1975; CHECK-NEXT: v6.w = vasl(v0.w,v5.w) 1976; CHECK-NEXT: } 1977; CHECK-NEXT: { 1978; CHECK-NEXT: v1.w = vadd(v6.w,v1.w) 1979; CHECK-NEXT: v4 = vand(v6,v4) 1980; CHECK-NEXT: } 1981; CHECK-NEXT: { 1982; CHECK-NEXT: v6.uw = vlsr(v6.uw,r4) 1983; CHECK-NEXT: q0 = vcmp.gt(v6.uw,v1.uw) 1984; CHECK-NEXT: q1 = vcmp.eq(v4.w,v3.w) 1985; CHECK-NEXT: } 1986; CHECK-NEXT: { 1987; CHECK-NEXT: r4 = #23 1988; CHECK-NEXT: v1.uw = vlsr(v1.uw,r4) 1989; CHECK-NEXT: v4 = vmux(q1,v3,v2) 1990; CHECK-NEXT: v2 = vmux(q0,v2,v3) 1991; CHECK-NEXT: } 1992; CHECK-NEXT: { 1993; CHECK-NEXT: v2.w = vsub(v2.w,v5.w) 1994; CHECK-NEXT: v4.w = vadd(v1.w,v4.w) 1995; CHECK-NEXT: q2 = vcmp.eq(v6.w,v1.w) 1996; CHECK-NEXT: } 1997; CHECK-NEXT: { 1998; CHECK-NEXT: v29.uw = vlsr(v1.uw,r6) 1999; CHECK-NEXT: v2.w = vadd(v2.w,v7.w) 2000; CHECK-NEXT: } 2001; CHECK-NEXT: { 2002; CHECK-NEXT: v30.uw = vlsr(v4.uw,r6) 2003; CHECK-NEXT: } 2004; CHECK-NEXT: { 2005; CHECK-NEXT: v2.w = vasl(v2.w,r4) 2006; CHECK-NEXT: v1 = vmux(q2,v30,v29) 2007; CHECK-NEXT: } 2008; CHECK-NEXT: { 2009; CHECK-NEXT: v31 = vor(v1,v2) 2010; CHECK-NEXT: } 2011; CHECK-NEXT: { 2012; CHECK-NEXT: v0 = vmux(q3,v3,v31) 2013; CHECK-NEXT: jumpr r31 2014; CHECK-NEXT: vmem(r1+#0) = v0.new 2015; CHECK-NEXT: } 2016 %v0 = load <32 x i8>, ptr %a0, align 128 2017 %v1 = uitofp <32 x i8> %v0 to <32 x float> 2018 store <32 x float> %v1, ptr %a1, align 128 2019 ret void 2020} 2021 2022 2023; u16 -> f16 2024; No widening 2025define void @u16f16_0(ptr %a0, ptr %a1) #0 { 2026; CHECK-LABEL: u16f16_0: 2027; CHECK: .cfi_startproc 2028; CHECK-NEXT: // %bb.0: 2029; CHECK-NEXT: { 2030; CHECK-NEXT: r3:2 = combine(#64,#1) 2031; CHECK-NEXT: r5 = #31 2032; CHECK-NEXT: v1.uh = vcl0(v0.uh) 2033; CHECK-NEXT: v0.cur = vmem(r0+#0) 2034; CHECK-NEXT: } 2035; CHECK-NEXT: { 2036; CHECK-NEXT: v2.h = vsplat(r2) 2037; CHECK-NEXT: v5.h = vsplat(r3) 2038; CHECK-NEXT: r4 = #5 2039; CHECK-NEXT: v3 = vxor(v3,v3) 2040; CHECK-NEXT: } 2041; CHECK-NEXT: { 2042; CHECK-NEXT: v4.h = vsplat(r5) 2043; CHECK-NEXT: r3 = #10 2044; CHECK-NEXT: v1.h = vadd(v1.h,v2.h) 2045; CHECK-NEXT: q3 = vcmp.eq(v0.h,v3.h) 2046; CHECK-NEXT: } 2047; CHECK-NEXT: { 2048; CHECK-NEXT: v6.h = vasl(v0.h,v1.h) 2049; CHECK-NEXT: } 2050; CHECK-NEXT: { 2051; CHECK-NEXT: v7.h = vadd(v6.h,v4.h) 2052; CHECK-NEXT: v5 = vand(v6,v5) 2053; CHECK-NEXT: } 2054; CHECK-NEXT: { 2055; CHECK-NEXT: v6.uh = vlsr(v6.uh,r4) 2056; CHECK-NEXT: q0 = vcmp.eq(v5.h,v3.h) 2057; CHECK-NEXT: q1 = vcmp.gt(v6.uh,v7.uh) 2058; CHECK-NEXT: } 2059; CHECK-NEXT: { 2060; CHECK-NEXT: v26.uh = vlsr(v7.uh,r4) 2061; CHECK-NEXT: v27 = vmux(q0,v3,v2) 2062; CHECK-NEXT: v2 = vmux(q1,v2,v3) 2063; CHECK-NEXT: } 2064; CHECK-NEXT: { 2065; CHECK-NEXT: v2.h = vadd(v2.h,v4.h) 2066; CHECK-NEXT: v28.h = vadd(v26.h,v27.h) 2067; CHECK-NEXT: q2 = vcmp.eq(v6.h,v26.h) 2068; CHECK-NEXT: } 2069; CHECK-NEXT: { 2070; CHECK-NEXT: v29.uh = vlsr(v26.uh,r2) 2071; CHECK-NEXT: v1.h = vsub(v2.h,v1.h) 2072; CHECK-NEXT: } 2073; CHECK-NEXT: { 2074; CHECK-NEXT: v30.uh = vlsr(v28.uh,r2) 2075; CHECK-NEXT: } 2076; CHECK-NEXT: { 2077; CHECK-NEXT: v1.h = vasl(v1.h,r3) 2078; CHECK-NEXT: v2 = vmux(q2,v30,v29) 2079; CHECK-NEXT: } 2080; CHECK-NEXT: { 2081; CHECK-NEXT: v31 = vor(v2,v1) 2082; CHECK-NEXT: } 2083; CHECK-NEXT: { 2084; CHECK-NEXT: v0 = vmux(q3,v3,v31) 2085; CHECK-NEXT: jumpr r31 2086; CHECK-NEXT: vmem(r1+#0) = v0.new 2087; CHECK-NEXT: } 2088 %v0 = load <64 x i16>, ptr %a0, align 128 2089 %v1 = uitofp <64 x i16> %v0 to <64 x half> 2090 store <64 x half> %v1, ptr %a1, align 128 2091 ret void 2092} 2093 2094; Widen input and result 2095define void @u16f16_1(ptr %a0, ptr %a1) #0 { 2096; CHECK-LABEL: u16f16_1: 2097; CHECK: .cfi_startproc 2098; CHECK-NEXT: // %bb.0: 2099; CHECK-NEXT: { 2100; CHECK-NEXT: r3:2 = combine(#31,#1) 2101; CHECK-NEXT: r6 = #64 2102; CHECK-NEXT: v1.uh = vcl0(v0.uh) 2103; CHECK-NEXT: v0.cur = vmem(r0+#0) 2104; CHECK-NEXT: } 2105; CHECK-NEXT: { 2106; CHECK-NEXT: v2.h = vsplat(r2) 2107; CHECK-NEXT: v4.h = vsplat(r3) 2108; CHECK-NEXT: r5 = #5 2109; CHECK-NEXT: v3 = vxor(v3,v3) 2110; CHECK-NEXT: } 2111; CHECK-NEXT: { 2112; CHECK-NEXT: v5.h = vsplat(r6) 2113; CHECK-NEXT: r4 = #10 2114; CHECK-NEXT: v1.h = vadd(v1.h,v2.h) 2115; CHECK-NEXT: q2 = vcmp.eq(v0.h,v3.h) 2116; CHECK-NEXT: } 2117; CHECK-NEXT: { 2118; CHECK-NEXT: q3 = vsetq(r6) 2119; CHECK-NEXT: } 2120; CHECK-NEXT: { 2121; CHECK-NEXT: v6.h = vasl(v0.h,v1.h) 2122; CHECK-NEXT: } 2123; CHECK-NEXT: { 2124; CHECK-NEXT: v7.h = vadd(v6.h,v4.h) 2125; CHECK-NEXT: v5 = vand(v6,v5) 2126; CHECK-NEXT: } 2127; CHECK-NEXT: { 2128; CHECK-NEXT: v6.uh = vlsr(v6.uh,r5) 2129; CHECK-NEXT: q1 = vcmp.eq(v5.h,v3.h) 2130; CHECK-NEXT: q0 = vcmp.gt(v6.uh,v7.uh) 2131; CHECK-NEXT: } 2132; CHECK-NEXT: { 2133; CHECK-NEXT: v7.uh = vlsr(v7.uh,r5) 2134; CHECK-NEXT: v5 = vmux(q1,v3,v2) 2135; CHECK-NEXT: v2 = vmux(q0,v2,v3) 2136; CHECK-NEXT: } 2137; CHECK-NEXT: { 2138; CHECK-NEXT: v2.h = vadd(v2.h,v4.h) 2139; CHECK-NEXT: v28.h = vadd(v7.h,v5.h) 2140; CHECK-NEXT: q1 = vcmp.eq(v6.h,v7.h) 2141; CHECK-NEXT: } 2142; CHECK-NEXT: { 2143; CHECK-NEXT: v29.uh = vlsr(v7.uh,r2) 2144; CHECK-NEXT: v1.h = vsub(v2.h,v1.h) 2145; CHECK-NEXT: } 2146; CHECK-NEXT: { 2147; CHECK-NEXT: v30.uh = vlsr(v28.uh,r2) 2148; CHECK-NEXT: } 2149; CHECK-NEXT: { 2150; CHECK-NEXT: v1.h = vasl(v1.h,r4) 2151; CHECK-NEXT: v2 = vmux(q1,v30,v29) 2152; CHECK-NEXT: } 2153; CHECK-NEXT: { 2154; CHECK-NEXT: v31 = vor(v2,v1) 2155; CHECK-NEXT: } 2156; CHECK-NEXT: { 2157; CHECK-NEXT: v0 = vmux(q2,v3,v31) 2158; CHECK-NEXT: } 2159; CHECK-NEXT: { 2160; CHECK-NEXT: jumpr r31 2161; CHECK-NEXT: if (q3) vmem(r1+#0) = v0 2162; CHECK-NEXT: } 2163 %v0 = load <32 x i16>, ptr %a0, align 128 2164 %v1 = uitofp <32 x i16> %v0 to <32 x half> 2165 store <32 x half> %v1, ptr %a1, align 128 2166 ret void 2167} 2168 2169 2170; u16 -> f32 2171; No widening 2172define void @u16f32_0(ptr %a0, ptr %a1) #0 { 2173; CHECK-LABEL: u16f32_0: 2174; CHECK: .cfi_startproc 2175; CHECK-NEXT: // %bb.0: 2176; CHECK-NEXT: { 2177; CHECK-NEXT: v0 = vmem(r0+#0) 2178; CHECK-NEXT: } 2179; CHECK-NEXT: { 2180; CHECK-NEXT: r7 = #1 2181; CHECK-NEXT: r3:2 = combine(##255,#8) 2182; CHECK-NEXT: v1:0.uw = vunpack(v0.uh) 2183; CHECK-NEXT: } 2184; CHECK-NEXT: { 2185; CHECK-NEXT: v3 = vsplat(r7) 2186; CHECK-NEXT: v6 = vsplat(r3) 2187; CHECK-NEXT: r6 = #512 2188; CHECK-NEXT: v2 = vxor(v2,v2) 2189; CHECK-NEXT: } 2190; CHECK-NEXT: { 2191; CHECK-NEXT: v8 = vsplat(r6) 2192; CHECK-NEXT: r5 = #159 2193; CHECK-NEXT: r4 = #23 2194; CHECK-NEXT: v4.uw = vcl0(v0.uw) 2195; CHECK-NEXT: } 2196; CHECK-NEXT: { 2197; CHECK-NEXT: v14 = vsplat(r5) 2198; CHECK-NEXT: v5.uw = vcl0(v1.uw) 2199; CHECK-NEXT: v4.w = vadd(v4.w,v3.w) 2200; CHECK-NEXT: } 2201; CHECK-NEXT: { 2202; CHECK-NEXT: v5.w = vadd(v5.w,v3.w) 2203; CHECK-NEXT: } 2204; CHECK-NEXT: { 2205; CHECK-NEXT: v7.w = vasl(v0.w,v4.w) 2206; CHECK-NEXT: } 2207; CHECK-NEXT: { 2208; CHECK-NEXT: v9.w = vasl(v1.w,v5.w) 2209; CHECK-NEXT: v10.w = vadd(v7.w,v6.w) 2210; CHECK-NEXT: v11 = vand(v7,v8) 2211; CHECK-NEXT: } 2212; CHECK-NEXT: { 2213; CHECK-NEXT: v6.w = vadd(v9.w,v6.w) 2214; CHECK-NEXT: v8 = vand(v9,v8) 2215; CHECK-NEXT: q1 = vcmp.eq(v11.w,v2.w) 2216; CHECK-NEXT: q0 = vcmp.gt(v7.uw,v10.uw) 2217; CHECK-NEXT: } 2218; CHECK-NEXT: { 2219; CHECK-NEXT: v19.uw = vlsr(v10.uw,r2) 2220; CHECK-NEXT: q2 = vcmp.eq(v8.w,v2.w) 2221; CHECK-NEXT: q3 = vcmp.gt(v9.uw,v6.uw) 2222; CHECK-NEXT: v20 = vmux(q1,v2,v3) 2223; CHECK-NEXT: } 2224; CHECK-NEXT: { 2225; CHECK-NEXT: v21.uw = vlsr(v6.uw,r2) 2226; CHECK-NEXT: v22 = vmux(q2,v2,v3) 2227; CHECK-NEXT: v25 = vmux(q0,v3,v2) 2228; CHECK-NEXT: v3 = vmux(q3,v3,v2) 2229; CHECK-NEXT: } 2230; CHECK-NEXT: { 2231; CHECK-NEXT: v4.w = vsub(v25.w,v4.w) 2232; CHECK-NEXT: v3.w = vsub(v3.w,v5.w) 2233; CHECK-NEXT: v23.w = vadd(v19.w,v20.w) 2234; CHECK-NEXT: v10.w = vadd(v21.w,v22.w) 2235; CHECK-NEXT: } 2236; CHECK-NEXT: { 2237; CHECK-NEXT: v12.uw = vlsr(v7.uw,r2) 2238; CHECK-NEXT: v4.w = vadd(v4.w,v14.w) 2239; CHECK-NEXT: v3.w = vadd(v3.w,v14.w) 2240; CHECK-NEXT: } 2241; CHECK-NEXT: { 2242; CHECK-NEXT: v24.uw = vlsr(v9.uw,r2) 2243; CHECK-NEXT: q2 = vcmp.eq(v12.w,v19.w) 2244; CHECK-NEXT: } 2245; CHECK-NEXT: { 2246; CHECK-NEXT: v13.uw = vlsr(v19.uw,r7) 2247; CHECK-NEXT: q3 = vcmp.eq(v24.w,v21.w) 2248; CHECK-NEXT: } 2249; CHECK-NEXT: { 2250; CHECK-NEXT: v26.uw = vlsr(v23.uw,r7) 2251; CHECK-NEXT: } 2252; CHECK-NEXT: { 2253; CHECK-NEXT: v27.uw = vlsr(v10.uw,r7) 2254; CHECK-NEXT: v5 = vmux(q2,v26,v13) 2255; CHECK-NEXT: q2 = vcmp.eq(v1.w,v2.w) 2256; CHECK-NEXT: } 2257; CHECK-NEXT: { 2258; CHECK-NEXT: v28.uw = vlsr(v21.uw,r7) 2259; CHECK-NEXT: } 2260; CHECK-NEXT: { 2261; CHECK-NEXT: v4.w = vasl(v4.w,r4) 2262; CHECK-NEXT: v6 = vmux(q3,v27,v28) 2263; CHECK-NEXT: q3 = vcmp.eq(v0.w,v2.w) 2264; CHECK-NEXT: } 2265; CHECK-NEXT: { 2266; CHECK-NEXT: v3.w = vasl(v3.w,r4) 2267; CHECK-NEXT: v29 = vor(v5,v4) 2268; CHECK-NEXT: } 2269; CHECK-NEXT: { 2270; CHECK-NEXT: v3 = vor(v6,v3) 2271; CHECK-NEXT: v31 = vmux(q3,v2,v29) 2272; CHECK-NEXT: vmem(r1+#0) = v31.new 2273; CHECK-NEXT: } 2274; CHECK-NEXT: { 2275; CHECK-NEXT: v30 = vmux(q2,v2,v3) 2276; CHECK-NEXT: jumpr r31 2277; CHECK-NEXT: vmem(r1+#1) = v30.new 2278; CHECK-NEXT: } 2279 %v0 = load <64 x i16>, ptr %a0, align 128 2280 %v1 = uitofp <64 x i16> %v0 to <64 x float> 2281 store <64 x float> %v1, ptr %a1, align 128 2282 ret void 2283} 2284 2285; Widen input 2286define void @u16f32_1(ptr %a0, ptr %a1) #0 { 2287; CHECK-LABEL: u16f32_1: 2288; CHECK: .cfi_startproc 2289; CHECK-NEXT: // %bb.0: 2290; CHECK-NEXT: { 2291; CHECK-NEXT: v0 = vmem(r0+#0) 2292; CHECK-NEXT: } 2293; CHECK-NEXT: { 2294; CHECK-NEXT: r6 = #1 2295; CHECK-NEXT: r2 = #255 2296; CHECK-NEXT: v1:0.uw = vunpack(v0.uh) 2297; CHECK-NEXT: } 2298; CHECK-NEXT: { 2299; CHECK-NEXT: v1 = vsplat(r6) 2300; CHECK-NEXT: v4 = vsplat(r2) 2301; CHECK-NEXT: r3 = #512 2302; CHECK-NEXT: v2 = vxor(v2,v2) 2303; CHECK-NEXT: } 2304; CHECK-NEXT: { 2305; CHECK-NEXT: v5 = vsplat(r3) 2306; CHECK-NEXT: r5:4 = combine(##159,#8) 2307; CHECK-NEXT: v3.uw = vcl0(v0.uw) 2308; CHECK-NEXT: } 2309; CHECK-NEXT: { 2310; CHECK-NEXT: v7 = vsplat(r5) 2311; CHECK-NEXT: q3 = vcmp.eq(v0.w,v2.w) 2312; CHECK-NEXT: v3.w = vadd(v3.w,v1.w) 2313; CHECK-NEXT: } 2314; CHECK-NEXT: { 2315; CHECK-NEXT: v6.w = vasl(v0.w,v3.w) 2316; CHECK-NEXT: } 2317; CHECK-NEXT: { 2318; CHECK-NEXT: v4.w = vadd(v6.w,v4.w) 2319; CHECK-NEXT: v5 = vand(v6,v5) 2320; CHECK-NEXT: } 2321; CHECK-NEXT: { 2322; CHECK-NEXT: v6.uw = vlsr(v6.uw,r4) 2323; CHECK-NEXT: q0 = vcmp.gt(v6.uw,v4.uw) 2324; CHECK-NEXT: q1 = vcmp.eq(v5.w,v2.w) 2325; CHECK-NEXT: } 2326; CHECK-NEXT: { 2327; CHECK-NEXT: r4 = #23 2328; CHECK-NEXT: v4.uw = vlsr(v4.uw,r4) 2329; CHECK-NEXT: v5 = vmux(q1,v2,v1) 2330; CHECK-NEXT: v1 = vmux(q0,v1,v2) 2331; CHECK-NEXT: } 2332; CHECK-NEXT: { 2333; CHECK-NEXT: v1.w = vsub(v1.w,v3.w) 2334; CHECK-NEXT: v29.w = vadd(v4.w,v5.w) 2335; CHECK-NEXT: q2 = vcmp.eq(v6.w,v4.w) 2336; CHECK-NEXT: } 2337; CHECK-NEXT: { 2338; CHECK-NEXT: v30.uw = vlsr(v4.uw,r6) 2339; CHECK-NEXT: v1.w = vadd(v1.w,v7.w) 2340; CHECK-NEXT: } 2341; CHECK-NEXT: { 2342; CHECK-NEXT: v3.uw = vlsr(v29.uw,r6) 2343; CHECK-NEXT: } 2344; CHECK-NEXT: { 2345; CHECK-NEXT: v1.w = vasl(v1.w,r4) 2346; CHECK-NEXT: v3 = vmux(q2,v3,v30) 2347; CHECK-NEXT: } 2348; CHECK-NEXT: { 2349; CHECK-NEXT: v31 = vor(v3,v1) 2350; CHECK-NEXT: } 2351; CHECK-NEXT: { 2352; CHECK-NEXT: v0 = vmux(q3,v2,v31) 2353; CHECK-NEXT: jumpr r31 2354; CHECK-NEXT: vmem(r1+#0) = v0.new 2355; CHECK-NEXT: } 2356 %v0 = load <32 x i16>, ptr %a0, align 128 2357 %v1 = uitofp <32 x i16> %v0 to <32 x float> 2358 store <32 x float> %v1, ptr %a1, align 128 2359 ret void 2360} 2361 2362 2363; u32 -> f16 2364; No widening 2365define void @u32f16_0(ptr %a0, ptr %a1) #0 { 2366; CHECK-LABEL: u32f16_0: 2367; CHECK: .cfi_startproc 2368; CHECK-NEXT: // %bb.0: 2369; CHECK-NEXT: { 2370; CHECK-NEXT: r3:2 = combine(#8,#1) 2371; CHECK-NEXT: r6 = #255 2372; CHECK-NEXT: v3.uw = vcl0(v0.uw) 2373; CHECK-NEXT: v0.cur = vmem(r0+#1) 2374; CHECK-NEXT: } 2375; CHECK-NEXT: { 2376; CHECK-NEXT: v2 = vsplat(r2) 2377; CHECK-NEXT: r4 = #512 2378; CHECK-NEXT: v4.uw = vcl0(v1.uw) 2379; CHECK-NEXT: v1.cur = vmem(r0+#0) 2380; CHECK-NEXT: } 2381; CHECK-NEXT: { 2382; CHECK-NEXT: v7 = vsplat(r4) 2383; CHECK-NEXT: v6 = vsplat(r6) 2384; CHECK-NEXT: v4.w = vadd(v4.w,v2.w) 2385; CHECK-NEXT: v3.w = vadd(v3.w,v2.w) 2386; CHECK-NEXT: } 2387; CHECK-NEXT: { 2388; CHECK-NEXT: r4 = #159 2389; CHECK-NEXT: v9 = vxor(v9,v9) 2390; CHECK-NEXT: } 2391; CHECK-NEXT: { 2392; CHECK-NEXT: v10 = vsplat(r4) 2393; CHECK-NEXT: v5.w = vasl(v1.w,v4.w) 2394; CHECK-NEXT: } 2395; CHECK-NEXT: { 2396; CHECK-NEXT: v8.w = vasl(v0.w,v3.w) 2397; CHECK-NEXT: v11.w = vadd(v5.w,v6.w) 2398; CHECK-NEXT: v13 = vand(v5,v7) 2399; CHECK-NEXT: } 2400; CHECK-NEXT: { 2401; CHECK-NEXT: v6.w = vadd(v8.w,v6.w) 2402; CHECK-NEXT: v7 = vand(v8,v7) 2403; CHECK-NEXT: q1 = vcmp.gt(v5.uw,v11.uw) 2404; CHECK-NEXT: q2 = vcmp.eq(v13.w,v9.w) 2405; CHECK-NEXT: } 2406; CHECK-NEXT: { 2407; CHECK-NEXT: v27.uw = vlsr(v11.uw,r3) 2408; CHECK-NEXT: q3 = vcmp.gt(v8.uw,v6.uw) 2409; CHECK-NEXT: q0 = vcmp.eq(v7.w,v9.w) 2410; CHECK-NEXT: v28 = vmux(q2,v9,v2) 2411; CHECK-NEXT: } 2412; CHECK-NEXT: { 2413; CHECK-NEXT: v6.uw = vlsr(v6.uw,r3) 2414; CHECK-NEXT: v29 = vmux(q1,v2,v9) 2415; CHECK-NEXT: v30 = vmux(q3,v2,v9) 2416; CHECK-NEXT: v2 = vmux(q0,v9,v2) 2417; CHECK-NEXT: } 2418; CHECK-NEXT: { 2419; CHECK-NEXT: v4.w = vsub(v29.w,v4.w) 2420; CHECK-NEXT: v7.w = vadd(v27.w,v28.w) 2421; CHECK-NEXT: v3.w = vsub(v30.w,v3.w) 2422; CHECK-NEXT: v2.w = vadd(v6.w,v2.w) 2423; CHECK-NEXT: } 2424; CHECK-NEXT: { 2425; CHECK-NEXT: v12.uw = vlsr(v5.uw,r3) 2426; CHECK-NEXT: v4.w = vadd(v4.w,v10.w) 2427; CHECK-NEXT: v3.w = vadd(v3.w,v10.w) 2428; CHECK-NEXT: q2 = vcmp.eq(v1.w,v9.w) 2429; CHECK-NEXT: } 2430; CHECK-NEXT: { 2431; CHECK-NEXT: r3 = #23 2432; CHECK-NEXT: v14.uw = vlsr(v8.uw,r3) 2433; CHECK-NEXT: q3 = vcmp.eq(v12.w,v27.w) 2434; CHECK-NEXT: } 2435; CHECK-NEXT: { 2436; CHECK-NEXT: v5.uw = vlsr(v27.uw,r2) 2437; CHECK-NEXT: q1 = vcmp.eq(v14.w,v6.w) 2438; CHECK-NEXT: } 2439; CHECK-NEXT: { 2440; CHECK-NEXT: v7.uw = vlsr(v7.uw,r2) 2441; CHECK-NEXT: } 2442; CHECK-NEXT: { 2443; CHECK-NEXT: v2.uw = vlsr(v2.uw,r2) 2444; CHECK-NEXT: v5 = vmux(q3,v7,v5) 2445; CHECK-NEXT: q3 = vcmp.eq(v0.w,v9.w) 2446; CHECK-NEXT: } 2447; CHECK-NEXT: { 2448; CHECK-NEXT: v6.uw = vlsr(v6.uw,r2) 2449; CHECK-NEXT: } 2450; CHECK-NEXT: { 2451; CHECK-NEXT: v4.w = vasl(v4.w,r3) 2452; CHECK-NEXT: v31 = vmux(q1,v2,v6) 2453; CHECK-NEXT: } 2454; CHECK-NEXT: { 2455; CHECK-NEXT: v2.w = vasl(v3.w,r3) 2456; CHECK-NEXT: v4 = vor(v5,v4) 2457; CHECK-NEXT: } 2458; CHECK-NEXT: { 2459; CHECK-NEXT: v1 = vor(v31,v2) 2460; CHECK-NEXT: v3 = vmux(q2,v9,v4) 2461; CHECK-NEXT: } 2462; CHECK-NEXT: { 2463; CHECK-NEXT: v0 = vmux(q3,v9,v1) 2464; CHECK-NEXT: } 2465; CHECK-NEXT: { 2466; CHECK-NEXT: v2.qf32 = vadd(v3.sf,v9.sf) 2467; CHECK-NEXT: } 2468; CHECK-NEXT: { 2469; CHECK-NEXT: v3.qf32 = vadd(v0.sf,v9.sf) 2470; CHECK-NEXT: } 2471; CHECK-NEXT: { 2472; CHECK-NEXT: v0.hf = v3:2.qf32 2473; CHECK-NEXT: } 2474; CHECK-NEXT: { 2475; CHECK-NEXT: v0.h = vdeal(v0.h) 2476; CHECK-NEXT: jumpr r31 2477; CHECK-NEXT: vmem(r1+#0) = v0.new 2478; CHECK-NEXT: } 2479 %v0 = load <64 x i32>, ptr %a0, align 128 2480 %v1 = uitofp <64 x i32> %v0 to <64 x half> 2481 store <64 x half> %v1, ptr %a1, align 128 2482 ret void 2483} 2484 2485; Widen result 2486define void @u32f16_1(ptr %a0, ptr %a1) #0 { 2487; CHECK-LABEL: u32f16_1: 2488; CHECK: .cfi_startproc 2489; CHECK-NEXT: // %bb.0: 2490; CHECK-NEXT: { 2491; CHECK-NEXT: r3:2 = combine(##512,#1) 2492; CHECK-NEXT: v1.uw = vcl0(v0.uw) 2493; CHECK-NEXT: v0.cur = vmem(r0+#0) 2494; CHECK-NEXT: } 2495; CHECK-NEXT: { 2496; CHECK-NEXT: v3 = vsplat(r2) 2497; CHECK-NEXT: v5 = vsplat(r3) 2498; CHECK-NEXT: r6 = #255 2499; CHECK-NEXT: v2 = vxor(v2,v2) 2500; CHECK-NEXT: } 2501; CHECK-NEXT: { 2502; CHECK-NEXT: v4 = vsplat(r6) 2503; CHECK-NEXT: r5 = #8 2504; CHECK-NEXT: r4 = #159 2505; CHECK-NEXT: v1.w = vadd(v1.w,v3.w) 2506; CHECK-NEXT: } 2507; CHECK-NEXT: { 2508; CHECK-NEXT: v7 = vsplat(r4) 2509; CHECK-NEXT: r3 = #23 2510; CHECK-NEXT: q2 = vcmp.eq(v0.w,v2.w) 2511; CHECK-NEXT: } 2512; CHECK-NEXT: { 2513; CHECK-NEXT: v6.w = vasl(v0.w,v1.w) 2514; CHECK-NEXT: } 2515; CHECK-NEXT: { 2516; CHECK-NEXT: v4.w = vadd(v6.w,v4.w) 2517; CHECK-NEXT: v5 = vand(v6,v5) 2518; CHECK-NEXT: } 2519; CHECK-NEXT: { 2520; CHECK-NEXT: v6.uw = vlsr(v6.uw,r5) 2521; CHECK-NEXT: q0 = vcmp.eq(v5.w,v2.w) 2522; CHECK-NEXT: q1 = vcmp.gt(v6.uw,v4.uw) 2523; CHECK-NEXT: } 2524; CHECK-NEXT: { 2525; CHECK-NEXT: v4.uw = vlsr(v4.uw,r5) 2526; CHECK-NEXT: v5 = vmux(q0,v2,v3) 2527; CHECK-NEXT: v3 = vmux(q1,v3,v2) 2528; CHECK-NEXT: } 2529; CHECK-NEXT: { 2530; CHECK-NEXT: v1.w = vsub(v3.w,v1.w) 2531; CHECK-NEXT: v30.w = vadd(v4.w,v5.w) 2532; CHECK-NEXT: q1 = vcmp.eq(v6.w,v4.w) 2533; CHECK-NEXT: } 2534; CHECK-NEXT: { 2535; CHECK-NEXT: v31.uw = vlsr(v4.uw,r2) 2536; CHECK-NEXT: v1.w = vadd(v1.w,v7.w) 2537; CHECK-NEXT: } 2538; CHECK-NEXT: { 2539; CHECK-NEXT: r2 = #64 2540; CHECK-NEXT: v3.uw = vlsr(v30.uw,r2) 2541; CHECK-NEXT: } 2542; CHECK-NEXT: { 2543; CHECK-NEXT: v1.w = vasl(v1.w,r3) 2544; CHECK-NEXT: q3 = vsetq(r2) 2545; CHECK-NEXT: v3 = vmux(q1,v3,v31) 2546; CHECK-NEXT: } 2547; CHECK-NEXT: { 2548; CHECK-NEXT: v1.qf32 = vadd(v2.sf,v2.sf) 2549; CHECK-NEXT: v0 = vor(v3,v1) 2550; CHECK-NEXT: } 2551; CHECK-NEXT: { 2552; CHECK-NEXT: v0 = vmux(q2,v2,v0) 2553; CHECK-NEXT: } 2554; CHECK-NEXT: { 2555; CHECK-NEXT: v0.qf32 = vadd(v0.sf,v2.sf) 2556; CHECK-NEXT: } 2557; CHECK-NEXT: { 2558; CHECK-NEXT: v0.hf = v1:0.qf32 2559; CHECK-NEXT: } 2560; CHECK-NEXT: { 2561; CHECK-NEXT: v0.h = vdeal(v0.h) 2562; CHECK-NEXT: } 2563; CHECK-NEXT: { 2564; CHECK-NEXT: jumpr r31 2565; CHECK-NEXT: if (q3) vmem(r1+#0) = v0 2566; CHECK-NEXT: } 2567 %v0 = load <32 x i32>, ptr %a0, align 128 2568 %v1 = uitofp <32 x i32> %v0 to <32 x half> 2569 store <32 x half> %v1, ptr %a1, align 128 2570 ret void 2571} 2572 2573; u32 -> f32 2574; No widening 2575define void @u32f32_0(ptr %a0, ptr %a1) #0 { 2576; CHECK-LABEL: u32f32_0: 2577; CHECK: .cfi_startproc 2578; CHECK-NEXT: // %bb.0: 2579; CHECK-NEXT: { 2580; CHECK-NEXT: r3:2 = combine(##512,#1) 2581; CHECK-NEXT: v1.uw = vcl0(v0.uw) 2582; CHECK-NEXT: v0.cur = vmem(r0+#0) 2583; CHECK-NEXT: } 2584; CHECK-NEXT: { 2585; CHECK-NEXT: v2 = vsplat(r2) 2586; CHECK-NEXT: v5 = vsplat(r3) 2587; CHECK-NEXT: r6 = #255 2588; CHECK-NEXT: v3 = vxor(v3,v3) 2589; CHECK-NEXT: } 2590; CHECK-NEXT: { 2591; CHECK-NEXT: v4 = vsplat(r6) 2592; CHECK-NEXT: r5 = #8 2593; CHECK-NEXT: r4 = #159 2594; CHECK-NEXT: v1.w = vadd(v1.w,v2.w) 2595; CHECK-NEXT: } 2596; CHECK-NEXT: { 2597; CHECK-NEXT: v7 = vsplat(r4) 2598; CHECK-NEXT: r3 = #23 2599; CHECK-NEXT: q3 = vcmp.eq(v0.w,v3.w) 2600; CHECK-NEXT: } 2601; CHECK-NEXT: { 2602; CHECK-NEXT: v6.w = vasl(v0.w,v1.w) 2603; CHECK-NEXT: } 2604; CHECK-NEXT: { 2605; CHECK-NEXT: v4.w = vadd(v6.w,v4.w) 2606; CHECK-NEXT: v5 = vand(v6,v5) 2607; CHECK-NEXT: } 2608; CHECK-NEXT: { 2609; CHECK-NEXT: v6.uw = vlsr(v6.uw,r5) 2610; CHECK-NEXT: q0 = vcmp.eq(v5.w,v3.w) 2611; CHECK-NEXT: q1 = vcmp.gt(v6.uw,v4.uw) 2612; CHECK-NEXT: } 2613; CHECK-NEXT: { 2614; CHECK-NEXT: v4.uw = vlsr(v4.uw,r5) 2615; CHECK-NEXT: v5 = vmux(q0,v3,v2) 2616; CHECK-NEXT: v2 = vmux(q1,v2,v3) 2617; CHECK-NEXT: } 2618; CHECK-NEXT: { 2619; CHECK-NEXT: v1.w = vsub(v2.w,v1.w) 2620; CHECK-NEXT: v29.w = vadd(v4.w,v5.w) 2621; CHECK-NEXT: q2 = vcmp.eq(v6.w,v4.w) 2622; CHECK-NEXT: } 2623; CHECK-NEXT: { 2624; CHECK-NEXT: v30.uw = vlsr(v4.uw,r2) 2625; CHECK-NEXT: v1.w = vadd(v1.w,v7.w) 2626; CHECK-NEXT: } 2627; CHECK-NEXT: { 2628; CHECK-NEXT: v2.uw = vlsr(v29.uw,r2) 2629; CHECK-NEXT: } 2630; CHECK-NEXT: { 2631; CHECK-NEXT: v1.w = vasl(v1.w,r3) 2632; CHECK-NEXT: v2 = vmux(q2,v2,v30) 2633; CHECK-NEXT: } 2634; CHECK-NEXT: { 2635; CHECK-NEXT: v31 = vor(v2,v1) 2636; CHECK-NEXT: } 2637; CHECK-NEXT: { 2638; CHECK-NEXT: v0 = vmux(q3,v3,v31) 2639; CHECK-NEXT: jumpr r31 2640; CHECK-NEXT: vmem(r1+#0) = v0.new 2641; CHECK-NEXT: } 2642 %v0 = load <32 x i32>, ptr %a0, align 128 2643 %v1 = uitofp <32 x i32> %v0 to <32 x float> 2644 store <32 x float> %v1, ptr %a1, align 128 2645 ret void 2646} 2647 2648; Widen input and result 2649define void @u32f32_1(ptr %a0, ptr %a1) #0 { 2650; CHECK-LABEL: u32f32_1: 2651; CHECK: .cfi_startproc 2652; CHECK-NEXT: // %bb.0: 2653; CHECK-NEXT: { 2654; CHECK-NEXT: r3:2 = combine(##512,#1) 2655; CHECK-NEXT: v1.uw = vcl0(v0.uw) 2656; CHECK-NEXT: v0.cur = vmem(r0+#0) 2657; CHECK-NEXT: } 2658; CHECK-NEXT: { 2659; CHECK-NEXT: v2 = vsplat(r2) 2660; CHECK-NEXT: v5 = vsplat(r3) 2661; CHECK-NEXT: r6 = #255 2662; CHECK-NEXT: v3 = vxor(v3,v3) 2663; CHECK-NEXT: } 2664; CHECK-NEXT: { 2665; CHECK-NEXT: v4 = vsplat(r6) 2666; CHECK-NEXT: r5 = #8 2667; CHECK-NEXT: r4 = #159 2668; CHECK-NEXT: v1.w = vadd(v1.w,v2.w) 2669; CHECK-NEXT: } 2670; CHECK-NEXT: { 2671; CHECK-NEXT: v7 = vsplat(r4) 2672; CHECK-NEXT: r3 = #23 2673; CHECK-NEXT: q2 = vcmp.eq(v0.w,v3.w) 2674; CHECK-NEXT: } 2675; CHECK-NEXT: { 2676; CHECK-NEXT: v6.w = vasl(v0.w,v1.w) 2677; CHECK-NEXT: } 2678; CHECK-NEXT: { 2679; CHECK-NEXT: v4.w = vadd(v6.w,v4.w) 2680; CHECK-NEXT: v5 = vand(v6,v5) 2681; CHECK-NEXT: } 2682; CHECK-NEXT: { 2683; CHECK-NEXT: v6.uw = vlsr(v6.uw,r5) 2684; CHECK-NEXT: q0 = vcmp.eq(v5.w,v3.w) 2685; CHECK-NEXT: q1 = vcmp.gt(v6.uw,v4.uw) 2686; CHECK-NEXT: } 2687; CHECK-NEXT: { 2688; CHECK-NEXT: v4.uw = vlsr(v4.uw,r5) 2689; CHECK-NEXT: v5 = vmux(q0,v3,v2) 2690; CHECK-NEXT: v2 = vmux(q1,v2,v3) 2691; CHECK-NEXT: } 2692; CHECK-NEXT: { 2693; CHECK-NEXT: v1.w = vsub(v2.w,v1.w) 2694; CHECK-NEXT: v29.w = vadd(v4.w,v5.w) 2695; CHECK-NEXT: q1 = vcmp.eq(v6.w,v4.w) 2696; CHECK-NEXT: } 2697; CHECK-NEXT: { 2698; CHECK-NEXT: v30.uw = vlsr(v4.uw,r2) 2699; CHECK-NEXT: v1.w = vadd(v1.w,v7.w) 2700; CHECK-NEXT: } 2701; CHECK-NEXT: { 2702; CHECK-NEXT: r2 = #64 2703; CHECK-NEXT: v2.uw = vlsr(v29.uw,r2) 2704; CHECK-NEXT: } 2705; CHECK-NEXT: { 2706; CHECK-NEXT: v1.w = vasl(v1.w,r3) 2707; CHECK-NEXT: q3 = vsetq(r2) 2708; CHECK-NEXT: v2 = vmux(q1,v2,v30) 2709; CHECK-NEXT: } 2710; CHECK-NEXT: { 2711; CHECK-NEXT: v31 = vor(v2,v1) 2712; CHECK-NEXT: } 2713; CHECK-NEXT: { 2714; CHECK-NEXT: v0 = vmux(q2,v3,v31) 2715; CHECK-NEXT: } 2716; CHECK-NEXT: { 2717; CHECK-NEXT: jumpr r31 2718; CHECK-NEXT: if (q3) vmem(r1+#0) = v0 2719; CHECK-NEXT: } 2720 %v0 = load <16 x i32>, ptr %a0, align 128 2721 %v1 = uitofp <16 x i32> %v0 to <16 x float> 2722 store <16 x float> %v1, ptr %a1, align 128 2723 ret void 2724} 2725 2726 2727attributes #0 = { "target-features"="+v68,+hvxv68,+hvx-length128b,+hvx-qfloat" } 2728 2729