1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a %s -o - | FileCheck %s 3 4define void @scalar(float %num, ptr addrspace(1) %p) { 5; CHECK-LABEL: scalar: 6; CHECK: ; %bb.0: ; %entry 7; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8; CHECK-NEXT: v_mov_b32_e32 v3, v2 9; CHECK-NEXT: v_mov_b32_e32 v2, v1 10; CHECK-NEXT: v_bfe_u32 v1, v0, 16, 1 11; CHECK-NEXT: s_movk_i32 s4, 0x7fff 12; CHECK-NEXT: v_add3_u32 v1, v1, v0, s4 13; CHECK-NEXT: v_or_b32_e32 v4, 0x400000, v0 14; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 15; CHECK-NEXT: v_cndmask_b32_e32 v0, v1, v4, vcc 16; CHECK-NEXT: global_store_short_d16_hi v[2:3], v0, off 17; CHECK-NEXT: s_waitcnt vmcnt(0) 18; CHECK-NEXT: s_setpc_b64 s[30:31] 19entry: 20 %conv = fptrunc float %num to bfloat 21 store bfloat %conv, ptr addrspace(1) %p, align 8 22 ret void 23} 24 25define void @v2(<2 x float> %num, ptr addrspace(1) %p) { 26; CHECK-LABEL: v2: 27; CHECK: ; %bb.0: ; %entry 28; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 29; CHECK-NEXT: v_bfe_u32 v4, v0, 16, 1 30; CHECK-NEXT: s_movk_i32 s4, 0x7fff 31; CHECK-NEXT: v_add3_u32 v4, v4, v0, s4 32; CHECK-NEXT: v_or_b32_e32 v5, 0x400000, v0 33; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 34; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc 35; CHECK-NEXT: v_bfe_u32 v4, v1, 16, 1 36; CHECK-NEXT: v_add3_u32 v4, v4, v1, s4 37; CHECK-NEXT: v_or_b32_e32 v5, 0x400000, v1 38; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v1, v1 39; CHECK-NEXT: v_cndmask_b32_e32 v1, v4, v5, vcc 40; CHECK-NEXT: s_mov_b32 s4, 0x7060302 41; CHECK-NEXT: v_perm_b32 v0, v1, v0, s4 42; CHECK-NEXT: global_store_dword v[2:3], v0, off 43; CHECK-NEXT: s_waitcnt vmcnt(0) 44; CHECK-NEXT: s_setpc_b64 s[30:31] 45entry: 46 %conv = fptrunc <2 x float> %num to <2 x bfloat> 47 store <2 x bfloat> %conv, ptr addrspace(1) %p, align 8 48 ret void 49} 50 51define void @v3(<3 x float> %num, ptr addrspace(1) %p) { 52; CHECK-LABEL: v3: 53; CHECK: ; %bb.0: ; %entry 54; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 55; CHECK-NEXT: v_mov_b32_e32 v5, v4 56; CHECK-NEXT: v_mov_b32_e32 v4, v3 57; CHECK-NEXT: v_bfe_u32 v3, v0, 16, 1 58; CHECK-NEXT: s_movk_i32 s4, 0x7fff 59; CHECK-NEXT: v_add3_u32 v3, v3, v0, s4 60; CHECK-NEXT: v_or_b32_e32 v6, 0x400000, v0 61; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 62; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v6, vcc 63; CHECK-NEXT: v_bfe_u32 v3, v1, 16, 1 64; CHECK-NEXT: v_add3_u32 v3, v3, v1, s4 65; CHECK-NEXT: v_or_b32_e32 v6, 0x400000, v1 66; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v1, v1 67; CHECK-NEXT: v_cndmask_b32_e32 v1, v3, v6, vcc 68; CHECK-NEXT: s_mov_b32 s5, 0x7060302 69; CHECK-NEXT: v_perm_b32 v0, v1, v0, s5 70; CHECK-NEXT: v_bfe_u32 v1, v2, 16, 1 71; CHECK-NEXT: v_add3_u32 v1, v1, v2, s4 72; CHECK-NEXT: v_or_b32_e32 v3, 0x400000, v2 73; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 74; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 75; CHECK-NEXT: global_store_short_d16_hi v[4:5], v1, off offset:4 76; CHECK-NEXT: global_store_dword v[4:5], v0, off 77; CHECK-NEXT: s_waitcnt vmcnt(0) 78; CHECK-NEXT: s_setpc_b64 s[30:31] 79entry: 80 %conv = fptrunc <3 x float> %num to <3 x bfloat> 81 store <3 x bfloat> %conv, ptr addrspace(1) %p, align 8 82 ret void 83} 84 85define void @v4(<4 x float> %num, ptr addrspace(1) %p) { 86; CHECK-LABEL: v4: 87; CHECK: ; %bb.0: ; %entry 88; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 89; CHECK-NEXT: v_bfe_u32 v6, v2, 16, 1 90; CHECK-NEXT: s_movk_i32 s4, 0x7fff 91; CHECK-NEXT: v_add3_u32 v6, v6, v2, s4 92; CHECK-NEXT: v_or_b32_e32 v7, 0x400000, v2 93; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 94; CHECK-NEXT: v_cndmask_b32_e32 v2, v6, v7, vcc 95; CHECK-NEXT: v_bfe_u32 v6, v3, 16, 1 96; CHECK-NEXT: v_add3_u32 v6, v6, v3, s4 97; CHECK-NEXT: v_or_b32_e32 v7, 0x400000, v3 98; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 99; CHECK-NEXT: v_cndmask_b32_e32 v3, v6, v7, vcc 100; CHECK-NEXT: s_mov_b32 s5, 0x7060302 101; CHECK-NEXT: v_perm_b32 v3, v3, v2, s5 102; CHECK-NEXT: v_bfe_u32 v2, v0, 16, 1 103; CHECK-NEXT: v_add3_u32 v2, v2, v0, s4 104; CHECK-NEXT: v_or_b32_e32 v6, 0x400000, v0 105; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 106; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v6, vcc 107; CHECK-NEXT: v_bfe_u32 v2, v1, 16, 1 108; CHECK-NEXT: v_add3_u32 v2, v2, v1, s4 109; CHECK-NEXT: v_or_b32_e32 v6, 0x400000, v1 110; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v1, v1 111; CHECK-NEXT: v_cndmask_b32_e32 v1, v2, v6, vcc 112; CHECK-NEXT: v_perm_b32 v2, v1, v0, s5 113; CHECK-NEXT: global_store_dwordx2 v[4:5], v[2:3], off 114; CHECK-NEXT: s_waitcnt vmcnt(0) 115; CHECK-NEXT: s_setpc_b64 s[30:31] 116entry: 117 %conv = fptrunc <4 x float> %num to <4 x bfloat> 118 store <4 x bfloat> %conv, ptr addrspace(1) %p, align 8 119 ret void 120} 121 122define void @v8(<8 x float> %num, ptr addrspace(1) %p) { 123; CHECK-LABEL: v8: 124; CHECK: ; %bb.0: ; %entry 125; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 126; CHECK-NEXT: v_bfe_u32 v10, v6, 16, 1 127; CHECK-NEXT: s_movk_i32 s4, 0x7fff 128; CHECK-NEXT: v_add3_u32 v10, v10, v6, s4 129; CHECK-NEXT: v_or_b32_e32 v11, 0x400000, v6 130; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 131; CHECK-NEXT: v_cndmask_b32_e32 v6, v10, v11, vcc 132; CHECK-NEXT: v_bfe_u32 v10, v7, 16, 1 133; CHECK-NEXT: v_add3_u32 v10, v10, v7, s4 134; CHECK-NEXT: v_or_b32_e32 v11, 0x400000, v7 135; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v7, v7 136; CHECK-NEXT: v_cndmask_b32_e32 v7, v10, v11, vcc 137; CHECK-NEXT: s_mov_b32 s5, 0x7060302 138; CHECK-NEXT: v_perm_b32 v7, v7, v6, s5 139; CHECK-NEXT: v_bfe_u32 v6, v4, 16, 1 140; CHECK-NEXT: v_add3_u32 v6, v6, v4, s4 141; CHECK-NEXT: v_or_b32_e32 v10, 0x400000, v4 142; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 143; CHECK-NEXT: v_cndmask_b32_e32 v4, v6, v10, vcc 144; CHECK-NEXT: v_bfe_u32 v6, v5, 16, 1 145; CHECK-NEXT: v_add3_u32 v6, v6, v5, s4 146; CHECK-NEXT: v_or_b32_e32 v10, 0x400000, v5 147; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 148; CHECK-NEXT: v_cndmask_b32_e32 v5, v6, v10, vcc 149; CHECK-NEXT: v_perm_b32 v6, v5, v4, s5 150; CHECK-NEXT: v_bfe_u32 v4, v2, 16, 1 151; CHECK-NEXT: v_add3_u32 v4, v4, v2, s4 152; CHECK-NEXT: v_or_b32_e32 v5, 0x400000, v2 153; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 154; CHECK-NEXT: v_cndmask_b32_e32 v2, v4, v5, vcc 155; CHECK-NEXT: v_bfe_u32 v4, v3, 16, 1 156; CHECK-NEXT: v_add3_u32 v4, v4, v3, s4 157; CHECK-NEXT: v_or_b32_e32 v5, 0x400000, v3 158; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 159; CHECK-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc 160; CHECK-NEXT: v_perm_b32 v5, v3, v2, s5 161; CHECK-NEXT: v_bfe_u32 v2, v0, 16, 1 162; CHECK-NEXT: v_add3_u32 v2, v2, v0, s4 163; CHECK-NEXT: v_or_b32_e32 v3, 0x400000, v0 164; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 165; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc 166; CHECK-NEXT: v_bfe_u32 v2, v1, 16, 1 167; CHECK-NEXT: v_add3_u32 v2, v2, v1, s4 168; CHECK-NEXT: v_or_b32_e32 v3, 0x400000, v1 169; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v1, v1 170; CHECK-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc 171; CHECK-NEXT: v_perm_b32 v4, v1, v0, s5 172; CHECK-NEXT: global_store_dwordx4 v[8:9], v[4:7], off 173; CHECK-NEXT: s_waitcnt vmcnt(0) 174; CHECK-NEXT: s_setpc_b64 s[30:31] 175entry: 176 %conv = fptrunc <8 x float> %num to <8 x bfloat> 177 store <8 x bfloat> %conv, ptr addrspace(1) %p, align 8 178 ret void 179} 180 181define void @v16(<16 x float> %num, ptr addrspace(1) %p) { 182; CHECK-LABEL: v16: 183; CHECK: ; %bb.0: ; %entry 184; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 185; CHECK-NEXT: v_bfe_u32 v18, v6, 16, 1 186; CHECK-NEXT: s_movk_i32 s4, 0x7fff 187; CHECK-NEXT: v_add3_u32 v18, v18, v6, s4 188; CHECK-NEXT: v_or_b32_e32 v19, 0x400000, v6 189; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 190; CHECK-NEXT: v_cndmask_b32_e32 v6, v18, v19, vcc 191; CHECK-NEXT: v_bfe_u32 v18, v7, 16, 1 192; CHECK-NEXT: v_add3_u32 v18, v18, v7, s4 193; CHECK-NEXT: v_or_b32_e32 v19, 0x400000, v7 194; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v7, v7 195; CHECK-NEXT: v_cndmask_b32_e32 v7, v18, v19, vcc 196; CHECK-NEXT: s_mov_b32 s5, 0x7060302 197; CHECK-NEXT: v_perm_b32 v7, v7, v6, s5 198; CHECK-NEXT: v_bfe_u32 v6, v4, 16, 1 199; CHECK-NEXT: v_add3_u32 v6, v6, v4, s4 200; CHECK-NEXT: v_or_b32_e32 v18, 0x400000, v4 201; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 202; CHECK-NEXT: v_cndmask_b32_e32 v4, v6, v18, vcc 203; CHECK-NEXT: v_bfe_u32 v6, v5, 16, 1 204; CHECK-NEXT: v_add3_u32 v6, v6, v5, s4 205; CHECK-NEXT: v_or_b32_e32 v18, 0x400000, v5 206; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 207; CHECK-NEXT: v_cndmask_b32_e32 v5, v6, v18, vcc 208; CHECK-NEXT: v_perm_b32 v6, v5, v4, s5 209; CHECK-NEXT: v_bfe_u32 v4, v2, 16, 1 210; CHECK-NEXT: v_add3_u32 v4, v4, v2, s4 211; CHECK-NEXT: v_or_b32_e32 v5, 0x400000, v2 212; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 213; CHECK-NEXT: v_cndmask_b32_e32 v2, v4, v5, vcc 214; CHECK-NEXT: v_bfe_u32 v4, v3, 16, 1 215; CHECK-NEXT: v_add3_u32 v4, v4, v3, s4 216; CHECK-NEXT: v_or_b32_e32 v5, 0x400000, v3 217; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 218; CHECK-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc 219; CHECK-NEXT: v_perm_b32 v5, v3, v2, s5 220; CHECK-NEXT: v_bfe_u32 v2, v0, 16, 1 221; CHECK-NEXT: v_add3_u32 v2, v2, v0, s4 222; CHECK-NEXT: v_or_b32_e32 v3, 0x400000, v0 223; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 224; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc 225; CHECK-NEXT: v_bfe_u32 v2, v1, 16, 1 226; CHECK-NEXT: v_add3_u32 v2, v2, v1, s4 227; CHECK-NEXT: v_or_b32_e32 v3, 0x400000, v1 228; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v1, v1 229; CHECK-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc 230; CHECK-NEXT: v_perm_b32 v4, v1, v0, s5 231; CHECK-NEXT: v_bfe_u32 v0, v14, 16, 1 232; CHECK-NEXT: v_add3_u32 v0, v0, v14, s4 233; CHECK-NEXT: v_or_b32_e32 v1, 0x400000, v14 234; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v14, v14 235; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 236; CHECK-NEXT: v_bfe_u32 v1, v15, 16, 1 237; CHECK-NEXT: v_add3_u32 v1, v1, v15, s4 238; CHECK-NEXT: v_or_b32_e32 v2, 0x400000, v15 239; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v15, v15 240; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 241; CHECK-NEXT: v_perm_b32 v3, v1, v0, s5 242; CHECK-NEXT: v_bfe_u32 v0, v12, 16, 1 243; CHECK-NEXT: v_add3_u32 v0, v0, v12, s4 244; CHECK-NEXT: v_or_b32_e32 v1, 0x400000, v12 245; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v12, v12 246; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 247; CHECK-NEXT: v_bfe_u32 v1, v13, 16, 1 248; CHECK-NEXT: v_add3_u32 v1, v1, v13, s4 249; CHECK-NEXT: v_or_b32_e32 v2, 0x400000, v13 250; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v13, v13 251; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 252; CHECK-NEXT: v_perm_b32 v2, v1, v0, s5 253; CHECK-NEXT: v_bfe_u32 v0, v10, 16, 1 254; CHECK-NEXT: v_add3_u32 v0, v0, v10, s4 255; CHECK-NEXT: v_or_b32_e32 v1, 0x400000, v10 256; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v10, v10 257; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 258; CHECK-NEXT: v_bfe_u32 v1, v11, 16, 1 259; CHECK-NEXT: v_add3_u32 v1, v1, v11, s4 260; CHECK-NEXT: v_or_b32_e32 v10, 0x400000, v11 261; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v11, v11 262; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v10, vcc 263; CHECK-NEXT: v_perm_b32 v1, v1, v0, s5 264; CHECK-NEXT: v_bfe_u32 v0, v8, 16, 1 265; CHECK-NEXT: v_add3_u32 v0, v0, v8, s4 266; CHECK-NEXT: v_or_b32_e32 v10, 0x400000, v8 267; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v8, v8 268; CHECK-NEXT: v_bfe_u32 v8, v9, 16, 1 269; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 270; CHECK-NEXT: v_add3_u32 v8, v8, v9, s4 271; CHECK-NEXT: v_or_b32_e32 v10, 0x400000, v9 272; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v9, v9 273; CHECK-NEXT: v_cndmask_b32_e32 v8, v8, v10, vcc 274; CHECK-NEXT: v_perm_b32 v0, v8, v0, s5 275; CHECK-NEXT: global_store_dwordx4 v[16:17], v[0:3], off offset:16 276; CHECK-NEXT: global_store_dwordx4 v[16:17], v[4:7], off 277; CHECK-NEXT: s_waitcnt vmcnt(0) 278; CHECK-NEXT: s_setpc_b64 s[30:31] 279entry: 280 %conv = fptrunc <16 x float> %num to <16 x bfloat> 281 store <16 x bfloat> %conv, ptr addrspace(1) %p, align 8 282 ret void 283} 284 285define void @v32(<32 x float> %num, ptr addrspace(1) %p) { 286; CHECK-LABEL: v32: 287; CHECK: ; %bb.0: ; %entry 288; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 289; CHECK-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8 290; CHECK-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4 291; CHECK-NEXT: buffer_load_dword v31, off, s[0:3], s32 292; CHECK-NEXT: v_bfe_u32 v34, v6, 16, 1 293; CHECK-NEXT: s_movk_i32 s4, 0x7fff 294; CHECK-NEXT: v_add3_u32 v34, v34, v6, s4 295; CHECK-NEXT: v_or_b32_e32 v35, 0x400000, v6 296; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v6, v6 297; CHECK-NEXT: v_cndmask_b32_e32 v6, v34, v35, vcc 298; CHECK-NEXT: v_bfe_u32 v34, v7, 16, 1 299; CHECK-NEXT: v_add3_u32 v34, v34, v7, s4 300; CHECK-NEXT: v_or_b32_e32 v35, 0x400000, v7 301; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v7, v7 302; CHECK-NEXT: v_cndmask_b32_e32 v7, v34, v35, vcc 303; CHECK-NEXT: s_mov_b32 s5, 0x7060302 304; CHECK-NEXT: v_perm_b32 v7, v7, v6, s5 305; CHECK-NEXT: v_bfe_u32 v6, v4, 16, 1 306; CHECK-NEXT: v_add3_u32 v6, v6, v4, s4 307; CHECK-NEXT: v_or_b32_e32 v34, 0x400000, v4 308; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 309; CHECK-NEXT: v_cndmask_b32_e32 v4, v6, v34, vcc 310; CHECK-NEXT: v_bfe_u32 v6, v5, 16, 1 311; CHECK-NEXT: v_add3_u32 v6, v6, v5, s4 312; CHECK-NEXT: v_or_b32_e32 v34, 0x400000, v5 313; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 314; CHECK-NEXT: v_cndmask_b32_e32 v5, v6, v34, vcc 315; CHECK-NEXT: v_perm_b32 v6, v5, v4, s5 316; CHECK-NEXT: v_bfe_u32 v4, v2, 16, 1 317; CHECK-NEXT: v_add3_u32 v4, v4, v2, s4 318; CHECK-NEXT: v_or_b32_e32 v5, 0x400000, v2 319; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v2, v2 320; CHECK-NEXT: v_cndmask_b32_e32 v2, v4, v5, vcc 321; CHECK-NEXT: v_bfe_u32 v4, v3, 16, 1 322; CHECK-NEXT: v_add3_u32 v4, v4, v3, s4 323; CHECK-NEXT: v_or_b32_e32 v5, 0x400000, v3 324; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 325; CHECK-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc 326; CHECK-NEXT: v_perm_b32 v5, v3, v2, s5 327; CHECK-NEXT: v_bfe_u32 v2, v0, 16, 1 328; CHECK-NEXT: v_add3_u32 v2, v2, v0, s4 329; CHECK-NEXT: v_or_b32_e32 v3, 0x400000, v0 330; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 331; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc 332; CHECK-NEXT: v_bfe_u32 v2, v1, 16, 1 333; CHECK-NEXT: v_add3_u32 v2, v2, v1, s4 334; CHECK-NEXT: v_or_b32_e32 v3, 0x400000, v1 335; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v1, v1 336; CHECK-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc 337; CHECK-NEXT: v_perm_b32 v4, v1, v0, s5 338; CHECK-NEXT: v_bfe_u32 v0, v14, 16, 1 339; CHECK-NEXT: v_add3_u32 v0, v0, v14, s4 340; CHECK-NEXT: v_or_b32_e32 v1, 0x400000, v14 341; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v14, v14 342; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 343; CHECK-NEXT: v_bfe_u32 v1, v15, 16, 1 344; CHECK-NEXT: v_add3_u32 v1, v1, v15, s4 345; CHECK-NEXT: v_or_b32_e32 v2, 0x400000, v15 346; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v15, v15 347; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 348; CHECK-NEXT: v_perm_b32 v3, v1, v0, s5 349; CHECK-NEXT: v_bfe_u32 v0, v12, 16, 1 350; CHECK-NEXT: v_add3_u32 v0, v0, v12, s4 351; CHECK-NEXT: v_or_b32_e32 v1, 0x400000, v12 352; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v12, v12 353; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 354; CHECK-NEXT: v_bfe_u32 v1, v13, 16, 1 355; CHECK-NEXT: v_add3_u32 v1, v1, v13, s4 356; CHECK-NEXT: v_or_b32_e32 v2, 0x400000, v13 357; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v13, v13 358; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 359; CHECK-NEXT: v_perm_b32 v2, v1, v0, s5 360; CHECK-NEXT: v_bfe_u32 v0, v10, 16, 1 361; CHECK-NEXT: v_add3_u32 v0, v0, v10, s4 362; CHECK-NEXT: v_or_b32_e32 v1, 0x400000, v10 363; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v10, v10 364; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 365; CHECK-NEXT: v_bfe_u32 v1, v11, 16, 1 366; CHECK-NEXT: v_add3_u32 v1, v1, v11, s4 367; CHECK-NEXT: v_or_b32_e32 v10, 0x400000, v11 368; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v11, v11 369; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v10, vcc 370; CHECK-NEXT: v_perm_b32 v1, v1, v0, s5 371; CHECK-NEXT: v_bfe_u32 v0, v8, 16, 1 372; CHECK-NEXT: v_add3_u32 v0, v0, v8, s4 373; CHECK-NEXT: v_or_b32_e32 v10, 0x400000, v8 374; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v8, v8 375; CHECK-NEXT: v_bfe_u32 v8, v9, 16, 1 376; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 377; CHECK-NEXT: v_add3_u32 v8, v8, v9, s4 378; CHECK-NEXT: v_or_b32_e32 v10, 0x400000, v9 379; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v9, v9 380; CHECK-NEXT: v_cndmask_b32_e32 v8, v8, v10, vcc 381; CHECK-NEXT: v_perm_b32 v0, v8, v0, s5 382; CHECK-NEXT: v_bfe_u32 v8, v22, 16, 1 383; CHECK-NEXT: v_add3_u32 v8, v8, v22, s4 384; CHECK-NEXT: v_or_b32_e32 v9, 0x400000, v22 385; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v22, v22 386; CHECK-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc 387; CHECK-NEXT: v_bfe_u32 v9, v23, 16, 1 388; CHECK-NEXT: v_add3_u32 v9, v9, v23, s4 389; CHECK-NEXT: v_or_b32_e32 v10, 0x400000, v23 390; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v23, v23 391; CHECK-NEXT: v_cndmask_b32_e32 v9, v9, v10, vcc 392; CHECK-NEXT: v_perm_b32 v11, v9, v8, s5 393; CHECK-NEXT: v_bfe_u32 v8, v20, 16, 1 394; CHECK-NEXT: v_add3_u32 v8, v8, v20, s4 395; CHECK-NEXT: v_or_b32_e32 v9, 0x400000, v20 396; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v20, v20 397; CHECK-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc 398; CHECK-NEXT: v_bfe_u32 v9, v21, 16, 1 399; CHECK-NEXT: v_add3_u32 v9, v9, v21, s4 400; CHECK-NEXT: v_or_b32_e32 v10, 0x400000, v21 401; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v21, v21 402; CHECK-NEXT: v_cndmask_b32_e32 v9, v9, v10, vcc 403; CHECK-NEXT: v_perm_b32 v10, v9, v8, s5 404; CHECK-NEXT: v_bfe_u32 v8, v18, 16, 1 405; CHECK-NEXT: v_add3_u32 v8, v8, v18, s4 406; CHECK-NEXT: v_or_b32_e32 v9, 0x400000, v18 407; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v18, v18 408; CHECK-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc 409; CHECK-NEXT: v_bfe_u32 v9, v19, 16, 1 410; CHECK-NEXT: v_add3_u32 v9, v9, v19, s4 411; CHECK-NEXT: v_or_b32_e32 v12, 0x400000, v19 412; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v19, v19 413; CHECK-NEXT: v_cndmask_b32_e32 v9, v9, v12, vcc 414; CHECK-NEXT: v_perm_b32 v9, v9, v8, s5 415; CHECK-NEXT: v_bfe_u32 v8, v16, 16, 1 416; CHECK-NEXT: v_add3_u32 v8, v8, v16, s4 417; CHECK-NEXT: v_or_b32_e32 v12, 0x400000, v16 418; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v16, v16 419; CHECK-NEXT: v_cndmask_b32_e32 v8, v8, v12, vcc 420; CHECK-NEXT: v_bfe_u32 v12, v17, 16, 1 421; CHECK-NEXT: v_add3_u32 v12, v12, v17, s4 422; CHECK-NEXT: v_or_b32_e32 v13, 0x400000, v17 423; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v17, v17 424; CHECK-NEXT: v_cndmask_b32_e32 v12, v12, v13, vcc 425; CHECK-NEXT: v_perm_b32 v8, v12, v8, s5 426; CHECK-NEXT: v_bfe_u32 v12, v30, 16, 1 427; CHECK-NEXT: v_add3_u32 v12, v12, v30, s4 428; CHECK-NEXT: v_or_b32_e32 v13, 0x400000, v30 429; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v30, v30 430; CHECK-NEXT: v_cndmask_b32_e32 v12, v12, v13, vcc 431; CHECK-NEXT: s_waitcnt vmcnt(0) 432; CHECK-NEXT: v_bfe_u32 v13, v31, 16, 1 433; CHECK-NEXT: v_add3_u32 v13, v13, v31, s4 434; CHECK-NEXT: v_or_b32_e32 v14, 0x400000, v31 435; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v31, v31 436; CHECK-NEXT: v_cndmask_b32_e32 v13, v13, v14, vcc 437; CHECK-NEXT: v_perm_b32 v15, v13, v12, s5 438; CHECK-NEXT: v_bfe_u32 v12, v28, 16, 1 439; CHECK-NEXT: v_add3_u32 v12, v12, v28, s4 440; CHECK-NEXT: v_or_b32_e32 v13, 0x400000, v28 441; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v28, v28 442; CHECK-NEXT: v_cndmask_b32_e32 v12, v12, v13, vcc 443; CHECK-NEXT: v_bfe_u32 v13, v29, 16, 1 444; CHECK-NEXT: v_add3_u32 v13, v13, v29, s4 445; CHECK-NEXT: v_or_b32_e32 v14, 0x400000, v29 446; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v29, v29 447; CHECK-NEXT: v_cndmask_b32_e32 v13, v13, v14, vcc 448; CHECK-NEXT: v_perm_b32 v14, v13, v12, s5 449; CHECK-NEXT: v_bfe_u32 v12, v26, 16, 1 450; CHECK-NEXT: v_add3_u32 v12, v12, v26, s4 451; CHECK-NEXT: v_or_b32_e32 v13, 0x400000, v26 452; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v26, v26 453; CHECK-NEXT: v_cndmask_b32_e32 v12, v12, v13, vcc 454; CHECK-NEXT: v_bfe_u32 v13, v27, 16, 1 455; CHECK-NEXT: v_add3_u32 v13, v13, v27, s4 456; CHECK-NEXT: v_or_b32_e32 v16, 0x400000, v27 457; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v27, v27 458; CHECK-NEXT: v_cndmask_b32_e32 v13, v13, v16, vcc 459; CHECK-NEXT: v_perm_b32 v13, v13, v12, s5 460; CHECK-NEXT: v_bfe_u32 v12, v24, 16, 1 461; CHECK-NEXT: v_add3_u32 v12, v12, v24, s4 462; CHECK-NEXT: v_or_b32_e32 v16, 0x400000, v24 463; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v24, v24 464; CHECK-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc 465; CHECK-NEXT: v_bfe_u32 v16, v25, 16, 1 466; CHECK-NEXT: v_add3_u32 v16, v16, v25, s4 467; CHECK-NEXT: v_or_b32_e32 v17, 0x400000, v25 468; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v25, v25 469; CHECK-NEXT: v_cndmask_b32_e32 v16, v16, v17, vcc 470; CHECK-NEXT: v_perm_b32 v12, v16, v12, s5 471; CHECK-NEXT: global_store_dwordx4 v[32:33], v[12:15], off offset:48 472; CHECK-NEXT: global_store_dwordx4 v[32:33], v[8:11], off offset:32 473; CHECK-NEXT: global_store_dwordx4 v[32:33], v[0:3], off offset:16 474; CHECK-NEXT: global_store_dwordx4 v[32:33], v[4:7], off 475; CHECK-NEXT: s_waitcnt vmcnt(0) 476; CHECK-NEXT: s_setpc_b64 s[30:31] 477entry: 478 %conv = fptrunc <32 x float> %num to <32 x bfloat> 479 store <32 x bfloat> %conv, ptr addrspace(1) %p, align 8 480 ret void 481} 482