1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GPRIDX %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s 4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s 5; RUN: not --crash llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERR %s 6 7; FIXME: Need constant bus fixup pre-gfx10 for movrel 8; ERR: Bad machine code: VOP* instruction violates constant bus restriction 9 10define amdgpu_ps <8 x i32> @dyn_insertelement_v8i32_s_s_s(<8 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) { 11; GPRIDX-LABEL: dyn_insertelement_v8i32_s_s_s: 12; GPRIDX: ; %bb.0: ; %entry 13; GPRIDX-NEXT: s_cmp_eq_u32 s11, 0 14; GPRIDX-NEXT: s_cselect_b32 s0, s10, s2 15; GPRIDX-NEXT: s_cmp_eq_u32 s11, 1 16; GPRIDX-NEXT: s_cselect_b32 s1, s10, s3 17; GPRIDX-NEXT: s_cmp_eq_u32 s11, 2 18; GPRIDX-NEXT: s_cselect_b32 s2, s10, s4 19; GPRIDX-NEXT: s_cmp_eq_u32 s11, 3 20; GPRIDX-NEXT: s_cselect_b32 s3, s10, s5 21; GPRIDX-NEXT: s_cmp_eq_u32 s11, 4 22; GPRIDX-NEXT: s_cselect_b32 s4, s10, s6 23; GPRIDX-NEXT: s_cmp_eq_u32 s11, 5 24; GPRIDX-NEXT: s_cselect_b32 s5, s10, s7 25; GPRIDX-NEXT: s_cmp_eq_u32 s11, 6 26; GPRIDX-NEXT: s_cselect_b32 s6, s10, s8 27; GPRIDX-NEXT: s_cmp_eq_u32 s11, 7 28; GPRIDX-NEXT: s_cselect_b32 s7, s10, s9 29; GPRIDX-NEXT: ; return to shader part epilog 30; 31; GFX10PLUS-LABEL: dyn_insertelement_v8i32_s_s_s: 32; GFX10PLUS: ; %bb.0: ; %entry 33; GFX10PLUS-NEXT: s_mov_b32 s0, s2 34; GFX10PLUS-NEXT: s_mov_b32 m0, s11 35; GFX10PLUS-NEXT: s_mov_b32 s1, s3 36; GFX10PLUS-NEXT: s_mov_b32 s2, s4 37; GFX10PLUS-NEXT: s_mov_b32 s3, s5 38; GFX10PLUS-NEXT: s_mov_b32 s4, s6 39; GFX10PLUS-NEXT: s_mov_b32 s5, s7 40; GFX10PLUS-NEXT: s_mov_b32 s6, s8 41; GFX10PLUS-NEXT: s_mov_b32 s7, s9 42; GFX10PLUS-NEXT: s_movreld_b32 s0, s10 43; GFX10PLUS-NEXT: ; return to shader part epilog 44entry: 45 %insert = insertelement <8 x i32> %vec, i32 %val, i32 %idx 46 ret <8 x i32> %insert 47} 48 49define amdgpu_ps <8 x ptr addrspace(3)> @dyn_insertelement_v8p3i8_s_s_s(<8 x ptr addrspace(3)> inreg %vec, ptr addrspace(3) inreg %val, i32 inreg %idx) { 50; GPRIDX-LABEL: dyn_insertelement_v8p3i8_s_s_s: 51; GPRIDX: ; %bb.0: ; %entry 52; GPRIDX-NEXT: s_cmp_eq_u32 s11, 0 53; GPRIDX-NEXT: s_cselect_b32 s0, s10, s2 54; GPRIDX-NEXT: s_cmp_eq_u32 s11, 1 55; GPRIDX-NEXT: s_cselect_b32 s1, s10, s3 56; GPRIDX-NEXT: s_cmp_eq_u32 s11, 2 57; GPRIDX-NEXT: s_cselect_b32 s2, s10, s4 58; GPRIDX-NEXT: s_cmp_eq_u32 s11, 3 59; GPRIDX-NEXT: s_cselect_b32 s3, s10, s5 60; GPRIDX-NEXT: s_cmp_eq_u32 s11, 4 61; GPRIDX-NEXT: s_cselect_b32 s4, s10, s6 62; GPRIDX-NEXT: s_cmp_eq_u32 s11, 5 63; GPRIDX-NEXT: s_cselect_b32 s5, s10, s7 64; GPRIDX-NEXT: s_cmp_eq_u32 s11, 6 65; GPRIDX-NEXT: s_cselect_b32 s6, s10, s8 66; GPRIDX-NEXT: s_cmp_eq_u32 s11, 7 67; GPRIDX-NEXT: s_cselect_b32 s7, s10, s9 68; GPRIDX-NEXT: ; return to shader part epilog 69; 70; GFX10PLUS-LABEL: dyn_insertelement_v8p3i8_s_s_s: 71; GFX10PLUS: ; %bb.0: ; %entry 72; GFX10PLUS-NEXT: s_mov_b32 s0, s2 73; GFX10PLUS-NEXT: s_mov_b32 m0, s11 74; GFX10PLUS-NEXT: s_mov_b32 s1, s3 75; GFX10PLUS-NEXT: s_mov_b32 s2, s4 76; GFX10PLUS-NEXT: s_mov_b32 s3, s5 77; GFX10PLUS-NEXT: s_mov_b32 s4, s6 78; GFX10PLUS-NEXT: s_mov_b32 s5, s7 79; GFX10PLUS-NEXT: s_mov_b32 s6, s8 80; GFX10PLUS-NEXT: s_mov_b32 s7, s9 81; GFX10PLUS-NEXT: s_movreld_b32 s0, s10 82; GFX10PLUS-NEXT: ; return to shader part epilog 83entry: 84 %insert = insertelement <8 x ptr addrspace(3)> %vec, ptr addrspace(3) %val, i32 %idx 85 ret <8 x ptr addrspace(3)> %insert 86} 87 88define <8 x float> @dyn_insertelement_v8f32_const_s_v_v(float %val, i32 %idx) { 89; GPRIDX-LABEL: dyn_insertelement_v8f32_const_s_v_v: 90; GPRIDX: ; %bb.0: ; %entry 91; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 92; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 93; GPRIDX-NEXT: v_cndmask_b32_e32 v8, 1.0, v0, vcc 94; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 95; GPRIDX-NEXT: v_mov_b32_e32 v2, 0x40400000 96; GPRIDX-NEXT: v_cndmask_b32_e32 v9, 2.0, v0, vcc 97; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v1 98; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc 99; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v1 100; GPRIDX-NEXT: v_mov_b32_e32 v4, 0x40a00000 101; GPRIDX-NEXT: v_cndmask_b32_e32 v3, 4.0, v0, vcc 102; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v1 103; GPRIDX-NEXT: v_mov_b32_e32 v5, 0x40c00000 104; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc 105; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v1 106; GPRIDX-NEXT: v_mov_b32_e32 v6, 0x40e00000 107; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v0, vcc 108; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v1 109; GPRIDX-NEXT: v_mov_b32_e32 v7, 0x41000000 110; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v0, vcc 111; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v1 112; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v0, vcc 113; GPRIDX-NEXT: v_mov_b32_e32 v0, v8 114; GPRIDX-NEXT: v_mov_b32_e32 v1, v9 115; GPRIDX-NEXT: s_setpc_b64 s[30:31] 116; 117; GFX10-LABEL: dyn_insertelement_v8f32_const_s_v_v: 118; GFX10: ; %bb.0: ; %entry 119; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 120; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 121; GFX10-NEXT: v_cndmask_b32_e32 v8, 1.0, v0, vcc_lo 122; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 123; GFX10-NEXT: v_cndmask_b32_e32 v9, 2.0, v0, vcc_lo 124; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1 125; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x40400000, v0, vcc_lo 126; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1 127; GFX10-NEXT: v_cndmask_b32_e32 v3, 4.0, v0, vcc_lo 128; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1 129; GFX10-NEXT: v_cndmask_b32_e32 v4, 0x40a00000, v0, vcc_lo 130; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1 131; GFX10-NEXT: v_cndmask_b32_e32 v5, 0x40c00000, v0, vcc_lo 132; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1 133; GFX10-NEXT: v_cndmask_b32_e32 v6, 0x40e00000, v0, vcc_lo 134; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v1 135; GFX10-NEXT: v_mov_b32_e32 v1, v9 136; GFX10-NEXT: v_cndmask_b32_e32 v7, 0x41000000, v0, vcc_lo 137; GFX10-NEXT: v_mov_b32_e32 v0, v8 138; GFX10-NEXT: s_setpc_b64 s[30:31] 139; 140; GFX11-LABEL: dyn_insertelement_v8f32_const_s_v_v: 141; GFX11: ; %bb.0: ; %entry 142; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 143; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 144; GFX11-NEXT: v_cndmask_b32_e32 v8, 1.0, v0, vcc_lo 145; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 146; GFX11-NEXT: v_cndmask_b32_e32 v9, 2.0, v0, vcc_lo 147; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1 148; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x40400000, v0, vcc_lo 149; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1 150; GFX11-NEXT: v_cndmask_b32_e32 v3, 4.0, v0, vcc_lo 151; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1 152; GFX11-NEXT: v_cndmask_b32_e32 v4, 0x40a00000, v0, vcc_lo 153; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1 154; GFX11-NEXT: v_cndmask_b32_e32 v5, 0x40c00000, v0, vcc_lo 155; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1 156; GFX11-NEXT: v_cndmask_b32_e32 v6, 0x40e00000, v0, vcc_lo 157; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v1 158; GFX11-NEXT: v_mov_b32_e32 v1, v9 159; GFX11-NEXT: v_dual_cndmask_b32 v7, 0x41000000, v0 :: v_dual_mov_b32 v0, v8 160; GFX11-NEXT: s_setpc_b64 s[30:31] 161entry: 162 %insert = insertelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, float %val, i32 %idx 163 ret <8 x float> %insert 164} 165 166define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_s_v(<8 x float> inreg %vec, float inreg %val, i32 %idx) { 167; GPRIDX-LABEL: dyn_insertelement_v8f32_s_s_v: 168; GPRIDX: ; %bb.0: ; %entry 169; GPRIDX-NEXT: v_mov_b32_e32 v1, s2 170; GPRIDX-NEXT: v_mov_b32_e32 v10, s10 171; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 172; GPRIDX-NEXT: v_mov_b32_e32 v2, s3 173; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v1, v10, vcc 174; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 175; GPRIDX-NEXT: v_mov_b32_e32 v3, s4 176; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v2, v10, vcc 177; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 178; GPRIDX-NEXT: v_mov_b32_e32 v4, s5 179; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v3, v10, vcc 180; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 181; GPRIDX-NEXT: v_mov_b32_e32 v5, s6 182; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v4, v10, vcc 183; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 184; GPRIDX-NEXT: v_mov_b32_e32 v6, s7 185; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v5, v10, vcc 186; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 187; GPRIDX-NEXT: v_mov_b32_e32 v7, s8 188; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v6, v10, vcc 189; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 190; GPRIDX-NEXT: v_mov_b32_e32 v9, s9 191; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v7, v10, vcc 192; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 193; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v9, v10, vcc 194; GPRIDX-NEXT: v_mov_b32_e32 v0, v8 195; GPRIDX-NEXT: ; return to shader part epilog 196; 197; GFX10-LABEL: dyn_insertelement_v8f32_s_s_v: 198; GFX10: ; %bb.0: ; %entry 199; GFX10-NEXT: v_mov_b32_e32 v7, s10 200; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 201; GFX10-NEXT: v_cndmask_b32_e32 v8, s2, v7, vcc_lo 202; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 203; GFX10-NEXT: v_cndmask_b32_e32 v1, s3, v7, vcc_lo 204; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 205; GFX10-NEXT: v_cndmask_b32_e32 v2, s4, v7, vcc_lo 206; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 207; GFX10-NEXT: v_cndmask_b32_e32 v3, s5, v7, vcc_lo 208; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 209; GFX10-NEXT: v_cndmask_b32_e32 v4, s6, v7, vcc_lo 210; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 211; GFX10-NEXT: v_cndmask_b32_e32 v5, s7, v7, vcc_lo 212; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 213; GFX10-NEXT: v_cndmask_b32_e32 v6, s8, v7, vcc_lo 214; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 215; GFX10-NEXT: v_mov_b32_e32 v0, v8 216; GFX10-NEXT: v_cndmask_b32_e32 v7, s9, v7, vcc_lo 217; GFX10-NEXT: ; return to shader part epilog 218; 219; GFX11-LABEL: dyn_insertelement_v8f32_s_s_v: 220; GFX11: ; %bb.0: ; %entry 221; GFX11-NEXT: v_mov_b32_e32 v7, s10 222; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 223; GFX11-NEXT: v_cndmask_b32_e32 v8, s2, v7, vcc_lo 224; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 225; GFX11-NEXT: v_cndmask_b32_e32 v1, s3, v7, vcc_lo 226; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 227; GFX11-NEXT: v_cndmask_b32_e32 v2, s4, v7, vcc_lo 228; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 229; GFX11-NEXT: v_cndmask_b32_e32 v3, s5, v7, vcc_lo 230; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 231; GFX11-NEXT: v_cndmask_b32_e32 v4, s6, v7, vcc_lo 232; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 233; GFX11-NEXT: v_cndmask_b32_e32 v5, s7, v7, vcc_lo 234; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 235; GFX11-NEXT: v_cndmask_b32_e32 v6, s8, v7, vcc_lo 236; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 237; GFX11-NEXT: v_dual_mov_b32 v0, v8 :: v_dual_cndmask_b32 v7, s9, v7 238; GFX11-NEXT: ; return to shader part epilog 239entry: 240 %insert = insertelement <8 x float> %vec, float %val, i32 %idx 241 ret <8 x float> %insert 242} 243 244define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_v_s(<8 x float> inreg %vec, float %val, i32 inreg %idx) { 245; GPRIDX-LABEL: dyn_insertelement_v8f32_s_v_s: 246; GPRIDX: ; %bb.0: ; %entry 247; GPRIDX-NEXT: v_mov_b32_e32 v1, s2 248; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s10, 0 249; GPRIDX-NEXT: v_mov_b32_e32 v2, s3 250; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v1, v0, vcc 251; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s10, 1 252; GPRIDX-NEXT: v_mov_b32_e32 v3, s4 253; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v2, v0, vcc 254; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s10, 2 255; GPRIDX-NEXT: v_mov_b32_e32 v4, s5 256; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v3, v0, vcc 257; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s10, 3 258; GPRIDX-NEXT: v_mov_b32_e32 v5, s6 259; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v4, v0, vcc 260; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s10, 4 261; GPRIDX-NEXT: v_mov_b32_e32 v6, s7 262; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v5, v0, vcc 263; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s10, 5 264; GPRIDX-NEXT: v_mov_b32_e32 v7, s8 265; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v6, v0, vcc 266; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s10, 6 267; GPRIDX-NEXT: v_mov_b32_e32 v9, s9 268; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v7, v0, vcc 269; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s10, 7 270; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v9, v0, vcc 271; GPRIDX-NEXT: v_mov_b32_e32 v0, v8 272; GPRIDX-NEXT: ; return to shader part epilog 273; 274; GFX10-LABEL: dyn_insertelement_v8f32_s_v_s: 275; GFX10: ; %bb.0: ; %entry 276; GFX10-NEXT: s_mov_b32 s0, s2 277; GFX10-NEXT: s_mov_b32 s1, s3 278; GFX10-NEXT: s_mov_b32 s2, s4 279; GFX10-NEXT: s_mov_b32 s3, s5 280; GFX10-NEXT: s_mov_b32 s4, s6 281; GFX10-NEXT: s_mov_b32 s5, s7 282; GFX10-NEXT: s_mov_b32 s6, s8 283; GFX10-NEXT: s_mov_b32 s7, s9 284; GFX10-NEXT: v_mov_b32_e32 v8, v0 285; GFX10-NEXT: v_mov_b32_e32 v0, s0 286; GFX10-NEXT: s_mov_b32 m0, s10 287; GFX10-NEXT: v_mov_b32_e32 v1, s1 288; GFX10-NEXT: v_mov_b32_e32 v2, s2 289; GFX10-NEXT: v_mov_b32_e32 v3, s3 290; GFX10-NEXT: v_mov_b32_e32 v4, s4 291; GFX10-NEXT: v_mov_b32_e32 v5, s5 292; GFX10-NEXT: v_mov_b32_e32 v6, s6 293; GFX10-NEXT: v_mov_b32_e32 v7, s7 294; GFX10-NEXT: v_movreld_b32_e32 v0, v8 295; GFX10-NEXT: ; return to shader part epilog 296; 297; GFX11-LABEL: dyn_insertelement_v8f32_s_v_s: 298; GFX11: ; %bb.0: ; %entry 299; GFX11-NEXT: s_mov_b32 s0, s2 300; GFX11-NEXT: s_mov_b32 s1, s3 301; GFX11-NEXT: s_mov_b32 s2, s4 302; GFX11-NEXT: s_mov_b32 s3, s5 303; GFX11-NEXT: s_mov_b32 s4, s6 304; GFX11-NEXT: s_mov_b32 s5, s7 305; GFX11-NEXT: s_mov_b32 s6, s8 306; GFX11-NEXT: s_mov_b32 s7, s9 307; GFX11-NEXT: v_mov_b32_e32 v8, v0 308; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3 309; GFX11-NEXT: s_mov_b32 m0, s10 310; GFX11-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2 311; GFX11-NEXT: v_dual_mov_b32 v5, s5 :: v_dual_mov_b32 v4, s4 312; GFX11-NEXT: v_dual_mov_b32 v7, s7 :: v_dual_mov_b32 v6, s6 313; GFX11-NEXT: v_movreld_b32_e32 v0, v8 314; GFX11-NEXT: ; return to shader part epilog 315entry: 316 %insert = insertelement <8 x float> %vec, float %val, i32 %idx 317 ret <8 x float> %insert 318} 319 320define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_s_s(<8 x float> %vec, float inreg %val, i32 inreg %idx) { 321; GPRIDX-LABEL: dyn_insertelement_v8f32_v_s_s: 322; GPRIDX: ; %bb.0: ; %entry 323; GPRIDX-NEXT: v_mov_b32_e32 v8, s2 324; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s3, 0 325; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 326; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s3, 1 327; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc 328; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s3, 2 329; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 330; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s3, 3 331; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc 332; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s3, 4 333; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc 334; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s3, 5 335; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc 336; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s3, 6 337; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc 338; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s3, 7 339; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc 340; GPRIDX-NEXT: ; return to shader part epilog 341; 342; GFX10PLUS-LABEL: dyn_insertelement_v8f32_v_s_s: 343; GFX10PLUS: ; %bb.0: ; %entry 344; GFX10PLUS-NEXT: s_mov_b32 m0, s3 345; GFX10PLUS-NEXT: v_movreld_b32_e32 v0, s2 346; GFX10PLUS-NEXT: ; return to shader part epilog 347entry: 348 %insert = insertelement <8 x float> %vec, float %val, i32 %idx 349 ret <8 x float> %insert 350} 351 352define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_v_v(<8 x float> inreg %vec, float %val, i32 %idx) { 353; GPRIDX-LABEL: dyn_insertelement_v8f32_s_v_v: 354; GPRIDX: ; %bb.0: ; %entry 355; GPRIDX-NEXT: v_mov_b32_e32 v2, s2 356; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 357; GPRIDX-NEXT: v_mov_b32_e32 v3, s3 358; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v2, v0, vcc 359; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 360; GPRIDX-NEXT: v_mov_b32_e32 v4, s4 361; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v3, v0, vcc 362; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v1 363; GPRIDX-NEXT: v_mov_b32_e32 v5, s5 364; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v4, v0, vcc 365; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v1 366; GPRIDX-NEXT: v_mov_b32_e32 v6, s6 367; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v5, v0, vcc 368; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v1 369; GPRIDX-NEXT: v_mov_b32_e32 v7, s7 370; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v6, v0, vcc 371; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v1 372; GPRIDX-NEXT: v_mov_b32_e32 v10, s8 373; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v7, v0, vcc 374; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v1 375; GPRIDX-NEXT: v_mov_b32_e32 v11, s9 376; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v10, v0, vcc 377; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v1 378; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v11, v0, vcc 379; GPRIDX-NEXT: v_mov_b32_e32 v0, v8 380; GPRIDX-NEXT: v_mov_b32_e32 v1, v9 381; GPRIDX-NEXT: ; return to shader part epilog 382; 383; GFX10-LABEL: dyn_insertelement_v8f32_s_v_v: 384; GFX10: ; %bb.0: ; %entry 385; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 386; GFX10-NEXT: v_cndmask_b32_e32 v8, s2, v0, vcc_lo 387; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 388; GFX10-NEXT: v_cndmask_b32_e32 v9, s3, v0, vcc_lo 389; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1 390; GFX10-NEXT: v_cndmask_b32_e32 v2, s4, v0, vcc_lo 391; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1 392; GFX10-NEXT: v_cndmask_b32_e32 v3, s5, v0, vcc_lo 393; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1 394; GFX10-NEXT: v_cndmask_b32_e32 v4, s6, v0, vcc_lo 395; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1 396; GFX10-NEXT: v_cndmask_b32_e32 v5, s7, v0, vcc_lo 397; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1 398; GFX10-NEXT: v_cndmask_b32_e32 v6, s8, v0, vcc_lo 399; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v1 400; GFX10-NEXT: v_mov_b32_e32 v1, v9 401; GFX10-NEXT: v_cndmask_b32_e32 v7, s9, v0, vcc_lo 402; GFX10-NEXT: v_mov_b32_e32 v0, v8 403; GFX10-NEXT: ; return to shader part epilog 404; 405; GFX11-LABEL: dyn_insertelement_v8f32_s_v_v: 406; GFX11: ; %bb.0: ; %entry 407; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 408; GFX11-NEXT: v_cndmask_b32_e32 v8, s2, v0, vcc_lo 409; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 410; GFX11-NEXT: v_cndmask_b32_e32 v9, s3, v0, vcc_lo 411; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1 412; GFX11-NEXT: v_cndmask_b32_e32 v2, s4, v0, vcc_lo 413; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1 414; GFX11-NEXT: v_cndmask_b32_e32 v3, s5, v0, vcc_lo 415; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1 416; GFX11-NEXT: v_cndmask_b32_e32 v4, s6, v0, vcc_lo 417; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1 418; GFX11-NEXT: v_cndmask_b32_e32 v5, s7, v0, vcc_lo 419; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1 420; GFX11-NEXT: v_cndmask_b32_e32 v6, s8, v0, vcc_lo 421; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v1 422; GFX11-NEXT: v_mov_b32_e32 v1, v9 423; GFX11-NEXT: v_dual_cndmask_b32 v7, s9, v0 :: v_dual_mov_b32 v0, v8 424; GFX11-NEXT: ; return to shader part epilog 425entry: 426 %insert = insertelement <8 x float> %vec, float %val, i32 %idx 427 ret <8 x float> %insert 428} 429 430define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_s_v(<8 x float> %vec, float inreg %val, i32 %idx) { 431; GPRIDX-LABEL: dyn_insertelement_v8f32_v_s_v: 432; GPRIDX: ; %bb.0: ; %entry 433; GPRIDX-NEXT: v_mov_b32_e32 v9, s2 434; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v8 435; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc 436; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8 437; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 438; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v8 439; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v9, vcc 440; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v8 441; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc 442; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v8 443; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v9, vcc 444; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v8 445; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc 446; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v8 447; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v9, vcc 448; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v8 449; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc 450; GPRIDX-NEXT: ; return to shader part epilog 451; 452; GFX10PLUS-LABEL: dyn_insertelement_v8f32_v_s_v: 453; GFX10PLUS: ; %bb.0: ; %entry 454; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v8 455; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc_lo 456; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8 457; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s2, vcc_lo 458; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v8 459; GFX10PLUS-NEXT: v_cndmask_b32_e64 v2, v2, s2, vcc_lo 460; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v8 461; GFX10PLUS-NEXT: v_cndmask_b32_e64 v3, v3, s2, vcc_lo 462; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v8 463; GFX10PLUS-NEXT: v_cndmask_b32_e64 v4, v4, s2, vcc_lo 464; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v8 465; GFX10PLUS-NEXT: v_cndmask_b32_e64 v5, v5, s2, vcc_lo 466; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v8 467; GFX10PLUS-NEXT: v_cndmask_b32_e64 v6, v6, s2, vcc_lo 468; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v8 469; GFX10PLUS-NEXT: v_cndmask_b32_e64 v7, v7, s2, vcc_lo 470; GFX10PLUS-NEXT: ; return to shader part epilog 471entry: 472 %insert = insertelement <8 x float> %vec, float %val, i32 %idx 473 ret <8 x float> %insert 474} 475 476define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_v_s(<8 x float> %vec, float %val, i32 inreg %idx) { 477; GPRIDX-LABEL: dyn_insertelement_v8f32_v_v_s: 478; GPRIDX: ; %bb.0: ; %entry 479; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 0 480; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 481; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1 482; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc 483; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 2 484; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 485; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 3 486; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc 487; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 4 488; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc 489; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 5 490; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc 491; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 6 492; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc 493; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 7 494; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc 495; GPRIDX-NEXT: ; return to shader part epilog 496; 497; GFX10PLUS-LABEL: dyn_insertelement_v8f32_v_v_s: 498; GFX10PLUS: ; %bb.0: ; %entry 499; GFX10PLUS-NEXT: s_mov_b32 m0, s2 500; GFX10PLUS-NEXT: v_movreld_b32_e32 v0, v8 501; GFX10PLUS-NEXT: ; return to shader part epilog 502entry: 503 %insert = insertelement <8 x float> %vec, float %val, i32 %idx 504 ret <8 x float> %insert 505} 506 507define amdgpu_ps <8 x float> @dyn_insertelement_v8p3i8_v_v_s(<8 x ptr addrspace(3)> %vec, ptr addrspace(3) %val, i32 inreg %idx) { 508; GPRIDX-LABEL: dyn_insertelement_v8p3i8_v_v_s: 509; GPRIDX: ; %bb.0: ; %entry 510; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 0 511; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 512; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1 513; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc 514; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 2 515; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 516; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 3 517; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc 518; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 4 519; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc 520; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 5 521; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc 522; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 6 523; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc 524; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 7 525; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc 526; GPRIDX-NEXT: ; return to shader part epilog 527; 528; GFX10PLUS-LABEL: dyn_insertelement_v8p3i8_v_v_s: 529; GFX10PLUS: ; %bb.0: ; %entry 530; GFX10PLUS-NEXT: s_mov_b32 m0, s2 531; GFX10PLUS-NEXT: v_movreld_b32_e32 v0, v8 532; GFX10PLUS-NEXT: ; return to shader part epilog 533entry: 534 %insert = insertelement <8 x ptr addrspace(3)> %vec, ptr addrspace(3) %val, i32 %idx 535 %cast.0 = ptrtoint <8 x ptr addrspace(3)> %insert to <8 x i32> 536 %cast.1 = bitcast <8 x i32> %cast.0 to <8 x float> 537 ret <8 x float> %cast.1 538} 539 540define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_v_v(<8 x float> %vec, float %val, i32 %idx) { 541; GPRIDX-LABEL: dyn_insertelement_v8f32_v_v_v: 542; GPRIDX: ; %bb.0: ; %entry 543; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9 544; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 545; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v9 546; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc 547; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v9 548; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 549; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v9 550; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc 551; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v9 552; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc 553; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v9 554; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc 555; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v9 556; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc 557; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v9 558; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc 559; GPRIDX-NEXT: ; return to shader part epilog 560; 561; GFX10PLUS-LABEL: dyn_insertelement_v8f32_v_v_v: 562; GFX10PLUS: ; %bb.0: ; %entry 563; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v9 564; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 565; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v9 566; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc_lo 567; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v9 568; GFX10PLUS-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc_lo 569; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v9 570; GFX10PLUS-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc_lo 571; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v9 572; GFX10PLUS-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc_lo 573; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v9 574; GFX10PLUS-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc_lo 575; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v9 576; GFX10PLUS-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc_lo 577; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v9 578; GFX10PLUS-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc_lo 579; GFX10PLUS-NEXT: ; return to shader part epilog 580entry: 581 %insert = insertelement <8 x float> %vec, float %val, i32 %idx 582 ret <8 x float> %insert 583} 584 585define amdgpu_ps <8 x i64> @dyn_insertelement_v8i64_s_s_s(<8 x i64> inreg %vec, i64 inreg %val, i32 inreg %idx) { 586; GPRIDX-LABEL: dyn_insertelement_v8i64_s_s_s: 587; GPRIDX: ; %bb.0: ; %entry 588; GPRIDX-NEXT: s_mov_b32 s0, s2 589; GPRIDX-NEXT: s_mov_b32 s1, s3 590; GPRIDX-NEXT: s_mov_b32 s2, s4 591; GPRIDX-NEXT: s_mov_b32 s3, s5 592; GPRIDX-NEXT: s_mov_b32 s4, s6 593; GPRIDX-NEXT: s_mov_b32 s5, s7 594; GPRIDX-NEXT: s_mov_b32 s6, s8 595; GPRIDX-NEXT: s_mov_b32 s7, s9 596; GPRIDX-NEXT: s_mov_b32 s8, s10 597; GPRIDX-NEXT: s_mov_b32 s9, s11 598; GPRIDX-NEXT: s_mov_b32 s10, s12 599; GPRIDX-NEXT: s_mov_b32 s11, s13 600; GPRIDX-NEXT: s_mov_b32 s12, s14 601; GPRIDX-NEXT: s_mov_b32 s13, s15 602; GPRIDX-NEXT: s_mov_b32 s14, s16 603; GPRIDX-NEXT: s_mov_b32 s15, s17 604; GPRIDX-NEXT: s_mov_b32 m0, s20 605; GPRIDX-NEXT: s_nop 0 606; GPRIDX-NEXT: s_movreld_b64 s[0:1], s[18:19] 607; GPRIDX-NEXT: ; return to shader part epilog 608; 609; GFX10PLUS-LABEL: dyn_insertelement_v8i64_s_s_s: 610; GFX10PLUS: ; %bb.0: ; %entry 611; GFX10PLUS-NEXT: s_mov_b32 s0, s2 612; GFX10PLUS-NEXT: s_mov_b32 s1, s3 613; GFX10PLUS-NEXT: s_mov_b32 m0, s20 614; GFX10PLUS-NEXT: s_mov_b32 s2, s4 615; GFX10PLUS-NEXT: s_mov_b32 s3, s5 616; GFX10PLUS-NEXT: s_mov_b32 s4, s6 617; GFX10PLUS-NEXT: s_mov_b32 s5, s7 618; GFX10PLUS-NEXT: s_mov_b32 s6, s8 619; GFX10PLUS-NEXT: s_mov_b32 s7, s9 620; GFX10PLUS-NEXT: s_mov_b32 s8, s10 621; GFX10PLUS-NEXT: s_mov_b32 s9, s11 622; GFX10PLUS-NEXT: s_mov_b32 s10, s12 623; GFX10PLUS-NEXT: s_mov_b32 s11, s13 624; GFX10PLUS-NEXT: s_mov_b32 s12, s14 625; GFX10PLUS-NEXT: s_mov_b32 s13, s15 626; GFX10PLUS-NEXT: s_mov_b32 s14, s16 627; GFX10PLUS-NEXT: s_mov_b32 s15, s17 628; GFX10PLUS-NEXT: s_movreld_b64 s[0:1], s[18:19] 629; GFX10PLUS-NEXT: ; return to shader part epilog 630entry: 631 %insert = insertelement <8 x i64> %vec, i64 %val, i32 %idx 632 ret <8 x i64> %insert 633} 634 635define amdgpu_ps <8 x ptr addrspace(1)> @dyn_insertelement_v8p1i8_s_s_s(<8 x ptr addrspace(1)> inreg %vec, ptr addrspace(1) inreg %val, i32 inreg %idx) { 636; GPRIDX-LABEL: dyn_insertelement_v8p1i8_s_s_s: 637; GPRIDX: ; %bb.0: ; %entry 638; GPRIDX-NEXT: s_mov_b32 s0, s2 639; GPRIDX-NEXT: s_mov_b32 s1, s3 640; GPRIDX-NEXT: s_mov_b32 s2, s4 641; GPRIDX-NEXT: s_mov_b32 s3, s5 642; GPRIDX-NEXT: s_mov_b32 s4, s6 643; GPRIDX-NEXT: s_mov_b32 s5, s7 644; GPRIDX-NEXT: s_mov_b32 s6, s8 645; GPRIDX-NEXT: s_mov_b32 s7, s9 646; GPRIDX-NEXT: s_mov_b32 s8, s10 647; GPRIDX-NEXT: s_mov_b32 s9, s11 648; GPRIDX-NEXT: s_mov_b32 s10, s12 649; GPRIDX-NEXT: s_mov_b32 s11, s13 650; GPRIDX-NEXT: s_mov_b32 s12, s14 651; GPRIDX-NEXT: s_mov_b32 s13, s15 652; GPRIDX-NEXT: s_mov_b32 s14, s16 653; GPRIDX-NEXT: s_mov_b32 s15, s17 654; GPRIDX-NEXT: s_mov_b32 m0, s20 655; GPRIDX-NEXT: s_nop 0 656; GPRIDX-NEXT: s_movreld_b64 s[0:1], s[18:19] 657; GPRIDX-NEXT: ; return to shader part epilog 658; 659; GFX10PLUS-LABEL: dyn_insertelement_v8p1i8_s_s_s: 660; GFX10PLUS: ; %bb.0: ; %entry 661; GFX10PLUS-NEXT: s_mov_b32 s0, s2 662; GFX10PLUS-NEXT: s_mov_b32 s1, s3 663; GFX10PLUS-NEXT: s_mov_b32 m0, s20 664; GFX10PLUS-NEXT: s_mov_b32 s2, s4 665; GFX10PLUS-NEXT: s_mov_b32 s3, s5 666; GFX10PLUS-NEXT: s_mov_b32 s4, s6 667; GFX10PLUS-NEXT: s_mov_b32 s5, s7 668; GFX10PLUS-NEXT: s_mov_b32 s6, s8 669; GFX10PLUS-NEXT: s_mov_b32 s7, s9 670; GFX10PLUS-NEXT: s_mov_b32 s8, s10 671; GFX10PLUS-NEXT: s_mov_b32 s9, s11 672; GFX10PLUS-NEXT: s_mov_b32 s10, s12 673; GFX10PLUS-NEXT: s_mov_b32 s11, s13 674; GFX10PLUS-NEXT: s_mov_b32 s12, s14 675; GFX10PLUS-NEXT: s_mov_b32 s13, s15 676; GFX10PLUS-NEXT: s_mov_b32 s14, s16 677; GFX10PLUS-NEXT: s_mov_b32 s15, s17 678; GFX10PLUS-NEXT: s_movreld_b64 s[0:1], s[18:19] 679; GFX10PLUS-NEXT: ; return to shader part epilog 680entry: 681 %insert = insertelement <8 x ptr addrspace(1)> %vec, ptr addrspace(1) %val, i32 %idx 682 ret <8 x ptr addrspace(1)> %insert 683} 684 685define void @dyn_insertelement_v8f64_const_s_v_v(double %val, i32 %idx) { 686; GPRIDX-LABEL: dyn_insertelement_v8f64_const_s_v_v: 687; GPRIDX: ; %bb.0: ; %entry 688; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 689; GPRIDX-NEXT: s_mov_b32 s18, 0 690; GPRIDX-NEXT: s_mov_b32 s16, 0 691; GPRIDX-NEXT: s_mov_b32 s14, 0 692; GPRIDX-NEXT: s_mov_b32 s12, 0 693; GPRIDX-NEXT: s_mov_b32 s8, 0 694; GPRIDX-NEXT: s_mov_b64 s[4:5], 1.0 695; GPRIDX-NEXT: s_mov_b32 s19, 0x40200000 696; GPRIDX-NEXT: s_mov_b32 s17, 0x401c0000 697; GPRIDX-NEXT: s_mov_b32 s15, 0x40180000 698; GPRIDX-NEXT: s_mov_b32 s13, 0x40140000 699; GPRIDX-NEXT: s_mov_b64 s[10:11], 4.0 700; GPRIDX-NEXT: s_mov_b32 s9, 0x40080000 701; GPRIDX-NEXT: s_mov_b64 s[6:7], 2.0 702; GPRIDX-NEXT: v_mov_b32_e32 v3, s4 703; GPRIDX-NEXT: v_mov_b32_e32 v4, s5 704; GPRIDX-NEXT: v_mov_b32_e32 v5, s6 705; GPRIDX-NEXT: v_mov_b32_e32 v6, s7 706; GPRIDX-NEXT: v_mov_b32_e32 v7, s8 707; GPRIDX-NEXT: v_mov_b32_e32 v8, s9 708; GPRIDX-NEXT: v_mov_b32_e32 v9, s10 709; GPRIDX-NEXT: v_mov_b32_e32 v10, s11 710; GPRIDX-NEXT: v_mov_b32_e32 v11, s12 711; GPRIDX-NEXT: v_mov_b32_e32 v12, s13 712; GPRIDX-NEXT: v_mov_b32_e32 v13, s14 713; GPRIDX-NEXT: v_mov_b32_e32 v14, s15 714; GPRIDX-NEXT: v_mov_b32_e32 v15, s16 715; GPRIDX-NEXT: v_mov_b32_e32 v16, s17 716; GPRIDX-NEXT: v_mov_b32_e32 v17, s18 717; GPRIDX-NEXT: v_mov_b32_e32 v18, s19 718; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 719; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[16:17], 0, v2 720; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[4:5], 2, v2 721; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[6:7], 3, v2 722; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[8:9], 4, v2 723; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[10:11], 5, v2 724; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[12:13], 6, v2 725; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[14:15], 7, v2 726; GPRIDX-NEXT: v_cndmask_b32_e64 v3, v3, v0, s[16:17] 727; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v0, vcc 728; GPRIDX-NEXT: v_cndmask_b32_e64 v4, v4, v1, s[16:17] 729; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v1, vcc 730; GPRIDX-NEXT: v_cndmask_b32_e64 v7, v7, v0, s[4:5] 731; GPRIDX-NEXT: v_cndmask_b32_e64 v9, v9, v0, s[6:7] 732; GPRIDX-NEXT: v_cndmask_b32_e64 v11, v11, v0, s[8:9] 733; GPRIDX-NEXT: v_cndmask_b32_e64 v13, v13, v0, s[10:11] 734; GPRIDX-NEXT: v_cndmask_b32_e64 v15, v15, v0, s[12:13] 735; GPRIDX-NEXT: v_cndmask_b32_e64 v17, v17, v0, s[14:15] 736; GPRIDX-NEXT: v_cndmask_b32_e64 v8, v8, v1, s[4:5] 737; GPRIDX-NEXT: v_cndmask_b32_e64 v10, v10, v1, s[6:7] 738; GPRIDX-NEXT: v_cndmask_b32_e64 v12, v12, v1, s[8:9] 739; GPRIDX-NEXT: v_cndmask_b32_e64 v14, v14, v1, s[10:11] 740; GPRIDX-NEXT: v_cndmask_b32_e64 v16, v16, v1, s[12:13] 741; GPRIDX-NEXT: v_cndmask_b32_e64 v18, v18, v1, s[14:15] 742; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[3:6], off 743; GPRIDX-NEXT: s_waitcnt vmcnt(0) 744; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[7:10], off 745; GPRIDX-NEXT: s_waitcnt vmcnt(0) 746; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[11:14], off 747; GPRIDX-NEXT: s_waitcnt vmcnt(0) 748; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[15:18], off 749; GPRIDX-NEXT: s_waitcnt vmcnt(0) 750; GPRIDX-NEXT: s_setpc_b64 s[30:31] 751; 752; GFX10-LABEL: dyn_insertelement_v8f64_const_s_v_v: 753; GFX10: ; %bb.0: ; %entry 754; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 755; GFX10-NEXT: s_mov_b64 s[4:5], 1.0 756; GFX10-NEXT: s_mov_b32 s18, 0 757; GFX10-NEXT: s_mov_b32 s16, 0 758; GFX10-NEXT: s_mov_b32 s14, 0 759; GFX10-NEXT: s_mov_b32 s12, 0 760; GFX10-NEXT: s_mov_b32 s8, 0 761; GFX10-NEXT: s_mov_b32 s19, 0x40200000 762; GFX10-NEXT: s_mov_b32 s17, 0x401c0000 763; GFX10-NEXT: s_mov_b32 s15, 0x40180000 764; GFX10-NEXT: s_mov_b32 s13, 0x40140000 765; GFX10-NEXT: s_mov_b64 s[10:11], 4.0 766; GFX10-NEXT: s_mov_b32 s9, 0x40080000 767; GFX10-NEXT: s_mov_b64 s[6:7], 2.0 768; GFX10-NEXT: v_mov_b32_e32 v3, s4 769; GFX10-NEXT: v_mov_b32_e32 v4, s5 770; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2 771; GFX10-NEXT: v_mov_b32_e32 v5, s6 772; GFX10-NEXT: v_mov_b32_e32 v6, s7 773; GFX10-NEXT: v_mov_b32_e32 v7, s8 774; GFX10-NEXT: v_mov_b32_e32 v8, s9 775; GFX10-NEXT: v_mov_b32_e32 v9, s10 776; GFX10-NEXT: v_mov_b32_e32 v10, s11 777; GFX10-NEXT: v_mov_b32_e32 v11, s12 778; GFX10-NEXT: v_mov_b32_e32 v12, s13 779; GFX10-NEXT: v_mov_b32_e32 v13, s14 780; GFX10-NEXT: v_mov_b32_e32 v14, s15 781; GFX10-NEXT: v_mov_b32_e32 v15, s16 782; GFX10-NEXT: v_mov_b32_e32 v16, s17 783; GFX10-NEXT: v_mov_b32_e32 v17, s18 784; GFX10-NEXT: v_mov_b32_e32 v18, s19 785; GFX10-NEXT: v_cmp_eq_u32_e64 s4, 1, v2 786; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v0, vcc_lo 787; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v1, vcc_lo 788; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v2 789; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 7, v2 790; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v0, s4 791; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v1, s4 792; GFX10-NEXT: v_cmp_eq_u32_e64 s4, 3, v2 793; GFX10-NEXT: v_cndmask_b32_e32 v7, v7, v0, vcc_lo 794; GFX10-NEXT: v_cndmask_b32_e32 v8, v8, v1, vcc_lo 795; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v2 796; GFX10-NEXT: v_cndmask_b32_e64 v17, v17, v0, s5 797; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, v0, s4 798; GFX10-NEXT: v_cndmask_b32_e64 v10, v10, v1, s4 799; GFX10-NEXT: v_cmp_eq_u32_e64 s4, 5, v2 800; GFX10-NEXT: v_cndmask_b32_e32 v11, v11, v0, vcc_lo 801; GFX10-NEXT: v_cndmask_b32_e32 v12, v12, v1, vcc_lo 802; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v2 803; GFX10-NEXT: v_cndmask_b32_e64 v18, v18, v1, s5 804; GFX10-NEXT: v_cndmask_b32_e64 v13, v13, v0, s4 805; GFX10-NEXT: v_cndmask_b32_e64 v14, v14, v1, s4 806; GFX10-NEXT: v_cndmask_b32_e32 v15, v15, v0, vcc_lo 807; GFX10-NEXT: v_cndmask_b32_e32 v16, v16, v1, vcc_lo 808; GFX10-NEXT: global_store_dwordx4 v[0:1], v[3:6], off 809; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 810; GFX10-NEXT: global_store_dwordx4 v[0:1], v[7:10], off 811; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 812; GFX10-NEXT: global_store_dwordx4 v[0:1], v[11:14], off 813; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 814; GFX10-NEXT: global_store_dwordx4 v[0:1], v[15:18], off 815; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 816; GFX10-NEXT: s_setpc_b64 s[30:31] 817; 818; GFX11-LABEL: dyn_insertelement_v8f64_const_s_v_v: 819; GFX11: ; %bb.0: ; %entry 820; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 821; GFX11-NEXT: s_mov_b32 s14, 0 822; GFX11-NEXT: s_mov_b32 s15, 0x40200000 823; GFX11-NEXT: s_mov_b32 s12, 0 824; GFX11-NEXT: s_mov_b32 s10, 0 825; GFX11-NEXT: s_mov_b32 s8, 0 826; GFX11-NEXT: s_mov_b32 s4, 0 827; GFX11-NEXT: s_mov_b64 s[0:1], 1.0 828; GFX11-NEXT: s_mov_b32 s13, 0x401c0000 829; GFX11-NEXT: s_mov_b32 s11, 0x40180000 830; GFX11-NEXT: s_mov_b32 s9, 0x40140000 831; GFX11-NEXT: s_mov_b64 s[6:7], 4.0 832; GFX11-NEXT: s_mov_b32 s5, 0x40080000 833; GFX11-NEXT: s_mov_b64 s[2:3], 2.0 834; GFX11-NEXT: v_dual_mov_b32 v18, s15 :: v_dual_mov_b32 v17, s14 835; GFX11-NEXT: v_dual_mov_b32 v4, s1 :: v_dual_mov_b32 v3, s0 836; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2 837; GFX11-NEXT: v_dual_mov_b32 v16, s13 :: v_dual_mov_b32 v15, s12 838; GFX11-NEXT: v_dual_mov_b32 v14, s11 :: v_dual_mov_b32 v13, s10 839; GFX11-NEXT: v_dual_mov_b32 v12, s9 :: v_dual_mov_b32 v11, s8 840; GFX11-NEXT: v_dual_mov_b32 v10, s7 :: v_dual_mov_b32 v9, s6 841; GFX11-NEXT: v_dual_mov_b32 v8, s5 :: v_dual_mov_b32 v7, s4 842; GFX11-NEXT: v_dual_mov_b32 v6, s3 :: v_dual_mov_b32 v5, s2 843; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 1, v2 844; GFX11-NEXT: v_dual_cndmask_b32 v3, v3, v0 :: v_dual_cndmask_b32 v4, v4, v1 845; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v2 846; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 7, v2 847; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, v0, s0 848; GFX11-NEXT: v_cndmask_b32_e64 v6, v6, v1, s0 849; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 3, v2 850; GFX11-NEXT: v_dual_cndmask_b32 v7, v7, v0 :: v_dual_cndmask_b32 v8, v8, v1 851; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v2 852; GFX11-NEXT: v_cndmask_b32_e64 v17, v17, v0, s1 853; GFX11-NEXT: v_cndmask_b32_e64 v9, v9, v0, s0 854; GFX11-NEXT: v_cndmask_b32_e64 v10, v10, v1, s0 855; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 5, v2 856; GFX11-NEXT: v_dual_cndmask_b32 v11, v11, v0 :: v_dual_cndmask_b32 v12, v12, v1 857; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v2 858; GFX11-NEXT: v_cndmask_b32_e64 v18, v18, v1, s1 859; GFX11-NEXT: v_cndmask_b32_e64 v13, v13, v0, s0 860; GFX11-NEXT: v_cndmask_b32_e64 v14, v14, v1, s0 861; GFX11-NEXT: v_dual_cndmask_b32 v15, v15, v0 :: v_dual_cndmask_b32 v16, v16, v1 862; GFX11-NEXT: global_store_b128 v[0:1], v[3:6], off dlc 863; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 864; GFX11-NEXT: global_store_b128 v[0:1], v[7:10], off dlc 865; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 866; GFX11-NEXT: global_store_b128 v[0:1], v[11:14], off dlc 867; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 868; GFX11-NEXT: global_store_b128 v[0:1], v[15:18], off dlc 869; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 870; GFX11-NEXT: s_setpc_b64 s[30:31] 871entry: 872 %insert = insertelement <8 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0>, double %val, i32 %idx 873 %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1> 874 %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3> 875 %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5> 876 %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7> 877 store volatile <2 x double> %vec.0, ptr addrspace(1) undef 878 store volatile <2 x double> %vec.1, ptr addrspace(1) undef 879 store volatile <2 x double> %vec.2, ptr addrspace(1) undef 880 store volatile <2 x double> %vec.3, ptr addrspace(1) undef 881 ret void 882} 883 884define amdgpu_ps void @dyn_insertelement_v8f64_s_s_v(<8 x double> inreg %vec, double inreg %val, i32 %idx) { 885; GPRIDX-LABEL: dyn_insertelement_v8f64_s_s_v: 886; GPRIDX: ; %bb.0: ; %entry 887; GPRIDX-NEXT: s_mov_b32 s1, s3 888; GPRIDX-NEXT: s_mov_b32 s3, s5 889; GPRIDX-NEXT: s_mov_b32 s5, s7 890; GPRIDX-NEXT: s_mov_b32 s7, s9 891; GPRIDX-NEXT: s_mov_b32 s9, s11 892; GPRIDX-NEXT: s_mov_b32 s11, s13 893; GPRIDX-NEXT: s_mov_b32 s13, s15 894; GPRIDX-NEXT: s_mov_b32 s15, s17 895; GPRIDX-NEXT: s_mov_b32 s0, s2 896; GPRIDX-NEXT: s_mov_b32 s2, s4 897; GPRIDX-NEXT: s_mov_b32 s4, s6 898; GPRIDX-NEXT: s_mov_b32 s6, s8 899; GPRIDX-NEXT: s_mov_b32 s8, s10 900; GPRIDX-NEXT: s_mov_b32 s10, s12 901; GPRIDX-NEXT: s_mov_b32 s12, s14 902; GPRIDX-NEXT: s_mov_b32 s14, s16 903; GPRIDX-NEXT: v_mov_b32_e32 v16, s15 904; GPRIDX-NEXT: v_mov_b32_e32 v15, s14 905; GPRIDX-NEXT: v_mov_b32_e32 v14, s13 906; GPRIDX-NEXT: v_mov_b32_e32 v13, s12 907; GPRIDX-NEXT: v_mov_b32_e32 v12, s11 908; GPRIDX-NEXT: v_mov_b32_e32 v11, s10 909; GPRIDX-NEXT: v_mov_b32_e32 v10, s9 910; GPRIDX-NEXT: v_mov_b32_e32 v9, s8 911; GPRIDX-NEXT: v_mov_b32_e32 v8, s7 912; GPRIDX-NEXT: v_mov_b32_e32 v7, s6 913; GPRIDX-NEXT: v_mov_b32_e32 v6, s5 914; GPRIDX-NEXT: v_mov_b32_e32 v5, s4 915; GPRIDX-NEXT: v_mov_b32_e32 v4, s3 916; GPRIDX-NEXT: v_mov_b32_e32 v3, s2 917; GPRIDX-NEXT: v_mov_b32_e32 v2, s1 918; GPRIDX-NEXT: v_mov_b32_e32 v1, s0 919; GPRIDX-NEXT: v_mov_b32_e32 v17, s18 920; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 921; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v0 922; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v0 923; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[4:5], 4, v0 924; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[6:7], 5, v0 925; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[8:9], 6, v0 926; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[10:11], 7, v0 927; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[12:13], 0, v0 928; GPRIDX-NEXT: v_mov_b32_e32 v0, s19 929; GPRIDX-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[12:13] 930; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v17, vcc 931; GPRIDX-NEXT: v_cndmask_b32_e64 v2, v2, v0, s[12:13] 932; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc 933; GPRIDX-NEXT: v_cndmask_b32_e64 v5, v5, v17, s[0:1] 934; GPRIDX-NEXT: v_cndmask_b32_e64 v7, v7, v17, s[2:3] 935; GPRIDX-NEXT: v_cndmask_b32_e64 v9, v9, v17, s[4:5] 936; GPRIDX-NEXT: v_cndmask_b32_e64 v11, v11, v17, s[6:7] 937; GPRIDX-NEXT: v_cndmask_b32_e64 v13, v13, v17, s[8:9] 938; GPRIDX-NEXT: v_cndmask_b32_e64 v15, v15, v17, s[10:11] 939; GPRIDX-NEXT: v_cndmask_b32_e64 v6, v6, v0, s[0:1] 940; GPRIDX-NEXT: v_cndmask_b32_e64 v8, v8, v0, s[2:3] 941; GPRIDX-NEXT: v_cndmask_b32_e64 v10, v10, v0, s[4:5] 942; GPRIDX-NEXT: v_cndmask_b32_e64 v12, v12, v0, s[6:7] 943; GPRIDX-NEXT: v_cndmask_b32_e64 v14, v14, v0, s[8:9] 944; GPRIDX-NEXT: v_cndmask_b32_e64 v16, v16, v0, s[10:11] 945; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[1:4], off 946; GPRIDX-NEXT: s_waitcnt vmcnt(0) 947; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[5:8], off 948; GPRIDX-NEXT: s_waitcnt vmcnt(0) 949; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[9:12], off 950; GPRIDX-NEXT: s_waitcnt vmcnt(0) 951; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[13:16], off 952; GPRIDX-NEXT: s_waitcnt vmcnt(0) 953; GPRIDX-NEXT: s_endpgm 954; 955; GFX10-LABEL: dyn_insertelement_v8f64_s_s_v: 956; GFX10: ; %bb.0: ; %entry 957; GFX10-NEXT: s_mov_b32 s1, s3 958; GFX10-NEXT: s_mov_b32 s3, s5 959; GFX10-NEXT: s_mov_b32 s5, s7 960; GFX10-NEXT: s_mov_b32 s7, s9 961; GFX10-NEXT: s_mov_b32 s9, s11 962; GFX10-NEXT: s_mov_b32 s11, s13 963; GFX10-NEXT: s_mov_b32 s13, s15 964; GFX10-NEXT: s_mov_b32 s15, s17 965; GFX10-NEXT: s_mov_b32 s0, s2 966; GFX10-NEXT: s_mov_b32 s2, s4 967; GFX10-NEXT: s_mov_b32 s4, s6 968; GFX10-NEXT: s_mov_b32 s6, s8 969; GFX10-NEXT: s_mov_b32 s8, s10 970; GFX10-NEXT: s_mov_b32 s10, s12 971; GFX10-NEXT: s_mov_b32 s12, s14 972; GFX10-NEXT: s_mov_b32 s14, s16 973; GFX10-NEXT: v_mov_b32_e32 v16, s15 974; GFX10-NEXT: v_mov_b32_e32 v2, s1 975; GFX10-NEXT: v_mov_b32_e32 v1, s0 976; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 977; GFX10-NEXT: v_mov_b32_e32 v15, s14 978; GFX10-NEXT: v_mov_b32_e32 v14, s13 979; GFX10-NEXT: v_mov_b32_e32 v13, s12 980; GFX10-NEXT: v_mov_b32_e32 v12, s11 981; GFX10-NEXT: v_mov_b32_e32 v11, s10 982; GFX10-NEXT: v_mov_b32_e32 v10, s9 983; GFX10-NEXT: v_mov_b32_e32 v9, s8 984; GFX10-NEXT: v_mov_b32_e32 v8, s7 985; GFX10-NEXT: v_mov_b32_e32 v7, s6 986; GFX10-NEXT: v_mov_b32_e32 v6, s5 987; GFX10-NEXT: v_mov_b32_e32 v5, s4 988; GFX10-NEXT: v_mov_b32_e32 v4, s3 989; GFX10-NEXT: v_mov_b32_e32 v3, s2 990; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 1, v0 991; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s18, vcc_lo 992; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s19, vcc_lo 993; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 994; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 7, v0 995; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, s18, s0 996; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, s19, s0 997; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 3, v0 998; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, s18, vcc_lo 999; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, s19, vcc_lo 1000; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 1001; GFX10-NEXT: v_cndmask_b32_e64 v15, v15, s18, s1 1002; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, s18, s0 1003; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, s19, s0 1004; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 5, v0 1005; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, s18, vcc_lo 1006; GFX10-NEXT: v_cndmask_b32_e64 v10, v10, s19, vcc_lo 1007; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 1008; GFX10-NEXT: v_cndmask_b32_e64 v16, v16, s19, s1 1009; GFX10-NEXT: v_cndmask_b32_e64 v11, v11, s18, s0 1010; GFX10-NEXT: v_cndmask_b32_e64 v12, v12, s19, s0 1011; GFX10-NEXT: v_cndmask_b32_e64 v13, v13, s18, vcc_lo 1012; GFX10-NEXT: v_cndmask_b32_e64 v14, v14, s19, vcc_lo 1013; GFX10-NEXT: global_store_dwordx4 v[0:1], v[1:4], off 1014; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1015; GFX10-NEXT: global_store_dwordx4 v[0:1], v[5:8], off 1016; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1017; GFX10-NEXT: global_store_dwordx4 v[0:1], v[9:12], off 1018; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1019; GFX10-NEXT: global_store_dwordx4 v[0:1], v[13:16], off 1020; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1021; GFX10-NEXT: s_endpgm 1022; 1023; GFX11-LABEL: dyn_insertelement_v8f64_s_s_v: 1024; GFX11: ; %bb.0: ; %entry 1025; GFX11-NEXT: s_mov_b32 s1, s3 1026; GFX11-NEXT: s_mov_b32 s3, s5 1027; GFX11-NEXT: s_mov_b32 s5, s7 1028; GFX11-NEXT: s_mov_b32 s7, s9 1029; GFX11-NEXT: s_mov_b32 s9, s11 1030; GFX11-NEXT: s_mov_b32 s11, s13 1031; GFX11-NEXT: s_mov_b32 s13, s15 1032; GFX11-NEXT: s_mov_b32 s15, s17 1033; GFX11-NEXT: s_mov_b32 s0, s2 1034; GFX11-NEXT: s_mov_b32 s2, s4 1035; GFX11-NEXT: s_mov_b32 s4, s6 1036; GFX11-NEXT: s_mov_b32 s6, s8 1037; GFX11-NEXT: s_mov_b32 s8, s10 1038; GFX11-NEXT: s_mov_b32 s10, s12 1039; GFX11-NEXT: s_mov_b32 s12, s14 1040; GFX11-NEXT: s_mov_b32 s14, s16 1041; GFX11-NEXT: v_dual_mov_b32 v16, s15 :: v_dual_mov_b32 v15, s14 1042; GFX11-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0 1043; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 1044; GFX11-NEXT: v_dual_mov_b32 v14, s13 :: v_dual_mov_b32 v13, s12 1045; GFX11-NEXT: v_dual_mov_b32 v12, s11 :: v_dual_mov_b32 v11, s10 1046; GFX11-NEXT: v_dual_mov_b32 v10, s9 :: v_dual_mov_b32 v9, s8 1047; GFX11-NEXT: v_dual_mov_b32 v8, s7 :: v_dual_mov_b32 v7, s6 1048; GFX11-NEXT: v_dual_mov_b32 v6, s5 :: v_dual_mov_b32 v5, s4 1049; GFX11-NEXT: v_dual_mov_b32 v4, s3 :: v_dual_mov_b32 v3, s2 1050; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 1, v0 1051; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s18, vcc_lo 1052; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s19, vcc_lo 1053; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 1054; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 7, v0 1055; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, s18, s0 1056; GFX11-NEXT: v_cndmask_b32_e64 v4, v4, s19, s0 1057; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 3, v0 1058; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, s18, vcc_lo 1059; GFX11-NEXT: v_cndmask_b32_e64 v6, v6, s19, vcc_lo 1060; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 1061; GFX11-NEXT: v_cndmask_b32_e64 v15, v15, s18, s1 1062; GFX11-NEXT: v_cndmask_b32_e64 v7, v7, s18, s0 1063; GFX11-NEXT: v_cndmask_b32_e64 v8, v8, s19, s0 1064; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 5, v0 1065; GFX11-NEXT: v_cndmask_b32_e64 v9, v9, s18, vcc_lo 1066; GFX11-NEXT: v_cndmask_b32_e64 v10, v10, s19, vcc_lo 1067; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 1068; GFX11-NEXT: v_cndmask_b32_e64 v16, v16, s19, s1 1069; GFX11-NEXT: v_cndmask_b32_e64 v11, v11, s18, s0 1070; GFX11-NEXT: v_cndmask_b32_e64 v12, v12, s19, s0 1071; GFX11-NEXT: v_cndmask_b32_e64 v13, v13, s18, vcc_lo 1072; GFX11-NEXT: v_cndmask_b32_e64 v14, v14, s19, vcc_lo 1073; GFX11-NEXT: global_store_b128 v[0:1], v[1:4], off dlc 1074; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1075; GFX11-NEXT: global_store_b128 v[0:1], v[5:8], off dlc 1076; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1077; GFX11-NEXT: global_store_b128 v[0:1], v[9:12], off dlc 1078; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1079; GFX11-NEXT: global_store_b128 v[0:1], v[13:16], off dlc 1080; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1081; GFX11-NEXT: s_endpgm 1082entry: 1083 %insert = insertelement <8 x double> %vec, double %val, i32 %idx 1084 %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1> 1085 %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3> 1086 %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5> 1087 %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7> 1088 store volatile <2 x double> %vec.0, ptr addrspace(1) undef 1089 store volatile <2 x double> %vec.1, ptr addrspace(1) undef 1090 store volatile <2 x double> %vec.2, ptr addrspace(1) undef 1091 store volatile <2 x double> %vec.3, ptr addrspace(1) undef 1092 ret void 1093} 1094 1095define amdgpu_ps void @dyn_insertelement_v8f64_s_v_s(<8 x double> inreg %vec, double %val, i32 inreg %idx) { 1096; GPRIDX-LABEL: dyn_insertelement_v8f64_s_v_s: 1097; GPRIDX: ; %bb.0: ; %entry 1098; GPRIDX-NEXT: s_mov_b32 s1, s3 1099; GPRIDX-NEXT: s_mov_b32 s3, s5 1100; GPRIDX-NEXT: s_mov_b32 s5, s7 1101; GPRIDX-NEXT: s_mov_b32 s7, s9 1102; GPRIDX-NEXT: s_mov_b32 s9, s11 1103; GPRIDX-NEXT: s_mov_b32 s11, s13 1104; GPRIDX-NEXT: s_mov_b32 s13, s15 1105; GPRIDX-NEXT: s_mov_b32 s15, s17 1106; GPRIDX-NEXT: s_mov_b32 s0, s2 1107; GPRIDX-NEXT: s_mov_b32 s2, s4 1108; GPRIDX-NEXT: s_mov_b32 s4, s6 1109; GPRIDX-NEXT: s_mov_b32 s6, s8 1110; GPRIDX-NEXT: s_mov_b32 s8, s10 1111; GPRIDX-NEXT: s_mov_b32 s10, s12 1112; GPRIDX-NEXT: s_mov_b32 s12, s14 1113; GPRIDX-NEXT: s_mov_b32 s14, s16 1114; GPRIDX-NEXT: v_mov_b32_e32 v17, s15 1115; GPRIDX-NEXT: v_mov_b32_e32 v16, s14 1116; GPRIDX-NEXT: v_mov_b32_e32 v15, s13 1117; GPRIDX-NEXT: v_mov_b32_e32 v14, s12 1118; GPRIDX-NEXT: v_mov_b32_e32 v13, s11 1119; GPRIDX-NEXT: v_mov_b32_e32 v12, s10 1120; GPRIDX-NEXT: v_mov_b32_e32 v11, s9 1121; GPRIDX-NEXT: v_mov_b32_e32 v10, s8 1122; GPRIDX-NEXT: v_mov_b32_e32 v9, s7 1123; GPRIDX-NEXT: v_mov_b32_e32 v8, s6 1124; GPRIDX-NEXT: v_mov_b32_e32 v7, s5 1125; GPRIDX-NEXT: v_mov_b32_e32 v6, s4 1126; GPRIDX-NEXT: v_mov_b32_e32 v5, s3 1127; GPRIDX-NEXT: v_mov_b32_e32 v4, s2 1128; GPRIDX-NEXT: v_mov_b32_e32 v3, s1 1129; GPRIDX-NEXT: v_mov_b32_e32 v2, s0 1130; GPRIDX-NEXT: s_lshl_b32 s0, s18, 1 1131; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST) 1132; GPRIDX-NEXT: v_mov_b32_e32 v2, v0 1133; GPRIDX-NEXT: v_mov_b32_e32 v3, v1 1134; GPRIDX-NEXT: s_set_gpr_idx_off 1135; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[2:5], off 1136; GPRIDX-NEXT: s_waitcnt vmcnt(0) 1137; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[6:9], off 1138; GPRIDX-NEXT: s_waitcnt vmcnt(0) 1139; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[10:13], off 1140; GPRIDX-NEXT: s_waitcnt vmcnt(0) 1141; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[14:17], off 1142; GPRIDX-NEXT: s_waitcnt vmcnt(0) 1143; GPRIDX-NEXT: s_endpgm 1144; 1145; GFX10-LABEL: dyn_insertelement_v8f64_s_v_s: 1146; GFX10: ; %bb.0: ; %entry 1147; GFX10-NEXT: s_mov_b32 s1, s3 1148; GFX10-NEXT: s_mov_b32 s3, s5 1149; GFX10-NEXT: s_mov_b32 s5, s7 1150; GFX10-NEXT: s_mov_b32 s7, s9 1151; GFX10-NEXT: s_mov_b32 s9, s11 1152; GFX10-NEXT: s_mov_b32 s11, s13 1153; GFX10-NEXT: s_mov_b32 s13, s15 1154; GFX10-NEXT: s_mov_b32 s15, s17 1155; GFX10-NEXT: s_mov_b32 s0, s2 1156; GFX10-NEXT: s_mov_b32 s2, s4 1157; GFX10-NEXT: s_mov_b32 s4, s6 1158; GFX10-NEXT: s_mov_b32 s6, s8 1159; GFX10-NEXT: s_mov_b32 s8, s10 1160; GFX10-NEXT: s_mov_b32 s10, s12 1161; GFX10-NEXT: s_mov_b32 s12, s14 1162; GFX10-NEXT: s_mov_b32 s14, s16 1163; GFX10-NEXT: v_mov_b32_e32 v17, s15 1164; GFX10-NEXT: v_mov_b32_e32 v2, s0 1165; GFX10-NEXT: s_lshl_b32 m0, s18, 1 1166; GFX10-NEXT: v_mov_b32_e32 v16, s14 1167; GFX10-NEXT: v_mov_b32_e32 v15, s13 1168; GFX10-NEXT: v_mov_b32_e32 v14, s12 1169; GFX10-NEXT: v_mov_b32_e32 v13, s11 1170; GFX10-NEXT: v_mov_b32_e32 v12, s10 1171; GFX10-NEXT: v_mov_b32_e32 v11, s9 1172; GFX10-NEXT: v_mov_b32_e32 v10, s8 1173; GFX10-NEXT: v_mov_b32_e32 v9, s7 1174; GFX10-NEXT: v_mov_b32_e32 v8, s6 1175; GFX10-NEXT: v_mov_b32_e32 v7, s5 1176; GFX10-NEXT: v_mov_b32_e32 v6, s4 1177; GFX10-NEXT: v_mov_b32_e32 v5, s3 1178; GFX10-NEXT: v_mov_b32_e32 v4, s2 1179; GFX10-NEXT: v_mov_b32_e32 v3, s1 1180; GFX10-NEXT: v_movreld_b32_e32 v2, v0 1181; GFX10-NEXT: v_movreld_b32_e32 v3, v1 1182; GFX10-NEXT: global_store_dwordx4 v[0:1], v[2:5], off 1183; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1184; GFX10-NEXT: global_store_dwordx4 v[0:1], v[6:9], off 1185; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1186; GFX10-NEXT: global_store_dwordx4 v[0:1], v[10:13], off 1187; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1188; GFX10-NEXT: global_store_dwordx4 v[0:1], v[14:17], off 1189; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1190; GFX10-NEXT: s_endpgm 1191; 1192; GFX11-LABEL: dyn_insertelement_v8f64_s_v_s: 1193; GFX11: ; %bb.0: ; %entry 1194; GFX11-NEXT: s_mov_b32 s1, s3 1195; GFX11-NEXT: s_mov_b32 s3, s5 1196; GFX11-NEXT: s_mov_b32 s5, s7 1197; GFX11-NEXT: s_mov_b32 s7, s9 1198; GFX11-NEXT: s_mov_b32 s9, s11 1199; GFX11-NEXT: s_mov_b32 s11, s13 1200; GFX11-NEXT: s_mov_b32 s13, s15 1201; GFX11-NEXT: s_mov_b32 s15, s17 1202; GFX11-NEXT: s_mov_b32 s0, s2 1203; GFX11-NEXT: s_mov_b32 s2, s4 1204; GFX11-NEXT: s_mov_b32 s4, s6 1205; GFX11-NEXT: s_mov_b32 s6, s8 1206; GFX11-NEXT: s_mov_b32 s8, s10 1207; GFX11-NEXT: s_mov_b32 s10, s12 1208; GFX11-NEXT: s_mov_b32 s12, s14 1209; GFX11-NEXT: s_mov_b32 s14, s16 1210; GFX11-NEXT: v_dual_mov_b32 v17, s15 :: v_dual_mov_b32 v16, s14 1211; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 1212; GFX11-NEXT: s_lshl_b32 m0, s18, 1 1213; GFX11-NEXT: v_dual_mov_b32 v15, s13 :: v_dual_mov_b32 v14, s12 1214; GFX11-NEXT: v_dual_mov_b32 v13, s11 :: v_dual_mov_b32 v12, s10 1215; GFX11-NEXT: v_dual_mov_b32 v11, s9 :: v_dual_mov_b32 v10, s8 1216; GFX11-NEXT: v_dual_mov_b32 v9, s7 :: v_dual_mov_b32 v8, s6 1217; GFX11-NEXT: v_dual_mov_b32 v7, s5 :: v_dual_mov_b32 v6, s4 1218; GFX11-NEXT: v_dual_mov_b32 v5, s3 :: v_dual_mov_b32 v4, s2 1219; GFX11-NEXT: v_movreld_b32_e32 v2, v0 1220; GFX11-NEXT: v_movreld_b32_e32 v3, v1 1221; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off dlc 1222; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1223; GFX11-NEXT: global_store_b128 v[0:1], v[6:9], off dlc 1224; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1225; GFX11-NEXT: global_store_b128 v[0:1], v[10:13], off dlc 1226; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1227; GFX11-NEXT: global_store_b128 v[0:1], v[14:17], off dlc 1228; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1229; GFX11-NEXT: s_endpgm 1230entry: 1231 %insert = insertelement <8 x double> %vec, double %val, i32 %idx 1232 %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1> 1233 %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3> 1234 %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5> 1235 %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7> 1236 store volatile <2 x double> %vec.0, ptr addrspace(1) undef 1237 store volatile <2 x double> %vec.1, ptr addrspace(1) undef 1238 store volatile <2 x double> %vec.2, ptr addrspace(1) undef 1239 store volatile <2 x double> %vec.3, ptr addrspace(1) undef 1240 ret void 1241} 1242 1243define amdgpu_ps void @dyn_insertelement_v8f64_v_s_s(<8 x double> %vec, double inreg %val, i32 inreg %idx) { 1244; GPRIDX-LABEL: dyn_insertelement_v8f64_v_s_s: 1245; GPRIDX: ; %bb.0: ; %entry 1246; GPRIDX-NEXT: s_lshl_b32 s0, s4, 1 1247; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST) 1248; GPRIDX-NEXT: v_mov_b32_e32 v0, s2 1249; GPRIDX-NEXT: v_mov_b32_e32 v1, s3 1250; GPRIDX-NEXT: s_set_gpr_idx_off 1251; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 1252; GPRIDX-NEXT: s_waitcnt vmcnt(0) 1253; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[4:7], off 1254; GPRIDX-NEXT: s_waitcnt vmcnt(0) 1255; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[8:11], off 1256; GPRIDX-NEXT: s_waitcnt vmcnt(0) 1257; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[12:15], off 1258; GPRIDX-NEXT: s_waitcnt vmcnt(0) 1259; GPRIDX-NEXT: s_endpgm 1260; 1261; GFX10-LABEL: dyn_insertelement_v8f64_v_s_s: 1262; GFX10: ; %bb.0: ; %entry 1263; GFX10-NEXT: s_lshl_b32 m0, s4, 1 1264; GFX10-NEXT: v_movreld_b32_e32 v0, s2 1265; GFX10-NEXT: v_movreld_b32_e32 v1, s3 1266; GFX10-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 1267; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1268; GFX10-NEXT: global_store_dwordx4 v[0:1], v[4:7], off 1269; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1270; GFX10-NEXT: global_store_dwordx4 v[0:1], v[8:11], off 1271; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1272; GFX10-NEXT: global_store_dwordx4 v[0:1], v[12:15], off 1273; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1274; GFX10-NEXT: s_endpgm 1275; 1276; GFX11-LABEL: dyn_insertelement_v8f64_v_s_s: 1277; GFX11: ; %bb.0: ; %entry 1278; GFX11-NEXT: s_lshl_b32 m0, s4, 1 1279; GFX11-NEXT: v_movreld_b32_e32 v0, s2 1280; GFX11-NEXT: v_movreld_b32_e32 v1, s3 1281; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off dlc 1282; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1283; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off dlc 1284; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1285; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off dlc 1286; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1287; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off dlc 1288; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1289; GFX11-NEXT: s_endpgm 1290entry: 1291 %insert = insertelement <8 x double> %vec, double %val, i32 %idx 1292 %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1> 1293 %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3> 1294 %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5> 1295 %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7> 1296 store volatile <2 x double> %vec.0, ptr addrspace(1) undef 1297 store volatile <2 x double> %vec.1, ptr addrspace(1) undef 1298 store volatile <2 x double> %vec.2, ptr addrspace(1) undef 1299 store volatile <2 x double> %vec.3, ptr addrspace(1) undef 1300 ret void 1301} 1302 1303define amdgpu_ps void @dyn_insertelement_v8f64_s_v_v(<8 x double> inreg %vec, double %val, i32 %idx) { 1304; GPRIDX-LABEL: dyn_insertelement_v8f64_s_v_v: 1305; GPRIDX: ; %bb.0: ; %entry 1306; GPRIDX-NEXT: s_mov_b32 s1, s3 1307; GPRIDX-NEXT: s_mov_b32 s3, s5 1308; GPRIDX-NEXT: s_mov_b32 s5, s7 1309; GPRIDX-NEXT: s_mov_b32 s7, s9 1310; GPRIDX-NEXT: s_mov_b32 s9, s11 1311; GPRIDX-NEXT: s_mov_b32 s11, s13 1312; GPRIDX-NEXT: s_mov_b32 s13, s15 1313; GPRIDX-NEXT: s_mov_b32 s15, s17 1314; GPRIDX-NEXT: s_mov_b32 s0, s2 1315; GPRIDX-NEXT: s_mov_b32 s2, s4 1316; GPRIDX-NEXT: s_mov_b32 s4, s6 1317; GPRIDX-NEXT: s_mov_b32 s6, s8 1318; GPRIDX-NEXT: s_mov_b32 s8, s10 1319; GPRIDX-NEXT: s_mov_b32 s10, s12 1320; GPRIDX-NEXT: s_mov_b32 s12, s14 1321; GPRIDX-NEXT: s_mov_b32 s14, s16 1322; GPRIDX-NEXT: v_mov_b32_e32 v18, s15 1323; GPRIDX-NEXT: v_mov_b32_e32 v17, s14 1324; GPRIDX-NEXT: v_mov_b32_e32 v16, s13 1325; GPRIDX-NEXT: v_mov_b32_e32 v15, s12 1326; GPRIDX-NEXT: v_mov_b32_e32 v14, s11 1327; GPRIDX-NEXT: v_mov_b32_e32 v13, s10 1328; GPRIDX-NEXT: v_mov_b32_e32 v12, s9 1329; GPRIDX-NEXT: v_mov_b32_e32 v11, s8 1330; GPRIDX-NEXT: v_mov_b32_e32 v10, s7 1331; GPRIDX-NEXT: v_mov_b32_e32 v9, s6 1332; GPRIDX-NEXT: v_mov_b32_e32 v8, s5 1333; GPRIDX-NEXT: v_mov_b32_e32 v7, s4 1334; GPRIDX-NEXT: v_mov_b32_e32 v6, s3 1335; GPRIDX-NEXT: v_mov_b32_e32 v5, s2 1336; GPRIDX-NEXT: v_mov_b32_e32 v4, s1 1337; GPRIDX-NEXT: v_mov_b32_e32 v3, s0 1338; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 1339; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[12:13], 0, v2 1340; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v2 1341; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v2 1342; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[4:5], 4, v2 1343; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[6:7], 5, v2 1344; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[8:9], 6, v2 1345; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[10:11], 7, v2 1346; GPRIDX-NEXT: v_cndmask_b32_e64 v3, v3, v0, s[12:13] 1347; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v0, vcc 1348; GPRIDX-NEXT: v_cndmask_b32_e64 v4, v4, v1, s[12:13] 1349; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v1, vcc 1350; GPRIDX-NEXT: v_cndmask_b32_e64 v7, v7, v0, s[0:1] 1351; GPRIDX-NEXT: v_cndmask_b32_e64 v9, v9, v0, s[2:3] 1352; GPRIDX-NEXT: v_cndmask_b32_e64 v11, v11, v0, s[4:5] 1353; GPRIDX-NEXT: v_cndmask_b32_e64 v13, v13, v0, s[6:7] 1354; GPRIDX-NEXT: v_cndmask_b32_e64 v15, v15, v0, s[8:9] 1355; GPRIDX-NEXT: v_cndmask_b32_e64 v17, v17, v0, s[10:11] 1356; GPRIDX-NEXT: v_cndmask_b32_e64 v8, v8, v1, s[0:1] 1357; GPRIDX-NEXT: v_cndmask_b32_e64 v10, v10, v1, s[2:3] 1358; GPRIDX-NEXT: v_cndmask_b32_e64 v12, v12, v1, s[4:5] 1359; GPRIDX-NEXT: v_cndmask_b32_e64 v14, v14, v1, s[6:7] 1360; GPRIDX-NEXT: v_cndmask_b32_e64 v16, v16, v1, s[8:9] 1361; GPRIDX-NEXT: v_cndmask_b32_e64 v18, v18, v1, s[10:11] 1362; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[3:6], off 1363; GPRIDX-NEXT: s_waitcnt vmcnt(0) 1364; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[7:10], off 1365; GPRIDX-NEXT: s_waitcnt vmcnt(0) 1366; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[11:14], off 1367; GPRIDX-NEXT: s_waitcnt vmcnt(0) 1368; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[15:18], off 1369; GPRIDX-NEXT: s_waitcnt vmcnt(0) 1370; GPRIDX-NEXT: s_endpgm 1371; 1372; GFX10-LABEL: dyn_insertelement_v8f64_s_v_v: 1373; GFX10: ; %bb.0: ; %entry 1374; GFX10-NEXT: s_mov_b32 s1, s3 1375; GFX10-NEXT: s_mov_b32 s3, s5 1376; GFX10-NEXT: s_mov_b32 s5, s7 1377; GFX10-NEXT: s_mov_b32 s7, s9 1378; GFX10-NEXT: s_mov_b32 s9, s11 1379; GFX10-NEXT: s_mov_b32 s11, s13 1380; GFX10-NEXT: s_mov_b32 s13, s15 1381; GFX10-NEXT: s_mov_b32 s15, s17 1382; GFX10-NEXT: s_mov_b32 s0, s2 1383; GFX10-NEXT: s_mov_b32 s2, s4 1384; GFX10-NEXT: s_mov_b32 s4, s6 1385; GFX10-NEXT: s_mov_b32 s6, s8 1386; GFX10-NEXT: s_mov_b32 s8, s10 1387; GFX10-NEXT: s_mov_b32 s10, s12 1388; GFX10-NEXT: s_mov_b32 s12, s14 1389; GFX10-NEXT: s_mov_b32 s14, s16 1390; GFX10-NEXT: v_mov_b32_e32 v18, s15 1391; GFX10-NEXT: v_mov_b32_e32 v4, s1 1392; GFX10-NEXT: v_mov_b32_e32 v3, s0 1393; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2 1394; GFX10-NEXT: v_mov_b32_e32 v17, s14 1395; GFX10-NEXT: v_mov_b32_e32 v16, s13 1396; GFX10-NEXT: v_mov_b32_e32 v15, s12 1397; GFX10-NEXT: v_mov_b32_e32 v14, s11 1398; GFX10-NEXT: v_mov_b32_e32 v13, s10 1399; GFX10-NEXT: v_mov_b32_e32 v12, s9 1400; GFX10-NEXT: v_mov_b32_e32 v11, s8 1401; GFX10-NEXT: v_mov_b32_e32 v10, s7 1402; GFX10-NEXT: v_mov_b32_e32 v9, s6 1403; GFX10-NEXT: v_mov_b32_e32 v8, s5 1404; GFX10-NEXT: v_mov_b32_e32 v7, s4 1405; GFX10-NEXT: v_mov_b32_e32 v6, s3 1406; GFX10-NEXT: v_mov_b32_e32 v5, s2 1407; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 1, v2 1408; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v0, vcc_lo 1409; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v1, vcc_lo 1410; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v2 1411; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 7, v2 1412; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v0, s0 1413; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v1, s0 1414; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 3, v2 1415; GFX10-NEXT: v_cndmask_b32_e32 v7, v7, v0, vcc_lo 1416; GFX10-NEXT: v_cndmask_b32_e32 v8, v8, v1, vcc_lo 1417; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v2 1418; GFX10-NEXT: v_cndmask_b32_e64 v17, v17, v0, s1 1419; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, v0, s0 1420; GFX10-NEXT: v_cndmask_b32_e64 v10, v10, v1, s0 1421; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 5, v2 1422; GFX10-NEXT: v_cndmask_b32_e32 v11, v11, v0, vcc_lo 1423; GFX10-NEXT: v_cndmask_b32_e32 v12, v12, v1, vcc_lo 1424; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v2 1425; GFX10-NEXT: v_cndmask_b32_e64 v18, v18, v1, s1 1426; GFX10-NEXT: v_cndmask_b32_e64 v13, v13, v0, s0 1427; GFX10-NEXT: v_cndmask_b32_e64 v14, v14, v1, s0 1428; GFX10-NEXT: v_cndmask_b32_e32 v15, v15, v0, vcc_lo 1429; GFX10-NEXT: v_cndmask_b32_e32 v16, v16, v1, vcc_lo 1430; GFX10-NEXT: global_store_dwordx4 v[0:1], v[3:6], off 1431; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1432; GFX10-NEXT: global_store_dwordx4 v[0:1], v[7:10], off 1433; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1434; GFX10-NEXT: global_store_dwordx4 v[0:1], v[11:14], off 1435; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1436; GFX10-NEXT: global_store_dwordx4 v[0:1], v[15:18], off 1437; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1438; GFX10-NEXT: s_endpgm 1439; 1440; GFX11-LABEL: dyn_insertelement_v8f64_s_v_v: 1441; GFX11: ; %bb.0: ; %entry 1442; GFX11-NEXT: s_mov_b32 s1, s3 1443; GFX11-NEXT: s_mov_b32 s3, s5 1444; GFX11-NEXT: s_mov_b32 s5, s7 1445; GFX11-NEXT: s_mov_b32 s7, s9 1446; GFX11-NEXT: s_mov_b32 s9, s11 1447; GFX11-NEXT: s_mov_b32 s11, s13 1448; GFX11-NEXT: s_mov_b32 s13, s15 1449; GFX11-NEXT: s_mov_b32 s15, s17 1450; GFX11-NEXT: s_mov_b32 s0, s2 1451; GFX11-NEXT: s_mov_b32 s2, s4 1452; GFX11-NEXT: s_mov_b32 s4, s6 1453; GFX11-NEXT: s_mov_b32 s6, s8 1454; GFX11-NEXT: s_mov_b32 s8, s10 1455; GFX11-NEXT: s_mov_b32 s10, s12 1456; GFX11-NEXT: s_mov_b32 s12, s14 1457; GFX11-NEXT: s_mov_b32 s14, s16 1458; GFX11-NEXT: v_dual_mov_b32 v18, s15 :: v_dual_mov_b32 v17, s14 1459; GFX11-NEXT: v_dual_mov_b32 v4, s1 :: v_dual_mov_b32 v3, s0 1460; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2 1461; GFX11-NEXT: v_dual_mov_b32 v16, s13 :: v_dual_mov_b32 v15, s12 1462; GFX11-NEXT: v_dual_mov_b32 v14, s11 :: v_dual_mov_b32 v13, s10 1463; GFX11-NEXT: v_dual_mov_b32 v12, s9 :: v_dual_mov_b32 v11, s8 1464; GFX11-NEXT: v_dual_mov_b32 v10, s7 :: v_dual_mov_b32 v9, s6 1465; GFX11-NEXT: v_dual_mov_b32 v8, s5 :: v_dual_mov_b32 v7, s4 1466; GFX11-NEXT: v_dual_mov_b32 v6, s3 :: v_dual_mov_b32 v5, s2 1467; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 1, v2 1468; GFX11-NEXT: v_dual_cndmask_b32 v3, v3, v0 :: v_dual_cndmask_b32 v4, v4, v1 1469; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v2 1470; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 7, v2 1471; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, v0, s0 1472; GFX11-NEXT: v_cndmask_b32_e64 v6, v6, v1, s0 1473; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 3, v2 1474; GFX11-NEXT: v_dual_cndmask_b32 v7, v7, v0 :: v_dual_cndmask_b32 v8, v8, v1 1475; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v2 1476; GFX11-NEXT: v_cndmask_b32_e64 v17, v17, v0, s1 1477; GFX11-NEXT: v_cndmask_b32_e64 v9, v9, v0, s0 1478; GFX11-NEXT: v_cndmask_b32_e64 v10, v10, v1, s0 1479; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 5, v2 1480; GFX11-NEXT: v_dual_cndmask_b32 v11, v11, v0 :: v_dual_cndmask_b32 v12, v12, v1 1481; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v2 1482; GFX11-NEXT: v_cndmask_b32_e64 v18, v18, v1, s1 1483; GFX11-NEXT: v_cndmask_b32_e64 v13, v13, v0, s0 1484; GFX11-NEXT: v_cndmask_b32_e64 v14, v14, v1, s0 1485; GFX11-NEXT: v_dual_cndmask_b32 v15, v15, v0 :: v_dual_cndmask_b32 v16, v16, v1 1486; GFX11-NEXT: global_store_b128 v[0:1], v[3:6], off dlc 1487; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1488; GFX11-NEXT: global_store_b128 v[0:1], v[7:10], off dlc 1489; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1490; GFX11-NEXT: global_store_b128 v[0:1], v[11:14], off dlc 1491; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1492; GFX11-NEXT: global_store_b128 v[0:1], v[15:18], off dlc 1493; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1494; GFX11-NEXT: s_endpgm 1495entry: 1496 %insert = insertelement <8 x double> %vec, double %val, i32 %idx 1497 %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1> 1498 %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3> 1499 %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5> 1500 %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7> 1501 store volatile <2 x double> %vec.0, ptr addrspace(1) undef 1502 store volatile <2 x double> %vec.1, ptr addrspace(1) undef 1503 store volatile <2 x double> %vec.2, ptr addrspace(1) undef 1504 store volatile <2 x double> %vec.3, ptr addrspace(1) undef 1505 ret void 1506} 1507 1508define amdgpu_ps void @dyn_insertelement_v8f64_v_s_v(<8 x double> %vec, double inreg %val, i32 %idx) { 1509; GPRIDX-LABEL: dyn_insertelement_v8f64_v_s_v: 1510; GPRIDX: ; %bb.0: ; %entry 1511; GPRIDX-NEXT: v_mov_b32_e32 v17, s2 1512; GPRIDX-NEXT: v_mov_b32_e32 v18, s3 1513; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v16 1514; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v17, vcc 1515; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v18, vcc 1516; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v16 1517; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v17, vcc 1518; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v18, vcc 1519; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v16 1520; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v17, vcc 1521; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v18, vcc 1522; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v16 1523; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v17, vcc 1524; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v18, vcc 1525; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v16 1526; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v8, v17, vcc 1527; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v9, v18, vcc 1528; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v16 1529; GPRIDX-NEXT: v_cndmask_b32_e32 v10, v10, v17, vcc 1530; GPRIDX-NEXT: v_cndmask_b32_e32 v11, v11, v18, vcc 1531; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v16 1532; GPRIDX-NEXT: v_cndmask_b32_e32 v12, v12, v17, vcc 1533; GPRIDX-NEXT: v_cndmask_b32_e32 v13, v13, v18, vcc 1534; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v16 1535; GPRIDX-NEXT: v_cndmask_b32_e32 v14, v14, v17, vcc 1536; GPRIDX-NEXT: v_cndmask_b32_e32 v15, v15, v18, vcc 1537; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 1538; GPRIDX-NEXT: s_waitcnt vmcnt(0) 1539; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[4:7], off 1540; GPRIDX-NEXT: s_waitcnt vmcnt(0) 1541; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[8:11], off 1542; GPRIDX-NEXT: s_waitcnt vmcnt(0) 1543; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[12:15], off 1544; GPRIDX-NEXT: s_waitcnt vmcnt(0) 1545; GPRIDX-NEXT: s_endpgm 1546; 1547; GFX10-LABEL: dyn_insertelement_v8f64_v_s_v: 1548; GFX10: ; %bb.0: ; %entry 1549; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v16 1550; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 1, v16 1551; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 7, v16 1552; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc_lo 1553; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc_lo 1554; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16 1555; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s2, s0 1556; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, s3, s0 1557; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 3, v16 1558; GFX10-NEXT: v_cndmask_b32_e64 v14, v14, s2, s1 1559; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, s2, vcc_lo 1560; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, s3, vcc_lo 1561; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16 1562; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, s2, s0 1563; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, s3, s0 1564; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 5, v16 1565; GFX10-NEXT: v_cndmask_b32_e64 v15, v15, s3, s1 1566; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, s2, vcc_lo 1567; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, s3, vcc_lo 1568; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v16 1569; GFX10-NEXT: v_cndmask_b32_e64 v10, v10, s2, s0 1570; GFX10-NEXT: v_cndmask_b32_e64 v11, v11, s3, s0 1571; GFX10-NEXT: v_cndmask_b32_e64 v12, v12, s2, vcc_lo 1572; GFX10-NEXT: v_cndmask_b32_e64 v13, v13, s3, vcc_lo 1573; GFX10-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 1574; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1575; GFX10-NEXT: global_store_dwordx4 v[0:1], v[4:7], off 1576; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1577; GFX10-NEXT: global_store_dwordx4 v[0:1], v[8:11], off 1578; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1579; GFX10-NEXT: global_store_dwordx4 v[0:1], v[12:15], off 1580; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1581; GFX10-NEXT: s_endpgm 1582; 1583; GFX11-LABEL: dyn_insertelement_v8f64_v_s_v: 1584; GFX11: ; %bb.0: ; %entry 1585; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v16 1586; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 1, v16 1587; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 7, v16 1588; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc_lo 1589; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc_lo 1590; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16 1591; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s2, s0 1592; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, s3, s0 1593; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 3, v16 1594; GFX11-NEXT: v_cndmask_b32_e64 v14, v14, s2, s1 1595; GFX11-NEXT: v_cndmask_b32_e64 v4, v4, s2, vcc_lo 1596; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, s3, vcc_lo 1597; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16 1598; GFX11-NEXT: v_cndmask_b32_e64 v6, v6, s2, s0 1599; GFX11-NEXT: v_cndmask_b32_e64 v7, v7, s3, s0 1600; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 5, v16 1601; GFX11-NEXT: v_cndmask_b32_e64 v15, v15, s3, s1 1602; GFX11-NEXT: v_cndmask_b32_e64 v8, v8, s2, vcc_lo 1603; GFX11-NEXT: v_cndmask_b32_e64 v9, v9, s3, vcc_lo 1604; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v16 1605; GFX11-NEXT: v_cndmask_b32_e64 v10, v10, s2, s0 1606; GFX11-NEXT: v_cndmask_b32_e64 v11, v11, s3, s0 1607; GFX11-NEXT: v_cndmask_b32_e64 v12, v12, s2, vcc_lo 1608; GFX11-NEXT: v_cndmask_b32_e64 v13, v13, s3, vcc_lo 1609; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off dlc 1610; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1611; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off dlc 1612; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1613; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off dlc 1614; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1615; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off dlc 1616; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1617; GFX11-NEXT: s_endpgm 1618entry: 1619 %insert = insertelement <8 x double> %vec, double %val, i32 %idx 1620 %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1> 1621 %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3> 1622 %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5> 1623 %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7> 1624 store volatile <2 x double> %vec.0, ptr addrspace(1) undef 1625 store volatile <2 x double> %vec.1, ptr addrspace(1) undef 1626 store volatile <2 x double> %vec.2, ptr addrspace(1) undef 1627 store volatile <2 x double> %vec.3, ptr addrspace(1) undef 1628 ret void 1629} 1630 1631define amdgpu_ps void @dyn_insertelement_v8f64_v_v_s(<8 x double> %vec, double %val, i32 inreg %idx) { 1632; GPRIDX-LABEL: dyn_insertelement_v8f64_v_v_s: 1633; GPRIDX: ; %bb.0: ; %entry 1634; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1 1635; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST) 1636; GPRIDX-NEXT: v_mov_b32_e32 v0, v16 1637; GPRIDX-NEXT: v_mov_b32_e32 v1, v17 1638; GPRIDX-NEXT: s_set_gpr_idx_off 1639; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 1640; GPRIDX-NEXT: s_waitcnt vmcnt(0) 1641; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[4:7], off 1642; GPRIDX-NEXT: s_waitcnt vmcnt(0) 1643; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[8:11], off 1644; GPRIDX-NEXT: s_waitcnt vmcnt(0) 1645; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[12:15], off 1646; GPRIDX-NEXT: s_waitcnt vmcnt(0) 1647; GPRIDX-NEXT: s_endpgm 1648; 1649; GFX10-LABEL: dyn_insertelement_v8f64_v_v_s: 1650; GFX10: ; %bb.0: ; %entry 1651; GFX10-NEXT: s_lshl_b32 m0, s2, 1 1652; GFX10-NEXT: v_movreld_b32_e32 v0, v16 1653; GFX10-NEXT: v_movreld_b32_e32 v1, v17 1654; GFX10-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 1655; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1656; GFX10-NEXT: global_store_dwordx4 v[0:1], v[4:7], off 1657; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1658; GFX10-NEXT: global_store_dwordx4 v[0:1], v[8:11], off 1659; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1660; GFX10-NEXT: global_store_dwordx4 v[0:1], v[12:15], off 1661; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1662; GFX10-NEXT: s_endpgm 1663; 1664; GFX11-LABEL: dyn_insertelement_v8f64_v_v_s: 1665; GFX11: ; %bb.0: ; %entry 1666; GFX11-NEXT: s_lshl_b32 m0, s2, 1 1667; GFX11-NEXT: v_movreld_b32_e32 v0, v16 1668; GFX11-NEXT: v_movreld_b32_e32 v1, v17 1669; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off dlc 1670; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1671; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off dlc 1672; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1673; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off dlc 1674; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1675; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off dlc 1676; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1677; GFX11-NEXT: s_endpgm 1678entry: 1679 %insert = insertelement <8 x double> %vec, double %val, i32 %idx 1680 %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1> 1681 %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3> 1682 %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5> 1683 %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7> 1684 store volatile <2 x double> %vec.0, ptr addrspace(1) undef 1685 store volatile <2 x double> %vec.1, ptr addrspace(1) undef 1686 store volatile <2 x double> %vec.2, ptr addrspace(1) undef 1687 store volatile <2 x double> %vec.3, ptr addrspace(1) undef 1688 ret void 1689} 1690 1691define amdgpu_ps void @dyn_insertelement_v8f64_v_v_v(<8 x double> %vec, double %val, i32 %idx) { 1692; GPRIDX-LABEL: dyn_insertelement_v8f64_v_v_v: 1693; GPRIDX: ; %bb.0: ; %entry 1694; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v18 1695; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc 1696; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc 1697; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v18 1698; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v16, vcc 1699; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v17, vcc 1700; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v18 1701; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v16, vcc 1702; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v17, vcc 1703; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v18 1704; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v16, vcc 1705; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v17, vcc 1706; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v18 1707; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v8, v16, vcc 1708; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v9, v17, vcc 1709; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v18 1710; GPRIDX-NEXT: v_cndmask_b32_e32 v10, v10, v16, vcc 1711; GPRIDX-NEXT: v_cndmask_b32_e32 v11, v11, v17, vcc 1712; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v18 1713; GPRIDX-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc 1714; GPRIDX-NEXT: v_cndmask_b32_e32 v13, v13, v17, vcc 1715; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v18 1716; GPRIDX-NEXT: v_cndmask_b32_e32 v14, v14, v16, vcc 1717; GPRIDX-NEXT: v_cndmask_b32_e32 v15, v15, v17, vcc 1718; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 1719; GPRIDX-NEXT: s_waitcnt vmcnt(0) 1720; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[4:7], off 1721; GPRIDX-NEXT: s_waitcnt vmcnt(0) 1722; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[8:11], off 1723; GPRIDX-NEXT: s_waitcnt vmcnt(0) 1724; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[12:15], off 1725; GPRIDX-NEXT: s_waitcnt vmcnt(0) 1726; GPRIDX-NEXT: s_endpgm 1727; 1728; GFX10-LABEL: dyn_insertelement_v8f64_v_v_v: 1729; GFX10: ; %bb.0: ; %entry 1730; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v18 1731; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 1, v18 1732; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 7, v18 1733; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc_lo 1734; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc_lo 1735; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v18 1736; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v16, s0 1737; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v17, s0 1738; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 3, v18 1739; GFX10-NEXT: v_cndmask_b32_e64 v14, v14, v16, s1 1740; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v16, vcc_lo 1741; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v17, vcc_lo 1742; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v18 1743; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v16, s0 1744; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, v17, s0 1745; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 5, v18 1746; GFX10-NEXT: v_cndmask_b32_e64 v15, v15, v17, s1 1747; GFX10-NEXT: v_cndmask_b32_e32 v8, v8, v16, vcc_lo 1748; GFX10-NEXT: v_cndmask_b32_e32 v9, v9, v17, vcc_lo 1749; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v18 1750; GFX10-NEXT: v_cndmask_b32_e64 v10, v10, v16, s0 1751; GFX10-NEXT: v_cndmask_b32_e64 v11, v11, v17, s0 1752; GFX10-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc_lo 1753; GFX10-NEXT: v_cndmask_b32_e32 v13, v13, v17, vcc_lo 1754; GFX10-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 1755; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1756; GFX10-NEXT: global_store_dwordx4 v[0:1], v[4:7], off 1757; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1758; GFX10-NEXT: global_store_dwordx4 v[0:1], v[8:11], off 1759; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1760; GFX10-NEXT: global_store_dwordx4 v[0:1], v[12:15], off 1761; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1762; GFX10-NEXT: s_endpgm 1763; 1764; GFX11-LABEL: dyn_insertelement_v8f64_v_v_v: 1765; GFX11: ; %bb.0: ; %entry 1766; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v18 1767; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 1, v18 1768; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 7, v18 1769; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v16 :: v_dual_cndmask_b32 v1, v1, v17 1770; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v18 1771; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v16, s0 1772; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v17, s0 1773; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 3, v18 1774; GFX11-NEXT: v_cndmask_b32_e64 v14, v14, v16, s1 1775; GFX11-NEXT: v_dual_cndmask_b32 v4, v4, v16 :: v_dual_cndmask_b32 v5, v5, v17 1776; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v18 1777; GFX11-NEXT: v_cndmask_b32_e64 v6, v6, v16, s0 1778; GFX11-NEXT: v_cndmask_b32_e64 v7, v7, v17, s0 1779; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 5, v18 1780; GFX11-NEXT: v_cndmask_b32_e64 v15, v15, v17, s1 1781; GFX11-NEXT: v_dual_cndmask_b32 v8, v8, v16 :: v_dual_cndmask_b32 v9, v9, v17 1782; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v18 1783; GFX11-NEXT: v_cndmask_b32_e64 v10, v10, v16, s0 1784; GFX11-NEXT: v_cndmask_b32_e64 v11, v11, v17, s0 1785; GFX11-NEXT: v_dual_cndmask_b32 v12, v12, v16 :: v_dual_cndmask_b32 v13, v13, v17 1786; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off dlc 1787; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1788; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off dlc 1789; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1790; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off dlc 1791; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1792; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off dlc 1793; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1794; GFX11-NEXT: s_endpgm 1795entry: 1796 %insert = insertelement <8 x double> %vec, double %val, i32 %idx 1797 %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1> 1798 %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3> 1799 %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5> 1800 %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7> 1801 store volatile <2 x double> %vec.0, ptr addrspace(1) undef 1802 store volatile <2 x double> %vec.1, ptr addrspace(1) undef 1803 store volatile <2 x double> %vec.2, ptr addrspace(1) undef 1804 store volatile <2 x double> %vec.3, ptr addrspace(1) undef 1805 ret void 1806} 1807 1808define amdgpu_ps <3 x i32> @dyn_insertelement_v3i32_s_s_s(<3 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) { 1809; GPRIDX-LABEL: dyn_insertelement_v3i32_s_s_s: 1810; GPRIDX: ; %bb.0: ; %entry 1811; GPRIDX-NEXT: s_cmp_eq_u32 s6, 0 1812; GPRIDX-NEXT: s_cselect_b32 s0, s5, s2 1813; GPRIDX-NEXT: s_cmp_eq_u32 s6, 1 1814; GPRIDX-NEXT: s_cselect_b32 s1, s5, s3 1815; GPRIDX-NEXT: s_cmp_eq_u32 s6, 2 1816; GPRIDX-NEXT: s_cselect_b32 s2, s5, s4 1817; GPRIDX-NEXT: ; return to shader part epilog 1818; 1819; GFX10PLUS-LABEL: dyn_insertelement_v3i32_s_s_s: 1820; GFX10PLUS: ; %bb.0: ; %entry 1821; GFX10PLUS-NEXT: s_cmp_eq_u32 s6, 0 1822; GFX10PLUS-NEXT: s_cselect_b32 s0, s5, s2 1823; GFX10PLUS-NEXT: s_cmp_eq_u32 s6, 1 1824; GFX10PLUS-NEXT: s_cselect_b32 s1, s5, s3 1825; GFX10PLUS-NEXT: s_cmp_eq_u32 s6, 2 1826; GFX10PLUS-NEXT: s_cselect_b32 s2, s5, s4 1827; GFX10PLUS-NEXT: ; return to shader part epilog 1828entry: 1829 %insert = insertelement <3 x i32> %vec, i32 %val, i32 %idx 1830 ret <3 x i32> %insert 1831} 1832 1833define amdgpu_ps <3 x float> @dyn_insertelement_v3i32_v_v_s(<3 x float> %vec, float %val, i32 inreg %idx) { 1834; GPRIDX-LABEL: dyn_insertelement_v3i32_v_v_s: 1835; GPRIDX: ; %bb.0: ; %entry 1836; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 0 1837; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 1838; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1 1839; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 1840; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 2 1841; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1842; GPRIDX-NEXT: ; return to shader part epilog 1843; 1844; GFX10PLUS-LABEL: dyn_insertelement_v3i32_v_v_s: 1845; GFX10PLUS: ; %bb.0: ; %entry 1846; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 0 1847; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 1848; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 1 1849; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo 1850; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 2 1851; GFX10PLUS-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo 1852; GFX10PLUS-NEXT: ; return to shader part epilog 1853entry: 1854 %insert = insertelement <3 x float> %vec, float %val, i32 %idx 1855 ret <3 x float> %insert 1856} 1857 1858define amdgpu_ps <5 x i32> @dyn_insertelement_v5i32_s_s_s(<5 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) { 1859; GPRIDX-LABEL: dyn_insertelement_v5i32_s_s_s: 1860; GPRIDX: ; %bb.0: ; %entry 1861; GPRIDX-NEXT: s_cmp_eq_u32 s8, 0 1862; GPRIDX-NEXT: s_cselect_b32 s0, s7, s2 1863; GPRIDX-NEXT: s_cmp_eq_u32 s8, 1 1864; GPRIDX-NEXT: s_cselect_b32 s1, s7, s3 1865; GPRIDX-NEXT: s_cmp_eq_u32 s8, 2 1866; GPRIDX-NEXT: s_cselect_b32 s2, s7, s4 1867; GPRIDX-NEXT: s_cmp_eq_u32 s8, 3 1868; GPRIDX-NEXT: s_cselect_b32 s3, s7, s5 1869; GPRIDX-NEXT: s_cmp_eq_u32 s8, 4 1870; GPRIDX-NEXT: s_cselect_b32 s4, s7, s6 1871; GPRIDX-NEXT: ; return to shader part epilog 1872; 1873; GFX10PLUS-LABEL: dyn_insertelement_v5i32_s_s_s: 1874; GFX10PLUS: ; %bb.0: ; %entry 1875; GFX10PLUS-NEXT: s_cmp_eq_u32 s8, 0 1876; GFX10PLUS-NEXT: s_cselect_b32 s0, s7, s2 1877; GFX10PLUS-NEXT: s_cmp_eq_u32 s8, 1 1878; GFX10PLUS-NEXT: s_cselect_b32 s1, s7, s3 1879; GFX10PLUS-NEXT: s_cmp_eq_u32 s8, 2 1880; GFX10PLUS-NEXT: s_cselect_b32 s2, s7, s4 1881; GFX10PLUS-NEXT: s_cmp_eq_u32 s8, 3 1882; GFX10PLUS-NEXT: s_cselect_b32 s3, s7, s5 1883; GFX10PLUS-NEXT: s_cmp_eq_u32 s8, 4 1884; GFX10PLUS-NEXT: s_cselect_b32 s4, s7, s6 1885; GFX10PLUS-NEXT: ; return to shader part epilog 1886entry: 1887 %insert = insertelement <5 x i32> %vec, i32 %val, i32 %idx 1888 ret <5 x i32> %insert 1889} 1890 1891define amdgpu_ps <5 x float> @dyn_insertelement_v5i32_v_v_s(<5 x float> %vec, float %val, i32 inreg %idx) { 1892; GPRIDX-LABEL: dyn_insertelement_v5i32_v_v_s: 1893; GPRIDX: ; %bb.0: ; %entry 1894; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 0 1895; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 1896; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1 1897; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 1898; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 2 1899; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc 1900; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 3 1901; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc 1902; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 4 1903; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc 1904; GPRIDX-NEXT: ; return to shader part epilog 1905; 1906; GFX10PLUS-LABEL: dyn_insertelement_v5i32_v_v_s: 1907; GFX10PLUS: ; %bb.0: ; %entry 1908; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 0 1909; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo 1910; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 1 1911; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo 1912; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 2 1913; GFX10PLUS-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo 1914; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 3 1915; GFX10PLUS-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc_lo 1916; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 4 1917; GFX10PLUS-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo 1918; GFX10PLUS-NEXT: ; return to shader part epilog 1919entry: 1920 %insert = insertelement <5 x float> %vec, float %val, i32 %idx 1921 ret <5 x float> %insert 1922} 1923 1924define amdgpu_ps <32 x i32> @dyn_insertelement_v32i32_s_s_s(<32 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) { 1925; GPRIDX-LABEL: dyn_insertelement_v32i32_s_s_s: 1926; GPRIDX: ; %bb.0: ; %entry 1927; GPRIDX-NEXT: s_mov_b32 s0, s2 1928; GPRIDX-NEXT: s_mov_b32 s1, s3 1929; GPRIDX-NEXT: s_mov_b32 s2, s4 1930; GPRIDX-NEXT: s_mov_b32 s3, s5 1931; GPRIDX-NEXT: s_mov_b32 s4, s6 1932; GPRIDX-NEXT: s_mov_b32 s5, s7 1933; GPRIDX-NEXT: s_mov_b32 s6, s8 1934; GPRIDX-NEXT: s_mov_b32 s7, s9 1935; GPRIDX-NEXT: s_mov_b32 s8, s10 1936; GPRIDX-NEXT: s_mov_b32 s9, s11 1937; GPRIDX-NEXT: s_mov_b32 s10, s12 1938; GPRIDX-NEXT: s_mov_b32 s11, s13 1939; GPRIDX-NEXT: s_mov_b32 s12, s14 1940; GPRIDX-NEXT: s_mov_b32 s13, s15 1941; GPRIDX-NEXT: s_mov_b32 s14, s16 1942; GPRIDX-NEXT: s_mov_b32 s15, s17 1943; GPRIDX-NEXT: s_mov_b32 s16, s18 1944; GPRIDX-NEXT: s_mov_b32 s17, s19 1945; GPRIDX-NEXT: s_mov_b32 s18, s20 1946; GPRIDX-NEXT: s_mov_b32 s19, s21 1947; GPRIDX-NEXT: s_mov_b32 s20, s22 1948; GPRIDX-NEXT: s_mov_b32 s21, s23 1949; GPRIDX-NEXT: s_mov_b32 s22, s24 1950; GPRIDX-NEXT: s_mov_b32 s23, s25 1951; GPRIDX-NEXT: s_mov_b32 s24, s26 1952; GPRIDX-NEXT: s_mov_b32 s25, s27 1953; GPRIDX-NEXT: s_mov_b32 s26, s28 1954; GPRIDX-NEXT: s_mov_b32 s27, s29 1955; GPRIDX-NEXT: s_mov_b32 s28, s30 1956; GPRIDX-NEXT: s_mov_b32 s29, s31 1957; GPRIDX-NEXT: s_mov_b32 s31, s33 1958; GPRIDX-NEXT: s_mov_b32 s30, s32 1959; GPRIDX-NEXT: s_mov_b32 m0, s35 1960; GPRIDX-NEXT: s_nop 0 1961; GPRIDX-NEXT: s_movreld_b32 s0, s34 1962; GPRIDX-NEXT: ; return to shader part epilog 1963; 1964; GFX10PLUS-LABEL: dyn_insertelement_v32i32_s_s_s: 1965; GFX10PLUS: ; %bb.0: ; %entry 1966; GFX10PLUS-NEXT: s_mov_b32 s0, s2 1967; GFX10PLUS-NEXT: s_mov_b32 m0, s35 1968; GFX10PLUS-NEXT: s_mov_b32 s1, s3 1969; GFX10PLUS-NEXT: s_mov_b32 s2, s4 1970; GFX10PLUS-NEXT: s_mov_b32 s3, s5 1971; GFX10PLUS-NEXT: s_mov_b32 s4, s6 1972; GFX10PLUS-NEXT: s_mov_b32 s5, s7 1973; GFX10PLUS-NEXT: s_mov_b32 s6, s8 1974; GFX10PLUS-NEXT: s_mov_b32 s7, s9 1975; GFX10PLUS-NEXT: s_mov_b32 s8, s10 1976; GFX10PLUS-NEXT: s_mov_b32 s9, s11 1977; GFX10PLUS-NEXT: s_mov_b32 s10, s12 1978; GFX10PLUS-NEXT: s_mov_b32 s11, s13 1979; GFX10PLUS-NEXT: s_mov_b32 s12, s14 1980; GFX10PLUS-NEXT: s_mov_b32 s13, s15 1981; GFX10PLUS-NEXT: s_mov_b32 s14, s16 1982; GFX10PLUS-NEXT: s_mov_b32 s15, s17 1983; GFX10PLUS-NEXT: s_mov_b32 s16, s18 1984; GFX10PLUS-NEXT: s_mov_b32 s17, s19 1985; GFX10PLUS-NEXT: s_mov_b32 s18, s20 1986; GFX10PLUS-NEXT: s_mov_b32 s19, s21 1987; GFX10PLUS-NEXT: s_mov_b32 s20, s22 1988; GFX10PLUS-NEXT: s_mov_b32 s21, s23 1989; GFX10PLUS-NEXT: s_mov_b32 s22, s24 1990; GFX10PLUS-NEXT: s_mov_b32 s23, s25 1991; GFX10PLUS-NEXT: s_mov_b32 s24, s26 1992; GFX10PLUS-NEXT: s_mov_b32 s25, s27 1993; GFX10PLUS-NEXT: s_mov_b32 s26, s28 1994; GFX10PLUS-NEXT: s_mov_b32 s27, s29 1995; GFX10PLUS-NEXT: s_mov_b32 s28, s30 1996; GFX10PLUS-NEXT: s_mov_b32 s29, s31 1997; GFX10PLUS-NEXT: s_mov_b32 s31, s33 1998; GFX10PLUS-NEXT: s_mov_b32 s30, s32 1999; GFX10PLUS-NEXT: s_movreld_b32 s0, s34 2000; GFX10PLUS-NEXT: ; return to shader part epilog 2001entry: 2002 %insert = insertelement <32 x i32> %vec, i32 %val, i32 %idx 2003 ret <32 x i32> %insert 2004} 2005 2006define amdgpu_ps <32 x float> @dyn_insertelement_v32i32_v_v_s(<32 x float> %vec, float %val, i32 inreg %idx) { 2007; GPRIDX-LABEL: dyn_insertelement_v32i32_v_v_s: 2008; GPRIDX: ; %bb.0: ; %entry 2009; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(DST) 2010; GPRIDX-NEXT: v_mov_b32_e32 v0, v32 2011; GPRIDX-NEXT: s_set_gpr_idx_off 2012; GPRIDX-NEXT: ; return to shader part epilog 2013; 2014; GFX10PLUS-LABEL: dyn_insertelement_v32i32_v_v_s: 2015; GFX10PLUS: ; %bb.0: ; %entry 2016; GFX10PLUS-NEXT: s_mov_b32 m0, s2 2017; GFX10PLUS-NEXT: v_movreld_b32_e32 v0, v32 2018; GFX10PLUS-NEXT: ; return to shader part epilog 2019entry: 2020 %insert = insertelement <32 x float> %vec, float %val, i32 %idx 2021 ret <32 x float> %insert 2022} 2023 2024define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_s_s_add_1(<8 x float> inreg %vec, float inreg %val, i32 inreg %idx) { 2025; GPRIDX-LABEL: dyn_insertelement_v8f32_s_s_s_add_1: 2026; GPRIDX: ; %bb.0: ; %entry 2027; GPRIDX-NEXT: s_add_i32 s11, s11, 1 2028; GPRIDX-NEXT: s_cmp_eq_u32 s11, 0 2029; GPRIDX-NEXT: s_cselect_b32 s0, s10, s2 2030; GPRIDX-NEXT: s_cmp_eq_u32 s11, 1 2031; GPRIDX-NEXT: s_cselect_b32 s1, s10, s3 2032; GPRIDX-NEXT: s_cmp_eq_u32 s11, 2 2033; GPRIDX-NEXT: s_cselect_b32 s2, s10, s4 2034; GPRIDX-NEXT: s_cmp_eq_u32 s11, 3 2035; GPRIDX-NEXT: s_cselect_b32 s3, s10, s5 2036; GPRIDX-NEXT: s_cmp_eq_u32 s11, 4 2037; GPRIDX-NEXT: s_cselect_b32 s4, s10, s6 2038; GPRIDX-NEXT: s_cmp_eq_u32 s11, 5 2039; GPRIDX-NEXT: s_cselect_b32 s5, s10, s7 2040; GPRIDX-NEXT: s_cmp_eq_u32 s11, 6 2041; GPRIDX-NEXT: s_cselect_b32 s6, s10, s8 2042; GPRIDX-NEXT: s_cmp_eq_u32 s11, 7 2043; GPRIDX-NEXT: s_cselect_b32 s7, s10, s9 2044; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 2045; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 2046; GPRIDX-NEXT: v_mov_b32_e32 v2, s2 2047; GPRIDX-NEXT: v_mov_b32_e32 v3, s3 2048; GPRIDX-NEXT: v_mov_b32_e32 v4, s4 2049; GPRIDX-NEXT: v_mov_b32_e32 v5, s5 2050; GPRIDX-NEXT: v_mov_b32_e32 v6, s6 2051; GPRIDX-NEXT: v_mov_b32_e32 v7, s7 2052; GPRIDX-NEXT: ; return to shader part epilog 2053; 2054; GFX10-LABEL: dyn_insertelement_v8f32_s_s_s_add_1: 2055; GFX10: ; %bb.0: ; %entry 2056; GFX10-NEXT: s_mov_b32 s1, s3 2057; GFX10-NEXT: s_mov_b32 m0, s11 2058; GFX10-NEXT: s_mov_b32 s0, s2 2059; GFX10-NEXT: s_mov_b32 s2, s4 2060; GFX10-NEXT: s_mov_b32 s3, s5 2061; GFX10-NEXT: s_mov_b32 s4, s6 2062; GFX10-NEXT: s_mov_b32 s5, s7 2063; GFX10-NEXT: s_mov_b32 s6, s8 2064; GFX10-NEXT: s_mov_b32 s7, s9 2065; GFX10-NEXT: s_movreld_b32 s1, s10 2066; GFX10-NEXT: v_mov_b32_e32 v0, s0 2067; GFX10-NEXT: v_mov_b32_e32 v1, s1 2068; GFX10-NEXT: v_mov_b32_e32 v2, s2 2069; GFX10-NEXT: v_mov_b32_e32 v3, s3 2070; GFX10-NEXT: v_mov_b32_e32 v4, s4 2071; GFX10-NEXT: v_mov_b32_e32 v5, s5 2072; GFX10-NEXT: v_mov_b32_e32 v6, s6 2073; GFX10-NEXT: v_mov_b32_e32 v7, s7 2074; GFX10-NEXT: ; return to shader part epilog 2075; 2076; GFX11-LABEL: dyn_insertelement_v8f32_s_s_s_add_1: 2077; GFX11: ; %bb.0: ; %entry 2078; GFX11-NEXT: s_mov_b32 s1, s3 2079; GFX11-NEXT: s_mov_b32 m0, s11 2080; GFX11-NEXT: s_mov_b32 s0, s2 2081; GFX11-NEXT: s_mov_b32 s2, s4 2082; GFX11-NEXT: s_mov_b32 s3, s5 2083; GFX11-NEXT: s_mov_b32 s4, s6 2084; GFX11-NEXT: s_mov_b32 s5, s7 2085; GFX11-NEXT: s_mov_b32 s6, s8 2086; GFX11-NEXT: s_mov_b32 s7, s9 2087; GFX11-NEXT: s_movreld_b32 s1, s10 2088; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 2089; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 2090; GFX11-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 2091; GFX11-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 2092; GFX11-NEXT: ; return to shader part epilog 2093entry: 2094 %idx.add = add i32 %idx, 1 2095 %insert = insertelement <8 x float> %vec, float %val, i32 %idx.add 2096 ret <8 x float> %insert 2097} 2098 2099define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_s_s_add_7(<8 x float> inreg %vec, float inreg %val, i32 inreg %idx) { 2100; GPRIDX-LABEL: dyn_insertelement_v8f32_s_s_s_add_7: 2101; GPRIDX: ; %bb.0: ; %entry 2102; GPRIDX-NEXT: s_add_i32 s11, s11, 7 2103; GPRIDX-NEXT: s_cmp_eq_u32 s11, 0 2104; GPRIDX-NEXT: s_cselect_b32 s0, s10, s2 2105; GPRIDX-NEXT: s_cmp_eq_u32 s11, 1 2106; GPRIDX-NEXT: s_cselect_b32 s1, s10, s3 2107; GPRIDX-NEXT: s_cmp_eq_u32 s11, 2 2108; GPRIDX-NEXT: s_cselect_b32 s2, s10, s4 2109; GPRIDX-NEXT: s_cmp_eq_u32 s11, 3 2110; GPRIDX-NEXT: s_cselect_b32 s3, s10, s5 2111; GPRIDX-NEXT: s_cmp_eq_u32 s11, 4 2112; GPRIDX-NEXT: s_cselect_b32 s4, s10, s6 2113; GPRIDX-NEXT: s_cmp_eq_u32 s11, 5 2114; GPRIDX-NEXT: s_cselect_b32 s5, s10, s7 2115; GPRIDX-NEXT: s_cmp_eq_u32 s11, 6 2116; GPRIDX-NEXT: s_cselect_b32 s6, s10, s8 2117; GPRIDX-NEXT: s_cmp_eq_u32 s11, 7 2118; GPRIDX-NEXT: s_cselect_b32 s7, s10, s9 2119; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 2120; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 2121; GPRIDX-NEXT: v_mov_b32_e32 v2, s2 2122; GPRIDX-NEXT: v_mov_b32_e32 v3, s3 2123; GPRIDX-NEXT: v_mov_b32_e32 v4, s4 2124; GPRIDX-NEXT: v_mov_b32_e32 v5, s5 2125; GPRIDX-NEXT: v_mov_b32_e32 v6, s6 2126; GPRIDX-NEXT: v_mov_b32_e32 v7, s7 2127; GPRIDX-NEXT: ; return to shader part epilog 2128; 2129; GFX10-LABEL: dyn_insertelement_v8f32_s_s_s_add_7: 2130; GFX10: ; %bb.0: ; %entry 2131; GFX10-NEXT: s_mov_b32 s1, s3 2132; GFX10-NEXT: s_mov_b32 s3, s5 2133; GFX10-NEXT: s_mov_b32 s5, s7 2134; GFX10-NEXT: s_mov_b32 s7, s9 2135; GFX10-NEXT: s_mov_b32 m0, s11 2136; GFX10-NEXT: s_mov_b32 s0, s2 2137; GFX10-NEXT: s_mov_b32 s2, s4 2138; GFX10-NEXT: s_mov_b32 s4, s6 2139; GFX10-NEXT: s_mov_b32 s6, s8 2140; GFX10-NEXT: s_movreld_b32 s7, s10 2141; GFX10-NEXT: v_mov_b32_e32 v0, s0 2142; GFX10-NEXT: v_mov_b32_e32 v1, s1 2143; GFX10-NEXT: v_mov_b32_e32 v2, s2 2144; GFX10-NEXT: v_mov_b32_e32 v3, s3 2145; GFX10-NEXT: v_mov_b32_e32 v4, s4 2146; GFX10-NEXT: v_mov_b32_e32 v5, s5 2147; GFX10-NEXT: v_mov_b32_e32 v6, s6 2148; GFX10-NEXT: v_mov_b32_e32 v7, s7 2149; GFX10-NEXT: ; return to shader part epilog 2150; 2151; GFX11-LABEL: dyn_insertelement_v8f32_s_s_s_add_7: 2152; GFX11: ; %bb.0: ; %entry 2153; GFX11-NEXT: s_mov_b32 s1, s3 2154; GFX11-NEXT: s_mov_b32 s3, s5 2155; GFX11-NEXT: s_mov_b32 s5, s7 2156; GFX11-NEXT: s_mov_b32 s7, s9 2157; GFX11-NEXT: s_mov_b32 m0, s11 2158; GFX11-NEXT: s_mov_b32 s0, s2 2159; GFX11-NEXT: s_mov_b32 s2, s4 2160; GFX11-NEXT: s_mov_b32 s4, s6 2161; GFX11-NEXT: s_mov_b32 s6, s8 2162; GFX11-NEXT: s_movreld_b32 s7, s10 2163; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 2164; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 2165; GFX11-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 2166; GFX11-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 2167; GFX11-NEXT: ; return to shader part epilog 2168entry: 2169 %idx.add = add i32 %idx, 7 2170 %insert = insertelement <8 x float> %vec, float %val, i32 %idx.add 2171 ret <8 x float> %insert 2172} 2173 2174define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_v_v_add_1(<8 x float> %vec, float %val, i32 %idx) { 2175; GPRIDX-LABEL: dyn_insertelement_v8f32_v_v_v_add_1: 2176; GPRIDX: ; %bb.0: ; %entry 2177; GPRIDX-NEXT: v_add_u32_e32 v9, 1, v9 2178; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9 2179; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 2180; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v9 2181; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc 2182; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v9 2183; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 2184; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v9 2185; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc 2186; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v9 2187; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc 2188; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v9 2189; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc 2190; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v9 2191; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc 2192; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v9 2193; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc 2194; GPRIDX-NEXT: ; return to shader part epilog 2195; 2196; GFX10PLUS-LABEL: dyn_insertelement_v8f32_v_v_v_add_1: 2197; GFX10PLUS: ; %bb.0: ; %entry 2198; GFX10PLUS-NEXT: v_add_nc_u32_e32 v9, 1, v9 2199; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v9 2200; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 2201; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v9 2202; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc_lo 2203; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v9 2204; GFX10PLUS-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc_lo 2205; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v9 2206; GFX10PLUS-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc_lo 2207; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v9 2208; GFX10PLUS-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc_lo 2209; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v9 2210; GFX10PLUS-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc_lo 2211; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v9 2212; GFX10PLUS-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc_lo 2213; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v9 2214; GFX10PLUS-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc_lo 2215; GFX10PLUS-NEXT: ; return to shader part epilog 2216entry: 2217 %idx.add = add i32 %idx, 1 2218 %insert = insertelement <8 x float> %vec, float %val, i32 %idx.add 2219 ret <8 x float> %insert 2220} 2221 2222define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_v_v_add_7(<8 x float> %vec, float %val, i32 %idx) { 2223; GPRIDX-LABEL: dyn_insertelement_v8f32_v_v_v_add_7: 2224; GPRIDX: ; %bb.0: ; %entry 2225; GPRIDX-NEXT: v_add_u32_e32 v9, 7, v9 2226; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9 2227; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 2228; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v9 2229; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc 2230; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v9 2231; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 2232; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v9 2233; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc 2234; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v9 2235; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc 2236; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v9 2237; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc 2238; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v9 2239; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc 2240; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v9 2241; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc 2242; GPRIDX-NEXT: ; return to shader part epilog 2243; 2244; GFX10PLUS-LABEL: dyn_insertelement_v8f32_v_v_v_add_7: 2245; GFX10PLUS: ; %bb.0: ; %entry 2246; GFX10PLUS-NEXT: v_add_nc_u32_e32 v9, 7, v9 2247; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v9 2248; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 2249; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v9 2250; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc_lo 2251; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v9 2252; GFX10PLUS-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc_lo 2253; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v9 2254; GFX10PLUS-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc_lo 2255; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v9 2256; GFX10PLUS-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc_lo 2257; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v9 2258; GFX10PLUS-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc_lo 2259; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v9 2260; GFX10PLUS-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc_lo 2261; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v9 2262; GFX10PLUS-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc_lo 2263; GFX10PLUS-NEXT: ; return to shader part epilog 2264entry: 2265 %idx.add = add i32 %idx, 7 2266 %insert = insertelement <8 x float> %vec, float %val, i32 %idx.add 2267 ret <8 x float> %insert 2268} 2269 2270define amdgpu_ps void @dyn_insertelement_v8f64_s_s_s_add_1(<8 x double> inreg %vec, double inreg %val, i32 inreg %idx) { 2271; GPRIDX-LABEL: dyn_insertelement_v8f64_s_s_s_add_1: 2272; GPRIDX: ; %bb.0: ; %entry 2273; GPRIDX-NEXT: s_mov_b32 s0, s2 2274; GPRIDX-NEXT: s_mov_b32 s1, s3 2275; GPRIDX-NEXT: s_mov_b32 s2, s4 2276; GPRIDX-NEXT: s_mov_b32 s3, s5 2277; GPRIDX-NEXT: s_mov_b32 s4, s6 2278; GPRIDX-NEXT: s_mov_b32 s5, s7 2279; GPRIDX-NEXT: s_mov_b32 s6, s8 2280; GPRIDX-NEXT: s_mov_b32 s7, s9 2281; GPRIDX-NEXT: s_mov_b32 s8, s10 2282; GPRIDX-NEXT: s_mov_b32 s9, s11 2283; GPRIDX-NEXT: s_mov_b32 s10, s12 2284; GPRIDX-NEXT: s_mov_b32 s11, s13 2285; GPRIDX-NEXT: s_mov_b32 s12, s14 2286; GPRIDX-NEXT: s_mov_b32 s13, s15 2287; GPRIDX-NEXT: s_mov_b32 s14, s16 2288; GPRIDX-NEXT: s_mov_b32 s15, s17 2289; GPRIDX-NEXT: s_mov_b32 m0, s20 2290; GPRIDX-NEXT: s_nop 0 2291; GPRIDX-NEXT: s_movreld_b64 s[2:3], s[18:19] 2292; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 2293; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 2294; GPRIDX-NEXT: v_mov_b32_e32 v2, s2 2295; GPRIDX-NEXT: v_mov_b32_e32 v3, s3 2296; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 2297; GPRIDX-NEXT: s_waitcnt vmcnt(0) 2298; GPRIDX-NEXT: v_mov_b32_e32 v0, s4 2299; GPRIDX-NEXT: v_mov_b32_e32 v1, s5 2300; GPRIDX-NEXT: v_mov_b32_e32 v2, s6 2301; GPRIDX-NEXT: v_mov_b32_e32 v3, s7 2302; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 2303; GPRIDX-NEXT: s_waitcnt vmcnt(0) 2304; GPRIDX-NEXT: v_mov_b32_e32 v0, s8 2305; GPRIDX-NEXT: v_mov_b32_e32 v1, s9 2306; GPRIDX-NEXT: v_mov_b32_e32 v2, s10 2307; GPRIDX-NEXT: v_mov_b32_e32 v3, s11 2308; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 2309; GPRIDX-NEXT: s_waitcnt vmcnt(0) 2310; GPRIDX-NEXT: v_mov_b32_e32 v0, s12 2311; GPRIDX-NEXT: v_mov_b32_e32 v1, s13 2312; GPRIDX-NEXT: v_mov_b32_e32 v2, s14 2313; GPRIDX-NEXT: v_mov_b32_e32 v3, s15 2314; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 2315; GPRIDX-NEXT: s_waitcnt vmcnt(0) 2316; GPRIDX-NEXT: s_endpgm 2317; 2318; GFX10-LABEL: dyn_insertelement_v8f64_s_s_s_add_1: 2319; GFX10: ; %bb.0: ; %entry 2320; GFX10-NEXT: s_mov_b32 s0, s2 2321; GFX10-NEXT: s_mov_b32 s1, s3 2322; GFX10-NEXT: s_mov_b32 s2, s4 2323; GFX10-NEXT: s_mov_b32 s3, s5 2324; GFX10-NEXT: s_mov_b32 m0, s20 2325; GFX10-NEXT: s_mov_b32 s4, s6 2326; GFX10-NEXT: s_mov_b32 s5, s7 2327; GFX10-NEXT: s_mov_b32 s6, s8 2328; GFX10-NEXT: s_mov_b32 s7, s9 2329; GFX10-NEXT: s_mov_b32 s8, s10 2330; GFX10-NEXT: s_mov_b32 s9, s11 2331; GFX10-NEXT: s_mov_b32 s10, s12 2332; GFX10-NEXT: s_mov_b32 s11, s13 2333; GFX10-NEXT: s_mov_b32 s12, s14 2334; GFX10-NEXT: s_mov_b32 s13, s15 2335; GFX10-NEXT: s_mov_b32 s14, s16 2336; GFX10-NEXT: s_mov_b32 s15, s17 2337; GFX10-NEXT: s_movreld_b64 s[2:3], s[18:19] 2338; GFX10-NEXT: v_mov_b32_e32 v0, s0 2339; GFX10-NEXT: v_mov_b32_e32 v1, s1 2340; GFX10-NEXT: v_mov_b32_e32 v2, s2 2341; GFX10-NEXT: v_mov_b32_e32 v3, s3 2342; GFX10-NEXT: v_mov_b32_e32 v4, s4 2343; GFX10-NEXT: v_mov_b32_e32 v5, s5 2344; GFX10-NEXT: v_mov_b32_e32 v6, s6 2345; GFX10-NEXT: v_mov_b32_e32 v7, s7 2346; GFX10-NEXT: v_mov_b32_e32 v8, s8 2347; GFX10-NEXT: v_mov_b32_e32 v9, s9 2348; GFX10-NEXT: v_mov_b32_e32 v10, s10 2349; GFX10-NEXT: v_mov_b32_e32 v11, s11 2350; GFX10-NEXT: v_mov_b32_e32 v12, s12 2351; GFX10-NEXT: v_mov_b32_e32 v13, s13 2352; GFX10-NEXT: v_mov_b32_e32 v14, s14 2353; GFX10-NEXT: v_mov_b32_e32 v15, s15 2354; GFX10-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 2355; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2356; GFX10-NEXT: global_store_dwordx4 v[0:1], v[4:7], off 2357; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2358; GFX10-NEXT: global_store_dwordx4 v[0:1], v[8:11], off 2359; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2360; GFX10-NEXT: global_store_dwordx4 v[0:1], v[12:15], off 2361; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2362; GFX10-NEXT: s_endpgm 2363; 2364; GFX11-LABEL: dyn_insertelement_v8f64_s_s_s_add_1: 2365; GFX11: ; %bb.0: ; %entry 2366; GFX11-NEXT: s_mov_b32 s0, s2 2367; GFX11-NEXT: s_mov_b32 s1, s3 2368; GFX11-NEXT: s_mov_b32 s2, s4 2369; GFX11-NEXT: s_mov_b32 s3, s5 2370; GFX11-NEXT: s_mov_b32 m0, s20 2371; GFX11-NEXT: s_mov_b32 s4, s6 2372; GFX11-NEXT: s_mov_b32 s5, s7 2373; GFX11-NEXT: s_mov_b32 s6, s8 2374; GFX11-NEXT: s_mov_b32 s7, s9 2375; GFX11-NEXT: s_mov_b32 s8, s10 2376; GFX11-NEXT: s_mov_b32 s9, s11 2377; GFX11-NEXT: s_mov_b32 s10, s12 2378; GFX11-NEXT: s_mov_b32 s11, s13 2379; GFX11-NEXT: s_mov_b32 s12, s14 2380; GFX11-NEXT: s_mov_b32 s13, s15 2381; GFX11-NEXT: s_mov_b32 s14, s16 2382; GFX11-NEXT: s_mov_b32 s15, s17 2383; GFX11-NEXT: s_movreld_b64 s[2:3], s[18:19] 2384; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 2385; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 2386; GFX11-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 2387; GFX11-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 2388; GFX11-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 2389; GFX11-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 2390; GFX11-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 2391; GFX11-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 2392; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off dlc 2393; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2394; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off dlc 2395; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2396; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off dlc 2397; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2398; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off dlc 2399; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2400; GFX11-NEXT: s_endpgm 2401entry: 2402 %idx.add = add i32 %idx, 1 2403 %insert = insertelement <8 x double> %vec, double %val, i32 %idx.add 2404 %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1> 2405 %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3> 2406 %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5> 2407 %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7> 2408 store volatile <2 x double> %vec.0, ptr addrspace(1) undef 2409 store volatile <2 x double> %vec.1, ptr addrspace(1) undef 2410 store volatile <2 x double> %vec.2, ptr addrspace(1) undef 2411 store volatile <2 x double> %vec.3, ptr addrspace(1) undef 2412 ret void 2413} 2414 2415define amdgpu_ps void @dyn_insertelement_v8f64_v_v_v_add_1(<8 x double> %vec, double %val, i32 %idx) { 2416; GPRIDX-LABEL: dyn_insertelement_v8f64_v_v_v_add_1: 2417; GPRIDX: ; %bb.0: ; %entry 2418; GPRIDX-NEXT: v_add_u32_e32 v18, 1, v18 2419; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v18 2420; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc 2421; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc 2422; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v18 2423; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v16, vcc 2424; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v17, vcc 2425; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v18 2426; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v16, vcc 2427; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v17, vcc 2428; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v18 2429; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v16, vcc 2430; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v17, vcc 2431; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v18 2432; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v8, v16, vcc 2433; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v9, v17, vcc 2434; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v18 2435; GPRIDX-NEXT: v_cndmask_b32_e32 v10, v10, v16, vcc 2436; GPRIDX-NEXT: v_cndmask_b32_e32 v11, v11, v17, vcc 2437; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v18 2438; GPRIDX-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc 2439; GPRIDX-NEXT: v_cndmask_b32_e32 v13, v13, v17, vcc 2440; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v18 2441; GPRIDX-NEXT: v_cndmask_b32_e32 v14, v14, v16, vcc 2442; GPRIDX-NEXT: v_cndmask_b32_e32 v15, v15, v17, vcc 2443; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 2444; GPRIDX-NEXT: s_waitcnt vmcnt(0) 2445; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[4:7], off 2446; GPRIDX-NEXT: s_waitcnt vmcnt(0) 2447; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[8:11], off 2448; GPRIDX-NEXT: s_waitcnt vmcnt(0) 2449; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[12:15], off 2450; GPRIDX-NEXT: s_waitcnt vmcnt(0) 2451; GPRIDX-NEXT: s_endpgm 2452; 2453; GFX10-LABEL: dyn_insertelement_v8f64_v_v_v_add_1: 2454; GFX10: ; %bb.0: ; %entry 2455; GFX10-NEXT: v_add_nc_u32_e32 v18, 1, v18 2456; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v18 2457; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 1, v18 2458; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 7, v18 2459; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc_lo 2460; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc_lo 2461; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v18 2462; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v16, s0 2463; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v17, s0 2464; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 3, v18 2465; GFX10-NEXT: v_cndmask_b32_e64 v14, v14, v16, s1 2466; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v16, vcc_lo 2467; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v17, vcc_lo 2468; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v18 2469; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v16, s0 2470; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, v17, s0 2471; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 5, v18 2472; GFX10-NEXT: v_cndmask_b32_e64 v15, v15, v17, s1 2473; GFX10-NEXT: v_cndmask_b32_e32 v8, v8, v16, vcc_lo 2474; GFX10-NEXT: v_cndmask_b32_e32 v9, v9, v17, vcc_lo 2475; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v18 2476; GFX10-NEXT: v_cndmask_b32_e64 v10, v10, v16, s0 2477; GFX10-NEXT: v_cndmask_b32_e64 v11, v11, v17, s0 2478; GFX10-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc_lo 2479; GFX10-NEXT: v_cndmask_b32_e32 v13, v13, v17, vcc_lo 2480; GFX10-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 2481; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2482; GFX10-NEXT: global_store_dwordx4 v[0:1], v[4:7], off 2483; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2484; GFX10-NEXT: global_store_dwordx4 v[0:1], v[8:11], off 2485; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2486; GFX10-NEXT: global_store_dwordx4 v[0:1], v[12:15], off 2487; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2488; GFX10-NEXT: s_endpgm 2489; 2490; GFX11-LABEL: dyn_insertelement_v8f64_v_v_v_add_1: 2491; GFX11: ; %bb.0: ; %entry 2492; GFX11-NEXT: v_add_nc_u32_e32 v18, 1, v18 2493; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v18 2494; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc_lo 2495; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 1, v18 2496; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc_lo 2497; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v18 2498; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 7, v18 2499; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v16, s0 2500; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v17, s0 2501; GFX11-NEXT: v_cndmask_b32_e32 v5, v5, v17, vcc_lo 2502; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 3, v18 2503; GFX11-NEXT: v_cndmask_b32_e32 v4, v4, v16, vcc_lo 2504; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v18 2505; GFX11-NEXT: v_cndmask_b32_e64 v14, v14, v16, s1 2506; GFX11-NEXT: v_cndmask_b32_e64 v15, v15, v17, s1 2507; GFX11-NEXT: v_cndmask_b32_e64 v6, v6, v16, s0 2508; GFX11-NEXT: v_cndmask_b32_e64 v7, v7, v17, s0 2509; GFX11-NEXT: v_cndmask_b32_e32 v9, v9, v17, vcc_lo 2510; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 5, v18 2511; GFX11-NEXT: v_cndmask_b32_e32 v8, v8, v16, vcc_lo 2512; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v18 2513; GFX11-NEXT: v_cndmask_b32_e64 v10, v10, v16, s0 2514; GFX11-NEXT: v_cndmask_b32_e64 v11, v11, v17, s0 2515; GFX11-NEXT: v_dual_cndmask_b32 v13, v13, v17 :: v_dual_cndmask_b32 v12, v12, v16 2516; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off dlc 2517; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2518; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off dlc 2519; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2520; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off dlc 2521; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2522; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off dlc 2523; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2524; GFX11-NEXT: s_endpgm 2525entry: 2526 %idx.add = add i32 %idx, 1 2527 %insert = insertelement <8 x double> %vec, double %val, i32 %idx.add 2528 %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1> 2529 %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3> 2530 %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5> 2531 %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7> 2532 store volatile <2 x double> %vec.0, ptr addrspace(1) undef 2533 store volatile <2 x double> %vec.1, ptr addrspace(1) undef 2534 store volatile <2 x double> %vec.2, ptr addrspace(1) undef 2535 store volatile <2 x double> %vec.3, ptr addrspace(1) undef 2536 ret void 2537} 2538 2539define amdgpu_ps <9 x float> @dyn_insertelement_v9f32_s_v_s(<9 x float> inreg %vec, float %val, i32 inreg %idx) { 2540; GPRIDX-LABEL: dyn_insertelement_v9f32_s_v_s: 2541; GPRIDX: ; %bb.0: ; %entry 2542; GPRIDX-NEXT: s_mov_b32 s0, s2 2543; GPRIDX-NEXT: s_mov_b32 s1, s3 2544; GPRIDX-NEXT: s_mov_b32 s2, s4 2545; GPRIDX-NEXT: s_mov_b32 s3, s5 2546; GPRIDX-NEXT: s_mov_b32 s4, s6 2547; GPRIDX-NEXT: s_mov_b32 s5, s7 2548; GPRIDX-NEXT: s_mov_b32 s6, s8 2549; GPRIDX-NEXT: s_mov_b32 s7, s9 2550; GPRIDX-NEXT: s_mov_b32 s8, s10 2551; GPRIDX-NEXT: v_mov_b32_e32 v9, v0 2552; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 2553; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 2554; GPRIDX-NEXT: v_mov_b32_e32 v2, s2 2555; GPRIDX-NEXT: v_mov_b32_e32 v3, s3 2556; GPRIDX-NEXT: v_mov_b32_e32 v4, s4 2557; GPRIDX-NEXT: v_mov_b32_e32 v5, s5 2558; GPRIDX-NEXT: v_mov_b32_e32 v6, s6 2559; GPRIDX-NEXT: v_mov_b32_e32 v7, s7 2560; GPRIDX-NEXT: v_mov_b32_e32 v8, s8 2561; GPRIDX-NEXT: s_set_gpr_idx_on s11, gpr_idx(DST) 2562; GPRIDX-NEXT: v_mov_b32_e32 v0, v9 2563; GPRIDX-NEXT: s_set_gpr_idx_off 2564; GPRIDX-NEXT: ; return to shader part epilog 2565; 2566; GFX10-LABEL: dyn_insertelement_v9f32_s_v_s: 2567; GFX10: ; %bb.0: ; %entry 2568; GFX10-NEXT: s_mov_b32 s0, s2 2569; GFX10-NEXT: s_mov_b32 s1, s3 2570; GFX10-NEXT: s_mov_b32 s2, s4 2571; GFX10-NEXT: s_mov_b32 s3, s5 2572; GFX10-NEXT: s_mov_b32 s4, s6 2573; GFX10-NEXT: s_mov_b32 s5, s7 2574; GFX10-NEXT: s_mov_b32 s6, s8 2575; GFX10-NEXT: s_mov_b32 s7, s9 2576; GFX10-NEXT: s_mov_b32 s8, s10 2577; GFX10-NEXT: v_mov_b32_e32 v9, v0 2578; GFX10-NEXT: v_mov_b32_e32 v0, s0 2579; GFX10-NEXT: s_mov_b32 m0, s11 2580; GFX10-NEXT: v_mov_b32_e32 v1, s1 2581; GFX10-NEXT: v_mov_b32_e32 v2, s2 2582; GFX10-NEXT: v_mov_b32_e32 v3, s3 2583; GFX10-NEXT: v_mov_b32_e32 v4, s4 2584; GFX10-NEXT: v_mov_b32_e32 v5, s5 2585; GFX10-NEXT: v_mov_b32_e32 v6, s6 2586; GFX10-NEXT: v_mov_b32_e32 v7, s7 2587; GFX10-NEXT: v_mov_b32_e32 v8, s8 2588; GFX10-NEXT: v_movreld_b32_e32 v0, v9 2589; GFX10-NEXT: ; return to shader part epilog 2590; 2591; GFX11-LABEL: dyn_insertelement_v9f32_s_v_s: 2592; GFX11: ; %bb.0: ; %entry 2593; GFX11-NEXT: s_mov_b32 s0, s2 2594; GFX11-NEXT: s_mov_b32 s1, s3 2595; GFX11-NEXT: s_mov_b32 s2, s4 2596; GFX11-NEXT: s_mov_b32 s3, s5 2597; GFX11-NEXT: s_mov_b32 s4, s6 2598; GFX11-NEXT: s_mov_b32 s5, s7 2599; GFX11-NEXT: s_mov_b32 s6, s8 2600; GFX11-NEXT: s_mov_b32 s7, s9 2601; GFX11-NEXT: s_mov_b32 s8, s10 2602; GFX11-NEXT: v_dual_mov_b32 v9, v0 :: v_dual_mov_b32 v0, s0 2603; GFX11-NEXT: s_mov_b32 m0, s11 2604; GFX11-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2 2605; GFX11-NEXT: v_dual_mov_b32 v3, s3 :: v_dual_mov_b32 v4, s4 2606; GFX11-NEXT: v_dual_mov_b32 v5, s5 :: v_dual_mov_b32 v6, s6 2607; GFX11-NEXT: v_dual_mov_b32 v7, s7 :: v_dual_mov_b32 v8, s8 2608; GFX11-NEXT: v_movreld_b32_e32 v0, v9 2609; GFX11-NEXT: ; return to shader part epilog 2610entry: 2611 %insert = insertelement <9 x float> %vec, float %val, i32 %idx 2612 ret <9 x float> %insert 2613} 2614 2615define amdgpu_ps <9 x float> @dyn_insertelement_v9f32_s_v_v(<9 x float> inreg %vec, float %val, i32 %idx) { 2616; GPRIDX-LABEL: dyn_insertelement_v9f32_s_v_v: 2617; GPRIDX: ; %bb.0: ; %entry 2618; GPRIDX-NEXT: v_mov_b32_e32 v2, s2 2619; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 2620; GPRIDX-NEXT: v_mov_b32_e32 v3, s3 2621; GPRIDX-NEXT: v_cndmask_b32_e32 v10, v2, v0, vcc 2622; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 2623; GPRIDX-NEXT: v_mov_b32_e32 v4, s4 2624; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v3, v0, vcc 2625; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v1 2626; GPRIDX-NEXT: v_mov_b32_e32 v5, s5 2627; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v4, v0, vcc 2628; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v1 2629; GPRIDX-NEXT: v_mov_b32_e32 v6, s6 2630; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v5, v0, vcc 2631; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v1 2632; GPRIDX-NEXT: v_mov_b32_e32 v7, s7 2633; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v6, v0, vcc 2634; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v1 2635; GPRIDX-NEXT: v_mov_b32_e32 v8, s8 2636; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v7, v0, vcc 2637; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v1 2638; GPRIDX-NEXT: v_mov_b32_e32 v11, s9 2639; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v8, v0, vcc 2640; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v1 2641; GPRIDX-NEXT: v_mov_b32_e32 v12, s10 2642; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v11, v0, vcc 2643; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 8, v1 2644; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v12, v0, vcc 2645; GPRIDX-NEXT: v_mov_b32_e32 v0, v10 2646; GPRIDX-NEXT: v_mov_b32_e32 v1, v9 2647; GPRIDX-NEXT: ; return to shader part epilog 2648; 2649; GFX10-LABEL: dyn_insertelement_v9f32_s_v_v: 2650; GFX10: ; %bb.0: ; %entry 2651; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 2652; GFX10-NEXT: v_cndmask_b32_e32 v10, s2, v0, vcc_lo 2653; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 2654; GFX10-NEXT: v_cndmask_b32_e32 v9, s3, v0, vcc_lo 2655; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1 2656; GFX10-NEXT: v_cndmask_b32_e32 v2, s4, v0, vcc_lo 2657; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1 2658; GFX10-NEXT: v_cndmask_b32_e32 v3, s5, v0, vcc_lo 2659; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1 2660; GFX10-NEXT: v_cndmask_b32_e32 v4, s6, v0, vcc_lo 2661; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1 2662; GFX10-NEXT: v_cndmask_b32_e32 v5, s7, v0, vcc_lo 2663; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1 2664; GFX10-NEXT: v_cndmask_b32_e32 v6, s8, v0, vcc_lo 2665; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v1 2666; GFX10-NEXT: v_cndmask_b32_e32 v7, s9, v0, vcc_lo 2667; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v1 2668; GFX10-NEXT: v_mov_b32_e32 v1, v9 2669; GFX10-NEXT: v_cndmask_b32_e32 v8, s10, v0, vcc_lo 2670; GFX10-NEXT: v_mov_b32_e32 v0, v10 2671; GFX10-NEXT: ; return to shader part epilog 2672; 2673; GFX11-LABEL: dyn_insertelement_v9f32_s_v_v: 2674; GFX11: ; %bb.0: ; %entry 2675; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 2676; GFX11-NEXT: v_cndmask_b32_e32 v10, s2, v0, vcc_lo 2677; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 2678; GFX11-NEXT: v_cndmask_b32_e32 v9, s3, v0, vcc_lo 2679; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1 2680; GFX11-NEXT: v_cndmask_b32_e32 v2, s4, v0, vcc_lo 2681; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1 2682; GFX11-NEXT: v_cndmask_b32_e32 v3, s5, v0, vcc_lo 2683; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1 2684; GFX11-NEXT: v_cndmask_b32_e32 v4, s6, v0, vcc_lo 2685; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1 2686; GFX11-NEXT: v_cndmask_b32_e32 v5, s7, v0, vcc_lo 2687; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1 2688; GFX11-NEXT: v_cndmask_b32_e32 v6, s8, v0, vcc_lo 2689; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v1 2690; GFX11-NEXT: v_cndmask_b32_e32 v7, s9, v0, vcc_lo 2691; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v1 2692; GFX11-NEXT: v_dual_mov_b32 v1, v9 :: v_dual_cndmask_b32 v8, s10, v0 2693; GFX11-NEXT: v_mov_b32_e32 v0, v10 2694; GFX11-NEXT: ; return to shader part epilog 2695entry: 2696 %insert = insertelement <9 x float> %vec, float %val, i32 %idx 2697 ret <9 x float> %insert 2698} 2699 2700define amdgpu_ps <9 x float> @dyn_insertelement_v9f32_v_v_s(<9 x float> %vec, float %val, i32 inreg %idx) { 2701; GPRIDX-LABEL: dyn_insertelement_v9f32_v_v_s: 2702; GPRIDX: ; %bb.0: ; %entry 2703; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(DST) 2704; GPRIDX-NEXT: v_mov_b32_e32 v0, v9 2705; GPRIDX-NEXT: s_set_gpr_idx_off 2706; GPRIDX-NEXT: ; return to shader part epilog 2707; 2708; GFX10PLUS-LABEL: dyn_insertelement_v9f32_v_v_s: 2709; GFX10PLUS: ; %bb.0: ; %entry 2710; GFX10PLUS-NEXT: s_mov_b32 m0, s2 2711; GFX10PLUS-NEXT: v_movreld_b32_e32 v0, v9 2712; GFX10PLUS-NEXT: ; return to shader part epilog 2713entry: 2714 %insert = insertelement <9 x float> %vec, float %val, i32 %idx 2715 ret <9 x float> %insert 2716} 2717 2718define amdgpu_ps <9 x float> @dyn_insertelement_v9f32_v_v_v(<9 x float> %vec, float %val, i32 %idx) { 2719; GPRIDX-LABEL: dyn_insertelement_v9f32_v_v_v: 2720; GPRIDX: ; %bb.0: ; %entry 2721; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v10 2722; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc 2723; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v10 2724; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 2725; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v10 2726; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v9, vcc 2727; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v10 2728; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc 2729; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v10 2730; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v9, vcc 2731; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v10 2732; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc 2733; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v10 2734; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v9, vcc 2735; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v10 2736; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc 2737; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 8, v10 2738; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc 2739; GPRIDX-NEXT: ; return to shader part epilog 2740; 2741; GFX10PLUS-LABEL: dyn_insertelement_v9f32_v_v_v: 2742; GFX10PLUS: ; %bb.0: ; %entry 2743; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v10 2744; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc_lo 2745; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v10 2746; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo 2747; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v10 2748; GFX10PLUS-NEXT: v_cndmask_b32_e32 v2, v2, v9, vcc_lo 2749; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v10 2750; GFX10PLUS-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc_lo 2751; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v10 2752; GFX10PLUS-NEXT: v_cndmask_b32_e32 v4, v4, v9, vcc_lo 2753; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v10 2754; GFX10PLUS-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc_lo 2755; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v10 2756; GFX10PLUS-NEXT: v_cndmask_b32_e32 v6, v6, v9, vcc_lo 2757; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v10 2758; GFX10PLUS-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc_lo 2759; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v10 2760; GFX10PLUS-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc_lo 2761; GFX10PLUS-NEXT: ; return to shader part epilog 2762entry: 2763 %insert = insertelement <9 x float> %vec, float %val, i32 %idx 2764 ret <9 x float> %insert 2765} 2766 2767define amdgpu_ps <10 x float> @dyn_insertelement_v10f32_s_v_s(<10 x float> inreg %vec, float %val, i32 inreg %idx) { 2768; GPRIDX-LABEL: dyn_insertelement_v10f32_s_v_s: 2769; GPRIDX: ; %bb.0: ; %entry 2770; GPRIDX-NEXT: s_mov_b32 s0, s2 2771; GPRIDX-NEXT: s_mov_b32 s1, s3 2772; GPRIDX-NEXT: s_mov_b32 s2, s4 2773; GPRIDX-NEXT: s_mov_b32 s3, s5 2774; GPRIDX-NEXT: s_mov_b32 s4, s6 2775; GPRIDX-NEXT: s_mov_b32 s5, s7 2776; GPRIDX-NEXT: s_mov_b32 s6, s8 2777; GPRIDX-NEXT: s_mov_b32 s7, s9 2778; GPRIDX-NEXT: s_mov_b32 s8, s10 2779; GPRIDX-NEXT: s_mov_b32 s9, s11 2780; GPRIDX-NEXT: v_mov_b32_e32 v10, v0 2781; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 2782; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 2783; GPRIDX-NEXT: v_mov_b32_e32 v2, s2 2784; GPRIDX-NEXT: v_mov_b32_e32 v3, s3 2785; GPRIDX-NEXT: v_mov_b32_e32 v4, s4 2786; GPRIDX-NEXT: v_mov_b32_e32 v5, s5 2787; GPRIDX-NEXT: v_mov_b32_e32 v6, s6 2788; GPRIDX-NEXT: v_mov_b32_e32 v7, s7 2789; GPRIDX-NEXT: v_mov_b32_e32 v8, s8 2790; GPRIDX-NEXT: v_mov_b32_e32 v9, s9 2791; GPRIDX-NEXT: s_set_gpr_idx_on s12, gpr_idx(DST) 2792; GPRIDX-NEXT: v_mov_b32_e32 v0, v10 2793; GPRIDX-NEXT: s_set_gpr_idx_off 2794; GPRIDX-NEXT: ; return to shader part epilog 2795; 2796; GFX10-LABEL: dyn_insertelement_v10f32_s_v_s: 2797; GFX10: ; %bb.0: ; %entry 2798; GFX10-NEXT: s_mov_b32 s0, s2 2799; GFX10-NEXT: s_mov_b32 s1, s3 2800; GFX10-NEXT: s_mov_b32 s2, s4 2801; GFX10-NEXT: s_mov_b32 s3, s5 2802; GFX10-NEXT: s_mov_b32 s4, s6 2803; GFX10-NEXT: s_mov_b32 s5, s7 2804; GFX10-NEXT: s_mov_b32 s6, s8 2805; GFX10-NEXT: s_mov_b32 s7, s9 2806; GFX10-NEXT: s_mov_b32 s8, s10 2807; GFX10-NEXT: s_mov_b32 s9, s11 2808; GFX10-NEXT: v_mov_b32_e32 v10, v0 2809; GFX10-NEXT: v_mov_b32_e32 v0, s0 2810; GFX10-NEXT: s_mov_b32 m0, s12 2811; GFX10-NEXT: v_mov_b32_e32 v1, s1 2812; GFX10-NEXT: v_mov_b32_e32 v2, s2 2813; GFX10-NEXT: v_mov_b32_e32 v3, s3 2814; GFX10-NEXT: v_mov_b32_e32 v4, s4 2815; GFX10-NEXT: v_mov_b32_e32 v5, s5 2816; GFX10-NEXT: v_mov_b32_e32 v6, s6 2817; GFX10-NEXT: v_mov_b32_e32 v7, s7 2818; GFX10-NEXT: v_mov_b32_e32 v8, s8 2819; GFX10-NEXT: v_mov_b32_e32 v9, s9 2820; GFX10-NEXT: v_movreld_b32_e32 v0, v10 2821; GFX10-NEXT: ; return to shader part epilog 2822; 2823; GFX11-LABEL: dyn_insertelement_v10f32_s_v_s: 2824; GFX11: ; %bb.0: ; %entry 2825; GFX11-NEXT: s_mov_b32 s0, s2 2826; GFX11-NEXT: s_mov_b32 s1, s3 2827; GFX11-NEXT: s_mov_b32 s2, s4 2828; GFX11-NEXT: s_mov_b32 s3, s5 2829; GFX11-NEXT: s_mov_b32 s4, s6 2830; GFX11-NEXT: s_mov_b32 s5, s7 2831; GFX11-NEXT: s_mov_b32 s6, s8 2832; GFX11-NEXT: s_mov_b32 s7, s9 2833; GFX11-NEXT: s_mov_b32 s8, s10 2834; GFX11-NEXT: s_mov_b32 s9, s11 2835; GFX11-NEXT: v_mov_b32_e32 v10, v0 2836; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3 2837; GFX11-NEXT: s_mov_b32 m0, s12 2838; GFX11-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2 2839; GFX11-NEXT: v_dual_mov_b32 v5, s5 :: v_dual_mov_b32 v4, s4 2840; GFX11-NEXT: v_dual_mov_b32 v7, s7 :: v_dual_mov_b32 v6, s6 2841; GFX11-NEXT: v_dual_mov_b32 v9, s9 :: v_dual_mov_b32 v8, s8 2842; GFX11-NEXT: v_movreld_b32_e32 v0, v10 2843; GFX11-NEXT: ; return to shader part epilog 2844entry: 2845 %insert = insertelement <10 x float> %vec, float %val, i32 %idx 2846 ret <10 x float> %insert 2847} 2848 2849define amdgpu_ps <10 x float> @dyn_insertelement_v10f32_s_v_v(<10 x float> inreg %vec, float %val, i32 %idx) { 2850; GPRIDX-LABEL: dyn_insertelement_v10f32_s_v_v: 2851; GPRIDX: ; %bb.0: ; %entry 2852; GPRIDX-NEXT: v_mov_b32_e32 v2, s2 2853; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 2854; GPRIDX-NEXT: v_mov_b32_e32 v3, s3 2855; GPRIDX-NEXT: v_cndmask_b32_e32 v10, v2, v0, vcc 2856; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 2857; GPRIDX-NEXT: v_mov_b32_e32 v4, s4 2858; GPRIDX-NEXT: v_cndmask_b32_e32 v11, v3, v0, vcc 2859; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v1 2860; GPRIDX-NEXT: v_mov_b32_e32 v5, s5 2861; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v4, v0, vcc 2862; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v1 2863; GPRIDX-NEXT: v_mov_b32_e32 v6, s6 2864; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v5, v0, vcc 2865; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v1 2866; GPRIDX-NEXT: v_mov_b32_e32 v7, s7 2867; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v6, v0, vcc 2868; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v1 2869; GPRIDX-NEXT: v_mov_b32_e32 v8, s8 2870; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v7, v0, vcc 2871; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v1 2872; GPRIDX-NEXT: v_mov_b32_e32 v9, s9 2873; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v8, v0, vcc 2874; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v1 2875; GPRIDX-NEXT: v_mov_b32_e32 v12, s10 2876; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v9, v0, vcc 2877; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 8, v1 2878; GPRIDX-NEXT: v_mov_b32_e32 v13, s11 2879; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v12, v0, vcc 2880; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 9, v1 2881; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v13, v0, vcc 2882; GPRIDX-NEXT: v_mov_b32_e32 v0, v10 2883; GPRIDX-NEXT: v_mov_b32_e32 v1, v11 2884; GPRIDX-NEXT: ; return to shader part epilog 2885; 2886; GFX10-LABEL: dyn_insertelement_v10f32_s_v_v: 2887; GFX10: ; %bb.0: ; %entry 2888; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 2889; GFX10-NEXT: v_cndmask_b32_e32 v10, s2, v0, vcc_lo 2890; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 2891; GFX10-NEXT: v_cndmask_b32_e32 v11, s3, v0, vcc_lo 2892; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1 2893; GFX10-NEXT: v_cndmask_b32_e32 v2, s4, v0, vcc_lo 2894; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1 2895; GFX10-NEXT: v_cndmask_b32_e32 v3, s5, v0, vcc_lo 2896; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1 2897; GFX10-NEXT: v_cndmask_b32_e32 v4, s6, v0, vcc_lo 2898; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1 2899; GFX10-NEXT: v_cndmask_b32_e32 v5, s7, v0, vcc_lo 2900; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1 2901; GFX10-NEXT: v_cndmask_b32_e32 v6, s8, v0, vcc_lo 2902; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v1 2903; GFX10-NEXT: v_cndmask_b32_e32 v7, s9, v0, vcc_lo 2904; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v1 2905; GFX10-NEXT: v_cndmask_b32_e32 v8, s10, v0, vcc_lo 2906; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v1 2907; GFX10-NEXT: v_mov_b32_e32 v1, v11 2908; GFX10-NEXT: v_cndmask_b32_e32 v9, s11, v0, vcc_lo 2909; GFX10-NEXT: v_mov_b32_e32 v0, v10 2910; GFX10-NEXT: ; return to shader part epilog 2911; 2912; GFX11-LABEL: dyn_insertelement_v10f32_s_v_v: 2913; GFX11: ; %bb.0: ; %entry 2914; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 2915; GFX11-NEXT: v_cndmask_b32_e32 v10, s2, v0, vcc_lo 2916; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 2917; GFX11-NEXT: v_cndmask_b32_e32 v11, s3, v0, vcc_lo 2918; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1 2919; GFX11-NEXT: v_cndmask_b32_e32 v2, s4, v0, vcc_lo 2920; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1 2921; GFX11-NEXT: v_cndmask_b32_e32 v3, s5, v0, vcc_lo 2922; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1 2923; GFX11-NEXT: v_cndmask_b32_e32 v4, s6, v0, vcc_lo 2924; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1 2925; GFX11-NEXT: v_cndmask_b32_e32 v5, s7, v0, vcc_lo 2926; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1 2927; GFX11-NEXT: v_cndmask_b32_e32 v6, s8, v0, vcc_lo 2928; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v1 2929; GFX11-NEXT: v_cndmask_b32_e32 v7, s9, v0, vcc_lo 2930; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v1 2931; GFX11-NEXT: v_cndmask_b32_e32 v8, s10, v0, vcc_lo 2932; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v1 2933; GFX11-NEXT: v_mov_b32_e32 v1, v11 2934; GFX11-NEXT: v_dual_cndmask_b32 v9, s11, v0 :: v_dual_mov_b32 v0, v10 2935; GFX11-NEXT: ; return to shader part epilog 2936entry: 2937 %insert = insertelement <10 x float> %vec, float %val, i32 %idx 2938 ret <10 x float> %insert 2939} 2940 2941define amdgpu_ps <10 x float> @dyn_insertelement_v10f32_v_v_s(<10 x float> %vec, float %val, i32 inreg %idx) { 2942; GPRIDX-LABEL: dyn_insertelement_v10f32_v_v_s: 2943; GPRIDX: ; %bb.0: ; %entry 2944; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(DST) 2945; GPRIDX-NEXT: v_mov_b32_e32 v0, v10 2946; GPRIDX-NEXT: s_set_gpr_idx_off 2947; GPRIDX-NEXT: ; return to shader part epilog 2948; 2949; GFX10PLUS-LABEL: dyn_insertelement_v10f32_v_v_s: 2950; GFX10PLUS: ; %bb.0: ; %entry 2951; GFX10PLUS-NEXT: s_mov_b32 m0, s2 2952; GFX10PLUS-NEXT: v_movreld_b32_e32 v0, v10 2953; GFX10PLUS-NEXT: ; return to shader part epilog 2954entry: 2955 %insert = insertelement <10 x float> %vec, float %val, i32 %idx 2956 ret <10 x float> %insert 2957} 2958 2959define amdgpu_ps <10 x float> @dyn_insertelement_v10f32_v_v_v(<10 x float> %vec, float %val, i32 %idx) { 2960; GPRIDX-LABEL: dyn_insertelement_v10f32_v_v_v: 2961; GPRIDX: ; %bb.0: ; %entry 2962; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v11 2963; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 2964; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v11 2965; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v10, vcc 2966; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v11 2967; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc 2968; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v11 2969; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v10, vcc 2970; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v11 2971; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc 2972; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v11 2973; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v10, vcc 2974; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v11 2975; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc 2976; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v11 2977; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v10, vcc 2978; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 8, v11 2979; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v8, v10, vcc 2980; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 9, v11 2981; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v9, v10, vcc 2982; GPRIDX-NEXT: ; return to shader part epilog 2983; 2984; GFX10PLUS-LABEL: dyn_insertelement_v10f32_v_v_v: 2985; GFX10PLUS: ; %bb.0: ; %entry 2986; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v11 2987; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo 2988; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v11 2989; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, v1, v10, vcc_lo 2990; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v11 2991; GFX10PLUS-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc_lo 2992; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v11 2993; GFX10PLUS-NEXT: v_cndmask_b32_e32 v3, v3, v10, vcc_lo 2994; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v11 2995; GFX10PLUS-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc_lo 2996; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v11 2997; GFX10PLUS-NEXT: v_cndmask_b32_e32 v5, v5, v10, vcc_lo 2998; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v11 2999; GFX10PLUS-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc_lo 3000; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v11 3001; GFX10PLUS-NEXT: v_cndmask_b32_e32 v7, v7, v10, vcc_lo 3002; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v11 3003; GFX10PLUS-NEXT: v_cndmask_b32_e32 v8, v8, v10, vcc_lo 3004; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v11 3005; GFX10PLUS-NEXT: v_cndmask_b32_e32 v9, v9, v10, vcc_lo 3006; GFX10PLUS-NEXT: ; return to shader part epilog 3007entry: 3008 %insert = insertelement <10 x float> %vec, float %val, i32 %idx 3009 ret <10 x float> %insert 3010} 3011 3012define amdgpu_ps <11 x float> @dyn_insertelement_v11f32_s_v_s(<11 x float> inreg %vec, float %val, i32 inreg %idx) { 3013; GPRIDX-LABEL: dyn_insertelement_v11f32_s_v_s: 3014; GPRIDX: ; %bb.0: ; %entry 3015; GPRIDX-NEXT: s_mov_b32 s0, s2 3016; GPRIDX-NEXT: s_mov_b32 s1, s3 3017; GPRIDX-NEXT: s_mov_b32 s2, s4 3018; GPRIDX-NEXT: s_mov_b32 s3, s5 3019; GPRIDX-NEXT: s_mov_b32 s4, s6 3020; GPRIDX-NEXT: s_mov_b32 s5, s7 3021; GPRIDX-NEXT: s_mov_b32 s6, s8 3022; GPRIDX-NEXT: s_mov_b32 s7, s9 3023; GPRIDX-NEXT: s_mov_b32 s8, s10 3024; GPRIDX-NEXT: s_mov_b32 s9, s11 3025; GPRIDX-NEXT: s_mov_b32 s10, s12 3026; GPRIDX-NEXT: v_mov_b32_e32 v11, v0 3027; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 3028; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 3029; GPRIDX-NEXT: v_mov_b32_e32 v2, s2 3030; GPRIDX-NEXT: v_mov_b32_e32 v3, s3 3031; GPRIDX-NEXT: v_mov_b32_e32 v4, s4 3032; GPRIDX-NEXT: v_mov_b32_e32 v5, s5 3033; GPRIDX-NEXT: v_mov_b32_e32 v6, s6 3034; GPRIDX-NEXT: v_mov_b32_e32 v7, s7 3035; GPRIDX-NEXT: v_mov_b32_e32 v8, s8 3036; GPRIDX-NEXT: v_mov_b32_e32 v9, s9 3037; GPRIDX-NEXT: v_mov_b32_e32 v10, s10 3038; GPRIDX-NEXT: s_set_gpr_idx_on s13, gpr_idx(DST) 3039; GPRIDX-NEXT: v_mov_b32_e32 v0, v11 3040; GPRIDX-NEXT: s_set_gpr_idx_off 3041; GPRIDX-NEXT: ; return to shader part epilog 3042; 3043; GFX10-LABEL: dyn_insertelement_v11f32_s_v_s: 3044; GFX10: ; %bb.0: ; %entry 3045; GFX10-NEXT: s_mov_b32 s0, s2 3046; GFX10-NEXT: s_mov_b32 s1, s3 3047; GFX10-NEXT: s_mov_b32 s2, s4 3048; GFX10-NEXT: s_mov_b32 s3, s5 3049; GFX10-NEXT: s_mov_b32 s4, s6 3050; GFX10-NEXT: s_mov_b32 s5, s7 3051; GFX10-NEXT: s_mov_b32 s6, s8 3052; GFX10-NEXT: s_mov_b32 s7, s9 3053; GFX10-NEXT: s_mov_b32 s8, s10 3054; GFX10-NEXT: s_mov_b32 s9, s11 3055; GFX10-NEXT: s_mov_b32 s10, s12 3056; GFX10-NEXT: v_mov_b32_e32 v11, v0 3057; GFX10-NEXT: v_mov_b32_e32 v0, s0 3058; GFX10-NEXT: s_mov_b32 m0, s13 3059; GFX10-NEXT: v_mov_b32_e32 v1, s1 3060; GFX10-NEXT: v_mov_b32_e32 v2, s2 3061; GFX10-NEXT: v_mov_b32_e32 v3, s3 3062; GFX10-NEXT: v_mov_b32_e32 v4, s4 3063; GFX10-NEXT: v_mov_b32_e32 v5, s5 3064; GFX10-NEXT: v_mov_b32_e32 v6, s6 3065; GFX10-NEXT: v_mov_b32_e32 v7, s7 3066; GFX10-NEXT: v_mov_b32_e32 v8, s8 3067; GFX10-NEXT: v_mov_b32_e32 v9, s9 3068; GFX10-NEXT: v_mov_b32_e32 v10, s10 3069; GFX10-NEXT: v_movreld_b32_e32 v0, v11 3070; GFX10-NEXT: ; return to shader part epilog 3071; 3072; GFX11-LABEL: dyn_insertelement_v11f32_s_v_s: 3073; GFX11: ; %bb.0: ; %entry 3074; GFX11-NEXT: s_mov_b32 s0, s2 3075; GFX11-NEXT: s_mov_b32 s1, s3 3076; GFX11-NEXT: s_mov_b32 s2, s4 3077; GFX11-NEXT: s_mov_b32 s3, s5 3078; GFX11-NEXT: s_mov_b32 s4, s6 3079; GFX11-NEXT: s_mov_b32 s5, s7 3080; GFX11-NEXT: s_mov_b32 s6, s8 3081; GFX11-NEXT: s_mov_b32 s7, s9 3082; GFX11-NEXT: s_mov_b32 s8, s10 3083; GFX11-NEXT: s_mov_b32 s9, s11 3084; GFX11-NEXT: s_mov_b32 s10, s12 3085; GFX11-NEXT: v_dual_mov_b32 v11, v0 :: v_dual_mov_b32 v0, s0 3086; GFX11-NEXT: s_mov_b32 m0, s13 3087; GFX11-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2 3088; GFX11-NEXT: v_dual_mov_b32 v3, s3 :: v_dual_mov_b32 v4, s4 3089; GFX11-NEXT: v_dual_mov_b32 v5, s5 :: v_dual_mov_b32 v6, s6 3090; GFX11-NEXT: v_dual_mov_b32 v7, s7 :: v_dual_mov_b32 v8, s8 3091; GFX11-NEXT: v_dual_mov_b32 v9, s9 :: v_dual_mov_b32 v10, s10 3092; GFX11-NEXT: v_movreld_b32_e32 v0, v11 3093; GFX11-NEXT: ; return to shader part epilog 3094entry: 3095 %insert = insertelement <11 x float> %vec, float %val, i32 %idx 3096 ret <11 x float> %insert 3097} 3098 3099define amdgpu_ps <11 x float> @dyn_insertelement_v11f32_s_v_v(<11 x float> inreg %vec, float %val, i32 %idx) { 3100; GPRIDX-LABEL: dyn_insertelement_v11f32_s_v_v: 3101; GPRIDX: ; %bb.0: ; %entry 3102; GPRIDX-NEXT: v_mov_b32_e32 v2, s2 3103; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 3104; GPRIDX-NEXT: v_mov_b32_e32 v3, s3 3105; GPRIDX-NEXT: v_cndmask_b32_e32 v12, v2, v0, vcc 3106; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 3107; GPRIDX-NEXT: v_mov_b32_e32 v4, s4 3108; GPRIDX-NEXT: v_cndmask_b32_e32 v11, v3, v0, vcc 3109; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v1 3110; GPRIDX-NEXT: v_mov_b32_e32 v5, s5 3111; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v4, v0, vcc 3112; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v1 3113; GPRIDX-NEXT: v_mov_b32_e32 v6, s6 3114; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v5, v0, vcc 3115; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v1 3116; GPRIDX-NEXT: v_mov_b32_e32 v7, s7 3117; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v6, v0, vcc 3118; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v1 3119; GPRIDX-NEXT: v_mov_b32_e32 v8, s8 3120; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v7, v0, vcc 3121; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v1 3122; GPRIDX-NEXT: v_mov_b32_e32 v9, s9 3123; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v8, v0, vcc 3124; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v1 3125; GPRIDX-NEXT: v_mov_b32_e32 v10, s10 3126; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v9, v0, vcc 3127; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 8, v1 3128; GPRIDX-NEXT: v_mov_b32_e32 v13, s11 3129; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v10, v0, vcc 3130; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 9, v1 3131; GPRIDX-NEXT: v_mov_b32_e32 v14, s12 3132; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v13, v0, vcc 3133; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 10, v1 3134; GPRIDX-NEXT: v_cndmask_b32_e32 v10, v14, v0, vcc 3135; GPRIDX-NEXT: v_mov_b32_e32 v0, v12 3136; GPRIDX-NEXT: v_mov_b32_e32 v1, v11 3137; GPRIDX-NEXT: ; return to shader part epilog 3138; 3139; GFX10-LABEL: dyn_insertelement_v11f32_s_v_v: 3140; GFX10: ; %bb.0: ; %entry 3141; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 3142; GFX10-NEXT: v_cndmask_b32_e32 v12, s2, v0, vcc_lo 3143; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 3144; GFX10-NEXT: v_cndmask_b32_e32 v11, s3, v0, vcc_lo 3145; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1 3146; GFX10-NEXT: v_cndmask_b32_e32 v2, s4, v0, vcc_lo 3147; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1 3148; GFX10-NEXT: v_cndmask_b32_e32 v3, s5, v0, vcc_lo 3149; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1 3150; GFX10-NEXT: v_cndmask_b32_e32 v4, s6, v0, vcc_lo 3151; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1 3152; GFX10-NEXT: v_cndmask_b32_e32 v5, s7, v0, vcc_lo 3153; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1 3154; GFX10-NEXT: v_cndmask_b32_e32 v6, s8, v0, vcc_lo 3155; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v1 3156; GFX10-NEXT: v_cndmask_b32_e32 v7, s9, v0, vcc_lo 3157; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v1 3158; GFX10-NEXT: v_cndmask_b32_e32 v8, s10, v0, vcc_lo 3159; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v1 3160; GFX10-NEXT: v_cndmask_b32_e32 v9, s11, v0, vcc_lo 3161; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v1 3162; GFX10-NEXT: v_mov_b32_e32 v1, v11 3163; GFX10-NEXT: v_cndmask_b32_e32 v10, s12, v0, vcc_lo 3164; GFX10-NEXT: v_mov_b32_e32 v0, v12 3165; GFX10-NEXT: ; return to shader part epilog 3166; 3167; GFX11-LABEL: dyn_insertelement_v11f32_s_v_v: 3168; GFX11: ; %bb.0: ; %entry 3169; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 3170; GFX11-NEXT: v_cndmask_b32_e32 v12, s2, v0, vcc_lo 3171; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 3172; GFX11-NEXT: v_cndmask_b32_e32 v11, s3, v0, vcc_lo 3173; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1 3174; GFX11-NEXT: v_cndmask_b32_e32 v2, s4, v0, vcc_lo 3175; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1 3176; GFX11-NEXT: v_cndmask_b32_e32 v3, s5, v0, vcc_lo 3177; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1 3178; GFX11-NEXT: v_cndmask_b32_e32 v4, s6, v0, vcc_lo 3179; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1 3180; GFX11-NEXT: v_cndmask_b32_e32 v5, s7, v0, vcc_lo 3181; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1 3182; GFX11-NEXT: v_cndmask_b32_e32 v6, s8, v0, vcc_lo 3183; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v1 3184; GFX11-NEXT: v_cndmask_b32_e32 v7, s9, v0, vcc_lo 3185; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v1 3186; GFX11-NEXT: v_cndmask_b32_e32 v8, s10, v0, vcc_lo 3187; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v1 3188; GFX11-NEXT: v_cndmask_b32_e32 v9, s11, v0, vcc_lo 3189; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v1 3190; GFX11-NEXT: v_dual_mov_b32 v1, v11 :: v_dual_cndmask_b32 v10, s12, v0 3191; GFX11-NEXT: v_mov_b32_e32 v0, v12 3192; GFX11-NEXT: ; return to shader part epilog 3193entry: 3194 %insert = insertelement <11 x float> %vec, float %val, i32 %idx 3195 ret <11 x float> %insert 3196} 3197 3198define amdgpu_ps <11 x float> @dyn_insertelement_v11f32_v_v_s(<11 x float> %vec, float %val, i32 inreg %idx) { 3199; GPRIDX-LABEL: dyn_insertelement_v11f32_v_v_s: 3200; GPRIDX: ; %bb.0: ; %entry 3201; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(DST) 3202; GPRIDX-NEXT: v_mov_b32_e32 v0, v11 3203; GPRIDX-NEXT: s_set_gpr_idx_off 3204; GPRIDX-NEXT: ; return to shader part epilog 3205; 3206; GFX10PLUS-LABEL: dyn_insertelement_v11f32_v_v_s: 3207; GFX10PLUS: ; %bb.0: ; %entry 3208; GFX10PLUS-NEXT: s_mov_b32 m0, s2 3209; GFX10PLUS-NEXT: v_movreld_b32_e32 v0, v11 3210; GFX10PLUS-NEXT: ; return to shader part epilog 3211entry: 3212 %insert = insertelement <11 x float> %vec, float %val, i32 %idx 3213 ret <11 x float> %insert 3214} 3215 3216define amdgpu_ps <11 x float> @dyn_insertelement_v11f32_v_v_v(<11 x float> %vec, float %val, i32 %idx) { 3217; GPRIDX-LABEL: dyn_insertelement_v11f32_v_v_v: 3218; GPRIDX: ; %bb.0: ; %entry 3219; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v12 3220; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc 3221; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v12 3222; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 3223; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v12 3224; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v11, vcc 3225; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v12 3226; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc 3227; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v12 3228; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v11, vcc 3229; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v12 3230; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v11, vcc 3231; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v12 3232; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v11, vcc 3233; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v12 3234; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v11, vcc 3235; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 8, v12 3236; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v8, v11, vcc 3237; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 9, v12 3238; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc 3239; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 10, v12 3240; GPRIDX-NEXT: v_cndmask_b32_e32 v10, v10, v11, vcc 3241; GPRIDX-NEXT: ; return to shader part epilog 3242; 3243; GFX10PLUS-LABEL: dyn_insertelement_v11f32_v_v_v: 3244; GFX10PLUS: ; %bb.0: ; %entry 3245; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v12 3246; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc_lo 3247; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v12 3248; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo 3249; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v12 3250; GFX10PLUS-NEXT: v_cndmask_b32_e32 v2, v2, v11, vcc_lo 3251; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v12 3252; GFX10PLUS-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc_lo 3253; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v12 3254; GFX10PLUS-NEXT: v_cndmask_b32_e32 v4, v4, v11, vcc_lo 3255; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v12 3256; GFX10PLUS-NEXT: v_cndmask_b32_e32 v5, v5, v11, vcc_lo 3257; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v12 3258; GFX10PLUS-NEXT: v_cndmask_b32_e32 v6, v6, v11, vcc_lo 3259; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v12 3260; GFX10PLUS-NEXT: v_cndmask_b32_e32 v7, v7, v11, vcc_lo 3261; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v12 3262; GFX10PLUS-NEXT: v_cndmask_b32_e32 v8, v8, v11, vcc_lo 3263; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v12 3264; GFX10PLUS-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc_lo 3265; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v12 3266; GFX10PLUS-NEXT: v_cndmask_b32_e32 v10, v10, v11, vcc_lo 3267; GFX10PLUS-NEXT: ; return to shader part epilog 3268entry: 3269 %insert = insertelement <11 x float> %vec, float %val, i32 %idx 3270 ret <11 x float> %insert 3271} 3272 3273define amdgpu_ps <12 x float> @dyn_insertelement_v12f32_s_v_s(<12 x float> inreg %vec, float %val, i32 inreg %idx) { 3274; GPRIDX-LABEL: dyn_insertelement_v12f32_s_v_s: 3275; GPRIDX: ; %bb.0: ; %entry 3276; GPRIDX-NEXT: s_mov_b32 s0, s2 3277; GPRIDX-NEXT: s_mov_b32 s1, s3 3278; GPRIDX-NEXT: s_mov_b32 s2, s4 3279; GPRIDX-NEXT: s_mov_b32 s3, s5 3280; GPRIDX-NEXT: s_mov_b32 s4, s6 3281; GPRIDX-NEXT: s_mov_b32 s5, s7 3282; GPRIDX-NEXT: s_mov_b32 s6, s8 3283; GPRIDX-NEXT: s_mov_b32 s7, s9 3284; GPRIDX-NEXT: s_mov_b32 s8, s10 3285; GPRIDX-NEXT: s_mov_b32 s9, s11 3286; GPRIDX-NEXT: s_mov_b32 s10, s12 3287; GPRIDX-NEXT: s_mov_b32 s11, s13 3288; GPRIDX-NEXT: v_mov_b32_e32 v12, v0 3289; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 3290; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 3291; GPRIDX-NEXT: v_mov_b32_e32 v2, s2 3292; GPRIDX-NEXT: v_mov_b32_e32 v3, s3 3293; GPRIDX-NEXT: v_mov_b32_e32 v4, s4 3294; GPRIDX-NEXT: v_mov_b32_e32 v5, s5 3295; GPRIDX-NEXT: v_mov_b32_e32 v6, s6 3296; GPRIDX-NEXT: v_mov_b32_e32 v7, s7 3297; GPRIDX-NEXT: v_mov_b32_e32 v8, s8 3298; GPRIDX-NEXT: v_mov_b32_e32 v9, s9 3299; GPRIDX-NEXT: v_mov_b32_e32 v10, s10 3300; GPRIDX-NEXT: v_mov_b32_e32 v11, s11 3301; GPRIDX-NEXT: s_set_gpr_idx_on s14, gpr_idx(DST) 3302; GPRIDX-NEXT: v_mov_b32_e32 v0, v12 3303; GPRIDX-NEXT: s_set_gpr_idx_off 3304; GPRIDX-NEXT: ; return to shader part epilog 3305; 3306; GFX10-LABEL: dyn_insertelement_v12f32_s_v_s: 3307; GFX10: ; %bb.0: ; %entry 3308; GFX10-NEXT: s_mov_b32 s0, s2 3309; GFX10-NEXT: s_mov_b32 s1, s3 3310; GFX10-NEXT: s_mov_b32 s2, s4 3311; GFX10-NEXT: s_mov_b32 s3, s5 3312; GFX10-NEXT: s_mov_b32 s4, s6 3313; GFX10-NEXT: s_mov_b32 s5, s7 3314; GFX10-NEXT: s_mov_b32 s6, s8 3315; GFX10-NEXT: s_mov_b32 s7, s9 3316; GFX10-NEXT: s_mov_b32 s8, s10 3317; GFX10-NEXT: s_mov_b32 s9, s11 3318; GFX10-NEXT: s_mov_b32 s10, s12 3319; GFX10-NEXT: s_mov_b32 s11, s13 3320; GFX10-NEXT: v_mov_b32_e32 v12, v0 3321; GFX10-NEXT: v_mov_b32_e32 v0, s0 3322; GFX10-NEXT: s_mov_b32 m0, s14 3323; GFX10-NEXT: v_mov_b32_e32 v1, s1 3324; GFX10-NEXT: v_mov_b32_e32 v2, s2 3325; GFX10-NEXT: v_mov_b32_e32 v3, s3 3326; GFX10-NEXT: v_mov_b32_e32 v4, s4 3327; GFX10-NEXT: v_mov_b32_e32 v5, s5 3328; GFX10-NEXT: v_mov_b32_e32 v6, s6 3329; GFX10-NEXT: v_mov_b32_e32 v7, s7 3330; GFX10-NEXT: v_mov_b32_e32 v8, s8 3331; GFX10-NEXT: v_mov_b32_e32 v9, s9 3332; GFX10-NEXT: v_mov_b32_e32 v10, s10 3333; GFX10-NEXT: v_mov_b32_e32 v11, s11 3334; GFX10-NEXT: v_movreld_b32_e32 v0, v12 3335; GFX10-NEXT: ; return to shader part epilog 3336; 3337; GFX11-LABEL: dyn_insertelement_v12f32_s_v_s: 3338; GFX11: ; %bb.0: ; %entry 3339; GFX11-NEXT: s_mov_b32 s0, s2 3340; GFX11-NEXT: s_mov_b32 s1, s3 3341; GFX11-NEXT: s_mov_b32 s2, s4 3342; GFX11-NEXT: s_mov_b32 s3, s5 3343; GFX11-NEXT: s_mov_b32 s4, s6 3344; GFX11-NEXT: s_mov_b32 s5, s7 3345; GFX11-NEXT: s_mov_b32 s6, s8 3346; GFX11-NEXT: s_mov_b32 s7, s9 3347; GFX11-NEXT: s_mov_b32 s8, s10 3348; GFX11-NEXT: s_mov_b32 s9, s11 3349; GFX11-NEXT: s_mov_b32 s10, s12 3350; GFX11-NEXT: s_mov_b32 s11, s13 3351; GFX11-NEXT: v_mov_b32_e32 v12, v0 3352; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3 3353; GFX11-NEXT: s_mov_b32 m0, s14 3354; GFX11-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2 3355; GFX11-NEXT: v_dual_mov_b32 v5, s5 :: v_dual_mov_b32 v4, s4 3356; GFX11-NEXT: v_dual_mov_b32 v7, s7 :: v_dual_mov_b32 v6, s6 3357; GFX11-NEXT: v_dual_mov_b32 v9, s9 :: v_dual_mov_b32 v8, s8 3358; GFX11-NEXT: v_dual_mov_b32 v11, s11 :: v_dual_mov_b32 v10, s10 3359; GFX11-NEXT: v_movreld_b32_e32 v0, v12 3360; GFX11-NEXT: ; return to shader part epilog 3361entry: 3362 %insert = insertelement <12 x float> %vec, float %val, i32 %idx 3363 ret <12 x float> %insert 3364} 3365 3366define amdgpu_ps <12 x float> @dyn_insertelement_v12f32_s_v_v(<12 x float> inreg %vec, float %val, i32 %idx) { 3367; GPRIDX-LABEL: dyn_insertelement_v12f32_s_v_v: 3368; GPRIDX: ; %bb.0: ; %entry 3369; GPRIDX-NEXT: v_mov_b32_e32 v2, s2 3370; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 3371; GPRIDX-NEXT: v_mov_b32_e32 v3, s3 3372; GPRIDX-NEXT: v_cndmask_b32_e32 v12, v2, v0, vcc 3373; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 3374; GPRIDX-NEXT: v_mov_b32_e32 v4, s4 3375; GPRIDX-NEXT: v_cndmask_b32_e32 v13, v3, v0, vcc 3376; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v1 3377; GPRIDX-NEXT: v_mov_b32_e32 v5, s5 3378; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v4, v0, vcc 3379; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v1 3380; GPRIDX-NEXT: v_mov_b32_e32 v6, s6 3381; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v5, v0, vcc 3382; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v1 3383; GPRIDX-NEXT: v_mov_b32_e32 v7, s7 3384; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v6, v0, vcc 3385; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v1 3386; GPRIDX-NEXT: v_mov_b32_e32 v8, s8 3387; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v7, v0, vcc 3388; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v1 3389; GPRIDX-NEXT: v_mov_b32_e32 v9, s9 3390; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v8, v0, vcc 3391; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v1 3392; GPRIDX-NEXT: v_mov_b32_e32 v10, s10 3393; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v9, v0, vcc 3394; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 8, v1 3395; GPRIDX-NEXT: v_mov_b32_e32 v11, s11 3396; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v10, v0, vcc 3397; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 9, v1 3398; GPRIDX-NEXT: v_mov_b32_e32 v14, s12 3399; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v11, v0, vcc 3400; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 10, v1 3401; GPRIDX-NEXT: v_mov_b32_e32 v15, s13 3402; GPRIDX-NEXT: v_cndmask_b32_e32 v10, v14, v0, vcc 3403; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 11, v1 3404; GPRIDX-NEXT: v_cndmask_b32_e32 v11, v15, v0, vcc 3405; GPRIDX-NEXT: v_mov_b32_e32 v0, v12 3406; GPRIDX-NEXT: v_mov_b32_e32 v1, v13 3407; GPRIDX-NEXT: ; return to shader part epilog 3408; 3409; GFX10-LABEL: dyn_insertelement_v12f32_s_v_v: 3410; GFX10: ; %bb.0: ; %entry 3411; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 3412; GFX10-NEXT: v_cndmask_b32_e32 v12, s2, v0, vcc_lo 3413; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 3414; GFX10-NEXT: v_cndmask_b32_e32 v13, s3, v0, vcc_lo 3415; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1 3416; GFX10-NEXT: v_cndmask_b32_e32 v2, s4, v0, vcc_lo 3417; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1 3418; GFX10-NEXT: v_cndmask_b32_e32 v3, s5, v0, vcc_lo 3419; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1 3420; GFX10-NEXT: v_cndmask_b32_e32 v4, s6, v0, vcc_lo 3421; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1 3422; GFX10-NEXT: v_cndmask_b32_e32 v5, s7, v0, vcc_lo 3423; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1 3424; GFX10-NEXT: v_cndmask_b32_e32 v6, s8, v0, vcc_lo 3425; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v1 3426; GFX10-NEXT: v_cndmask_b32_e32 v7, s9, v0, vcc_lo 3427; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v1 3428; GFX10-NEXT: v_cndmask_b32_e32 v8, s10, v0, vcc_lo 3429; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v1 3430; GFX10-NEXT: v_cndmask_b32_e32 v9, s11, v0, vcc_lo 3431; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v1 3432; GFX10-NEXT: v_cndmask_b32_e32 v10, s12, v0, vcc_lo 3433; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v1 3434; GFX10-NEXT: v_mov_b32_e32 v1, v13 3435; GFX10-NEXT: v_cndmask_b32_e32 v11, s13, v0, vcc_lo 3436; GFX10-NEXT: v_mov_b32_e32 v0, v12 3437; GFX10-NEXT: ; return to shader part epilog 3438; 3439; GFX11-LABEL: dyn_insertelement_v12f32_s_v_v: 3440; GFX11: ; %bb.0: ; %entry 3441; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 3442; GFX11-NEXT: v_cndmask_b32_e32 v12, s2, v0, vcc_lo 3443; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 3444; GFX11-NEXT: v_cndmask_b32_e32 v13, s3, v0, vcc_lo 3445; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1 3446; GFX11-NEXT: v_cndmask_b32_e32 v2, s4, v0, vcc_lo 3447; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1 3448; GFX11-NEXT: v_cndmask_b32_e32 v3, s5, v0, vcc_lo 3449; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1 3450; GFX11-NEXT: v_cndmask_b32_e32 v4, s6, v0, vcc_lo 3451; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1 3452; GFX11-NEXT: v_cndmask_b32_e32 v5, s7, v0, vcc_lo 3453; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1 3454; GFX11-NEXT: v_cndmask_b32_e32 v6, s8, v0, vcc_lo 3455; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v1 3456; GFX11-NEXT: v_cndmask_b32_e32 v7, s9, v0, vcc_lo 3457; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v1 3458; GFX11-NEXT: v_cndmask_b32_e32 v8, s10, v0, vcc_lo 3459; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v1 3460; GFX11-NEXT: v_cndmask_b32_e32 v9, s11, v0, vcc_lo 3461; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v1 3462; GFX11-NEXT: v_cndmask_b32_e32 v10, s12, v0, vcc_lo 3463; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v1 3464; GFX11-NEXT: v_mov_b32_e32 v1, v13 3465; GFX11-NEXT: v_dual_cndmask_b32 v11, s13, v0 :: v_dual_mov_b32 v0, v12 3466; GFX11-NEXT: ; return to shader part epilog 3467entry: 3468 %insert = insertelement <12 x float> %vec, float %val, i32 %idx 3469 ret <12 x float> %insert 3470} 3471 3472define amdgpu_ps <12 x float> @dyn_insertelement_v12f32_v_v_s(<12 x float> %vec, float %val, i32 inreg %idx) { 3473; GPRIDX-LABEL: dyn_insertelement_v12f32_v_v_s: 3474; GPRIDX: ; %bb.0: ; %entry 3475; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(DST) 3476; GPRIDX-NEXT: v_mov_b32_e32 v0, v12 3477; GPRIDX-NEXT: s_set_gpr_idx_off 3478; GPRIDX-NEXT: ; return to shader part epilog 3479; 3480; GFX10PLUS-LABEL: dyn_insertelement_v12f32_v_v_s: 3481; GFX10PLUS: ; %bb.0: ; %entry 3482; GFX10PLUS-NEXT: s_mov_b32 m0, s2 3483; GFX10PLUS-NEXT: v_movreld_b32_e32 v0, v12 3484; GFX10PLUS-NEXT: ; return to shader part epilog 3485entry: 3486 %insert = insertelement <12 x float> %vec, float %val, i32 %idx 3487 ret <12 x float> %insert 3488} 3489 3490define amdgpu_ps <12 x float> @dyn_insertelement_v12f32_v_v_v(<12 x float> %vec, float %val, i32 %idx) { 3491; GPRIDX-LABEL: dyn_insertelement_v12f32_v_v_v: 3492; GPRIDX: ; %bb.0: ; %entry 3493; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v13 3494; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc 3495; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v13 3496; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v12, vcc 3497; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v13 3498; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc 3499; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v13 3500; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v12, vcc 3501; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v13 3502; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v12, vcc 3503; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v13 3504; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v12, vcc 3505; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v13 3506; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v12, vcc 3507; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v13 3508; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v12, vcc 3509; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 8, v13 3510; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v8, v12, vcc 3511; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 9, v13 3512; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v9, v12, vcc 3513; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 10, v13 3514; GPRIDX-NEXT: v_cndmask_b32_e32 v10, v10, v12, vcc 3515; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 11, v13 3516; GPRIDX-NEXT: v_cndmask_b32_e32 v11, v11, v12, vcc 3517; GPRIDX-NEXT: ; return to shader part epilog 3518; 3519; GFX10PLUS-LABEL: dyn_insertelement_v12f32_v_v_v: 3520; GFX10PLUS: ; %bb.0: ; %entry 3521; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v13 3522; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo 3523; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v13 3524; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, v1, v12, vcc_lo 3525; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v13 3526; GFX10PLUS-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc_lo 3527; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v13 3528; GFX10PLUS-NEXT: v_cndmask_b32_e32 v3, v3, v12, vcc_lo 3529; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v13 3530; GFX10PLUS-NEXT: v_cndmask_b32_e32 v4, v4, v12, vcc_lo 3531; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v13 3532; GFX10PLUS-NEXT: v_cndmask_b32_e32 v5, v5, v12, vcc_lo 3533; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v13 3534; GFX10PLUS-NEXT: v_cndmask_b32_e32 v6, v6, v12, vcc_lo 3535; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v13 3536; GFX10PLUS-NEXT: v_cndmask_b32_e32 v7, v7, v12, vcc_lo 3537; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v13 3538; GFX10PLUS-NEXT: v_cndmask_b32_e32 v8, v8, v12, vcc_lo 3539; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v13 3540; GFX10PLUS-NEXT: v_cndmask_b32_e32 v9, v9, v12, vcc_lo 3541; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v13 3542; GFX10PLUS-NEXT: v_cndmask_b32_e32 v10, v10, v12, vcc_lo 3543; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v13 3544; GFX10PLUS-NEXT: v_cndmask_b32_e32 v11, v11, v12, vcc_lo 3545; GFX10PLUS-NEXT: ; return to shader part epilog 3546entry: 3547 %insert = insertelement <12 x float> %vec, float %val, i32 %idx 3548 ret <12 x float> %insert 3549} 3550 3551define amdgpu_ps <16 x i32> @dyn_insertelement_v16i32_s_s_s(<16 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) { 3552; GPRIDX-LABEL: dyn_insertelement_v16i32_s_s_s: 3553; GPRIDX: ; %bb.0: ; %entry 3554; GPRIDX-NEXT: s_mov_b32 s0, s2 3555; GPRIDX-NEXT: s_mov_b32 s1, s3 3556; GPRIDX-NEXT: s_mov_b32 s2, s4 3557; GPRIDX-NEXT: s_mov_b32 s3, s5 3558; GPRIDX-NEXT: s_mov_b32 s4, s6 3559; GPRIDX-NEXT: s_mov_b32 s5, s7 3560; GPRIDX-NEXT: s_mov_b32 s6, s8 3561; GPRIDX-NEXT: s_mov_b32 s7, s9 3562; GPRIDX-NEXT: s_mov_b32 s8, s10 3563; GPRIDX-NEXT: s_mov_b32 s9, s11 3564; GPRIDX-NEXT: s_mov_b32 s10, s12 3565; GPRIDX-NEXT: s_mov_b32 s11, s13 3566; GPRIDX-NEXT: s_mov_b32 s12, s14 3567; GPRIDX-NEXT: s_mov_b32 s13, s15 3568; GPRIDX-NEXT: s_mov_b32 s14, s16 3569; GPRIDX-NEXT: s_mov_b32 s15, s17 3570; GPRIDX-NEXT: s_mov_b32 m0, s19 3571; GPRIDX-NEXT: s_nop 0 3572; GPRIDX-NEXT: s_movreld_b32 s0, s18 3573; GPRIDX-NEXT: ; return to shader part epilog 3574; 3575; GFX10PLUS-LABEL: dyn_insertelement_v16i32_s_s_s: 3576; GFX10PLUS: ; %bb.0: ; %entry 3577; GFX10PLUS-NEXT: s_mov_b32 s0, s2 3578; GFX10PLUS-NEXT: s_mov_b32 m0, s19 3579; GFX10PLUS-NEXT: s_mov_b32 s1, s3 3580; GFX10PLUS-NEXT: s_mov_b32 s2, s4 3581; GFX10PLUS-NEXT: s_mov_b32 s3, s5 3582; GFX10PLUS-NEXT: s_mov_b32 s4, s6 3583; GFX10PLUS-NEXT: s_mov_b32 s5, s7 3584; GFX10PLUS-NEXT: s_mov_b32 s6, s8 3585; GFX10PLUS-NEXT: s_mov_b32 s7, s9 3586; GFX10PLUS-NEXT: s_mov_b32 s8, s10 3587; GFX10PLUS-NEXT: s_mov_b32 s9, s11 3588; GFX10PLUS-NEXT: s_mov_b32 s10, s12 3589; GFX10PLUS-NEXT: s_mov_b32 s11, s13 3590; GFX10PLUS-NEXT: s_mov_b32 s12, s14 3591; GFX10PLUS-NEXT: s_mov_b32 s13, s15 3592; GFX10PLUS-NEXT: s_mov_b32 s14, s16 3593; GFX10PLUS-NEXT: s_mov_b32 s15, s17 3594; GFX10PLUS-NEXT: s_movreld_b32 s0, s18 3595; GFX10PLUS-NEXT: ; return to shader part epilog 3596entry: 3597 %insert = insertelement <16 x i32> %vec, i32 %val, i32 %idx 3598 ret <16 x i32> %insert 3599} 3600 3601define amdgpu_ps <16 x float> @dyn_insertelement_v16f32_s_s_s(<16 x float> inreg %vec, float inreg %val, i32 inreg %idx) { 3602; GPRIDX-LABEL: dyn_insertelement_v16f32_s_s_s: 3603; GPRIDX: ; %bb.0: ; %entry 3604; GPRIDX-NEXT: s_mov_b32 s0, s2 3605; GPRIDX-NEXT: s_mov_b32 s1, s3 3606; GPRIDX-NEXT: s_mov_b32 s2, s4 3607; GPRIDX-NEXT: s_mov_b32 s3, s5 3608; GPRIDX-NEXT: s_mov_b32 s4, s6 3609; GPRIDX-NEXT: s_mov_b32 s5, s7 3610; GPRIDX-NEXT: s_mov_b32 s6, s8 3611; GPRIDX-NEXT: s_mov_b32 s7, s9 3612; GPRIDX-NEXT: s_mov_b32 s8, s10 3613; GPRIDX-NEXT: s_mov_b32 s9, s11 3614; GPRIDX-NEXT: s_mov_b32 s10, s12 3615; GPRIDX-NEXT: s_mov_b32 s11, s13 3616; GPRIDX-NEXT: s_mov_b32 s12, s14 3617; GPRIDX-NEXT: s_mov_b32 s13, s15 3618; GPRIDX-NEXT: s_mov_b32 s14, s16 3619; GPRIDX-NEXT: s_mov_b32 s15, s17 3620; GPRIDX-NEXT: s_mov_b32 m0, s19 3621; GPRIDX-NEXT: s_nop 0 3622; GPRIDX-NEXT: s_movreld_b32 s0, s18 3623; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 3624; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 3625; GPRIDX-NEXT: v_mov_b32_e32 v2, s2 3626; GPRIDX-NEXT: v_mov_b32_e32 v3, s3 3627; GPRIDX-NEXT: v_mov_b32_e32 v4, s4 3628; GPRIDX-NEXT: v_mov_b32_e32 v5, s5 3629; GPRIDX-NEXT: v_mov_b32_e32 v6, s6 3630; GPRIDX-NEXT: v_mov_b32_e32 v7, s7 3631; GPRIDX-NEXT: v_mov_b32_e32 v8, s8 3632; GPRIDX-NEXT: v_mov_b32_e32 v9, s9 3633; GPRIDX-NEXT: v_mov_b32_e32 v10, s10 3634; GPRIDX-NEXT: v_mov_b32_e32 v11, s11 3635; GPRIDX-NEXT: v_mov_b32_e32 v12, s12 3636; GPRIDX-NEXT: v_mov_b32_e32 v13, s13 3637; GPRIDX-NEXT: v_mov_b32_e32 v14, s14 3638; GPRIDX-NEXT: v_mov_b32_e32 v15, s15 3639; GPRIDX-NEXT: ; return to shader part epilog 3640; 3641; GFX10-LABEL: dyn_insertelement_v16f32_s_s_s: 3642; GFX10: ; %bb.0: ; %entry 3643; GFX10-NEXT: s_mov_b32 s0, s2 3644; GFX10-NEXT: s_mov_b32 m0, s19 3645; GFX10-NEXT: s_mov_b32 s1, s3 3646; GFX10-NEXT: s_mov_b32 s2, s4 3647; GFX10-NEXT: s_mov_b32 s3, s5 3648; GFX10-NEXT: s_mov_b32 s4, s6 3649; GFX10-NEXT: s_mov_b32 s5, s7 3650; GFX10-NEXT: s_mov_b32 s6, s8 3651; GFX10-NEXT: s_mov_b32 s7, s9 3652; GFX10-NEXT: s_mov_b32 s8, s10 3653; GFX10-NEXT: s_mov_b32 s9, s11 3654; GFX10-NEXT: s_mov_b32 s10, s12 3655; GFX10-NEXT: s_mov_b32 s11, s13 3656; GFX10-NEXT: s_mov_b32 s12, s14 3657; GFX10-NEXT: s_mov_b32 s13, s15 3658; GFX10-NEXT: s_mov_b32 s14, s16 3659; GFX10-NEXT: s_mov_b32 s15, s17 3660; GFX10-NEXT: s_movreld_b32 s0, s18 3661; GFX10-NEXT: v_mov_b32_e32 v0, s0 3662; GFX10-NEXT: v_mov_b32_e32 v1, s1 3663; GFX10-NEXT: v_mov_b32_e32 v2, s2 3664; GFX10-NEXT: v_mov_b32_e32 v3, s3 3665; GFX10-NEXT: v_mov_b32_e32 v4, s4 3666; GFX10-NEXT: v_mov_b32_e32 v5, s5 3667; GFX10-NEXT: v_mov_b32_e32 v6, s6 3668; GFX10-NEXT: v_mov_b32_e32 v7, s7 3669; GFX10-NEXT: v_mov_b32_e32 v8, s8 3670; GFX10-NEXT: v_mov_b32_e32 v9, s9 3671; GFX10-NEXT: v_mov_b32_e32 v10, s10 3672; GFX10-NEXT: v_mov_b32_e32 v11, s11 3673; GFX10-NEXT: v_mov_b32_e32 v12, s12 3674; GFX10-NEXT: v_mov_b32_e32 v13, s13 3675; GFX10-NEXT: v_mov_b32_e32 v14, s14 3676; GFX10-NEXT: v_mov_b32_e32 v15, s15 3677; GFX10-NEXT: ; return to shader part epilog 3678; 3679; GFX11-LABEL: dyn_insertelement_v16f32_s_s_s: 3680; GFX11: ; %bb.0: ; %entry 3681; GFX11-NEXT: s_mov_b32 s0, s2 3682; GFX11-NEXT: s_mov_b32 m0, s19 3683; GFX11-NEXT: s_mov_b32 s1, s3 3684; GFX11-NEXT: s_mov_b32 s2, s4 3685; GFX11-NEXT: s_mov_b32 s3, s5 3686; GFX11-NEXT: s_mov_b32 s4, s6 3687; GFX11-NEXT: s_mov_b32 s5, s7 3688; GFX11-NEXT: s_mov_b32 s6, s8 3689; GFX11-NEXT: s_mov_b32 s7, s9 3690; GFX11-NEXT: s_mov_b32 s8, s10 3691; GFX11-NEXT: s_mov_b32 s9, s11 3692; GFX11-NEXT: s_mov_b32 s10, s12 3693; GFX11-NEXT: s_mov_b32 s11, s13 3694; GFX11-NEXT: s_mov_b32 s12, s14 3695; GFX11-NEXT: s_mov_b32 s13, s15 3696; GFX11-NEXT: s_mov_b32 s14, s16 3697; GFX11-NEXT: s_mov_b32 s15, s17 3698; GFX11-NEXT: s_movreld_b32 s0, s18 3699; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 3700; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 3701; GFX11-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 3702; GFX11-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 3703; GFX11-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 3704; GFX11-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 3705; GFX11-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 3706; GFX11-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 3707; GFX11-NEXT: ; return to shader part epilog 3708entry: 3709 %insert = insertelement <16 x float> %vec, float %val, i32 %idx 3710 ret <16 x float> %insert 3711} 3712 3713define amdgpu_ps <32 x float> @dyn_insertelement_v32f32_s_s_s(<32 x float> inreg %vec, float inreg %val, i32 inreg %idx) { 3714; GPRIDX-LABEL: dyn_insertelement_v32f32_s_s_s: 3715; GPRIDX: ; %bb.0: ; %entry 3716; GPRIDX-NEXT: s_mov_b32 s0, s2 3717; GPRIDX-NEXT: s_mov_b32 s1, s3 3718; GPRIDX-NEXT: s_mov_b32 s2, s4 3719; GPRIDX-NEXT: s_mov_b32 s3, s5 3720; GPRIDX-NEXT: s_mov_b32 s4, s6 3721; GPRIDX-NEXT: s_mov_b32 s5, s7 3722; GPRIDX-NEXT: s_mov_b32 s6, s8 3723; GPRIDX-NEXT: s_mov_b32 s7, s9 3724; GPRIDX-NEXT: s_mov_b32 s8, s10 3725; GPRIDX-NEXT: s_mov_b32 s9, s11 3726; GPRIDX-NEXT: s_mov_b32 s10, s12 3727; GPRIDX-NEXT: s_mov_b32 s11, s13 3728; GPRIDX-NEXT: s_mov_b32 s12, s14 3729; GPRIDX-NEXT: s_mov_b32 s13, s15 3730; GPRIDX-NEXT: s_mov_b32 s14, s16 3731; GPRIDX-NEXT: s_mov_b32 s15, s17 3732; GPRIDX-NEXT: s_mov_b32 s16, s18 3733; GPRIDX-NEXT: s_mov_b32 s17, s19 3734; GPRIDX-NEXT: s_mov_b32 s18, s20 3735; GPRIDX-NEXT: s_mov_b32 s19, s21 3736; GPRIDX-NEXT: s_mov_b32 s20, s22 3737; GPRIDX-NEXT: s_mov_b32 s21, s23 3738; GPRIDX-NEXT: s_mov_b32 s22, s24 3739; GPRIDX-NEXT: s_mov_b32 s23, s25 3740; GPRIDX-NEXT: s_mov_b32 s24, s26 3741; GPRIDX-NEXT: s_mov_b32 s25, s27 3742; GPRIDX-NEXT: s_mov_b32 s26, s28 3743; GPRIDX-NEXT: s_mov_b32 s27, s29 3744; GPRIDX-NEXT: s_mov_b32 s28, s30 3745; GPRIDX-NEXT: s_mov_b32 s29, s31 3746; GPRIDX-NEXT: s_mov_b32 s31, s33 3747; GPRIDX-NEXT: s_mov_b32 s30, s32 3748; GPRIDX-NEXT: s_mov_b32 m0, s35 3749; GPRIDX-NEXT: s_nop 0 3750; GPRIDX-NEXT: s_movreld_b32 s0, s34 3751; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 3752; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 3753; GPRIDX-NEXT: v_mov_b32_e32 v2, s2 3754; GPRIDX-NEXT: v_mov_b32_e32 v3, s3 3755; GPRIDX-NEXT: v_mov_b32_e32 v4, s4 3756; GPRIDX-NEXT: v_mov_b32_e32 v5, s5 3757; GPRIDX-NEXT: v_mov_b32_e32 v6, s6 3758; GPRIDX-NEXT: v_mov_b32_e32 v7, s7 3759; GPRIDX-NEXT: v_mov_b32_e32 v8, s8 3760; GPRIDX-NEXT: v_mov_b32_e32 v9, s9 3761; GPRIDX-NEXT: v_mov_b32_e32 v10, s10 3762; GPRIDX-NEXT: v_mov_b32_e32 v11, s11 3763; GPRIDX-NEXT: v_mov_b32_e32 v12, s12 3764; GPRIDX-NEXT: v_mov_b32_e32 v13, s13 3765; GPRIDX-NEXT: v_mov_b32_e32 v14, s14 3766; GPRIDX-NEXT: v_mov_b32_e32 v15, s15 3767; GPRIDX-NEXT: v_mov_b32_e32 v16, s16 3768; GPRIDX-NEXT: v_mov_b32_e32 v17, s17 3769; GPRIDX-NEXT: v_mov_b32_e32 v18, s18 3770; GPRIDX-NEXT: v_mov_b32_e32 v19, s19 3771; GPRIDX-NEXT: v_mov_b32_e32 v20, s20 3772; GPRIDX-NEXT: v_mov_b32_e32 v21, s21 3773; GPRIDX-NEXT: v_mov_b32_e32 v22, s22 3774; GPRIDX-NEXT: v_mov_b32_e32 v23, s23 3775; GPRIDX-NEXT: v_mov_b32_e32 v24, s24 3776; GPRIDX-NEXT: v_mov_b32_e32 v25, s25 3777; GPRIDX-NEXT: v_mov_b32_e32 v26, s26 3778; GPRIDX-NEXT: v_mov_b32_e32 v27, s27 3779; GPRIDX-NEXT: v_mov_b32_e32 v28, s28 3780; GPRIDX-NEXT: v_mov_b32_e32 v29, s29 3781; GPRIDX-NEXT: v_mov_b32_e32 v30, s30 3782; GPRIDX-NEXT: v_mov_b32_e32 v31, s31 3783; GPRIDX-NEXT: ; return to shader part epilog 3784; 3785; GFX10-LABEL: dyn_insertelement_v32f32_s_s_s: 3786; GFX10: ; %bb.0: ; %entry 3787; GFX10-NEXT: s_mov_b32 s0, s2 3788; GFX10-NEXT: s_mov_b32 m0, s35 3789; GFX10-NEXT: s_mov_b32 s1, s3 3790; GFX10-NEXT: s_mov_b32 s2, s4 3791; GFX10-NEXT: s_mov_b32 s3, s5 3792; GFX10-NEXT: s_mov_b32 s4, s6 3793; GFX10-NEXT: s_mov_b32 s5, s7 3794; GFX10-NEXT: s_mov_b32 s6, s8 3795; GFX10-NEXT: s_mov_b32 s7, s9 3796; GFX10-NEXT: s_mov_b32 s8, s10 3797; GFX10-NEXT: s_mov_b32 s9, s11 3798; GFX10-NEXT: s_mov_b32 s10, s12 3799; GFX10-NEXT: s_mov_b32 s11, s13 3800; GFX10-NEXT: s_mov_b32 s12, s14 3801; GFX10-NEXT: s_mov_b32 s13, s15 3802; GFX10-NEXT: s_mov_b32 s14, s16 3803; GFX10-NEXT: s_mov_b32 s15, s17 3804; GFX10-NEXT: s_mov_b32 s16, s18 3805; GFX10-NEXT: s_mov_b32 s17, s19 3806; GFX10-NEXT: s_mov_b32 s18, s20 3807; GFX10-NEXT: s_mov_b32 s19, s21 3808; GFX10-NEXT: s_mov_b32 s20, s22 3809; GFX10-NEXT: s_mov_b32 s21, s23 3810; GFX10-NEXT: s_mov_b32 s22, s24 3811; GFX10-NEXT: s_mov_b32 s23, s25 3812; GFX10-NEXT: s_mov_b32 s24, s26 3813; GFX10-NEXT: s_mov_b32 s25, s27 3814; GFX10-NEXT: s_mov_b32 s26, s28 3815; GFX10-NEXT: s_mov_b32 s27, s29 3816; GFX10-NEXT: s_mov_b32 s28, s30 3817; GFX10-NEXT: s_mov_b32 s29, s31 3818; GFX10-NEXT: s_mov_b32 s31, s33 3819; GFX10-NEXT: s_mov_b32 s30, s32 3820; GFX10-NEXT: s_movreld_b32 s0, s34 3821; GFX10-NEXT: v_mov_b32_e32 v0, s0 3822; GFX10-NEXT: v_mov_b32_e32 v1, s1 3823; GFX10-NEXT: v_mov_b32_e32 v2, s2 3824; GFX10-NEXT: v_mov_b32_e32 v3, s3 3825; GFX10-NEXT: v_mov_b32_e32 v4, s4 3826; GFX10-NEXT: v_mov_b32_e32 v5, s5 3827; GFX10-NEXT: v_mov_b32_e32 v6, s6 3828; GFX10-NEXT: v_mov_b32_e32 v7, s7 3829; GFX10-NEXT: v_mov_b32_e32 v8, s8 3830; GFX10-NEXT: v_mov_b32_e32 v9, s9 3831; GFX10-NEXT: v_mov_b32_e32 v10, s10 3832; GFX10-NEXT: v_mov_b32_e32 v11, s11 3833; GFX10-NEXT: v_mov_b32_e32 v12, s12 3834; GFX10-NEXT: v_mov_b32_e32 v13, s13 3835; GFX10-NEXT: v_mov_b32_e32 v14, s14 3836; GFX10-NEXT: v_mov_b32_e32 v15, s15 3837; GFX10-NEXT: v_mov_b32_e32 v16, s16 3838; GFX10-NEXT: v_mov_b32_e32 v17, s17 3839; GFX10-NEXT: v_mov_b32_e32 v18, s18 3840; GFX10-NEXT: v_mov_b32_e32 v19, s19 3841; GFX10-NEXT: v_mov_b32_e32 v20, s20 3842; GFX10-NEXT: v_mov_b32_e32 v21, s21 3843; GFX10-NEXT: v_mov_b32_e32 v22, s22 3844; GFX10-NEXT: v_mov_b32_e32 v23, s23 3845; GFX10-NEXT: v_mov_b32_e32 v24, s24 3846; GFX10-NEXT: v_mov_b32_e32 v25, s25 3847; GFX10-NEXT: v_mov_b32_e32 v26, s26 3848; GFX10-NEXT: v_mov_b32_e32 v27, s27 3849; GFX10-NEXT: v_mov_b32_e32 v28, s28 3850; GFX10-NEXT: v_mov_b32_e32 v29, s29 3851; GFX10-NEXT: v_mov_b32_e32 v30, s30 3852; GFX10-NEXT: v_mov_b32_e32 v31, s31 3853; GFX10-NEXT: ; return to shader part epilog 3854; 3855; GFX11-LABEL: dyn_insertelement_v32f32_s_s_s: 3856; GFX11: ; %bb.0: ; %entry 3857; GFX11-NEXT: s_mov_b32 s0, s2 3858; GFX11-NEXT: s_mov_b32 m0, s35 3859; GFX11-NEXT: s_mov_b32 s1, s3 3860; GFX11-NEXT: s_mov_b32 s2, s4 3861; GFX11-NEXT: s_mov_b32 s3, s5 3862; GFX11-NEXT: s_mov_b32 s4, s6 3863; GFX11-NEXT: s_mov_b32 s5, s7 3864; GFX11-NEXT: s_mov_b32 s6, s8 3865; GFX11-NEXT: s_mov_b32 s7, s9 3866; GFX11-NEXT: s_mov_b32 s8, s10 3867; GFX11-NEXT: s_mov_b32 s9, s11 3868; GFX11-NEXT: s_mov_b32 s10, s12 3869; GFX11-NEXT: s_mov_b32 s11, s13 3870; GFX11-NEXT: s_mov_b32 s12, s14 3871; GFX11-NEXT: s_mov_b32 s13, s15 3872; GFX11-NEXT: s_mov_b32 s14, s16 3873; GFX11-NEXT: s_mov_b32 s15, s17 3874; GFX11-NEXT: s_mov_b32 s16, s18 3875; GFX11-NEXT: s_mov_b32 s17, s19 3876; GFX11-NEXT: s_mov_b32 s18, s20 3877; GFX11-NEXT: s_mov_b32 s19, s21 3878; GFX11-NEXT: s_mov_b32 s20, s22 3879; GFX11-NEXT: s_mov_b32 s21, s23 3880; GFX11-NEXT: s_mov_b32 s22, s24 3881; GFX11-NEXT: s_mov_b32 s23, s25 3882; GFX11-NEXT: s_mov_b32 s24, s26 3883; GFX11-NEXT: s_mov_b32 s25, s27 3884; GFX11-NEXT: s_mov_b32 s26, s28 3885; GFX11-NEXT: s_mov_b32 s27, s29 3886; GFX11-NEXT: s_mov_b32 s28, s30 3887; GFX11-NEXT: s_mov_b32 s29, s31 3888; GFX11-NEXT: s_mov_b32 s31, s33 3889; GFX11-NEXT: s_mov_b32 s30, s32 3890; GFX11-NEXT: s_movreld_b32 s0, s34 3891; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 3892; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 3893; GFX11-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 3894; GFX11-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 3895; GFX11-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 3896; GFX11-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 3897; GFX11-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 3898; GFX11-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 3899; GFX11-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 3900; GFX11-NEXT: v_dual_mov_b32 v18, s18 :: v_dual_mov_b32 v19, s19 3901; GFX11-NEXT: v_dual_mov_b32 v20, s20 :: v_dual_mov_b32 v21, s21 3902; GFX11-NEXT: v_dual_mov_b32 v22, s22 :: v_dual_mov_b32 v23, s23 3903; GFX11-NEXT: v_dual_mov_b32 v24, s24 :: v_dual_mov_b32 v25, s25 3904; GFX11-NEXT: v_dual_mov_b32 v26, s26 :: v_dual_mov_b32 v27, s27 3905; GFX11-NEXT: v_dual_mov_b32 v28, s28 :: v_dual_mov_b32 v29, s29 3906; GFX11-NEXT: v_dual_mov_b32 v30, s30 :: v_dual_mov_b32 v31, s31 3907; GFX11-NEXT: ; return to shader part epilog 3908entry: 3909 %insert = insertelement <32 x float> %vec, float %val, i32 %idx 3910 ret <32 x float> %insert 3911} 3912 3913define amdgpu_ps <16 x i64> @dyn_insertelement_v16i64_s_s_s(<16 x i64> inreg %vec, i64 inreg %val, i32 inreg %idx) { 3914; GPRIDX-LABEL: dyn_insertelement_v16i64_s_s_s: 3915; GPRIDX: ; %bb.0: ; %entry 3916; GPRIDX-NEXT: s_mov_b32 s0, s2 3917; GPRIDX-NEXT: s_mov_b32 s1, s3 3918; GPRIDX-NEXT: s_mov_b32 s2, s4 3919; GPRIDX-NEXT: s_mov_b32 s3, s5 3920; GPRIDX-NEXT: s_mov_b32 s4, s6 3921; GPRIDX-NEXT: s_mov_b32 s5, s7 3922; GPRIDX-NEXT: s_mov_b32 s6, s8 3923; GPRIDX-NEXT: s_mov_b32 s7, s9 3924; GPRIDX-NEXT: s_mov_b32 s8, s10 3925; GPRIDX-NEXT: s_mov_b32 s9, s11 3926; GPRIDX-NEXT: s_mov_b32 s10, s12 3927; GPRIDX-NEXT: s_mov_b32 s11, s13 3928; GPRIDX-NEXT: s_mov_b32 s12, s14 3929; GPRIDX-NEXT: s_mov_b32 s13, s15 3930; GPRIDX-NEXT: s_mov_b32 s14, s16 3931; GPRIDX-NEXT: s_mov_b32 s15, s17 3932; GPRIDX-NEXT: s_mov_b32 s16, s18 3933; GPRIDX-NEXT: s_mov_b32 s17, s19 3934; GPRIDX-NEXT: s_mov_b32 s18, s20 3935; GPRIDX-NEXT: s_mov_b32 s19, s21 3936; GPRIDX-NEXT: s_mov_b32 s20, s22 3937; GPRIDX-NEXT: s_mov_b32 s21, s23 3938; GPRIDX-NEXT: s_mov_b32 s22, s24 3939; GPRIDX-NEXT: s_mov_b32 s23, s25 3940; GPRIDX-NEXT: s_mov_b32 s24, s26 3941; GPRIDX-NEXT: s_mov_b32 s25, s27 3942; GPRIDX-NEXT: s_mov_b32 s26, s28 3943; GPRIDX-NEXT: s_mov_b32 s27, s29 3944; GPRIDX-NEXT: s_mov_b32 s28, s30 3945; GPRIDX-NEXT: s_mov_b32 s29, s31 3946; GPRIDX-NEXT: s_mov_b32 s31, s33 3947; GPRIDX-NEXT: s_mov_b32 s30, s32 3948; GPRIDX-NEXT: s_mov_b32 m0, s36 3949; GPRIDX-NEXT: s_nop 0 3950; GPRIDX-NEXT: s_movreld_b64 s[0:1], s[34:35] 3951; GPRIDX-NEXT: ; return to shader part epilog 3952; 3953; GFX10PLUS-LABEL: dyn_insertelement_v16i64_s_s_s: 3954; GFX10PLUS: ; %bb.0: ; %entry 3955; GFX10PLUS-NEXT: s_mov_b32 s0, s2 3956; GFX10PLUS-NEXT: s_mov_b32 s1, s3 3957; GFX10PLUS-NEXT: s_mov_b32 m0, s36 3958; GFX10PLUS-NEXT: s_mov_b32 s2, s4 3959; GFX10PLUS-NEXT: s_mov_b32 s3, s5 3960; GFX10PLUS-NEXT: s_mov_b32 s4, s6 3961; GFX10PLUS-NEXT: s_mov_b32 s5, s7 3962; GFX10PLUS-NEXT: s_mov_b32 s6, s8 3963; GFX10PLUS-NEXT: s_mov_b32 s7, s9 3964; GFX10PLUS-NEXT: s_mov_b32 s8, s10 3965; GFX10PLUS-NEXT: s_mov_b32 s9, s11 3966; GFX10PLUS-NEXT: s_mov_b32 s10, s12 3967; GFX10PLUS-NEXT: s_mov_b32 s11, s13 3968; GFX10PLUS-NEXT: s_mov_b32 s12, s14 3969; GFX10PLUS-NEXT: s_mov_b32 s13, s15 3970; GFX10PLUS-NEXT: s_mov_b32 s14, s16 3971; GFX10PLUS-NEXT: s_mov_b32 s15, s17 3972; GFX10PLUS-NEXT: s_mov_b32 s16, s18 3973; GFX10PLUS-NEXT: s_mov_b32 s17, s19 3974; GFX10PLUS-NEXT: s_mov_b32 s18, s20 3975; GFX10PLUS-NEXT: s_mov_b32 s19, s21 3976; GFX10PLUS-NEXT: s_mov_b32 s20, s22 3977; GFX10PLUS-NEXT: s_mov_b32 s21, s23 3978; GFX10PLUS-NEXT: s_mov_b32 s22, s24 3979; GFX10PLUS-NEXT: s_mov_b32 s23, s25 3980; GFX10PLUS-NEXT: s_mov_b32 s24, s26 3981; GFX10PLUS-NEXT: s_mov_b32 s25, s27 3982; GFX10PLUS-NEXT: s_mov_b32 s26, s28 3983; GFX10PLUS-NEXT: s_mov_b32 s27, s29 3984; GFX10PLUS-NEXT: s_mov_b32 s28, s30 3985; GFX10PLUS-NEXT: s_mov_b32 s29, s31 3986; GFX10PLUS-NEXT: s_mov_b32 s31, s33 3987; GFX10PLUS-NEXT: s_mov_b32 s30, s32 3988; GFX10PLUS-NEXT: s_movreld_b64 s[0:1], s[34:35] 3989; GFX10PLUS-NEXT: ; return to shader part epilog 3990entry: 3991 %insert = insertelement <16 x i64> %vec, i64 %val, i32 %idx 3992 ret <16 x i64> %insert 3993} 3994 3995define amdgpu_ps <16 x double> @dyn_insertelement_v16f64_s_s_s(<16 x double> inreg %vec, double inreg %val, i32 inreg %idx) { 3996; GPRIDX-LABEL: dyn_insertelement_v16f64_s_s_s: 3997; GPRIDX: ; %bb.0: ; %entry 3998; GPRIDX-NEXT: s_mov_b32 s0, s2 3999; GPRIDX-NEXT: s_mov_b32 s1, s3 4000; GPRIDX-NEXT: s_mov_b32 s2, s4 4001; GPRIDX-NEXT: s_mov_b32 s3, s5 4002; GPRIDX-NEXT: s_mov_b32 s4, s6 4003; GPRIDX-NEXT: s_mov_b32 s5, s7 4004; GPRIDX-NEXT: s_mov_b32 s6, s8 4005; GPRIDX-NEXT: s_mov_b32 s7, s9 4006; GPRIDX-NEXT: s_mov_b32 s8, s10 4007; GPRIDX-NEXT: s_mov_b32 s9, s11 4008; GPRIDX-NEXT: s_mov_b32 s10, s12 4009; GPRIDX-NEXT: s_mov_b32 s11, s13 4010; GPRIDX-NEXT: s_mov_b32 s12, s14 4011; GPRIDX-NEXT: s_mov_b32 s13, s15 4012; GPRIDX-NEXT: s_mov_b32 s14, s16 4013; GPRIDX-NEXT: s_mov_b32 s15, s17 4014; GPRIDX-NEXT: s_mov_b32 s16, s18 4015; GPRIDX-NEXT: s_mov_b32 s17, s19 4016; GPRIDX-NEXT: s_mov_b32 s18, s20 4017; GPRIDX-NEXT: s_mov_b32 s19, s21 4018; GPRIDX-NEXT: s_mov_b32 s20, s22 4019; GPRIDX-NEXT: s_mov_b32 s21, s23 4020; GPRIDX-NEXT: s_mov_b32 s22, s24 4021; GPRIDX-NEXT: s_mov_b32 s23, s25 4022; GPRIDX-NEXT: s_mov_b32 s24, s26 4023; GPRIDX-NEXT: s_mov_b32 s25, s27 4024; GPRIDX-NEXT: s_mov_b32 s26, s28 4025; GPRIDX-NEXT: s_mov_b32 s27, s29 4026; GPRIDX-NEXT: s_mov_b32 s28, s30 4027; GPRIDX-NEXT: s_mov_b32 s29, s31 4028; GPRIDX-NEXT: s_mov_b32 s31, s33 4029; GPRIDX-NEXT: s_mov_b32 s30, s32 4030; GPRIDX-NEXT: s_mov_b32 m0, s36 4031; GPRIDX-NEXT: s_nop 0 4032; GPRIDX-NEXT: s_movreld_b64 s[0:1], s[34:35] 4033; GPRIDX-NEXT: ; return to shader part epilog 4034; 4035; GFX10PLUS-LABEL: dyn_insertelement_v16f64_s_s_s: 4036; GFX10PLUS: ; %bb.0: ; %entry 4037; GFX10PLUS-NEXT: s_mov_b32 s0, s2 4038; GFX10PLUS-NEXT: s_mov_b32 s1, s3 4039; GFX10PLUS-NEXT: s_mov_b32 m0, s36 4040; GFX10PLUS-NEXT: s_mov_b32 s2, s4 4041; GFX10PLUS-NEXT: s_mov_b32 s3, s5 4042; GFX10PLUS-NEXT: s_mov_b32 s4, s6 4043; GFX10PLUS-NEXT: s_mov_b32 s5, s7 4044; GFX10PLUS-NEXT: s_mov_b32 s6, s8 4045; GFX10PLUS-NEXT: s_mov_b32 s7, s9 4046; GFX10PLUS-NEXT: s_mov_b32 s8, s10 4047; GFX10PLUS-NEXT: s_mov_b32 s9, s11 4048; GFX10PLUS-NEXT: s_mov_b32 s10, s12 4049; GFX10PLUS-NEXT: s_mov_b32 s11, s13 4050; GFX10PLUS-NEXT: s_mov_b32 s12, s14 4051; GFX10PLUS-NEXT: s_mov_b32 s13, s15 4052; GFX10PLUS-NEXT: s_mov_b32 s14, s16 4053; GFX10PLUS-NEXT: s_mov_b32 s15, s17 4054; GFX10PLUS-NEXT: s_mov_b32 s16, s18 4055; GFX10PLUS-NEXT: s_mov_b32 s17, s19 4056; GFX10PLUS-NEXT: s_mov_b32 s18, s20 4057; GFX10PLUS-NEXT: s_mov_b32 s19, s21 4058; GFX10PLUS-NEXT: s_mov_b32 s20, s22 4059; GFX10PLUS-NEXT: s_mov_b32 s21, s23 4060; GFX10PLUS-NEXT: s_mov_b32 s22, s24 4061; GFX10PLUS-NEXT: s_mov_b32 s23, s25 4062; GFX10PLUS-NEXT: s_mov_b32 s24, s26 4063; GFX10PLUS-NEXT: s_mov_b32 s25, s27 4064; GFX10PLUS-NEXT: s_mov_b32 s26, s28 4065; GFX10PLUS-NEXT: s_mov_b32 s27, s29 4066; GFX10PLUS-NEXT: s_mov_b32 s28, s30 4067; GFX10PLUS-NEXT: s_mov_b32 s29, s31 4068; GFX10PLUS-NEXT: s_mov_b32 s31, s33 4069; GFX10PLUS-NEXT: s_mov_b32 s30, s32 4070; GFX10PLUS-NEXT: s_movreld_b64 s[0:1], s[34:35] 4071; GFX10PLUS-NEXT: ; return to shader part epilog 4072entry: 4073 %insert = insertelement <16 x double> %vec, double %val, i32 %idx 4074 ret <16 x double> %insert 4075} 4076 4077define amdgpu_ps <16 x i32> @dyn_insertelement_v16i32_s_v_s(<16 x i32> inreg %vec, i32 %val, i32 inreg %idx) { 4078; GPRIDX-LABEL: dyn_insertelement_v16i32_s_v_s: 4079; GPRIDX: ; %bb.0: ; %entry 4080; GPRIDX-NEXT: s_mov_b32 s1, s3 4081; GPRIDX-NEXT: s_mov_b32 s3, s5 4082; GPRIDX-NEXT: s_mov_b32 s5, s7 4083; GPRIDX-NEXT: s_mov_b32 s7, s9 4084; GPRIDX-NEXT: s_mov_b32 s9, s11 4085; GPRIDX-NEXT: s_mov_b32 s11, s13 4086; GPRIDX-NEXT: s_mov_b32 s13, s15 4087; GPRIDX-NEXT: s_mov_b32 s15, s17 4088; GPRIDX-NEXT: s_mov_b32 s0, s2 4089; GPRIDX-NEXT: s_mov_b32 s2, s4 4090; GPRIDX-NEXT: s_mov_b32 s4, s6 4091; GPRIDX-NEXT: s_mov_b32 s6, s8 4092; GPRIDX-NEXT: s_mov_b32 s8, s10 4093; GPRIDX-NEXT: s_mov_b32 s10, s12 4094; GPRIDX-NEXT: s_mov_b32 s12, s14 4095; GPRIDX-NEXT: s_mov_b32 s14, s16 4096; GPRIDX-NEXT: v_mov_b32_e32 v16, s15 4097; GPRIDX-NEXT: v_mov_b32_e32 v15, s14 4098; GPRIDX-NEXT: v_mov_b32_e32 v14, s13 4099; GPRIDX-NEXT: v_mov_b32_e32 v13, s12 4100; GPRIDX-NEXT: v_mov_b32_e32 v12, s11 4101; GPRIDX-NEXT: v_mov_b32_e32 v11, s10 4102; GPRIDX-NEXT: v_mov_b32_e32 v10, s9 4103; GPRIDX-NEXT: v_mov_b32_e32 v9, s8 4104; GPRIDX-NEXT: v_mov_b32_e32 v8, s7 4105; GPRIDX-NEXT: v_mov_b32_e32 v7, s6 4106; GPRIDX-NEXT: v_mov_b32_e32 v6, s5 4107; GPRIDX-NEXT: v_mov_b32_e32 v5, s4 4108; GPRIDX-NEXT: v_mov_b32_e32 v4, s3 4109; GPRIDX-NEXT: v_mov_b32_e32 v3, s2 4110; GPRIDX-NEXT: v_mov_b32_e32 v2, s1 4111; GPRIDX-NEXT: v_mov_b32_e32 v1, s0 4112; GPRIDX-NEXT: s_set_gpr_idx_on s18, gpr_idx(DST) 4113; GPRIDX-NEXT: v_mov_b32_e32 v1, v0 4114; GPRIDX-NEXT: s_set_gpr_idx_off 4115; GPRIDX-NEXT: v_readfirstlane_b32 s0, v1 4116; GPRIDX-NEXT: v_readfirstlane_b32 s1, v2 4117; GPRIDX-NEXT: v_readfirstlane_b32 s2, v3 4118; GPRIDX-NEXT: v_readfirstlane_b32 s3, v4 4119; GPRIDX-NEXT: v_readfirstlane_b32 s4, v5 4120; GPRIDX-NEXT: v_readfirstlane_b32 s5, v6 4121; GPRIDX-NEXT: v_readfirstlane_b32 s6, v7 4122; GPRIDX-NEXT: v_readfirstlane_b32 s7, v8 4123; GPRIDX-NEXT: v_readfirstlane_b32 s8, v9 4124; GPRIDX-NEXT: v_readfirstlane_b32 s9, v10 4125; GPRIDX-NEXT: v_readfirstlane_b32 s10, v11 4126; GPRIDX-NEXT: v_readfirstlane_b32 s11, v12 4127; GPRIDX-NEXT: v_readfirstlane_b32 s12, v13 4128; GPRIDX-NEXT: v_readfirstlane_b32 s13, v14 4129; GPRIDX-NEXT: v_readfirstlane_b32 s14, v15 4130; GPRIDX-NEXT: v_readfirstlane_b32 s15, v16 4131; GPRIDX-NEXT: ; return to shader part epilog 4132; 4133; GFX10-LABEL: dyn_insertelement_v16i32_s_v_s: 4134; GFX10: ; %bb.0: ; %entry 4135; GFX10-NEXT: s_mov_b32 s1, s3 4136; GFX10-NEXT: s_mov_b32 s3, s5 4137; GFX10-NEXT: s_mov_b32 s5, s7 4138; GFX10-NEXT: s_mov_b32 s7, s9 4139; GFX10-NEXT: s_mov_b32 s9, s11 4140; GFX10-NEXT: s_mov_b32 s11, s13 4141; GFX10-NEXT: s_mov_b32 s13, s15 4142; GFX10-NEXT: s_mov_b32 s15, s17 4143; GFX10-NEXT: s_mov_b32 s0, s2 4144; GFX10-NEXT: s_mov_b32 s2, s4 4145; GFX10-NEXT: s_mov_b32 s4, s6 4146; GFX10-NEXT: s_mov_b32 s6, s8 4147; GFX10-NEXT: s_mov_b32 s8, s10 4148; GFX10-NEXT: s_mov_b32 s10, s12 4149; GFX10-NEXT: s_mov_b32 s12, s14 4150; GFX10-NEXT: s_mov_b32 s14, s16 4151; GFX10-NEXT: v_mov_b32_e32 v16, s15 4152; GFX10-NEXT: v_mov_b32_e32 v1, s0 4153; GFX10-NEXT: s_mov_b32 m0, s18 4154; GFX10-NEXT: v_mov_b32_e32 v15, s14 4155; GFX10-NEXT: v_mov_b32_e32 v14, s13 4156; GFX10-NEXT: v_mov_b32_e32 v13, s12 4157; GFX10-NEXT: v_mov_b32_e32 v12, s11 4158; GFX10-NEXT: v_mov_b32_e32 v11, s10 4159; GFX10-NEXT: v_mov_b32_e32 v10, s9 4160; GFX10-NEXT: v_mov_b32_e32 v9, s8 4161; GFX10-NEXT: v_mov_b32_e32 v8, s7 4162; GFX10-NEXT: v_mov_b32_e32 v7, s6 4163; GFX10-NEXT: v_mov_b32_e32 v6, s5 4164; GFX10-NEXT: v_mov_b32_e32 v5, s4 4165; GFX10-NEXT: v_mov_b32_e32 v4, s3 4166; GFX10-NEXT: v_mov_b32_e32 v3, s2 4167; GFX10-NEXT: v_mov_b32_e32 v2, s1 4168; GFX10-NEXT: v_movreld_b32_e32 v1, v0 4169; GFX10-NEXT: v_readfirstlane_b32 s0, v1 4170; GFX10-NEXT: v_readfirstlane_b32 s1, v2 4171; GFX10-NEXT: v_readfirstlane_b32 s2, v3 4172; GFX10-NEXT: v_readfirstlane_b32 s3, v4 4173; GFX10-NEXT: v_readfirstlane_b32 s4, v5 4174; GFX10-NEXT: v_readfirstlane_b32 s5, v6 4175; GFX10-NEXT: v_readfirstlane_b32 s6, v7 4176; GFX10-NEXT: v_readfirstlane_b32 s7, v8 4177; GFX10-NEXT: v_readfirstlane_b32 s8, v9 4178; GFX10-NEXT: v_readfirstlane_b32 s9, v10 4179; GFX10-NEXT: v_readfirstlane_b32 s10, v11 4180; GFX10-NEXT: v_readfirstlane_b32 s11, v12 4181; GFX10-NEXT: v_readfirstlane_b32 s12, v13 4182; GFX10-NEXT: v_readfirstlane_b32 s13, v14 4183; GFX10-NEXT: v_readfirstlane_b32 s14, v15 4184; GFX10-NEXT: v_readfirstlane_b32 s15, v16 4185; GFX10-NEXT: ; return to shader part epilog 4186; 4187; GFX11-LABEL: dyn_insertelement_v16i32_s_v_s: 4188; GFX11: ; %bb.0: ; %entry 4189; GFX11-NEXT: s_mov_b32 s1, s3 4190; GFX11-NEXT: s_mov_b32 s3, s5 4191; GFX11-NEXT: s_mov_b32 s5, s7 4192; GFX11-NEXT: s_mov_b32 s7, s9 4193; GFX11-NEXT: s_mov_b32 s9, s11 4194; GFX11-NEXT: s_mov_b32 s11, s13 4195; GFX11-NEXT: s_mov_b32 s13, s15 4196; GFX11-NEXT: s_mov_b32 s15, s17 4197; GFX11-NEXT: s_mov_b32 s0, s2 4198; GFX11-NEXT: s_mov_b32 s2, s4 4199; GFX11-NEXT: s_mov_b32 s4, s6 4200; GFX11-NEXT: s_mov_b32 s6, s8 4201; GFX11-NEXT: s_mov_b32 s8, s10 4202; GFX11-NEXT: s_mov_b32 s10, s12 4203; GFX11-NEXT: s_mov_b32 s12, s14 4204; GFX11-NEXT: s_mov_b32 s14, s16 4205; GFX11-NEXT: v_dual_mov_b32 v16, s15 :: v_dual_mov_b32 v15, s14 4206; GFX11-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0 4207; GFX11-NEXT: s_mov_b32 m0, s18 4208; GFX11-NEXT: v_dual_mov_b32 v14, s13 :: v_dual_mov_b32 v13, s12 4209; GFX11-NEXT: v_dual_mov_b32 v12, s11 :: v_dual_mov_b32 v11, s10 4210; GFX11-NEXT: v_dual_mov_b32 v10, s9 :: v_dual_mov_b32 v9, s8 4211; GFX11-NEXT: v_dual_mov_b32 v8, s7 :: v_dual_mov_b32 v7, s6 4212; GFX11-NEXT: v_dual_mov_b32 v6, s5 :: v_dual_mov_b32 v5, s4 4213; GFX11-NEXT: v_dual_mov_b32 v4, s3 :: v_dual_mov_b32 v3, s2 4214; GFX11-NEXT: v_movreld_b32_e32 v1, v0 4215; GFX11-NEXT: v_readfirstlane_b32 s0, v1 4216; GFX11-NEXT: v_readfirstlane_b32 s1, v2 4217; GFX11-NEXT: v_readfirstlane_b32 s2, v3 4218; GFX11-NEXT: v_readfirstlane_b32 s3, v4 4219; GFX11-NEXT: v_readfirstlane_b32 s4, v5 4220; GFX11-NEXT: v_readfirstlane_b32 s5, v6 4221; GFX11-NEXT: v_readfirstlane_b32 s6, v7 4222; GFX11-NEXT: v_readfirstlane_b32 s7, v8 4223; GFX11-NEXT: v_readfirstlane_b32 s8, v9 4224; GFX11-NEXT: v_readfirstlane_b32 s9, v10 4225; GFX11-NEXT: v_readfirstlane_b32 s10, v11 4226; GFX11-NEXT: v_readfirstlane_b32 s11, v12 4227; GFX11-NEXT: v_readfirstlane_b32 s12, v13 4228; GFX11-NEXT: v_readfirstlane_b32 s13, v14 4229; GFX11-NEXT: v_readfirstlane_b32 s14, v15 4230; GFX11-NEXT: v_readfirstlane_b32 s15, v16 4231; GFX11-NEXT: ; return to shader part epilog 4232entry: 4233 %insert = insertelement <16 x i32> %vec, i32 %val, i32 %idx 4234 ret <16 x i32> %insert 4235} 4236 4237define amdgpu_ps <16 x float> @dyn_insertelement_v16f32_s_v_s(<16 x float> inreg %vec, float %val, i32 inreg %idx) { 4238; GPRIDX-LABEL: dyn_insertelement_v16f32_s_v_s: 4239; GPRIDX: ; %bb.0: ; %entry 4240; GPRIDX-NEXT: s_mov_b32 s0, s2 4241; GPRIDX-NEXT: s_mov_b32 s1, s3 4242; GPRIDX-NEXT: s_mov_b32 s2, s4 4243; GPRIDX-NEXT: s_mov_b32 s3, s5 4244; GPRIDX-NEXT: s_mov_b32 s4, s6 4245; GPRIDX-NEXT: s_mov_b32 s5, s7 4246; GPRIDX-NEXT: s_mov_b32 s6, s8 4247; GPRIDX-NEXT: s_mov_b32 s7, s9 4248; GPRIDX-NEXT: s_mov_b32 s8, s10 4249; GPRIDX-NEXT: s_mov_b32 s9, s11 4250; GPRIDX-NEXT: s_mov_b32 s10, s12 4251; GPRIDX-NEXT: s_mov_b32 s11, s13 4252; GPRIDX-NEXT: s_mov_b32 s12, s14 4253; GPRIDX-NEXT: s_mov_b32 s13, s15 4254; GPRIDX-NEXT: s_mov_b32 s14, s16 4255; GPRIDX-NEXT: s_mov_b32 s15, s17 4256; GPRIDX-NEXT: v_mov_b32_e32 v16, v0 4257; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 4258; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 4259; GPRIDX-NEXT: v_mov_b32_e32 v2, s2 4260; GPRIDX-NEXT: v_mov_b32_e32 v3, s3 4261; GPRIDX-NEXT: v_mov_b32_e32 v4, s4 4262; GPRIDX-NEXT: v_mov_b32_e32 v5, s5 4263; GPRIDX-NEXT: v_mov_b32_e32 v6, s6 4264; GPRIDX-NEXT: v_mov_b32_e32 v7, s7 4265; GPRIDX-NEXT: v_mov_b32_e32 v8, s8 4266; GPRIDX-NEXT: v_mov_b32_e32 v9, s9 4267; GPRIDX-NEXT: v_mov_b32_e32 v10, s10 4268; GPRIDX-NEXT: v_mov_b32_e32 v11, s11 4269; GPRIDX-NEXT: v_mov_b32_e32 v12, s12 4270; GPRIDX-NEXT: v_mov_b32_e32 v13, s13 4271; GPRIDX-NEXT: v_mov_b32_e32 v14, s14 4272; GPRIDX-NEXT: v_mov_b32_e32 v15, s15 4273; GPRIDX-NEXT: s_set_gpr_idx_on s18, gpr_idx(DST) 4274; GPRIDX-NEXT: v_mov_b32_e32 v0, v16 4275; GPRIDX-NEXT: s_set_gpr_idx_off 4276; GPRIDX-NEXT: ; return to shader part epilog 4277; 4278; GFX10-LABEL: dyn_insertelement_v16f32_s_v_s: 4279; GFX10: ; %bb.0: ; %entry 4280; GFX10-NEXT: s_mov_b32 s0, s2 4281; GFX10-NEXT: s_mov_b32 s1, s3 4282; GFX10-NEXT: s_mov_b32 s2, s4 4283; GFX10-NEXT: s_mov_b32 s3, s5 4284; GFX10-NEXT: s_mov_b32 s4, s6 4285; GFX10-NEXT: s_mov_b32 s5, s7 4286; GFX10-NEXT: s_mov_b32 s6, s8 4287; GFX10-NEXT: s_mov_b32 s7, s9 4288; GFX10-NEXT: s_mov_b32 s8, s10 4289; GFX10-NEXT: s_mov_b32 s9, s11 4290; GFX10-NEXT: s_mov_b32 s10, s12 4291; GFX10-NEXT: s_mov_b32 s11, s13 4292; GFX10-NEXT: s_mov_b32 s12, s14 4293; GFX10-NEXT: s_mov_b32 s13, s15 4294; GFX10-NEXT: s_mov_b32 s14, s16 4295; GFX10-NEXT: s_mov_b32 s15, s17 4296; GFX10-NEXT: v_mov_b32_e32 v16, v0 4297; GFX10-NEXT: v_mov_b32_e32 v0, s0 4298; GFX10-NEXT: s_mov_b32 m0, s18 4299; GFX10-NEXT: v_mov_b32_e32 v1, s1 4300; GFX10-NEXT: v_mov_b32_e32 v2, s2 4301; GFX10-NEXT: v_mov_b32_e32 v3, s3 4302; GFX10-NEXT: v_mov_b32_e32 v4, s4 4303; GFX10-NEXT: v_mov_b32_e32 v5, s5 4304; GFX10-NEXT: v_mov_b32_e32 v6, s6 4305; GFX10-NEXT: v_mov_b32_e32 v7, s7 4306; GFX10-NEXT: v_mov_b32_e32 v8, s8 4307; GFX10-NEXT: v_mov_b32_e32 v9, s9 4308; GFX10-NEXT: v_mov_b32_e32 v10, s10 4309; GFX10-NEXT: v_mov_b32_e32 v11, s11 4310; GFX10-NEXT: v_mov_b32_e32 v12, s12 4311; GFX10-NEXT: v_mov_b32_e32 v13, s13 4312; GFX10-NEXT: v_mov_b32_e32 v14, s14 4313; GFX10-NEXT: v_mov_b32_e32 v15, s15 4314; GFX10-NEXT: v_movreld_b32_e32 v0, v16 4315; GFX10-NEXT: ; return to shader part epilog 4316; 4317; GFX11-LABEL: dyn_insertelement_v16f32_s_v_s: 4318; GFX11: ; %bb.0: ; %entry 4319; GFX11-NEXT: s_mov_b32 s0, s2 4320; GFX11-NEXT: s_mov_b32 s1, s3 4321; GFX11-NEXT: s_mov_b32 s2, s4 4322; GFX11-NEXT: s_mov_b32 s3, s5 4323; GFX11-NEXT: s_mov_b32 s4, s6 4324; GFX11-NEXT: s_mov_b32 s5, s7 4325; GFX11-NEXT: s_mov_b32 s6, s8 4326; GFX11-NEXT: s_mov_b32 s7, s9 4327; GFX11-NEXT: s_mov_b32 s8, s10 4328; GFX11-NEXT: s_mov_b32 s9, s11 4329; GFX11-NEXT: s_mov_b32 s10, s12 4330; GFX11-NEXT: s_mov_b32 s11, s13 4331; GFX11-NEXT: s_mov_b32 s12, s14 4332; GFX11-NEXT: s_mov_b32 s13, s15 4333; GFX11-NEXT: s_mov_b32 s14, s16 4334; GFX11-NEXT: s_mov_b32 s15, s17 4335; GFX11-NEXT: v_mov_b32_e32 v16, v0 4336; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3 4337; GFX11-NEXT: s_mov_b32 m0, s18 4338; GFX11-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2 4339; GFX11-NEXT: v_dual_mov_b32 v5, s5 :: v_dual_mov_b32 v4, s4 4340; GFX11-NEXT: v_dual_mov_b32 v7, s7 :: v_dual_mov_b32 v6, s6 4341; GFX11-NEXT: v_dual_mov_b32 v9, s9 :: v_dual_mov_b32 v8, s8 4342; GFX11-NEXT: v_dual_mov_b32 v11, s11 :: v_dual_mov_b32 v10, s10 4343; GFX11-NEXT: v_dual_mov_b32 v13, s13 :: v_dual_mov_b32 v12, s12 4344; GFX11-NEXT: v_dual_mov_b32 v15, s15 :: v_dual_mov_b32 v14, s14 4345; GFX11-NEXT: v_movreld_b32_e32 v0, v16 4346; GFX11-NEXT: ; return to shader part epilog 4347entry: 4348 %insert = insertelement <16 x float> %vec, float %val, i32 %idx 4349 ret <16 x float> %insert 4350} 4351 4352define amdgpu_ps <32 x float> @dyn_insertelement_v32f32_s_v_s(<32 x float> inreg %vec, float %val, i32 inreg %idx) { 4353; GPRIDX-LABEL: dyn_insertelement_v32f32_s_v_s: 4354; GPRIDX: ; %bb.0: ; %entry 4355; GPRIDX-NEXT: s_mov_b32 s0, s2 4356; GPRIDX-NEXT: s_mov_b32 s1, s3 4357; GPRIDX-NEXT: s_mov_b32 s2, s4 4358; GPRIDX-NEXT: s_mov_b32 s3, s5 4359; GPRIDX-NEXT: s_mov_b32 s4, s6 4360; GPRIDX-NEXT: s_mov_b32 s5, s7 4361; GPRIDX-NEXT: s_mov_b32 s6, s8 4362; GPRIDX-NEXT: s_mov_b32 s7, s9 4363; GPRIDX-NEXT: s_mov_b32 s8, s10 4364; GPRIDX-NEXT: s_mov_b32 s9, s11 4365; GPRIDX-NEXT: s_mov_b32 s10, s12 4366; GPRIDX-NEXT: s_mov_b32 s11, s13 4367; GPRIDX-NEXT: s_mov_b32 s12, s14 4368; GPRIDX-NEXT: s_mov_b32 s13, s15 4369; GPRIDX-NEXT: s_mov_b32 s14, s16 4370; GPRIDX-NEXT: s_mov_b32 s15, s17 4371; GPRIDX-NEXT: s_mov_b32 s16, s18 4372; GPRIDX-NEXT: s_mov_b32 s17, s19 4373; GPRIDX-NEXT: s_mov_b32 s18, s20 4374; GPRIDX-NEXT: s_mov_b32 s19, s21 4375; GPRIDX-NEXT: s_mov_b32 s20, s22 4376; GPRIDX-NEXT: s_mov_b32 s21, s23 4377; GPRIDX-NEXT: s_mov_b32 s22, s24 4378; GPRIDX-NEXT: s_mov_b32 s23, s25 4379; GPRIDX-NEXT: s_mov_b32 s24, s26 4380; GPRIDX-NEXT: s_mov_b32 s25, s27 4381; GPRIDX-NEXT: s_mov_b32 s26, s28 4382; GPRIDX-NEXT: s_mov_b32 s27, s29 4383; GPRIDX-NEXT: s_mov_b32 s28, s30 4384; GPRIDX-NEXT: s_mov_b32 s29, s31 4385; GPRIDX-NEXT: s_mov_b32 s31, s33 4386; GPRIDX-NEXT: s_mov_b32 s30, s32 4387; GPRIDX-NEXT: v_mov_b32_e32 v32, v0 4388; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 4389; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 4390; GPRIDX-NEXT: v_mov_b32_e32 v2, s2 4391; GPRIDX-NEXT: v_mov_b32_e32 v3, s3 4392; GPRIDX-NEXT: v_mov_b32_e32 v4, s4 4393; GPRIDX-NEXT: v_mov_b32_e32 v5, s5 4394; GPRIDX-NEXT: v_mov_b32_e32 v6, s6 4395; GPRIDX-NEXT: v_mov_b32_e32 v7, s7 4396; GPRIDX-NEXT: v_mov_b32_e32 v8, s8 4397; GPRIDX-NEXT: v_mov_b32_e32 v9, s9 4398; GPRIDX-NEXT: v_mov_b32_e32 v10, s10 4399; GPRIDX-NEXT: v_mov_b32_e32 v11, s11 4400; GPRIDX-NEXT: v_mov_b32_e32 v12, s12 4401; GPRIDX-NEXT: v_mov_b32_e32 v13, s13 4402; GPRIDX-NEXT: v_mov_b32_e32 v14, s14 4403; GPRIDX-NEXT: v_mov_b32_e32 v15, s15 4404; GPRIDX-NEXT: v_mov_b32_e32 v16, s16 4405; GPRIDX-NEXT: v_mov_b32_e32 v17, s17 4406; GPRIDX-NEXT: v_mov_b32_e32 v18, s18 4407; GPRIDX-NEXT: v_mov_b32_e32 v19, s19 4408; GPRIDX-NEXT: v_mov_b32_e32 v20, s20 4409; GPRIDX-NEXT: v_mov_b32_e32 v21, s21 4410; GPRIDX-NEXT: v_mov_b32_e32 v22, s22 4411; GPRIDX-NEXT: v_mov_b32_e32 v23, s23 4412; GPRIDX-NEXT: v_mov_b32_e32 v24, s24 4413; GPRIDX-NEXT: v_mov_b32_e32 v25, s25 4414; GPRIDX-NEXT: v_mov_b32_e32 v26, s26 4415; GPRIDX-NEXT: v_mov_b32_e32 v27, s27 4416; GPRIDX-NEXT: v_mov_b32_e32 v28, s28 4417; GPRIDX-NEXT: v_mov_b32_e32 v29, s29 4418; GPRIDX-NEXT: v_mov_b32_e32 v30, s30 4419; GPRIDX-NEXT: v_mov_b32_e32 v31, s31 4420; GPRIDX-NEXT: s_set_gpr_idx_on s34, gpr_idx(DST) 4421; GPRIDX-NEXT: v_mov_b32_e32 v0, v32 4422; GPRIDX-NEXT: s_set_gpr_idx_off 4423; GPRIDX-NEXT: ; return to shader part epilog 4424; 4425; GFX10-LABEL: dyn_insertelement_v32f32_s_v_s: 4426; GFX10: ; %bb.0: ; %entry 4427; GFX10-NEXT: s_mov_b32 s0, s2 4428; GFX10-NEXT: s_mov_b32 s1, s3 4429; GFX10-NEXT: s_mov_b32 s2, s4 4430; GFX10-NEXT: s_mov_b32 s3, s5 4431; GFX10-NEXT: s_mov_b32 s4, s6 4432; GFX10-NEXT: s_mov_b32 s5, s7 4433; GFX10-NEXT: s_mov_b32 s6, s8 4434; GFX10-NEXT: s_mov_b32 s7, s9 4435; GFX10-NEXT: s_mov_b32 s8, s10 4436; GFX10-NEXT: s_mov_b32 s9, s11 4437; GFX10-NEXT: s_mov_b32 s10, s12 4438; GFX10-NEXT: s_mov_b32 s11, s13 4439; GFX10-NEXT: s_mov_b32 s12, s14 4440; GFX10-NEXT: s_mov_b32 s13, s15 4441; GFX10-NEXT: s_mov_b32 s14, s16 4442; GFX10-NEXT: s_mov_b32 s15, s17 4443; GFX10-NEXT: s_mov_b32 s16, s18 4444; GFX10-NEXT: s_mov_b32 s17, s19 4445; GFX10-NEXT: s_mov_b32 s18, s20 4446; GFX10-NEXT: s_mov_b32 s19, s21 4447; GFX10-NEXT: s_mov_b32 s20, s22 4448; GFX10-NEXT: s_mov_b32 s21, s23 4449; GFX10-NEXT: s_mov_b32 s22, s24 4450; GFX10-NEXT: s_mov_b32 s23, s25 4451; GFX10-NEXT: s_mov_b32 s24, s26 4452; GFX10-NEXT: s_mov_b32 s25, s27 4453; GFX10-NEXT: s_mov_b32 s26, s28 4454; GFX10-NEXT: s_mov_b32 s27, s29 4455; GFX10-NEXT: s_mov_b32 s28, s30 4456; GFX10-NEXT: s_mov_b32 s29, s31 4457; GFX10-NEXT: s_mov_b32 s31, s33 4458; GFX10-NEXT: s_mov_b32 s30, s32 4459; GFX10-NEXT: v_mov_b32_e32 v32, v0 4460; GFX10-NEXT: v_mov_b32_e32 v0, s0 4461; GFX10-NEXT: s_mov_b32 m0, s34 4462; GFX10-NEXT: v_mov_b32_e32 v1, s1 4463; GFX10-NEXT: v_mov_b32_e32 v2, s2 4464; GFX10-NEXT: v_mov_b32_e32 v3, s3 4465; GFX10-NEXT: v_mov_b32_e32 v4, s4 4466; GFX10-NEXT: v_mov_b32_e32 v5, s5 4467; GFX10-NEXT: v_mov_b32_e32 v6, s6 4468; GFX10-NEXT: v_mov_b32_e32 v7, s7 4469; GFX10-NEXT: v_mov_b32_e32 v8, s8 4470; GFX10-NEXT: v_mov_b32_e32 v9, s9 4471; GFX10-NEXT: v_mov_b32_e32 v10, s10 4472; GFX10-NEXT: v_mov_b32_e32 v11, s11 4473; GFX10-NEXT: v_mov_b32_e32 v12, s12 4474; GFX10-NEXT: v_mov_b32_e32 v13, s13 4475; GFX10-NEXT: v_mov_b32_e32 v14, s14 4476; GFX10-NEXT: v_mov_b32_e32 v15, s15 4477; GFX10-NEXT: v_mov_b32_e32 v16, s16 4478; GFX10-NEXT: v_mov_b32_e32 v17, s17 4479; GFX10-NEXT: v_mov_b32_e32 v18, s18 4480; GFX10-NEXT: v_mov_b32_e32 v19, s19 4481; GFX10-NEXT: v_mov_b32_e32 v20, s20 4482; GFX10-NEXT: v_mov_b32_e32 v21, s21 4483; GFX10-NEXT: v_mov_b32_e32 v22, s22 4484; GFX10-NEXT: v_mov_b32_e32 v23, s23 4485; GFX10-NEXT: v_mov_b32_e32 v24, s24 4486; GFX10-NEXT: v_mov_b32_e32 v25, s25 4487; GFX10-NEXT: v_mov_b32_e32 v26, s26 4488; GFX10-NEXT: v_mov_b32_e32 v27, s27 4489; GFX10-NEXT: v_mov_b32_e32 v28, s28 4490; GFX10-NEXT: v_mov_b32_e32 v29, s29 4491; GFX10-NEXT: v_mov_b32_e32 v30, s30 4492; GFX10-NEXT: v_mov_b32_e32 v31, s31 4493; GFX10-NEXT: v_movreld_b32_e32 v0, v32 4494; GFX10-NEXT: ; return to shader part epilog 4495; 4496; GFX11-LABEL: dyn_insertelement_v32f32_s_v_s: 4497; GFX11: ; %bb.0: ; %entry 4498; GFX11-NEXT: s_mov_b32 s0, s2 4499; GFX11-NEXT: s_mov_b32 s1, s3 4500; GFX11-NEXT: s_mov_b32 s2, s4 4501; GFX11-NEXT: s_mov_b32 s3, s5 4502; GFX11-NEXT: s_mov_b32 s4, s6 4503; GFX11-NEXT: s_mov_b32 s5, s7 4504; GFX11-NEXT: s_mov_b32 s6, s8 4505; GFX11-NEXT: s_mov_b32 s7, s9 4506; GFX11-NEXT: s_mov_b32 s8, s10 4507; GFX11-NEXT: s_mov_b32 s9, s11 4508; GFX11-NEXT: s_mov_b32 s10, s12 4509; GFX11-NEXT: s_mov_b32 s11, s13 4510; GFX11-NEXT: s_mov_b32 s12, s14 4511; GFX11-NEXT: s_mov_b32 s13, s15 4512; GFX11-NEXT: s_mov_b32 s14, s16 4513; GFX11-NEXT: s_mov_b32 s15, s17 4514; GFX11-NEXT: s_mov_b32 s16, s18 4515; GFX11-NEXT: s_mov_b32 s17, s19 4516; GFX11-NEXT: s_mov_b32 s18, s20 4517; GFX11-NEXT: s_mov_b32 s19, s21 4518; GFX11-NEXT: s_mov_b32 s20, s22 4519; GFX11-NEXT: s_mov_b32 s21, s23 4520; GFX11-NEXT: s_mov_b32 s22, s24 4521; GFX11-NEXT: s_mov_b32 s23, s25 4522; GFX11-NEXT: s_mov_b32 s24, s26 4523; GFX11-NEXT: s_mov_b32 s25, s27 4524; GFX11-NEXT: s_mov_b32 s26, s28 4525; GFX11-NEXT: s_mov_b32 s27, s29 4526; GFX11-NEXT: s_mov_b32 s28, s30 4527; GFX11-NEXT: s_mov_b32 s29, s31 4528; GFX11-NEXT: s_mov_b32 s31, s33 4529; GFX11-NEXT: s_mov_b32 s30, s32 4530; GFX11-NEXT: v_mov_b32_e32 v32, v0 4531; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3 4532; GFX11-NEXT: s_mov_b32 m0, s34 4533; GFX11-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2 4534; GFX11-NEXT: v_dual_mov_b32 v5, s5 :: v_dual_mov_b32 v4, s4 4535; GFX11-NEXT: v_dual_mov_b32 v7, s7 :: v_dual_mov_b32 v6, s6 4536; GFX11-NEXT: v_dual_mov_b32 v9, s9 :: v_dual_mov_b32 v8, s8 4537; GFX11-NEXT: v_dual_mov_b32 v11, s11 :: v_dual_mov_b32 v10, s10 4538; GFX11-NEXT: v_dual_mov_b32 v13, s13 :: v_dual_mov_b32 v12, s12 4539; GFX11-NEXT: v_dual_mov_b32 v15, s15 :: v_dual_mov_b32 v14, s14 4540; GFX11-NEXT: v_dual_mov_b32 v17, s17 :: v_dual_mov_b32 v16, s16 4541; GFX11-NEXT: v_dual_mov_b32 v19, s19 :: v_dual_mov_b32 v18, s18 4542; GFX11-NEXT: v_dual_mov_b32 v21, s21 :: v_dual_mov_b32 v20, s20 4543; GFX11-NEXT: v_dual_mov_b32 v23, s23 :: v_dual_mov_b32 v22, s22 4544; GFX11-NEXT: v_dual_mov_b32 v25, s25 :: v_dual_mov_b32 v24, s24 4545; GFX11-NEXT: v_dual_mov_b32 v27, s27 :: v_dual_mov_b32 v26, s26 4546; GFX11-NEXT: v_dual_mov_b32 v29, s29 :: v_dual_mov_b32 v28, s28 4547; GFX11-NEXT: v_dual_mov_b32 v31, s31 :: v_dual_mov_b32 v30, s30 4548; GFX11-NEXT: v_movreld_b32_e32 v0, v32 4549; GFX11-NEXT: ; return to shader part epilog 4550entry: 4551 %insert = insertelement <32 x float> %vec, float %val, i32 %idx 4552 ret <32 x float> %insert 4553} 4554 4555define amdgpu_ps <16 x i64> @dyn_insertelement_v16i64_s_v_s(<16 x i64> inreg %vec, i64 %val, i32 inreg %idx) { 4556; GPRIDX-LABEL: dyn_insertelement_v16i64_s_v_s: 4557; GPRIDX: ; %bb.0: ; %entry 4558; GPRIDX-NEXT: s_mov_b32 s1, s3 4559; GPRIDX-NEXT: s_mov_b32 s3, s5 4560; GPRIDX-NEXT: s_mov_b32 s5, s7 4561; GPRIDX-NEXT: s_mov_b32 s7, s9 4562; GPRIDX-NEXT: s_mov_b32 s9, s11 4563; GPRIDX-NEXT: s_mov_b32 s11, s13 4564; GPRIDX-NEXT: s_mov_b32 s13, s15 4565; GPRIDX-NEXT: s_mov_b32 s15, s17 4566; GPRIDX-NEXT: s_mov_b32 s17, s19 4567; GPRIDX-NEXT: s_mov_b32 s19, s21 4568; GPRIDX-NEXT: s_mov_b32 s21, s23 4569; GPRIDX-NEXT: s_mov_b32 s23, s25 4570; GPRIDX-NEXT: s_mov_b32 s25, s27 4571; GPRIDX-NEXT: s_mov_b32 s27, s29 4572; GPRIDX-NEXT: s_mov_b32 s29, s31 4573; GPRIDX-NEXT: s_mov_b32 s31, s33 4574; GPRIDX-NEXT: s_mov_b32 s0, s2 4575; GPRIDX-NEXT: s_mov_b32 s2, s4 4576; GPRIDX-NEXT: s_mov_b32 s4, s6 4577; GPRIDX-NEXT: s_mov_b32 s6, s8 4578; GPRIDX-NEXT: s_mov_b32 s8, s10 4579; GPRIDX-NEXT: s_mov_b32 s10, s12 4580; GPRIDX-NEXT: s_mov_b32 s12, s14 4581; GPRIDX-NEXT: s_mov_b32 s14, s16 4582; GPRIDX-NEXT: s_mov_b32 s16, s18 4583; GPRIDX-NEXT: s_mov_b32 s18, s20 4584; GPRIDX-NEXT: s_mov_b32 s20, s22 4585; GPRIDX-NEXT: s_mov_b32 s22, s24 4586; GPRIDX-NEXT: s_mov_b32 s24, s26 4587; GPRIDX-NEXT: s_mov_b32 s26, s28 4588; GPRIDX-NEXT: s_mov_b32 s28, s30 4589; GPRIDX-NEXT: s_mov_b32 s30, s32 4590; GPRIDX-NEXT: v_mov_b32_e32 v33, s31 4591; GPRIDX-NEXT: s_lshl_b32 s33, s34, 1 4592; GPRIDX-NEXT: v_mov_b32_e32 v32, s30 4593; GPRIDX-NEXT: v_mov_b32_e32 v31, s29 4594; GPRIDX-NEXT: v_mov_b32_e32 v30, s28 4595; GPRIDX-NEXT: v_mov_b32_e32 v29, s27 4596; GPRIDX-NEXT: v_mov_b32_e32 v28, s26 4597; GPRIDX-NEXT: v_mov_b32_e32 v27, s25 4598; GPRIDX-NEXT: v_mov_b32_e32 v26, s24 4599; GPRIDX-NEXT: v_mov_b32_e32 v25, s23 4600; GPRIDX-NEXT: v_mov_b32_e32 v24, s22 4601; GPRIDX-NEXT: v_mov_b32_e32 v23, s21 4602; GPRIDX-NEXT: v_mov_b32_e32 v22, s20 4603; GPRIDX-NEXT: v_mov_b32_e32 v21, s19 4604; GPRIDX-NEXT: v_mov_b32_e32 v20, s18 4605; GPRIDX-NEXT: v_mov_b32_e32 v19, s17 4606; GPRIDX-NEXT: v_mov_b32_e32 v18, s16 4607; GPRIDX-NEXT: v_mov_b32_e32 v17, s15 4608; GPRIDX-NEXT: v_mov_b32_e32 v16, s14 4609; GPRIDX-NEXT: v_mov_b32_e32 v15, s13 4610; GPRIDX-NEXT: v_mov_b32_e32 v14, s12 4611; GPRIDX-NEXT: v_mov_b32_e32 v13, s11 4612; GPRIDX-NEXT: v_mov_b32_e32 v12, s10 4613; GPRIDX-NEXT: v_mov_b32_e32 v11, s9 4614; GPRIDX-NEXT: v_mov_b32_e32 v10, s8 4615; GPRIDX-NEXT: v_mov_b32_e32 v9, s7 4616; GPRIDX-NEXT: v_mov_b32_e32 v8, s6 4617; GPRIDX-NEXT: v_mov_b32_e32 v7, s5 4618; GPRIDX-NEXT: v_mov_b32_e32 v6, s4 4619; GPRIDX-NEXT: v_mov_b32_e32 v5, s3 4620; GPRIDX-NEXT: v_mov_b32_e32 v4, s2 4621; GPRIDX-NEXT: v_mov_b32_e32 v3, s1 4622; GPRIDX-NEXT: v_mov_b32_e32 v2, s0 4623; GPRIDX-NEXT: s_set_gpr_idx_on s33, gpr_idx(DST) 4624; GPRIDX-NEXT: v_mov_b32_e32 v2, v0 4625; GPRIDX-NEXT: v_mov_b32_e32 v3, v1 4626; GPRIDX-NEXT: s_set_gpr_idx_off 4627; GPRIDX-NEXT: v_readfirstlane_b32 s0, v2 4628; GPRIDX-NEXT: v_readfirstlane_b32 s1, v3 4629; GPRIDX-NEXT: v_readfirstlane_b32 s2, v4 4630; GPRIDX-NEXT: v_readfirstlane_b32 s3, v5 4631; GPRIDX-NEXT: v_readfirstlane_b32 s4, v6 4632; GPRIDX-NEXT: v_readfirstlane_b32 s5, v7 4633; GPRIDX-NEXT: v_readfirstlane_b32 s6, v8 4634; GPRIDX-NEXT: v_readfirstlane_b32 s7, v9 4635; GPRIDX-NEXT: v_readfirstlane_b32 s8, v10 4636; GPRIDX-NEXT: v_readfirstlane_b32 s9, v11 4637; GPRIDX-NEXT: v_readfirstlane_b32 s10, v12 4638; GPRIDX-NEXT: v_readfirstlane_b32 s11, v13 4639; GPRIDX-NEXT: v_readfirstlane_b32 s12, v14 4640; GPRIDX-NEXT: v_readfirstlane_b32 s13, v15 4641; GPRIDX-NEXT: v_readfirstlane_b32 s14, v16 4642; GPRIDX-NEXT: v_readfirstlane_b32 s15, v17 4643; GPRIDX-NEXT: v_readfirstlane_b32 s16, v18 4644; GPRIDX-NEXT: v_readfirstlane_b32 s17, v19 4645; GPRIDX-NEXT: v_readfirstlane_b32 s18, v20 4646; GPRIDX-NEXT: v_readfirstlane_b32 s19, v21 4647; GPRIDX-NEXT: v_readfirstlane_b32 s20, v22 4648; GPRIDX-NEXT: v_readfirstlane_b32 s21, v23 4649; GPRIDX-NEXT: v_readfirstlane_b32 s22, v24 4650; GPRIDX-NEXT: v_readfirstlane_b32 s23, v25 4651; GPRIDX-NEXT: v_readfirstlane_b32 s24, v26 4652; GPRIDX-NEXT: v_readfirstlane_b32 s25, v27 4653; GPRIDX-NEXT: v_readfirstlane_b32 s26, v28 4654; GPRIDX-NEXT: v_readfirstlane_b32 s27, v29 4655; GPRIDX-NEXT: v_readfirstlane_b32 s28, v30 4656; GPRIDX-NEXT: v_readfirstlane_b32 s29, v31 4657; GPRIDX-NEXT: v_readfirstlane_b32 s30, v32 4658; GPRIDX-NEXT: v_readfirstlane_b32 s31, v33 4659; GPRIDX-NEXT: ; return to shader part epilog 4660; 4661; GFX10-LABEL: dyn_insertelement_v16i64_s_v_s: 4662; GFX10: ; %bb.0: ; %entry 4663; GFX10-NEXT: s_mov_b32 s1, s3 4664; GFX10-NEXT: s_mov_b32 s3, s5 4665; GFX10-NEXT: s_mov_b32 s5, s7 4666; GFX10-NEXT: s_mov_b32 s7, s9 4667; GFX10-NEXT: s_mov_b32 s9, s11 4668; GFX10-NEXT: s_mov_b32 s11, s13 4669; GFX10-NEXT: s_mov_b32 s13, s15 4670; GFX10-NEXT: s_mov_b32 s15, s17 4671; GFX10-NEXT: s_mov_b32 s17, s19 4672; GFX10-NEXT: s_mov_b32 s19, s21 4673; GFX10-NEXT: s_mov_b32 s21, s23 4674; GFX10-NEXT: s_mov_b32 s23, s25 4675; GFX10-NEXT: s_mov_b32 s25, s27 4676; GFX10-NEXT: s_mov_b32 s27, s29 4677; GFX10-NEXT: s_mov_b32 s29, s31 4678; GFX10-NEXT: s_mov_b32 s31, s33 4679; GFX10-NEXT: s_mov_b32 s0, s2 4680; GFX10-NEXT: s_mov_b32 s2, s4 4681; GFX10-NEXT: s_mov_b32 s4, s6 4682; GFX10-NEXT: s_mov_b32 s6, s8 4683; GFX10-NEXT: s_mov_b32 s8, s10 4684; GFX10-NEXT: s_mov_b32 s10, s12 4685; GFX10-NEXT: s_mov_b32 s12, s14 4686; GFX10-NEXT: s_mov_b32 s14, s16 4687; GFX10-NEXT: s_mov_b32 s16, s18 4688; GFX10-NEXT: s_mov_b32 s18, s20 4689; GFX10-NEXT: s_mov_b32 s20, s22 4690; GFX10-NEXT: s_mov_b32 s22, s24 4691; GFX10-NEXT: s_mov_b32 s24, s26 4692; GFX10-NEXT: s_mov_b32 s26, s28 4693; GFX10-NEXT: s_mov_b32 s28, s30 4694; GFX10-NEXT: s_mov_b32 s30, s32 4695; GFX10-NEXT: v_mov_b32_e32 v33, s31 4696; GFX10-NEXT: v_mov_b32_e32 v2, s0 4697; GFX10-NEXT: s_lshl_b32 m0, s34, 1 4698; GFX10-NEXT: v_mov_b32_e32 v32, s30 4699; GFX10-NEXT: v_mov_b32_e32 v31, s29 4700; GFX10-NEXT: v_mov_b32_e32 v30, s28 4701; GFX10-NEXT: v_mov_b32_e32 v29, s27 4702; GFX10-NEXT: v_mov_b32_e32 v28, s26 4703; GFX10-NEXT: v_mov_b32_e32 v27, s25 4704; GFX10-NEXT: v_mov_b32_e32 v26, s24 4705; GFX10-NEXT: v_mov_b32_e32 v25, s23 4706; GFX10-NEXT: v_mov_b32_e32 v24, s22 4707; GFX10-NEXT: v_mov_b32_e32 v23, s21 4708; GFX10-NEXT: v_mov_b32_e32 v22, s20 4709; GFX10-NEXT: v_mov_b32_e32 v21, s19 4710; GFX10-NEXT: v_mov_b32_e32 v20, s18 4711; GFX10-NEXT: v_mov_b32_e32 v19, s17 4712; GFX10-NEXT: v_mov_b32_e32 v18, s16 4713; GFX10-NEXT: v_mov_b32_e32 v17, s15 4714; GFX10-NEXT: v_mov_b32_e32 v16, s14 4715; GFX10-NEXT: v_mov_b32_e32 v15, s13 4716; GFX10-NEXT: v_mov_b32_e32 v14, s12 4717; GFX10-NEXT: v_mov_b32_e32 v13, s11 4718; GFX10-NEXT: v_mov_b32_e32 v12, s10 4719; GFX10-NEXT: v_mov_b32_e32 v11, s9 4720; GFX10-NEXT: v_mov_b32_e32 v10, s8 4721; GFX10-NEXT: v_mov_b32_e32 v9, s7 4722; GFX10-NEXT: v_mov_b32_e32 v8, s6 4723; GFX10-NEXT: v_mov_b32_e32 v7, s5 4724; GFX10-NEXT: v_mov_b32_e32 v6, s4 4725; GFX10-NEXT: v_mov_b32_e32 v5, s3 4726; GFX10-NEXT: v_mov_b32_e32 v4, s2 4727; GFX10-NEXT: v_mov_b32_e32 v3, s1 4728; GFX10-NEXT: v_movreld_b32_e32 v2, v0 4729; GFX10-NEXT: v_movreld_b32_e32 v3, v1 4730; GFX10-NEXT: v_readfirstlane_b32 s0, v2 4731; GFX10-NEXT: v_readfirstlane_b32 s1, v3 4732; GFX10-NEXT: v_readfirstlane_b32 s2, v4 4733; GFX10-NEXT: v_readfirstlane_b32 s3, v5 4734; GFX10-NEXT: v_readfirstlane_b32 s4, v6 4735; GFX10-NEXT: v_readfirstlane_b32 s5, v7 4736; GFX10-NEXT: v_readfirstlane_b32 s6, v8 4737; GFX10-NEXT: v_readfirstlane_b32 s7, v9 4738; GFX10-NEXT: v_readfirstlane_b32 s8, v10 4739; GFX10-NEXT: v_readfirstlane_b32 s9, v11 4740; GFX10-NEXT: v_readfirstlane_b32 s10, v12 4741; GFX10-NEXT: v_readfirstlane_b32 s11, v13 4742; GFX10-NEXT: v_readfirstlane_b32 s12, v14 4743; GFX10-NEXT: v_readfirstlane_b32 s13, v15 4744; GFX10-NEXT: v_readfirstlane_b32 s14, v16 4745; GFX10-NEXT: v_readfirstlane_b32 s15, v17 4746; GFX10-NEXT: v_readfirstlane_b32 s16, v18 4747; GFX10-NEXT: v_readfirstlane_b32 s17, v19 4748; GFX10-NEXT: v_readfirstlane_b32 s18, v20 4749; GFX10-NEXT: v_readfirstlane_b32 s19, v21 4750; GFX10-NEXT: v_readfirstlane_b32 s20, v22 4751; GFX10-NEXT: v_readfirstlane_b32 s21, v23 4752; GFX10-NEXT: v_readfirstlane_b32 s22, v24 4753; GFX10-NEXT: v_readfirstlane_b32 s23, v25 4754; GFX10-NEXT: v_readfirstlane_b32 s24, v26 4755; GFX10-NEXT: v_readfirstlane_b32 s25, v27 4756; GFX10-NEXT: v_readfirstlane_b32 s26, v28 4757; GFX10-NEXT: v_readfirstlane_b32 s27, v29 4758; GFX10-NEXT: v_readfirstlane_b32 s28, v30 4759; GFX10-NEXT: v_readfirstlane_b32 s29, v31 4760; GFX10-NEXT: v_readfirstlane_b32 s30, v32 4761; GFX10-NEXT: v_readfirstlane_b32 s31, v33 4762; GFX10-NEXT: ; return to shader part epilog 4763; 4764; GFX11-LABEL: dyn_insertelement_v16i64_s_v_s: 4765; GFX11: ; %bb.0: ; %entry 4766; GFX11-NEXT: s_mov_b32 s1, s3 4767; GFX11-NEXT: s_mov_b32 s3, s5 4768; GFX11-NEXT: s_mov_b32 s5, s7 4769; GFX11-NEXT: s_mov_b32 s7, s9 4770; GFX11-NEXT: s_mov_b32 s9, s11 4771; GFX11-NEXT: s_mov_b32 s11, s13 4772; GFX11-NEXT: s_mov_b32 s13, s15 4773; GFX11-NEXT: s_mov_b32 s15, s17 4774; GFX11-NEXT: s_mov_b32 s17, s19 4775; GFX11-NEXT: s_mov_b32 s19, s21 4776; GFX11-NEXT: s_mov_b32 s21, s23 4777; GFX11-NEXT: s_mov_b32 s23, s25 4778; GFX11-NEXT: s_mov_b32 s25, s27 4779; GFX11-NEXT: s_mov_b32 s27, s29 4780; GFX11-NEXT: s_mov_b32 s29, s31 4781; GFX11-NEXT: s_mov_b32 s31, s33 4782; GFX11-NEXT: s_mov_b32 s0, s2 4783; GFX11-NEXT: s_mov_b32 s2, s4 4784; GFX11-NEXT: s_mov_b32 s4, s6 4785; GFX11-NEXT: s_mov_b32 s6, s8 4786; GFX11-NEXT: s_mov_b32 s8, s10 4787; GFX11-NEXT: s_mov_b32 s10, s12 4788; GFX11-NEXT: s_mov_b32 s12, s14 4789; GFX11-NEXT: s_mov_b32 s14, s16 4790; GFX11-NEXT: s_mov_b32 s16, s18 4791; GFX11-NEXT: s_mov_b32 s18, s20 4792; GFX11-NEXT: s_mov_b32 s20, s22 4793; GFX11-NEXT: s_mov_b32 s22, s24 4794; GFX11-NEXT: s_mov_b32 s24, s26 4795; GFX11-NEXT: s_mov_b32 s26, s28 4796; GFX11-NEXT: s_mov_b32 s28, s30 4797; GFX11-NEXT: s_mov_b32 s30, s32 4798; GFX11-NEXT: v_dual_mov_b32 v33, s31 :: v_dual_mov_b32 v32, s30 4799; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 4800; GFX11-NEXT: s_lshl_b32 m0, s34, 1 4801; GFX11-NEXT: v_dual_mov_b32 v31, s29 :: v_dual_mov_b32 v30, s28 4802; GFX11-NEXT: v_dual_mov_b32 v29, s27 :: v_dual_mov_b32 v28, s26 4803; GFX11-NEXT: v_dual_mov_b32 v27, s25 :: v_dual_mov_b32 v26, s24 4804; GFX11-NEXT: v_dual_mov_b32 v25, s23 :: v_dual_mov_b32 v24, s22 4805; GFX11-NEXT: v_dual_mov_b32 v23, s21 :: v_dual_mov_b32 v22, s20 4806; GFX11-NEXT: v_dual_mov_b32 v21, s19 :: v_dual_mov_b32 v20, s18 4807; GFX11-NEXT: v_dual_mov_b32 v19, s17 :: v_dual_mov_b32 v18, s16 4808; GFX11-NEXT: v_dual_mov_b32 v17, s15 :: v_dual_mov_b32 v16, s14 4809; GFX11-NEXT: v_dual_mov_b32 v15, s13 :: v_dual_mov_b32 v14, s12 4810; GFX11-NEXT: v_dual_mov_b32 v13, s11 :: v_dual_mov_b32 v12, s10 4811; GFX11-NEXT: v_dual_mov_b32 v11, s9 :: v_dual_mov_b32 v10, s8 4812; GFX11-NEXT: v_dual_mov_b32 v9, s7 :: v_dual_mov_b32 v8, s6 4813; GFX11-NEXT: v_dual_mov_b32 v7, s5 :: v_dual_mov_b32 v6, s4 4814; GFX11-NEXT: v_dual_mov_b32 v5, s3 :: v_dual_mov_b32 v4, s2 4815; GFX11-NEXT: v_movreld_b32_e32 v2, v0 4816; GFX11-NEXT: v_movreld_b32_e32 v3, v1 4817; GFX11-NEXT: v_readfirstlane_b32 s0, v2 4818; GFX11-NEXT: v_readfirstlane_b32 s1, v3 4819; GFX11-NEXT: v_readfirstlane_b32 s2, v4 4820; GFX11-NEXT: v_readfirstlane_b32 s3, v5 4821; GFX11-NEXT: v_readfirstlane_b32 s4, v6 4822; GFX11-NEXT: v_readfirstlane_b32 s5, v7 4823; GFX11-NEXT: v_readfirstlane_b32 s6, v8 4824; GFX11-NEXT: v_readfirstlane_b32 s7, v9 4825; GFX11-NEXT: v_readfirstlane_b32 s8, v10 4826; GFX11-NEXT: v_readfirstlane_b32 s9, v11 4827; GFX11-NEXT: v_readfirstlane_b32 s10, v12 4828; GFX11-NEXT: v_readfirstlane_b32 s11, v13 4829; GFX11-NEXT: v_readfirstlane_b32 s12, v14 4830; GFX11-NEXT: v_readfirstlane_b32 s13, v15 4831; GFX11-NEXT: v_readfirstlane_b32 s14, v16 4832; GFX11-NEXT: v_readfirstlane_b32 s15, v17 4833; GFX11-NEXT: v_readfirstlane_b32 s16, v18 4834; GFX11-NEXT: v_readfirstlane_b32 s17, v19 4835; GFX11-NEXT: v_readfirstlane_b32 s18, v20 4836; GFX11-NEXT: v_readfirstlane_b32 s19, v21 4837; GFX11-NEXT: v_readfirstlane_b32 s20, v22 4838; GFX11-NEXT: v_readfirstlane_b32 s21, v23 4839; GFX11-NEXT: v_readfirstlane_b32 s22, v24 4840; GFX11-NEXT: v_readfirstlane_b32 s23, v25 4841; GFX11-NEXT: v_readfirstlane_b32 s24, v26 4842; GFX11-NEXT: v_readfirstlane_b32 s25, v27 4843; GFX11-NEXT: v_readfirstlane_b32 s26, v28 4844; GFX11-NEXT: v_readfirstlane_b32 s27, v29 4845; GFX11-NEXT: v_readfirstlane_b32 s28, v30 4846; GFX11-NEXT: v_readfirstlane_b32 s29, v31 4847; GFX11-NEXT: v_readfirstlane_b32 s30, v32 4848; GFX11-NEXT: v_readfirstlane_b32 s31, v33 4849; GFX11-NEXT: ; return to shader part epilog 4850entry: 4851 %insert = insertelement <16 x i64> %vec, i64 %val, i32 %idx 4852 ret <16 x i64> %insert 4853} 4854 4855define amdgpu_ps <16 x double> @dyn_insertelement_v16f64_s_v_s(<16 x double> inreg %vec, double %val, i32 inreg %idx) { 4856; GPRIDX-LABEL: dyn_insertelement_v16f64_s_v_s: 4857; GPRIDX: ; %bb.0: ; %entry 4858; GPRIDX-NEXT: s_mov_b32 s1, s3 4859; GPRIDX-NEXT: s_mov_b32 s3, s5 4860; GPRIDX-NEXT: s_mov_b32 s5, s7 4861; GPRIDX-NEXT: s_mov_b32 s7, s9 4862; GPRIDX-NEXT: s_mov_b32 s9, s11 4863; GPRIDX-NEXT: s_mov_b32 s11, s13 4864; GPRIDX-NEXT: s_mov_b32 s13, s15 4865; GPRIDX-NEXT: s_mov_b32 s15, s17 4866; GPRIDX-NEXT: s_mov_b32 s17, s19 4867; GPRIDX-NEXT: s_mov_b32 s19, s21 4868; GPRIDX-NEXT: s_mov_b32 s21, s23 4869; GPRIDX-NEXT: s_mov_b32 s23, s25 4870; GPRIDX-NEXT: s_mov_b32 s25, s27 4871; GPRIDX-NEXT: s_mov_b32 s27, s29 4872; GPRIDX-NEXT: s_mov_b32 s29, s31 4873; GPRIDX-NEXT: s_mov_b32 s31, s33 4874; GPRIDX-NEXT: s_mov_b32 s0, s2 4875; GPRIDX-NEXT: s_mov_b32 s2, s4 4876; GPRIDX-NEXT: s_mov_b32 s4, s6 4877; GPRIDX-NEXT: s_mov_b32 s6, s8 4878; GPRIDX-NEXT: s_mov_b32 s8, s10 4879; GPRIDX-NEXT: s_mov_b32 s10, s12 4880; GPRIDX-NEXT: s_mov_b32 s12, s14 4881; GPRIDX-NEXT: s_mov_b32 s14, s16 4882; GPRIDX-NEXT: s_mov_b32 s16, s18 4883; GPRIDX-NEXT: s_mov_b32 s18, s20 4884; GPRIDX-NEXT: s_mov_b32 s20, s22 4885; GPRIDX-NEXT: s_mov_b32 s22, s24 4886; GPRIDX-NEXT: s_mov_b32 s24, s26 4887; GPRIDX-NEXT: s_mov_b32 s26, s28 4888; GPRIDX-NEXT: s_mov_b32 s28, s30 4889; GPRIDX-NEXT: s_mov_b32 s30, s32 4890; GPRIDX-NEXT: v_mov_b32_e32 v33, s31 4891; GPRIDX-NEXT: s_lshl_b32 s33, s34, 1 4892; GPRIDX-NEXT: v_mov_b32_e32 v32, s30 4893; GPRIDX-NEXT: v_mov_b32_e32 v31, s29 4894; GPRIDX-NEXT: v_mov_b32_e32 v30, s28 4895; GPRIDX-NEXT: v_mov_b32_e32 v29, s27 4896; GPRIDX-NEXT: v_mov_b32_e32 v28, s26 4897; GPRIDX-NEXT: v_mov_b32_e32 v27, s25 4898; GPRIDX-NEXT: v_mov_b32_e32 v26, s24 4899; GPRIDX-NEXT: v_mov_b32_e32 v25, s23 4900; GPRIDX-NEXT: v_mov_b32_e32 v24, s22 4901; GPRIDX-NEXT: v_mov_b32_e32 v23, s21 4902; GPRIDX-NEXT: v_mov_b32_e32 v22, s20 4903; GPRIDX-NEXT: v_mov_b32_e32 v21, s19 4904; GPRIDX-NEXT: v_mov_b32_e32 v20, s18 4905; GPRIDX-NEXT: v_mov_b32_e32 v19, s17 4906; GPRIDX-NEXT: v_mov_b32_e32 v18, s16 4907; GPRIDX-NEXT: v_mov_b32_e32 v17, s15 4908; GPRIDX-NEXT: v_mov_b32_e32 v16, s14 4909; GPRIDX-NEXT: v_mov_b32_e32 v15, s13 4910; GPRIDX-NEXT: v_mov_b32_e32 v14, s12 4911; GPRIDX-NEXT: v_mov_b32_e32 v13, s11 4912; GPRIDX-NEXT: v_mov_b32_e32 v12, s10 4913; GPRIDX-NEXT: v_mov_b32_e32 v11, s9 4914; GPRIDX-NEXT: v_mov_b32_e32 v10, s8 4915; GPRIDX-NEXT: v_mov_b32_e32 v9, s7 4916; GPRIDX-NEXT: v_mov_b32_e32 v8, s6 4917; GPRIDX-NEXT: v_mov_b32_e32 v7, s5 4918; GPRIDX-NEXT: v_mov_b32_e32 v6, s4 4919; GPRIDX-NEXT: v_mov_b32_e32 v5, s3 4920; GPRIDX-NEXT: v_mov_b32_e32 v4, s2 4921; GPRIDX-NEXT: v_mov_b32_e32 v3, s1 4922; GPRIDX-NEXT: v_mov_b32_e32 v2, s0 4923; GPRIDX-NEXT: s_set_gpr_idx_on s33, gpr_idx(DST) 4924; GPRIDX-NEXT: v_mov_b32_e32 v2, v0 4925; GPRIDX-NEXT: v_mov_b32_e32 v3, v1 4926; GPRIDX-NEXT: s_set_gpr_idx_off 4927; GPRIDX-NEXT: v_readfirstlane_b32 s0, v2 4928; GPRIDX-NEXT: v_readfirstlane_b32 s1, v3 4929; GPRIDX-NEXT: v_readfirstlane_b32 s2, v4 4930; GPRIDX-NEXT: v_readfirstlane_b32 s3, v5 4931; GPRIDX-NEXT: v_readfirstlane_b32 s4, v6 4932; GPRIDX-NEXT: v_readfirstlane_b32 s5, v7 4933; GPRIDX-NEXT: v_readfirstlane_b32 s6, v8 4934; GPRIDX-NEXT: v_readfirstlane_b32 s7, v9 4935; GPRIDX-NEXT: v_readfirstlane_b32 s8, v10 4936; GPRIDX-NEXT: v_readfirstlane_b32 s9, v11 4937; GPRIDX-NEXT: v_readfirstlane_b32 s10, v12 4938; GPRIDX-NEXT: v_readfirstlane_b32 s11, v13 4939; GPRIDX-NEXT: v_readfirstlane_b32 s12, v14 4940; GPRIDX-NEXT: v_readfirstlane_b32 s13, v15 4941; GPRIDX-NEXT: v_readfirstlane_b32 s14, v16 4942; GPRIDX-NEXT: v_readfirstlane_b32 s15, v17 4943; GPRIDX-NEXT: v_readfirstlane_b32 s16, v18 4944; GPRIDX-NEXT: v_readfirstlane_b32 s17, v19 4945; GPRIDX-NEXT: v_readfirstlane_b32 s18, v20 4946; GPRIDX-NEXT: v_readfirstlane_b32 s19, v21 4947; GPRIDX-NEXT: v_readfirstlane_b32 s20, v22 4948; GPRIDX-NEXT: v_readfirstlane_b32 s21, v23 4949; GPRIDX-NEXT: v_readfirstlane_b32 s22, v24 4950; GPRIDX-NEXT: v_readfirstlane_b32 s23, v25 4951; GPRIDX-NEXT: v_readfirstlane_b32 s24, v26 4952; GPRIDX-NEXT: v_readfirstlane_b32 s25, v27 4953; GPRIDX-NEXT: v_readfirstlane_b32 s26, v28 4954; GPRIDX-NEXT: v_readfirstlane_b32 s27, v29 4955; GPRIDX-NEXT: v_readfirstlane_b32 s28, v30 4956; GPRIDX-NEXT: v_readfirstlane_b32 s29, v31 4957; GPRIDX-NEXT: v_readfirstlane_b32 s30, v32 4958; GPRIDX-NEXT: v_readfirstlane_b32 s31, v33 4959; GPRIDX-NEXT: ; return to shader part epilog 4960; 4961; GFX10-LABEL: dyn_insertelement_v16f64_s_v_s: 4962; GFX10: ; %bb.0: ; %entry 4963; GFX10-NEXT: s_mov_b32 s1, s3 4964; GFX10-NEXT: s_mov_b32 s3, s5 4965; GFX10-NEXT: s_mov_b32 s5, s7 4966; GFX10-NEXT: s_mov_b32 s7, s9 4967; GFX10-NEXT: s_mov_b32 s9, s11 4968; GFX10-NEXT: s_mov_b32 s11, s13 4969; GFX10-NEXT: s_mov_b32 s13, s15 4970; GFX10-NEXT: s_mov_b32 s15, s17 4971; GFX10-NEXT: s_mov_b32 s17, s19 4972; GFX10-NEXT: s_mov_b32 s19, s21 4973; GFX10-NEXT: s_mov_b32 s21, s23 4974; GFX10-NEXT: s_mov_b32 s23, s25 4975; GFX10-NEXT: s_mov_b32 s25, s27 4976; GFX10-NEXT: s_mov_b32 s27, s29 4977; GFX10-NEXT: s_mov_b32 s29, s31 4978; GFX10-NEXT: s_mov_b32 s31, s33 4979; GFX10-NEXT: s_mov_b32 s0, s2 4980; GFX10-NEXT: s_mov_b32 s2, s4 4981; GFX10-NEXT: s_mov_b32 s4, s6 4982; GFX10-NEXT: s_mov_b32 s6, s8 4983; GFX10-NEXT: s_mov_b32 s8, s10 4984; GFX10-NEXT: s_mov_b32 s10, s12 4985; GFX10-NEXT: s_mov_b32 s12, s14 4986; GFX10-NEXT: s_mov_b32 s14, s16 4987; GFX10-NEXT: s_mov_b32 s16, s18 4988; GFX10-NEXT: s_mov_b32 s18, s20 4989; GFX10-NEXT: s_mov_b32 s20, s22 4990; GFX10-NEXT: s_mov_b32 s22, s24 4991; GFX10-NEXT: s_mov_b32 s24, s26 4992; GFX10-NEXT: s_mov_b32 s26, s28 4993; GFX10-NEXT: s_mov_b32 s28, s30 4994; GFX10-NEXT: s_mov_b32 s30, s32 4995; GFX10-NEXT: v_mov_b32_e32 v33, s31 4996; GFX10-NEXT: v_mov_b32_e32 v2, s0 4997; GFX10-NEXT: s_lshl_b32 m0, s34, 1 4998; GFX10-NEXT: v_mov_b32_e32 v32, s30 4999; GFX10-NEXT: v_mov_b32_e32 v31, s29 5000; GFX10-NEXT: v_mov_b32_e32 v30, s28 5001; GFX10-NEXT: v_mov_b32_e32 v29, s27 5002; GFX10-NEXT: v_mov_b32_e32 v28, s26 5003; GFX10-NEXT: v_mov_b32_e32 v27, s25 5004; GFX10-NEXT: v_mov_b32_e32 v26, s24 5005; GFX10-NEXT: v_mov_b32_e32 v25, s23 5006; GFX10-NEXT: v_mov_b32_e32 v24, s22 5007; GFX10-NEXT: v_mov_b32_e32 v23, s21 5008; GFX10-NEXT: v_mov_b32_e32 v22, s20 5009; GFX10-NEXT: v_mov_b32_e32 v21, s19 5010; GFX10-NEXT: v_mov_b32_e32 v20, s18 5011; GFX10-NEXT: v_mov_b32_e32 v19, s17 5012; GFX10-NEXT: v_mov_b32_e32 v18, s16 5013; GFX10-NEXT: v_mov_b32_e32 v17, s15 5014; GFX10-NEXT: v_mov_b32_e32 v16, s14 5015; GFX10-NEXT: v_mov_b32_e32 v15, s13 5016; GFX10-NEXT: v_mov_b32_e32 v14, s12 5017; GFX10-NEXT: v_mov_b32_e32 v13, s11 5018; GFX10-NEXT: v_mov_b32_e32 v12, s10 5019; GFX10-NEXT: v_mov_b32_e32 v11, s9 5020; GFX10-NEXT: v_mov_b32_e32 v10, s8 5021; GFX10-NEXT: v_mov_b32_e32 v9, s7 5022; GFX10-NEXT: v_mov_b32_e32 v8, s6 5023; GFX10-NEXT: v_mov_b32_e32 v7, s5 5024; GFX10-NEXT: v_mov_b32_e32 v6, s4 5025; GFX10-NEXT: v_mov_b32_e32 v5, s3 5026; GFX10-NEXT: v_mov_b32_e32 v4, s2 5027; GFX10-NEXT: v_mov_b32_e32 v3, s1 5028; GFX10-NEXT: v_movreld_b32_e32 v2, v0 5029; GFX10-NEXT: v_movreld_b32_e32 v3, v1 5030; GFX10-NEXT: v_readfirstlane_b32 s0, v2 5031; GFX10-NEXT: v_readfirstlane_b32 s1, v3 5032; GFX10-NEXT: v_readfirstlane_b32 s2, v4 5033; GFX10-NEXT: v_readfirstlane_b32 s3, v5 5034; GFX10-NEXT: v_readfirstlane_b32 s4, v6 5035; GFX10-NEXT: v_readfirstlane_b32 s5, v7 5036; GFX10-NEXT: v_readfirstlane_b32 s6, v8 5037; GFX10-NEXT: v_readfirstlane_b32 s7, v9 5038; GFX10-NEXT: v_readfirstlane_b32 s8, v10 5039; GFX10-NEXT: v_readfirstlane_b32 s9, v11 5040; GFX10-NEXT: v_readfirstlane_b32 s10, v12 5041; GFX10-NEXT: v_readfirstlane_b32 s11, v13 5042; GFX10-NEXT: v_readfirstlane_b32 s12, v14 5043; GFX10-NEXT: v_readfirstlane_b32 s13, v15 5044; GFX10-NEXT: v_readfirstlane_b32 s14, v16 5045; GFX10-NEXT: v_readfirstlane_b32 s15, v17 5046; GFX10-NEXT: v_readfirstlane_b32 s16, v18 5047; GFX10-NEXT: v_readfirstlane_b32 s17, v19 5048; GFX10-NEXT: v_readfirstlane_b32 s18, v20 5049; GFX10-NEXT: v_readfirstlane_b32 s19, v21 5050; GFX10-NEXT: v_readfirstlane_b32 s20, v22 5051; GFX10-NEXT: v_readfirstlane_b32 s21, v23 5052; GFX10-NEXT: v_readfirstlane_b32 s22, v24 5053; GFX10-NEXT: v_readfirstlane_b32 s23, v25 5054; GFX10-NEXT: v_readfirstlane_b32 s24, v26 5055; GFX10-NEXT: v_readfirstlane_b32 s25, v27 5056; GFX10-NEXT: v_readfirstlane_b32 s26, v28 5057; GFX10-NEXT: v_readfirstlane_b32 s27, v29 5058; GFX10-NEXT: v_readfirstlane_b32 s28, v30 5059; GFX10-NEXT: v_readfirstlane_b32 s29, v31 5060; GFX10-NEXT: v_readfirstlane_b32 s30, v32 5061; GFX10-NEXT: v_readfirstlane_b32 s31, v33 5062; GFX10-NEXT: ; return to shader part epilog 5063; 5064; GFX11-LABEL: dyn_insertelement_v16f64_s_v_s: 5065; GFX11: ; %bb.0: ; %entry 5066; GFX11-NEXT: s_mov_b32 s1, s3 5067; GFX11-NEXT: s_mov_b32 s3, s5 5068; GFX11-NEXT: s_mov_b32 s5, s7 5069; GFX11-NEXT: s_mov_b32 s7, s9 5070; GFX11-NEXT: s_mov_b32 s9, s11 5071; GFX11-NEXT: s_mov_b32 s11, s13 5072; GFX11-NEXT: s_mov_b32 s13, s15 5073; GFX11-NEXT: s_mov_b32 s15, s17 5074; GFX11-NEXT: s_mov_b32 s17, s19 5075; GFX11-NEXT: s_mov_b32 s19, s21 5076; GFX11-NEXT: s_mov_b32 s21, s23 5077; GFX11-NEXT: s_mov_b32 s23, s25 5078; GFX11-NEXT: s_mov_b32 s25, s27 5079; GFX11-NEXT: s_mov_b32 s27, s29 5080; GFX11-NEXT: s_mov_b32 s29, s31 5081; GFX11-NEXT: s_mov_b32 s31, s33 5082; GFX11-NEXT: s_mov_b32 s0, s2 5083; GFX11-NEXT: s_mov_b32 s2, s4 5084; GFX11-NEXT: s_mov_b32 s4, s6 5085; GFX11-NEXT: s_mov_b32 s6, s8 5086; GFX11-NEXT: s_mov_b32 s8, s10 5087; GFX11-NEXT: s_mov_b32 s10, s12 5088; GFX11-NEXT: s_mov_b32 s12, s14 5089; GFX11-NEXT: s_mov_b32 s14, s16 5090; GFX11-NEXT: s_mov_b32 s16, s18 5091; GFX11-NEXT: s_mov_b32 s18, s20 5092; GFX11-NEXT: s_mov_b32 s20, s22 5093; GFX11-NEXT: s_mov_b32 s22, s24 5094; GFX11-NEXT: s_mov_b32 s24, s26 5095; GFX11-NEXT: s_mov_b32 s26, s28 5096; GFX11-NEXT: s_mov_b32 s28, s30 5097; GFX11-NEXT: s_mov_b32 s30, s32 5098; GFX11-NEXT: v_dual_mov_b32 v33, s31 :: v_dual_mov_b32 v32, s30 5099; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 5100; GFX11-NEXT: s_lshl_b32 m0, s34, 1 5101; GFX11-NEXT: v_dual_mov_b32 v31, s29 :: v_dual_mov_b32 v30, s28 5102; GFX11-NEXT: v_dual_mov_b32 v29, s27 :: v_dual_mov_b32 v28, s26 5103; GFX11-NEXT: v_dual_mov_b32 v27, s25 :: v_dual_mov_b32 v26, s24 5104; GFX11-NEXT: v_dual_mov_b32 v25, s23 :: v_dual_mov_b32 v24, s22 5105; GFX11-NEXT: v_dual_mov_b32 v23, s21 :: v_dual_mov_b32 v22, s20 5106; GFX11-NEXT: v_dual_mov_b32 v21, s19 :: v_dual_mov_b32 v20, s18 5107; GFX11-NEXT: v_dual_mov_b32 v19, s17 :: v_dual_mov_b32 v18, s16 5108; GFX11-NEXT: v_dual_mov_b32 v17, s15 :: v_dual_mov_b32 v16, s14 5109; GFX11-NEXT: v_dual_mov_b32 v15, s13 :: v_dual_mov_b32 v14, s12 5110; GFX11-NEXT: v_dual_mov_b32 v13, s11 :: v_dual_mov_b32 v12, s10 5111; GFX11-NEXT: v_dual_mov_b32 v11, s9 :: v_dual_mov_b32 v10, s8 5112; GFX11-NEXT: v_dual_mov_b32 v9, s7 :: v_dual_mov_b32 v8, s6 5113; GFX11-NEXT: v_dual_mov_b32 v7, s5 :: v_dual_mov_b32 v6, s4 5114; GFX11-NEXT: v_dual_mov_b32 v5, s3 :: v_dual_mov_b32 v4, s2 5115; GFX11-NEXT: v_movreld_b32_e32 v2, v0 5116; GFX11-NEXT: v_movreld_b32_e32 v3, v1 5117; GFX11-NEXT: v_readfirstlane_b32 s0, v2 5118; GFX11-NEXT: v_readfirstlane_b32 s1, v3 5119; GFX11-NEXT: v_readfirstlane_b32 s2, v4 5120; GFX11-NEXT: v_readfirstlane_b32 s3, v5 5121; GFX11-NEXT: v_readfirstlane_b32 s4, v6 5122; GFX11-NEXT: v_readfirstlane_b32 s5, v7 5123; GFX11-NEXT: v_readfirstlane_b32 s6, v8 5124; GFX11-NEXT: v_readfirstlane_b32 s7, v9 5125; GFX11-NEXT: v_readfirstlane_b32 s8, v10 5126; GFX11-NEXT: v_readfirstlane_b32 s9, v11 5127; GFX11-NEXT: v_readfirstlane_b32 s10, v12 5128; GFX11-NEXT: v_readfirstlane_b32 s11, v13 5129; GFX11-NEXT: v_readfirstlane_b32 s12, v14 5130; GFX11-NEXT: v_readfirstlane_b32 s13, v15 5131; GFX11-NEXT: v_readfirstlane_b32 s14, v16 5132; GFX11-NEXT: v_readfirstlane_b32 s15, v17 5133; GFX11-NEXT: v_readfirstlane_b32 s16, v18 5134; GFX11-NEXT: v_readfirstlane_b32 s17, v19 5135; GFX11-NEXT: v_readfirstlane_b32 s18, v20 5136; GFX11-NEXT: v_readfirstlane_b32 s19, v21 5137; GFX11-NEXT: v_readfirstlane_b32 s20, v22 5138; GFX11-NEXT: v_readfirstlane_b32 s21, v23 5139; GFX11-NEXT: v_readfirstlane_b32 s22, v24 5140; GFX11-NEXT: v_readfirstlane_b32 s23, v25 5141; GFX11-NEXT: v_readfirstlane_b32 s24, v26 5142; GFX11-NEXT: v_readfirstlane_b32 s25, v27 5143; GFX11-NEXT: v_readfirstlane_b32 s26, v28 5144; GFX11-NEXT: v_readfirstlane_b32 s27, v29 5145; GFX11-NEXT: v_readfirstlane_b32 s28, v30 5146; GFX11-NEXT: v_readfirstlane_b32 s29, v31 5147; GFX11-NEXT: v_readfirstlane_b32 s30, v32 5148; GFX11-NEXT: v_readfirstlane_b32 s31, v33 5149; GFX11-NEXT: ; return to shader part epilog 5150entry: 5151 %insert = insertelement <16 x double> %vec, double %val, i32 %idx 5152 ret <16 x double> %insert 5153} 5154 5155define amdgpu_ps <7 x i32> @dyn_insertelement_v7i32_s_s_s(<7 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) { 5156; GPRIDX-LABEL: dyn_insertelement_v7i32_s_s_s: 5157; GPRIDX: ; %bb.0: ; %entry 5158; GPRIDX-NEXT: s_cmp_eq_u32 s10, 0 5159; GPRIDX-NEXT: s_cselect_b32 s0, s9, s2 5160; GPRIDX-NEXT: s_cmp_eq_u32 s10, 1 5161; GPRIDX-NEXT: s_cselect_b32 s1, s9, s3 5162; GPRIDX-NEXT: s_cmp_eq_u32 s10, 2 5163; GPRIDX-NEXT: s_cselect_b32 s2, s9, s4 5164; GPRIDX-NEXT: s_cmp_eq_u32 s10, 3 5165; GPRIDX-NEXT: s_cselect_b32 s3, s9, s5 5166; GPRIDX-NEXT: s_cmp_eq_u32 s10, 4 5167; GPRIDX-NEXT: s_cselect_b32 s4, s9, s6 5168; GPRIDX-NEXT: s_cmp_eq_u32 s10, 5 5169; GPRIDX-NEXT: s_cselect_b32 s5, s9, s7 5170; GPRIDX-NEXT: s_cmp_eq_u32 s10, 6 5171; GPRIDX-NEXT: s_cselect_b32 s6, s9, s8 5172; GPRIDX-NEXT: ; return to shader part epilog 5173; 5174; GFX10PLUS-LABEL: dyn_insertelement_v7i32_s_s_s: 5175; GFX10PLUS: ; %bb.0: ; %entry 5176; GFX10PLUS-NEXT: s_cmp_eq_u32 s10, 0 5177; GFX10PLUS-NEXT: s_cselect_b32 s0, s9, s2 5178; GFX10PLUS-NEXT: s_cmp_eq_u32 s10, 1 5179; GFX10PLUS-NEXT: s_cselect_b32 s1, s9, s3 5180; GFX10PLUS-NEXT: s_cmp_eq_u32 s10, 2 5181; GFX10PLUS-NEXT: s_cselect_b32 s2, s9, s4 5182; GFX10PLUS-NEXT: s_cmp_eq_u32 s10, 3 5183; GFX10PLUS-NEXT: s_cselect_b32 s3, s9, s5 5184; GFX10PLUS-NEXT: s_cmp_eq_u32 s10, 4 5185; GFX10PLUS-NEXT: s_cselect_b32 s4, s9, s6 5186; GFX10PLUS-NEXT: s_cmp_eq_u32 s10, 5 5187; GFX10PLUS-NEXT: s_cselect_b32 s5, s9, s7 5188; GFX10PLUS-NEXT: s_cmp_eq_u32 s10, 6 5189; GFX10PLUS-NEXT: s_cselect_b32 s6, s9, s8 5190; GFX10PLUS-NEXT: ; return to shader part epilog 5191entry: 5192 %insert = insertelement <7 x i32> %vec, i32 %val, i32 %idx 5193 ret <7 x i32> %insert 5194} 5195 5196define amdgpu_ps <7 x ptr addrspace(3)> @dyn_insertelement_v7p3i8_s_s_s(<7 x ptr addrspace(3)> inreg %vec, ptr addrspace(3) inreg %val, i32 inreg %idx) { 5197; GPRIDX-LABEL: dyn_insertelement_v7p3i8_s_s_s: 5198; GPRIDX: ; %bb.0: ; %entry 5199; GPRIDX-NEXT: s_cmp_eq_u32 s10, 0 5200; GPRIDX-NEXT: s_cselect_b32 s0, s9, s2 5201; GPRIDX-NEXT: s_cmp_eq_u32 s10, 1 5202; GPRIDX-NEXT: s_cselect_b32 s1, s9, s3 5203; GPRIDX-NEXT: s_cmp_eq_u32 s10, 2 5204; GPRIDX-NEXT: s_cselect_b32 s2, s9, s4 5205; GPRIDX-NEXT: s_cmp_eq_u32 s10, 3 5206; GPRIDX-NEXT: s_cselect_b32 s3, s9, s5 5207; GPRIDX-NEXT: s_cmp_eq_u32 s10, 4 5208; GPRIDX-NEXT: s_cselect_b32 s4, s9, s6 5209; GPRIDX-NEXT: s_cmp_eq_u32 s10, 5 5210; GPRIDX-NEXT: s_cselect_b32 s5, s9, s7 5211; GPRIDX-NEXT: s_cmp_eq_u32 s10, 6 5212; GPRIDX-NEXT: s_cselect_b32 s6, s9, s8 5213; GPRIDX-NEXT: ; return to shader part epilog 5214; 5215; GFX10PLUS-LABEL: dyn_insertelement_v7p3i8_s_s_s: 5216; GFX10PLUS: ; %bb.0: ; %entry 5217; GFX10PLUS-NEXT: s_cmp_eq_u32 s10, 0 5218; GFX10PLUS-NEXT: s_cselect_b32 s0, s9, s2 5219; GFX10PLUS-NEXT: s_cmp_eq_u32 s10, 1 5220; GFX10PLUS-NEXT: s_cselect_b32 s1, s9, s3 5221; GFX10PLUS-NEXT: s_cmp_eq_u32 s10, 2 5222; GFX10PLUS-NEXT: s_cselect_b32 s2, s9, s4 5223; GFX10PLUS-NEXT: s_cmp_eq_u32 s10, 3 5224; GFX10PLUS-NEXT: s_cselect_b32 s3, s9, s5 5225; GFX10PLUS-NEXT: s_cmp_eq_u32 s10, 4 5226; GFX10PLUS-NEXT: s_cselect_b32 s4, s9, s6 5227; GFX10PLUS-NEXT: s_cmp_eq_u32 s10, 5 5228; GFX10PLUS-NEXT: s_cselect_b32 s5, s9, s7 5229; GFX10PLUS-NEXT: s_cmp_eq_u32 s10, 6 5230; GFX10PLUS-NEXT: s_cselect_b32 s6, s9, s8 5231; GFX10PLUS-NEXT: ; return to shader part epilog 5232entry: 5233 %insert = insertelement <7 x ptr addrspace(3)> %vec, ptr addrspace(3) %val, i32 %idx 5234 ret <7 x ptr addrspace(3)> %insert 5235} 5236 5237define amdgpu_ps <7 x float> @dyn_insertelement_v7f32_s_v_s(<7 x float> inreg %vec, float %val, i32 inreg %idx) { 5238; GPRIDX-LABEL: dyn_insertelement_v7f32_s_v_s: 5239; GPRIDX: ; %bb.0: ; %entry 5240; GPRIDX-NEXT: v_mov_b32_e32 v1, s2 5241; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s9, 0 5242; GPRIDX-NEXT: v_mov_b32_e32 v2, s3 5243; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v1, v0, vcc 5244; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s9, 1 5245; GPRIDX-NEXT: v_mov_b32_e32 v3, s4 5246; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v2, v0, vcc 5247; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s9, 2 5248; GPRIDX-NEXT: v_mov_b32_e32 v4, s5 5249; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v3, v0, vcc 5250; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s9, 3 5251; GPRIDX-NEXT: v_mov_b32_e32 v5, s6 5252; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v4, v0, vcc 5253; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s9, 4 5254; GPRIDX-NEXT: v_mov_b32_e32 v6, s7 5255; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v5, v0, vcc 5256; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s9, 5 5257; GPRIDX-NEXT: v_mov_b32_e32 v8, s8 5258; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v6, v0, vcc 5259; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s9, 6 5260; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v8, v0, vcc 5261; GPRIDX-NEXT: v_mov_b32_e32 v0, v7 5262; GPRIDX-NEXT: ; return to shader part epilog 5263; 5264; GFX10PLUS-LABEL: dyn_insertelement_v7f32_s_v_s: 5265; GFX10PLUS: ; %bb.0: ; %entry 5266; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s9, 0 5267; GFX10PLUS-NEXT: v_cndmask_b32_e32 v7, s2, v0, vcc_lo 5268; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s9, 1 5269; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s3, v0, vcc_lo 5270; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s9, 2 5271; GFX10PLUS-NEXT: v_cndmask_b32_e32 v2, s4, v0, vcc_lo 5272; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s9, 3 5273; GFX10PLUS-NEXT: v_cndmask_b32_e32 v3, s5, v0, vcc_lo 5274; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s9, 4 5275; GFX10PLUS-NEXT: v_cndmask_b32_e32 v4, s6, v0, vcc_lo 5276; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s9, 5 5277; GFX10PLUS-NEXT: v_cndmask_b32_e32 v5, s7, v0, vcc_lo 5278; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s9, 6 5279; GFX10PLUS-NEXT: v_cndmask_b32_e32 v6, s8, v0, vcc_lo 5280; GFX10PLUS-NEXT: v_mov_b32_e32 v0, v7 5281; GFX10PLUS-NEXT: ; return to shader part epilog 5282entry: 5283 %insert = insertelement <7 x float> %vec, float %val, i32 %idx 5284 ret <7 x float> %insert 5285} 5286 5287define amdgpu_ps <7 x float> @dyn_insertelement_v7f32_s_v_v(<7 x float> inreg %vec, float %val, i32 %idx) { 5288; GPRIDX-LABEL: dyn_insertelement_v7f32_s_v_v: 5289; GPRIDX: ; %bb.0: ; %entry 5290; GPRIDX-NEXT: v_mov_b32_e32 v2, s2 5291; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 5292; GPRIDX-NEXT: v_mov_b32_e32 v3, s3 5293; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v2, v0, vcc 5294; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 5295; GPRIDX-NEXT: v_mov_b32_e32 v4, s4 5296; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v3, v0, vcc 5297; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v1 5298; GPRIDX-NEXT: v_mov_b32_e32 v5, s5 5299; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v4, v0, vcc 5300; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v1 5301; GPRIDX-NEXT: v_mov_b32_e32 v6, s6 5302; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v5, v0, vcc 5303; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v1 5304; GPRIDX-NEXT: v_mov_b32_e32 v9, s7 5305; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v6, v0, vcc 5306; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v1 5307; GPRIDX-NEXT: v_mov_b32_e32 v10, s8 5308; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v9, v0, vcc 5309; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v1 5310; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v10, v0, vcc 5311; GPRIDX-NEXT: v_mov_b32_e32 v0, v8 5312; GPRIDX-NEXT: v_mov_b32_e32 v1, v7 5313; GPRIDX-NEXT: ; return to shader part epilog 5314; 5315; GFX10-LABEL: dyn_insertelement_v7f32_s_v_v: 5316; GFX10: ; %bb.0: ; %entry 5317; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 5318; GFX10-NEXT: v_cndmask_b32_e32 v8, s2, v0, vcc_lo 5319; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 5320; GFX10-NEXT: v_cndmask_b32_e32 v7, s3, v0, vcc_lo 5321; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1 5322; GFX10-NEXT: v_cndmask_b32_e32 v2, s4, v0, vcc_lo 5323; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1 5324; GFX10-NEXT: v_cndmask_b32_e32 v3, s5, v0, vcc_lo 5325; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1 5326; GFX10-NEXT: v_cndmask_b32_e32 v4, s6, v0, vcc_lo 5327; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1 5328; GFX10-NEXT: v_cndmask_b32_e32 v5, s7, v0, vcc_lo 5329; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1 5330; GFX10-NEXT: v_mov_b32_e32 v1, v7 5331; GFX10-NEXT: v_cndmask_b32_e32 v6, s8, v0, vcc_lo 5332; GFX10-NEXT: v_mov_b32_e32 v0, v8 5333; GFX10-NEXT: ; return to shader part epilog 5334; 5335; GFX11-LABEL: dyn_insertelement_v7f32_s_v_v: 5336; GFX11: ; %bb.0: ; %entry 5337; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 5338; GFX11-NEXT: v_cndmask_b32_e32 v8, s2, v0, vcc_lo 5339; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 5340; GFX11-NEXT: v_cndmask_b32_e32 v7, s3, v0, vcc_lo 5341; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1 5342; GFX11-NEXT: v_cndmask_b32_e32 v2, s4, v0, vcc_lo 5343; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1 5344; GFX11-NEXT: v_cndmask_b32_e32 v3, s5, v0, vcc_lo 5345; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1 5346; GFX11-NEXT: v_cndmask_b32_e32 v4, s6, v0, vcc_lo 5347; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1 5348; GFX11-NEXT: v_cndmask_b32_e32 v5, s7, v0, vcc_lo 5349; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1 5350; GFX11-NEXT: v_dual_mov_b32 v1, v7 :: v_dual_cndmask_b32 v6, s8, v0 5351; GFX11-NEXT: v_mov_b32_e32 v0, v8 5352; GFX11-NEXT: ; return to shader part epilog 5353entry: 5354 %insert = insertelement <7 x float> %vec, float %val, i32 %idx 5355 ret <7 x float> %insert 5356} 5357 5358define amdgpu_ps <7 x float> @dyn_insertelement_v7f32_v_v_s(<7 x float> %vec, float %val, i32 inreg %idx) { 5359; GPRIDX-LABEL: dyn_insertelement_v7f32_v_v_s: 5360; GPRIDX: ; %bb.0: ; %entry 5361; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 0 5362; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc 5363; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1 5364; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 5365; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 2 5366; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc 5367; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 3 5368; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc 5369; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 4 5370; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc 5371; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 5 5372; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc 5373; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 6 5374; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc 5375; GPRIDX-NEXT: ; return to shader part epilog 5376; 5377; GFX10PLUS-LABEL: dyn_insertelement_v7f32_v_v_s: 5378; GFX10PLUS: ; %bb.0: ; %entry 5379; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 0 5380; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo 5381; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 1 5382; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo 5383; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 2 5384; GFX10PLUS-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc_lo 5385; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 3 5386; GFX10PLUS-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc_lo 5387; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 4 5388; GFX10PLUS-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc_lo 5389; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 5 5390; GFX10PLUS-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo 5391; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 6 5392; GFX10PLUS-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc_lo 5393; GFX10PLUS-NEXT: ; return to shader part epilog 5394entry: 5395 %insert = insertelement <7 x float> %vec, float %val, i32 %idx 5396 ret <7 x float> %insert 5397} 5398 5399define amdgpu_ps <7 x float> @dyn_insertelement_v7f32_v_v_v(<7 x float> %vec, float %val, i32 %idx) { 5400; GPRIDX-LABEL: dyn_insertelement_v7f32_v_v_v: 5401; GPRIDX: ; %bb.0: ; %entry 5402; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v8 5403; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc 5404; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8 5405; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 5406; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v8 5407; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc 5408; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v8 5409; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc 5410; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v8 5411; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc 5412; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v8 5413; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc 5414; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v8 5415; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc 5416; GPRIDX-NEXT: ; return to shader part epilog 5417; 5418; GFX10PLUS-LABEL: dyn_insertelement_v7f32_v_v_v: 5419; GFX10PLUS: ; %bb.0: ; %entry 5420; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v8 5421; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo 5422; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8 5423; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo 5424; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v8 5425; GFX10PLUS-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc_lo 5426; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v8 5427; GFX10PLUS-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc_lo 5428; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v8 5429; GFX10PLUS-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc_lo 5430; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v8 5431; GFX10PLUS-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo 5432; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v8 5433; GFX10PLUS-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc_lo 5434; GFX10PLUS-NEXT: ; return to shader part epilog 5435entry: 5436 %insert = insertelement <7 x float> %vec, float %val, i32 %idx 5437 ret <7 x float> %insert 5438} 5439 5440define amdgpu_ps <7 x double> @dyn_insertelement_v7f64_s_s_s(<7 x double> inreg %vec, double inreg %val, i32 inreg %idx) { 5441; GPRIDX-LABEL: dyn_insertelement_v7f64_s_s_s: 5442; GPRIDX: ; %bb.0: ; %entry 5443; GPRIDX-NEXT: s_mov_b32 s0, s2 5444; GPRIDX-NEXT: s_mov_b32 s1, s3 5445; GPRIDX-NEXT: s_mov_b32 s2, s4 5446; GPRIDX-NEXT: s_mov_b32 s3, s5 5447; GPRIDX-NEXT: s_mov_b32 s4, s6 5448; GPRIDX-NEXT: s_mov_b32 s5, s7 5449; GPRIDX-NEXT: s_mov_b32 s6, s8 5450; GPRIDX-NEXT: s_mov_b32 s7, s9 5451; GPRIDX-NEXT: s_mov_b32 s8, s10 5452; GPRIDX-NEXT: s_mov_b32 s9, s11 5453; GPRIDX-NEXT: s_mov_b32 s10, s12 5454; GPRIDX-NEXT: s_mov_b32 s11, s13 5455; GPRIDX-NEXT: s_mov_b32 s12, s14 5456; GPRIDX-NEXT: s_mov_b32 s13, s15 5457; GPRIDX-NEXT: s_mov_b32 m0, s18 5458; GPRIDX-NEXT: s_nop 0 5459; GPRIDX-NEXT: s_movreld_b64 s[0:1], s[16:17] 5460; GPRIDX-NEXT: ; return to shader part epilog 5461; 5462; GFX10PLUS-LABEL: dyn_insertelement_v7f64_s_s_s: 5463; GFX10PLUS: ; %bb.0: ; %entry 5464; GFX10PLUS-NEXT: s_mov_b32 s0, s2 5465; GFX10PLUS-NEXT: s_mov_b32 s1, s3 5466; GFX10PLUS-NEXT: s_mov_b32 m0, s18 5467; GFX10PLUS-NEXT: s_mov_b32 s2, s4 5468; GFX10PLUS-NEXT: s_mov_b32 s3, s5 5469; GFX10PLUS-NEXT: s_mov_b32 s4, s6 5470; GFX10PLUS-NEXT: s_mov_b32 s5, s7 5471; GFX10PLUS-NEXT: s_mov_b32 s6, s8 5472; GFX10PLUS-NEXT: s_mov_b32 s7, s9 5473; GFX10PLUS-NEXT: s_mov_b32 s8, s10 5474; GFX10PLUS-NEXT: s_mov_b32 s9, s11 5475; GFX10PLUS-NEXT: s_mov_b32 s10, s12 5476; GFX10PLUS-NEXT: s_mov_b32 s11, s13 5477; GFX10PLUS-NEXT: s_mov_b32 s12, s14 5478; GFX10PLUS-NEXT: s_mov_b32 s13, s15 5479; GFX10PLUS-NEXT: s_movreld_b64 s[0:1], s[16:17] 5480; GFX10PLUS-NEXT: ; return to shader part epilog 5481entry: 5482 %insert = insertelement <7 x double> %vec, double %val, i32 %idx 5483 ret <7 x double> %insert 5484} 5485 5486define amdgpu_ps <7 x double> @dyn_insertelement_v7f64_s_v_s(<7 x double> inreg %vec, double %val, i32 inreg %idx) { 5487; GPRIDX-LABEL: dyn_insertelement_v7f64_s_v_s: 5488; GPRIDX: ; %bb.0: ; %entry 5489; GPRIDX-NEXT: s_mov_b32 s0, s2 5490; GPRIDX-NEXT: s_mov_b32 s1, s3 5491; GPRIDX-NEXT: s_mov_b32 s2, s4 5492; GPRIDX-NEXT: s_mov_b32 s3, s5 5493; GPRIDX-NEXT: s_mov_b32 s4, s6 5494; GPRIDX-NEXT: s_mov_b32 s5, s7 5495; GPRIDX-NEXT: s_mov_b32 s6, s8 5496; GPRIDX-NEXT: s_mov_b32 s7, s9 5497; GPRIDX-NEXT: s_mov_b32 s8, s10 5498; GPRIDX-NEXT: s_mov_b32 s9, s11 5499; GPRIDX-NEXT: s_mov_b32 s10, s12 5500; GPRIDX-NEXT: s_mov_b32 s11, s13 5501; GPRIDX-NEXT: s_mov_b32 s12, s14 5502; GPRIDX-NEXT: s_mov_b32 s13, s15 5503; GPRIDX-NEXT: v_mov_b32_e32 v17, s15 5504; GPRIDX-NEXT: v_mov_b32_e32 v16, s14 5505; GPRIDX-NEXT: v_mov_b32_e32 v15, s13 5506; GPRIDX-NEXT: v_mov_b32_e32 v14, s12 5507; GPRIDX-NEXT: v_mov_b32_e32 v13, s11 5508; GPRIDX-NEXT: v_mov_b32_e32 v12, s10 5509; GPRIDX-NEXT: v_mov_b32_e32 v11, s9 5510; GPRIDX-NEXT: v_mov_b32_e32 v10, s8 5511; GPRIDX-NEXT: v_mov_b32_e32 v9, s7 5512; GPRIDX-NEXT: v_mov_b32_e32 v8, s6 5513; GPRIDX-NEXT: v_mov_b32_e32 v7, s5 5514; GPRIDX-NEXT: v_mov_b32_e32 v6, s4 5515; GPRIDX-NEXT: v_mov_b32_e32 v5, s3 5516; GPRIDX-NEXT: v_mov_b32_e32 v4, s2 5517; GPRIDX-NEXT: v_mov_b32_e32 v3, s1 5518; GPRIDX-NEXT: v_mov_b32_e32 v2, s0 5519; GPRIDX-NEXT: s_lshl_b32 s0, s16, 1 5520; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST) 5521; GPRIDX-NEXT: v_mov_b32_e32 v2, v0 5522; GPRIDX-NEXT: v_mov_b32_e32 v3, v1 5523; GPRIDX-NEXT: s_set_gpr_idx_off 5524; GPRIDX-NEXT: v_readfirstlane_b32 s0, v2 5525; GPRIDX-NEXT: v_readfirstlane_b32 s1, v3 5526; GPRIDX-NEXT: v_readfirstlane_b32 s2, v4 5527; GPRIDX-NEXT: v_readfirstlane_b32 s3, v5 5528; GPRIDX-NEXT: v_readfirstlane_b32 s4, v6 5529; GPRIDX-NEXT: v_readfirstlane_b32 s5, v7 5530; GPRIDX-NEXT: v_readfirstlane_b32 s6, v8 5531; GPRIDX-NEXT: v_readfirstlane_b32 s7, v9 5532; GPRIDX-NEXT: v_readfirstlane_b32 s8, v10 5533; GPRIDX-NEXT: v_readfirstlane_b32 s9, v11 5534; GPRIDX-NEXT: v_readfirstlane_b32 s10, v12 5535; GPRIDX-NEXT: v_readfirstlane_b32 s11, v13 5536; GPRIDX-NEXT: v_readfirstlane_b32 s12, v14 5537; GPRIDX-NEXT: v_readfirstlane_b32 s13, v15 5538; GPRIDX-NEXT: ; return to shader part epilog 5539; 5540; GFX10-LABEL: dyn_insertelement_v7f64_s_v_s: 5541; GFX10: ; %bb.0: ; %entry 5542; GFX10-NEXT: s_mov_b32 s0, s2 5543; GFX10-NEXT: s_mov_b32 s1, s3 5544; GFX10-NEXT: s_mov_b32 s2, s4 5545; GFX10-NEXT: s_mov_b32 s3, s5 5546; GFX10-NEXT: s_mov_b32 s4, s6 5547; GFX10-NEXT: s_mov_b32 s5, s7 5548; GFX10-NEXT: s_mov_b32 s6, s8 5549; GFX10-NEXT: s_mov_b32 s7, s9 5550; GFX10-NEXT: s_mov_b32 s8, s10 5551; GFX10-NEXT: s_mov_b32 s9, s11 5552; GFX10-NEXT: s_mov_b32 s10, s12 5553; GFX10-NEXT: s_mov_b32 s11, s13 5554; GFX10-NEXT: s_mov_b32 s12, s14 5555; GFX10-NEXT: s_mov_b32 s13, s15 5556; GFX10-NEXT: v_mov_b32_e32 v17, s15 5557; GFX10-NEXT: v_mov_b32_e32 v2, s0 5558; GFX10-NEXT: s_lshl_b32 m0, s16, 1 5559; GFX10-NEXT: v_mov_b32_e32 v16, s14 5560; GFX10-NEXT: v_mov_b32_e32 v15, s13 5561; GFX10-NEXT: v_mov_b32_e32 v14, s12 5562; GFX10-NEXT: v_mov_b32_e32 v13, s11 5563; GFX10-NEXT: v_mov_b32_e32 v12, s10 5564; GFX10-NEXT: v_mov_b32_e32 v11, s9 5565; GFX10-NEXT: v_mov_b32_e32 v10, s8 5566; GFX10-NEXT: v_mov_b32_e32 v9, s7 5567; GFX10-NEXT: v_mov_b32_e32 v8, s6 5568; GFX10-NEXT: v_mov_b32_e32 v7, s5 5569; GFX10-NEXT: v_mov_b32_e32 v6, s4 5570; GFX10-NEXT: v_mov_b32_e32 v5, s3 5571; GFX10-NEXT: v_mov_b32_e32 v4, s2 5572; GFX10-NEXT: v_mov_b32_e32 v3, s1 5573; GFX10-NEXT: v_movreld_b32_e32 v2, v0 5574; GFX10-NEXT: v_movreld_b32_e32 v3, v1 5575; GFX10-NEXT: v_readfirstlane_b32 s0, v2 5576; GFX10-NEXT: v_readfirstlane_b32 s1, v3 5577; GFX10-NEXT: v_readfirstlane_b32 s2, v4 5578; GFX10-NEXT: v_readfirstlane_b32 s3, v5 5579; GFX10-NEXT: v_readfirstlane_b32 s4, v6 5580; GFX10-NEXT: v_readfirstlane_b32 s5, v7 5581; GFX10-NEXT: v_readfirstlane_b32 s6, v8 5582; GFX10-NEXT: v_readfirstlane_b32 s7, v9 5583; GFX10-NEXT: v_readfirstlane_b32 s8, v10 5584; GFX10-NEXT: v_readfirstlane_b32 s9, v11 5585; GFX10-NEXT: v_readfirstlane_b32 s10, v12 5586; GFX10-NEXT: v_readfirstlane_b32 s11, v13 5587; GFX10-NEXT: v_readfirstlane_b32 s12, v14 5588; GFX10-NEXT: v_readfirstlane_b32 s13, v15 5589; GFX10-NEXT: ; return to shader part epilog 5590; 5591; GFX11-LABEL: dyn_insertelement_v7f64_s_v_s: 5592; GFX11: ; %bb.0: ; %entry 5593; GFX11-NEXT: s_mov_b32 s0, s2 5594; GFX11-NEXT: s_mov_b32 s1, s3 5595; GFX11-NEXT: s_mov_b32 s2, s4 5596; GFX11-NEXT: s_mov_b32 s3, s5 5597; GFX11-NEXT: s_mov_b32 s4, s6 5598; GFX11-NEXT: s_mov_b32 s5, s7 5599; GFX11-NEXT: s_mov_b32 s6, s8 5600; GFX11-NEXT: s_mov_b32 s7, s9 5601; GFX11-NEXT: s_mov_b32 s8, s10 5602; GFX11-NEXT: s_mov_b32 s9, s11 5603; GFX11-NEXT: s_mov_b32 s10, s12 5604; GFX11-NEXT: s_mov_b32 s11, s13 5605; GFX11-NEXT: s_mov_b32 s12, s14 5606; GFX11-NEXT: s_mov_b32 s13, s15 5607; GFX11-NEXT: v_dual_mov_b32 v17, s15 :: v_dual_mov_b32 v16, s14 5608; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 5609; GFX11-NEXT: s_lshl_b32 m0, s16, 1 5610; GFX11-NEXT: v_dual_mov_b32 v15, s13 :: v_dual_mov_b32 v14, s12 5611; GFX11-NEXT: v_dual_mov_b32 v13, s11 :: v_dual_mov_b32 v12, s10 5612; GFX11-NEXT: v_dual_mov_b32 v11, s9 :: v_dual_mov_b32 v10, s8 5613; GFX11-NEXT: v_dual_mov_b32 v9, s7 :: v_dual_mov_b32 v8, s6 5614; GFX11-NEXT: v_dual_mov_b32 v7, s5 :: v_dual_mov_b32 v6, s4 5615; GFX11-NEXT: v_dual_mov_b32 v5, s3 :: v_dual_mov_b32 v4, s2 5616; GFX11-NEXT: v_movreld_b32_e32 v2, v0 5617; GFX11-NEXT: v_movreld_b32_e32 v3, v1 5618; GFX11-NEXT: v_readfirstlane_b32 s0, v2 5619; GFX11-NEXT: v_readfirstlane_b32 s1, v3 5620; GFX11-NEXT: v_readfirstlane_b32 s2, v4 5621; GFX11-NEXT: v_readfirstlane_b32 s3, v5 5622; GFX11-NEXT: v_readfirstlane_b32 s4, v6 5623; GFX11-NEXT: v_readfirstlane_b32 s5, v7 5624; GFX11-NEXT: v_readfirstlane_b32 s6, v8 5625; GFX11-NEXT: v_readfirstlane_b32 s7, v9 5626; GFX11-NEXT: v_readfirstlane_b32 s8, v10 5627; GFX11-NEXT: v_readfirstlane_b32 s9, v11 5628; GFX11-NEXT: v_readfirstlane_b32 s10, v12 5629; GFX11-NEXT: v_readfirstlane_b32 s11, v13 5630; GFX11-NEXT: v_readfirstlane_b32 s12, v14 5631; GFX11-NEXT: v_readfirstlane_b32 s13, v15 5632; GFX11-NEXT: ; return to shader part epilog 5633entry: 5634 %insert = insertelement <7 x double> %vec, double %val, i32 %idx 5635 ret <7 x double> %insert 5636} 5637 5638define amdgpu_ps <7 x double> @dyn_insertelement_v7f64_s_v_v(<7 x double> inreg %vec, double %val, i32 %idx) { 5639; GPRIDX-LABEL: dyn_insertelement_v7f64_s_v_v: 5640; GPRIDX: ; %bb.0: ; %entry 5641; GPRIDX-NEXT: s_mov_b32 s0, s2 5642; GPRIDX-NEXT: s_mov_b32 s1, s3 5643; GPRIDX-NEXT: s_mov_b32 s2, s4 5644; GPRIDX-NEXT: s_mov_b32 s3, s5 5645; GPRIDX-NEXT: s_mov_b32 s4, s6 5646; GPRIDX-NEXT: s_mov_b32 s5, s7 5647; GPRIDX-NEXT: s_mov_b32 s6, s8 5648; GPRIDX-NEXT: s_mov_b32 s7, s9 5649; GPRIDX-NEXT: s_mov_b32 s8, s10 5650; GPRIDX-NEXT: s_mov_b32 s9, s11 5651; GPRIDX-NEXT: s_mov_b32 s10, s12 5652; GPRIDX-NEXT: s_mov_b32 s11, s13 5653; GPRIDX-NEXT: s_mov_b32 s12, s14 5654; GPRIDX-NEXT: s_mov_b32 s13, s15 5655; GPRIDX-NEXT: v_mov_b32_e32 v18, s15 5656; GPRIDX-NEXT: v_mov_b32_e32 v17, s14 5657; GPRIDX-NEXT: v_mov_b32_e32 v16, s13 5658; GPRIDX-NEXT: v_mov_b32_e32 v15, s12 5659; GPRIDX-NEXT: v_mov_b32_e32 v14, s11 5660; GPRIDX-NEXT: v_mov_b32_e32 v13, s10 5661; GPRIDX-NEXT: v_mov_b32_e32 v12, s9 5662; GPRIDX-NEXT: v_mov_b32_e32 v11, s8 5663; GPRIDX-NEXT: v_mov_b32_e32 v10, s7 5664; GPRIDX-NEXT: v_mov_b32_e32 v9, s6 5665; GPRIDX-NEXT: v_mov_b32_e32 v8, s5 5666; GPRIDX-NEXT: v_mov_b32_e32 v7, s4 5667; GPRIDX-NEXT: v_mov_b32_e32 v6, s3 5668; GPRIDX-NEXT: v_mov_b32_e32 v5, s2 5669; GPRIDX-NEXT: v_mov_b32_e32 v4, s1 5670; GPRIDX-NEXT: v_mov_b32_e32 v3, s0 5671; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 5672; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v2 5673; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v2 5674; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[4:5], 4, v2 5675; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[6:7], 5, v2 5676; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[8:9], 6, v2 5677; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[10:11], 1, v2 5678; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v0, vcc 5679; GPRIDX-NEXT: v_cndmask_b32_e64 v2, v5, v0, s[10:11] 5680; GPRIDX-NEXT: v_cndmask_b32_e64 v5, v7, v0, s[0:1] 5681; GPRIDX-NEXT: v_cndmask_b32_e64 v7, v9, v0, s[2:3] 5682; GPRIDX-NEXT: v_cndmask_b32_e64 v9, v11, v0, s[4:5] 5683; GPRIDX-NEXT: v_cndmask_b32_e64 v11, v13, v0, s[6:7] 5684; GPRIDX-NEXT: v_cndmask_b32_e64 v0, v15, v0, s[8:9] 5685; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v1, vcc 5686; GPRIDX-NEXT: v_cndmask_b32_e64 v6, v6, v1, s[10:11] 5687; GPRIDX-NEXT: v_cndmask_b32_e64 v8, v8, v1, s[0:1] 5688; GPRIDX-NEXT: v_cndmask_b32_e64 v10, v10, v1, s[2:3] 5689; GPRIDX-NEXT: v_cndmask_b32_e64 v12, v12, v1, s[4:5] 5690; GPRIDX-NEXT: v_cndmask_b32_e64 v13, v14, v1, s[6:7] 5691; GPRIDX-NEXT: v_cndmask_b32_e64 v1, v16, v1, s[8:9] 5692; GPRIDX-NEXT: v_readfirstlane_b32 s0, v3 5693; GPRIDX-NEXT: v_readfirstlane_b32 s1, v4 5694; GPRIDX-NEXT: v_readfirstlane_b32 s2, v2 5695; GPRIDX-NEXT: v_readfirstlane_b32 s3, v6 5696; GPRIDX-NEXT: v_readfirstlane_b32 s4, v5 5697; GPRIDX-NEXT: v_readfirstlane_b32 s5, v8 5698; GPRIDX-NEXT: v_readfirstlane_b32 s6, v7 5699; GPRIDX-NEXT: v_readfirstlane_b32 s7, v10 5700; GPRIDX-NEXT: v_readfirstlane_b32 s8, v9 5701; GPRIDX-NEXT: v_readfirstlane_b32 s9, v12 5702; GPRIDX-NEXT: v_readfirstlane_b32 s10, v11 5703; GPRIDX-NEXT: v_readfirstlane_b32 s11, v13 5704; GPRIDX-NEXT: v_readfirstlane_b32 s12, v0 5705; GPRIDX-NEXT: v_readfirstlane_b32 s13, v1 5706; GPRIDX-NEXT: ; return to shader part epilog 5707; 5708; GFX10-LABEL: dyn_insertelement_v7f64_s_v_v: 5709; GFX10: ; %bb.0: ; %entry 5710; GFX10-NEXT: s_mov_b32 s0, s2 5711; GFX10-NEXT: s_mov_b32 s1, s3 5712; GFX10-NEXT: s_mov_b32 s2, s4 5713; GFX10-NEXT: s_mov_b32 s3, s5 5714; GFX10-NEXT: s_mov_b32 s4, s6 5715; GFX10-NEXT: s_mov_b32 s5, s7 5716; GFX10-NEXT: s_mov_b32 s6, s8 5717; GFX10-NEXT: s_mov_b32 s7, s9 5718; GFX10-NEXT: s_mov_b32 s8, s10 5719; GFX10-NEXT: s_mov_b32 s9, s11 5720; GFX10-NEXT: s_mov_b32 s10, s12 5721; GFX10-NEXT: s_mov_b32 s11, s13 5722; GFX10-NEXT: s_mov_b32 s12, s14 5723; GFX10-NEXT: s_mov_b32 s13, s15 5724; GFX10-NEXT: v_mov_b32_e32 v18, s15 5725; GFX10-NEXT: v_mov_b32_e32 v17, s14 5726; GFX10-NEXT: v_mov_b32_e32 v16, s13 5727; GFX10-NEXT: v_mov_b32_e32 v15, s12 5728; GFX10-NEXT: v_mov_b32_e32 v14, s11 5729; GFX10-NEXT: v_mov_b32_e32 v13, s10 5730; GFX10-NEXT: v_mov_b32_e32 v12, s9 5731; GFX10-NEXT: v_mov_b32_e32 v11, s8 5732; GFX10-NEXT: v_mov_b32_e32 v10, s7 5733; GFX10-NEXT: v_mov_b32_e32 v9, s6 5734; GFX10-NEXT: v_mov_b32_e32 v8, s5 5735; GFX10-NEXT: v_mov_b32_e32 v7, s4 5736; GFX10-NEXT: v_mov_b32_e32 v6, s3 5737; GFX10-NEXT: v_mov_b32_e32 v5, s2 5738; GFX10-NEXT: v_mov_b32_e32 v4, s1 5739; GFX10-NEXT: v_mov_b32_e32 v3, s0 5740; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2 5741; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 1, v2 5742; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 6, v2 5743; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v0, vcc_lo 5744; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v1, vcc_lo 5745; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v2 5746; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v0, s0 5747; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v1, s0 5748; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 3, v2 5749; GFX10-NEXT: v_cndmask_b32_e32 v7, v7, v0, vcc_lo 5750; GFX10-NEXT: v_cndmask_b32_e32 v8, v8, v1, vcc_lo 5751; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v2 5752; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, v0, s0 5753; GFX10-NEXT: v_cndmask_b32_e64 v10, v10, v1, s0 5754; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 5, v2 5755; GFX10-NEXT: v_readfirstlane_b32 s2, v5 5756; GFX10-NEXT: v_cndmask_b32_e32 v11, v11, v0, vcc_lo 5757; GFX10-NEXT: v_cndmask_b32_e32 v2, v12, v1, vcc_lo 5758; GFX10-NEXT: v_readfirstlane_b32 s3, v6 5759; GFX10-NEXT: v_cndmask_b32_e64 v12, v13, v0, s0 5760; GFX10-NEXT: v_cndmask_b32_e64 v13, v14, v1, s0 5761; GFX10-NEXT: v_cndmask_b32_e64 v0, v15, v0, s1 5762; GFX10-NEXT: v_cndmask_b32_e64 v1, v16, v1, s1 5763; GFX10-NEXT: v_readfirstlane_b32 s0, v3 5764; GFX10-NEXT: v_readfirstlane_b32 s1, v4 5765; GFX10-NEXT: v_readfirstlane_b32 s4, v7 5766; GFX10-NEXT: v_readfirstlane_b32 s5, v8 5767; GFX10-NEXT: v_readfirstlane_b32 s6, v9 5768; GFX10-NEXT: v_readfirstlane_b32 s7, v10 5769; GFX10-NEXT: v_readfirstlane_b32 s8, v11 5770; GFX10-NEXT: v_readfirstlane_b32 s9, v2 5771; GFX10-NEXT: v_readfirstlane_b32 s10, v12 5772; GFX10-NEXT: v_readfirstlane_b32 s11, v13 5773; GFX10-NEXT: v_readfirstlane_b32 s12, v0 5774; GFX10-NEXT: v_readfirstlane_b32 s13, v1 5775; GFX10-NEXT: ; return to shader part epilog 5776; 5777; GFX11-LABEL: dyn_insertelement_v7f64_s_v_v: 5778; GFX11: ; %bb.0: ; %entry 5779; GFX11-NEXT: s_mov_b32 s0, s2 5780; GFX11-NEXT: s_mov_b32 s1, s3 5781; GFX11-NEXT: s_mov_b32 s2, s4 5782; GFX11-NEXT: s_mov_b32 s3, s5 5783; GFX11-NEXT: s_mov_b32 s4, s6 5784; GFX11-NEXT: s_mov_b32 s5, s7 5785; GFX11-NEXT: s_mov_b32 s6, s8 5786; GFX11-NEXT: s_mov_b32 s7, s9 5787; GFX11-NEXT: s_mov_b32 s8, s10 5788; GFX11-NEXT: s_mov_b32 s9, s11 5789; GFX11-NEXT: s_mov_b32 s10, s12 5790; GFX11-NEXT: s_mov_b32 s11, s13 5791; GFX11-NEXT: s_mov_b32 s12, s14 5792; GFX11-NEXT: s_mov_b32 s13, s15 5793; GFX11-NEXT: v_dual_mov_b32 v18, s15 :: v_dual_mov_b32 v17, s14 5794; GFX11-NEXT: v_dual_mov_b32 v16, s13 :: v_dual_mov_b32 v15, s12 5795; GFX11-NEXT: v_dual_mov_b32 v14, s11 :: v_dual_mov_b32 v13, s10 5796; GFX11-NEXT: v_dual_mov_b32 v12, s9 :: v_dual_mov_b32 v11, s8 5797; GFX11-NEXT: v_dual_mov_b32 v10, s7 :: v_dual_mov_b32 v9, s6 5798; GFX11-NEXT: v_dual_mov_b32 v8, s5 :: v_dual_mov_b32 v7, s4 5799; GFX11-NEXT: v_dual_mov_b32 v6, s3 :: v_dual_mov_b32 v5, s2 5800; GFX11-NEXT: v_dual_mov_b32 v4, s1 :: v_dual_mov_b32 v3, s0 5801; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2 5802; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 1, v2 5803; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 6, v2 5804; GFX11-NEXT: v_dual_cndmask_b32 v3, v3, v0 :: v_dual_cndmask_b32 v4, v4, v1 5805; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v2 5806; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, v0, s0 5807; GFX11-NEXT: v_cndmask_b32_e64 v6, v6, v1, s0 5808; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 3, v2 5809; GFX11-NEXT: v_dual_cndmask_b32 v7, v7, v0 :: v_dual_cndmask_b32 v8, v8, v1 5810; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v2 5811; GFX11-NEXT: v_cndmask_b32_e64 v9, v9, v0, s0 5812; GFX11-NEXT: v_cndmask_b32_e64 v10, v10, v1, s0 5813; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 5, v2 5814; GFX11-NEXT: v_readfirstlane_b32 s2, v5 5815; GFX11-NEXT: v_dual_cndmask_b32 v11, v11, v0 :: v_dual_cndmask_b32 v2, v12, v1 5816; GFX11-NEXT: v_readfirstlane_b32 s3, v6 5817; GFX11-NEXT: v_cndmask_b32_e64 v12, v13, v0, s0 5818; GFX11-NEXT: v_cndmask_b32_e64 v13, v14, v1, s0 5819; GFX11-NEXT: v_cndmask_b32_e64 v0, v15, v0, s1 5820; GFX11-NEXT: v_cndmask_b32_e64 v1, v16, v1, s1 5821; GFX11-NEXT: v_readfirstlane_b32 s0, v3 5822; GFX11-NEXT: v_readfirstlane_b32 s1, v4 5823; GFX11-NEXT: v_readfirstlane_b32 s4, v7 5824; GFX11-NEXT: v_readfirstlane_b32 s5, v8 5825; GFX11-NEXT: v_readfirstlane_b32 s6, v9 5826; GFX11-NEXT: v_readfirstlane_b32 s7, v10 5827; GFX11-NEXT: v_readfirstlane_b32 s8, v11 5828; GFX11-NEXT: v_readfirstlane_b32 s9, v2 5829; GFX11-NEXT: v_readfirstlane_b32 s10, v12 5830; GFX11-NEXT: v_readfirstlane_b32 s11, v13 5831; GFX11-NEXT: v_readfirstlane_b32 s12, v0 5832; GFX11-NEXT: v_readfirstlane_b32 s13, v1 5833; GFX11-NEXT: ; return to shader part epilog 5834entry: 5835 %insert = insertelement <7 x double> %vec, double %val, i32 %idx 5836 ret <7 x double> %insert 5837} 5838 5839define amdgpu_ps <7 x double> @dyn_insertelement_v7f64_v_v_s(<7 x double> %vec, double %val, i32 inreg %idx) { 5840; GPRIDX-LABEL: dyn_insertelement_v7f64_v_v_s: 5841; GPRIDX: ; %bb.0: ; %entry 5842; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1 5843; GPRIDX-NEXT: v_mov_b32_e32 v16, v15 5844; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST) 5845; GPRIDX-NEXT: v_mov_b32_e32 v0, v14 5846; GPRIDX-NEXT: v_mov_b32_e32 v1, v16 5847; GPRIDX-NEXT: s_set_gpr_idx_off 5848; GPRIDX-NEXT: v_readfirstlane_b32 s0, v0 5849; GPRIDX-NEXT: v_readfirstlane_b32 s1, v1 5850; GPRIDX-NEXT: v_readfirstlane_b32 s2, v2 5851; GPRIDX-NEXT: v_readfirstlane_b32 s3, v3 5852; GPRIDX-NEXT: v_readfirstlane_b32 s4, v4 5853; GPRIDX-NEXT: v_readfirstlane_b32 s5, v5 5854; GPRIDX-NEXT: v_readfirstlane_b32 s6, v6 5855; GPRIDX-NEXT: v_readfirstlane_b32 s7, v7 5856; GPRIDX-NEXT: v_readfirstlane_b32 s8, v8 5857; GPRIDX-NEXT: v_readfirstlane_b32 s9, v9 5858; GPRIDX-NEXT: v_readfirstlane_b32 s10, v10 5859; GPRIDX-NEXT: v_readfirstlane_b32 s11, v11 5860; GPRIDX-NEXT: v_readfirstlane_b32 s12, v12 5861; GPRIDX-NEXT: v_readfirstlane_b32 s13, v13 5862; GPRIDX-NEXT: ; return to shader part epilog 5863; 5864; GFX10PLUS-LABEL: dyn_insertelement_v7f64_v_v_s: 5865; GFX10PLUS: ; %bb.0: ; %entry 5866; GFX10PLUS-NEXT: v_mov_b32_e32 v16, v15 5867; GFX10PLUS-NEXT: s_lshl_b32 m0, s2, 1 5868; GFX10PLUS-NEXT: v_movreld_b32_e32 v0, v14 5869; GFX10PLUS-NEXT: v_movreld_b32_e32 v1, v16 5870; GFX10PLUS-NEXT: v_readfirstlane_b32 s0, v0 5871; GFX10PLUS-NEXT: v_readfirstlane_b32 s1, v1 5872; GFX10PLUS-NEXT: v_readfirstlane_b32 s2, v2 5873; GFX10PLUS-NEXT: v_readfirstlane_b32 s3, v3 5874; GFX10PLUS-NEXT: v_readfirstlane_b32 s4, v4 5875; GFX10PLUS-NEXT: v_readfirstlane_b32 s5, v5 5876; GFX10PLUS-NEXT: v_readfirstlane_b32 s6, v6 5877; GFX10PLUS-NEXT: v_readfirstlane_b32 s7, v7 5878; GFX10PLUS-NEXT: v_readfirstlane_b32 s8, v8 5879; GFX10PLUS-NEXT: v_readfirstlane_b32 s9, v9 5880; GFX10PLUS-NEXT: v_readfirstlane_b32 s10, v10 5881; GFX10PLUS-NEXT: v_readfirstlane_b32 s11, v11 5882; GFX10PLUS-NEXT: v_readfirstlane_b32 s12, v12 5883; GFX10PLUS-NEXT: v_readfirstlane_b32 s13, v13 5884; GFX10PLUS-NEXT: ; return to shader part epilog 5885entry: 5886 %insert = insertelement <7 x double> %vec, double %val, i32 %idx 5887 ret <7 x double> %insert 5888} 5889 5890define amdgpu_ps <7 x double> @dyn_insertelement_v7f64_v_v_v(<7 x double> %vec, double %val, i32 %idx) { 5891; GPRIDX-LABEL: dyn_insertelement_v7f64_v_v_v: 5892; GPRIDX: ; %bb.0: ; %entry 5893; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v16 5894; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc 5895; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc 5896; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v16 5897; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v14, vcc 5898; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v15, vcc 5899; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v16 5900; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v14, vcc 5901; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v15, vcc 5902; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v16 5903; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v14, vcc 5904; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v15, vcc 5905; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v16 5906; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v8, v14, vcc 5907; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v9, v15, vcc 5908; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v16 5909; GPRIDX-NEXT: v_cndmask_b32_e32 v10, v10, v14, vcc 5910; GPRIDX-NEXT: v_cndmask_b32_e32 v11, v11, v15, vcc 5911; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v16 5912; GPRIDX-NEXT: v_cndmask_b32_e32 v12, v12, v14, vcc 5913; GPRIDX-NEXT: v_cndmask_b32_e32 v13, v13, v15, vcc 5914; GPRIDX-NEXT: v_readfirstlane_b32 s0, v0 5915; GPRIDX-NEXT: v_readfirstlane_b32 s1, v1 5916; GPRIDX-NEXT: v_readfirstlane_b32 s2, v2 5917; GPRIDX-NEXT: v_readfirstlane_b32 s3, v3 5918; GPRIDX-NEXT: v_readfirstlane_b32 s4, v4 5919; GPRIDX-NEXT: v_readfirstlane_b32 s5, v5 5920; GPRIDX-NEXT: v_readfirstlane_b32 s6, v6 5921; GPRIDX-NEXT: v_readfirstlane_b32 s7, v7 5922; GPRIDX-NEXT: v_readfirstlane_b32 s8, v8 5923; GPRIDX-NEXT: v_readfirstlane_b32 s9, v9 5924; GPRIDX-NEXT: v_readfirstlane_b32 s10, v10 5925; GPRIDX-NEXT: v_readfirstlane_b32 s11, v11 5926; GPRIDX-NEXT: v_readfirstlane_b32 s12, v12 5927; GPRIDX-NEXT: v_readfirstlane_b32 s13, v13 5928; GPRIDX-NEXT: ; return to shader part epilog 5929; 5930; GFX10-LABEL: dyn_insertelement_v7f64_v_v_v: 5931; GFX10: ; %bb.0: ; %entry 5932; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v16 5933; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 1, v16 5934; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 6, v16 5935; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo 5936; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc_lo 5937; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16 5938; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v14, s0 5939; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v15, s0 5940; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 3, v16 5941; GFX10-NEXT: v_cndmask_b32_e64 v12, v12, v14, s1 5942; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v14, vcc_lo 5943; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v15, vcc_lo 5944; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16 5945; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v14, s0 5946; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, v15, s0 5947; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 5, v16 5948; GFX10-NEXT: v_cndmask_b32_e64 v13, v13, v15, s1 5949; GFX10-NEXT: v_cndmask_b32_e32 v8, v8, v14, vcc_lo 5950; GFX10-NEXT: v_cndmask_b32_e32 v9, v9, v15, vcc_lo 5951; GFX10-NEXT: v_readfirstlane_b32 s1, v1 5952; GFX10-NEXT: v_cndmask_b32_e64 v10, v10, v14, s0 5953; GFX10-NEXT: v_cndmask_b32_e64 v11, v11, v15, s0 5954; GFX10-NEXT: v_readfirstlane_b32 s0, v0 5955; GFX10-NEXT: v_readfirstlane_b32 s2, v2 5956; GFX10-NEXT: v_readfirstlane_b32 s3, v3 5957; GFX10-NEXT: v_readfirstlane_b32 s4, v4 5958; GFX10-NEXT: v_readfirstlane_b32 s5, v5 5959; GFX10-NEXT: v_readfirstlane_b32 s6, v6 5960; GFX10-NEXT: v_readfirstlane_b32 s7, v7 5961; GFX10-NEXT: v_readfirstlane_b32 s8, v8 5962; GFX10-NEXT: v_readfirstlane_b32 s9, v9 5963; GFX10-NEXT: v_readfirstlane_b32 s10, v10 5964; GFX10-NEXT: v_readfirstlane_b32 s11, v11 5965; GFX10-NEXT: v_readfirstlane_b32 s12, v12 5966; GFX10-NEXT: v_readfirstlane_b32 s13, v13 5967; GFX10-NEXT: ; return to shader part epilog 5968; 5969; GFX11-LABEL: dyn_insertelement_v7f64_v_v_v: 5970; GFX11: ; %bb.0: ; %entry 5971; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v16 5972; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 1, v16 5973; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 6, v16 5974; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v14 :: v_dual_cndmask_b32 v1, v1, v15 5975; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16 5976; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v14, s0 5977; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v15, s0 5978; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 3, v16 5979; GFX11-NEXT: v_cndmask_b32_e64 v12, v12, v14, s1 5980; GFX11-NEXT: v_dual_cndmask_b32 v4, v4, v14 :: v_dual_cndmask_b32 v5, v5, v15 5981; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16 5982; GFX11-NEXT: v_cndmask_b32_e64 v6, v6, v14, s0 5983; GFX11-NEXT: v_cndmask_b32_e64 v7, v7, v15, s0 5984; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 5, v16 5985; GFX11-NEXT: v_cndmask_b32_e64 v13, v13, v15, s1 5986; GFX11-NEXT: v_dual_cndmask_b32 v8, v8, v14 :: v_dual_cndmask_b32 v9, v9, v15 5987; GFX11-NEXT: v_readfirstlane_b32 s1, v1 5988; GFX11-NEXT: v_cndmask_b32_e64 v10, v10, v14, s0 5989; GFX11-NEXT: v_cndmask_b32_e64 v11, v11, v15, s0 5990; GFX11-NEXT: v_readfirstlane_b32 s0, v0 5991; GFX11-NEXT: v_readfirstlane_b32 s2, v2 5992; GFX11-NEXT: v_readfirstlane_b32 s3, v3 5993; GFX11-NEXT: v_readfirstlane_b32 s4, v4 5994; GFX11-NEXT: v_readfirstlane_b32 s5, v5 5995; GFX11-NEXT: v_readfirstlane_b32 s6, v6 5996; GFX11-NEXT: v_readfirstlane_b32 s7, v7 5997; GFX11-NEXT: v_readfirstlane_b32 s8, v8 5998; GFX11-NEXT: v_readfirstlane_b32 s9, v9 5999; GFX11-NEXT: v_readfirstlane_b32 s10, v10 6000; GFX11-NEXT: v_readfirstlane_b32 s11, v11 6001; GFX11-NEXT: v_readfirstlane_b32 s12, v12 6002; GFX11-NEXT: v_readfirstlane_b32 s13, v13 6003; GFX11-NEXT: ; return to shader part epilog 6004entry: 6005 %insert = insertelement <7 x double> %vec, double %val, i32 %idx 6006 ret <7 x double> %insert 6007} 6008 6009define amdgpu_ps <5 x double> @dyn_insertelement_v5f64_s_s_s(<5 x double> inreg %vec, double inreg %val, i32 inreg %idx) { 6010; GPRIDX-LABEL: dyn_insertelement_v5f64_s_s_s: 6011; GPRIDX: ; %bb.0: ; %entry 6012; GPRIDX-NEXT: s_cmp_eq_u32 s14, 0 6013; GPRIDX-NEXT: s_cselect_b64 s[0:1], s[12:13], s[2:3] 6014; GPRIDX-NEXT: s_cmp_eq_u32 s14, 1 6015; GPRIDX-NEXT: s_cselect_b64 s[2:3], s[12:13], s[4:5] 6016; GPRIDX-NEXT: s_cmp_eq_u32 s14, 2 6017; GPRIDX-NEXT: s_cselect_b64 s[4:5], s[12:13], s[6:7] 6018; GPRIDX-NEXT: s_cmp_eq_u32 s14, 3 6019; GPRIDX-NEXT: s_cselect_b64 s[6:7], s[12:13], s[8:9] 6020; GPRIDX-NEXT: s_cmp_eq_u32 s14, 4 6021; GPRIDX-NEXT: s_cselect_b64 s[8:9], s[12:13], s[10:11] 6022; GPRIDX-NEXT: ; return to shader part epilog 6023; 6024; GFX10PLUS-LABEL: dyn_insertelement_v5f64_s_s_s: 6025; GFX10PLUS: ; %bb.0: ; %entry 6026; GFX10PLUS-NEXT: s_cmp_eq_u32 s14, 0 6027; GFX10PLUS-NEXT: s_cselect_b64 s[0:1], s[12:13], s[2:3] 6028; GFX10PLUS-NEXT: s_cmp_eq_u32 s14, 1 6029; GFX10PLUS-NEXT: s_cselect_b64 s[2:3], s[12:13], s[4:5] 6030; GFX10PLUS-NEXT: s_cmp_eq_u32 s14, 2 6031; GFX10PLUS-NEXT: s_cselect_b64 s[4:5], s[12:13], s[6:7] 6032; GFX10PLUS-NEXT: s_cmp_eq_u32 s14, 3 6033; GFX10PLUS-NEXT: s_cselect_b64 s[6:7], s[12:13], s[8:9] 6034; GFX10PLUS-NEXT: s_cmp_eq_u32 s14, 4 6035; GFX10PLUS-NEXT: s_cselect_b64 s[8:9], s[12:13], s[10:11] 6036; GFX10PLUS-NEXT: ; return to shader part epilog 6037entry: 6038 %insert = insertelement <5 x double> %vec, double %val, i32 %idx 6039 ret <5 x double> %insert 6040} 6041 6042define amdgpu_ps <5 x double> @dyn_insertelement_v5f64_s_v_s(<5 x double> inreg %vec, double %val, i32 inreg %idx) { 6043; GPRIDX-LABEL: dyn_insertelement_v5f64_s_v_s: 6044; GPRIDX: ; %bb.0: ; %entry 6045; GPRIDX-NEXT: s_mov_b32 s1, s3 6046; GPRIDX-NEXT: s_mov_b32 s3, s5 6047; GPRIDX-NEXT: s_mov_b32 s5, s7 6048; GPRIDX-NEXT: s_mov_b32 s7, s9 6049; GPRIDX-NEXT: s_mov_b32 s9, s11 6050; GPRIDX-NEXT: s_mov_b32 s0, s2 6051; GPRIDX-NEXT: s_mov_b32 s2, s4 6052; GPRIDX-NEXT: s_mov_b32 s4, s6 6053; GPRIDX-NEXT: s_mov_b32 s6, s8 6054; GPRIDX-NEXT: s_mov_b32 s8, s10 6055; GPRIDX-NEXT: v_mov_b32_e32 v11, s9 6056; GPRIDX-NEXT: v_mov_b32_e32 v3, s1 6057; GPRIDX-NEXT: v_mov_b32_e32 v2, s0 6058; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s12, 0 6059; GPRIDX-NEXT: v_mov_b32_e32 v5, s3 6060; GPRIDX-NEXT: v_mov_b32_e32 v4, s2 6061; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc 6062; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc 6063; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s12, 1 6064; GPRIDX-NEXT: v_mov_b32_e32 v7, s5 6065; GPRIDX-NEXT: v_mov_b32_e32 v6, s4 6066; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc 6067; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v1, vcc 6068; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s12, 2 6069; GPRIDX-NEXT: v_mov_b32_e32 v9, s7 6070; GPRIDX-NEXT: v_mov_b32_e32 v8, s6 6071; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v0, vcc 6072; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v1, vcc 6073; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s12, 3 6074; GPRIDX-NEXT: v_mov_b32_e32 v10, s8 6075; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v8, v0, vcc 6076; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v9, v1, vcc 6077; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s12, 4 6078; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v10, v0, vcc 6079; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v11, v1, vcc 6080; GPRIDX-NEXT: v_readfirstlane_b32 s0, v2 6081; GPRIDX-NEXT: v_readfirstlane_b32 s1, v3 6082; GPRIDX-NEXT: v_readfirstlane_b32 s2, v4 6083; GPRIDX-NEXT: v_readfirstlane_b32 s3, v5 6084; GPRIDX-NEXT: v_readfirstlane_b32 s4, v6 6085; GPRIDX-NEXT: v_readfirstlane_b32 s5, v7 6086; GPRIDX-NEXT: v_readfirstlane_b32 s6, v8 6087; GPRIDX-NEXT: v_readfirstlane_b32 s7, v9 6088; GPRIDX-NEXT: v_readfirstlane_b32 s8, v0 6089; GPRIDX-NEXT: v_readfirstlane_b32 s9, v1 6090; GPRIDX-NEXT: ; return to shader part epilog 6091; 6092; GFX10-LABEL: dyn_insertelement_v5f64_s_v_s: 6093; GFX10: ; %bb.0: ; %entry 6094; GFX10-NEXT: s_mov_b32 s1, s3 6095; GFX10-NEXT: s_mov_b32 s3, s5 6096; GFX10-NEXT: s_mov_b32 s5, s7 6097; GFX10-NEXT: s_mov_b32 s7, s9 6098; GFX10-NEXT: s_mov_b32 s9, s11 6099; GFX10-NEXT: s_mov_b32 s0, s2 6100; GFX10-NEXT: s_mov_b32 s2, s4 6101; GFX10-NEXT: s_mov_b32 s4, s6 6102; GFX10-NEXT: s_mov_b32 s6, s8 6103; GFX10-NEXT: s_mov_b32 s8, s10 6104; GFX10-NEXT: v_mov_b32_e32 v11, s9 6105; GFX10-NEXT: v_mov_b32_e32 v10, s8 6106; GFX10-NEXT: v_mov_b32_e32 v9, s7 6107; GFX10-NEXT: v_mov_b32_e32 v8, s6 6108; GFX10-NEXT: v_mov_b32_e32 v7, s5 6109; GFX10-NEXT: v_mov_b32_e32 v6, s4 6110; GFX10-NEXT: v_mov_b32_e32 v5, s3 6111; GFX10-NEXT: v_mov_b32_e32 v4, s2 6112; GFX10-NEXT: v_mov_b32_e32 v3, s1 6113; GFX10-NEXT: v_mov_b32_e32 v2, s0 6114; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s12, 0 6115; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s12, 1 6116; GFX10-NEXT: v_cmp_eq_u32_e64 s1, s12, 4 6117; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc_lo 6118; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc_lo 6119; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, v0, s0 6120; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s12, 2 6121; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v1, s0 6122; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s12, 3 6123; GFX10-NEXT: v_readfirstlane_b32 s2, v4 6124; GFX10-NEXT: v_cndmask_b32_e32 v6, v6, v0, vcc_lo 6125; GFX10-NEXT: v_cndmask_b32_e32 v7, v7, v1, vcc_lo 6126; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v0, s0 6127; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, v1, s0 6128; GFX10-NEXT: v_cndmask_b32_e64 v0, v10, v0, s1 6129; GFX10-NEXT: v_cndmask_b32_e64 v1, v11, v1, s1 6130; GFX10-NEXT: v_readfirstlane_b32 s0, v2 6131; GFX10-NEXT: v_readfirstlane_b32 s1, v3 6132; GFX10-NEXT: v_readfirstlane_b32 s3, v5 6133; GFX10-NEXT: v_readfirstlane_b32 s4, v6 6134; GFX10-NEXT: v_readfirstlane_b32 s5, v7 6135; GFX10-NEXT: v_readfirstlane_b32 s6, v8 6136; GFX10-NEXT: v_readfirstlane_b32 s7, v9 6137; GFX10-NEXT: v_readfirstlane_b32 s8, v0 6138; GFX10-NEXT: v_readfirstlane_b32 s9, v1 6139; GFX10-NEXT: ; return to shader part epilog 6140; 6141; GFX11-LABEL: dyn_insertelement_v5f64_s_v_s: 6142; GFX11: ; %bb.0: ; %entry 6143; GFX11-NEXT: s_mov_b32 s1, s3 6144; GFX11-NEXT: s_mov_b32 s3, s5 6145; GFX11-NEXT: s_mov_b32 s5, s7 6146; GFX11-NEXT: s_mov_b32 s7, s9 6147; GFX11-NEXT: s_mov_b32 s9, s11 6148; GFX11-NEXT: s_mov_b32 s0, s2 6149; GFX11-NEXT: s_mov_b32 s2, s4 6150; GFX11-NEXT: s_mov_b32 s4, s6 6151; GFX11-NEXT: s_mov_b32 s6, s8 6152; GFX11-NEXT: s_mov_b32 s8, s10 6153; GFX11-NEXT: v_dual_mov_b32 v11, s9 :: v_dual_mov_b32 v10, s8 6154; GFX11-NEXT: v_dual_mov_b32 v9, s7 :: v_dual_mov_b32 v8, s6 6155; GFX11-NEXT: v_dual_mov_b32 v7, s5 :: v_dual_mov_b32 v6, s4 6156; GFX11-NEXT: v_dual_mov_b32 v5, s3 :: v_dual_mov_b32 v4, s2 6157; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 6158; GFX11-NEXT: v_cmp_eq_u32_e64 vcc_lo, s12, 0 6159; GFX11-NEXT: v_cmp_eq_u32_e64 s0, s12, 1 6160; GFX11-NEXT: v_cmp_eq_u32_e64 s1, s12, 4 6161; GFX11-NEXT: v_dual_cndmask_b32 v2, v2, v0 :: v_dual_cndmask_b32 v3, v3, v1 6162; GFX11-NEXT: v_cndmask_b32_e64 v4, v4, v0, s0 6163; GFX11-NEXT: v_cmp_eq_u32_e64 vcc_lo, s12, 2 6164; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, v1, s0 6165; GFX11-NEXT: v_cmp_eq_u32_e64 s0, s12, 3 6166; GFX11-NEXT: v_readfirstlane_b32 s2, v4 6167; GFX11-NEXT: v_dual_cndmask_b32 v6, v6, v0 :: v_dual_cndmask_b32 v7, v7, v1 6168; GFX11-NEXT: v_cndmask_b32_e64 v8, v8, v0, s0 6169; GFX11-NEXT: v_cndmask_b32_e64 v9, v9, v1, s0 6170; GFX11-NEXT: v_cndmask_b32_e64 v0, v10, v0, s1 6171; GFX11-NEXT: v_cndmask_b32_e64 v1, v11, v1, s1 6172; GFX11-NEXT: v_readfirstlane_b32 s0, v2 6173; GFX11-NEXT: v_readfirstlane_b32 s1, v3 6174; GFX11-NEXT: v_readfirstlane_b32 s3, v5 6175; GFX11-NEXT: v_readfirstlane_b32 s4, v6 6176; GFX11-NEXT: v_readfirstlane_b32 s5, v7 6177; GFX11-NEXT: v_readfirstlane_b32 s6, v8 6178; GFX11-NEXT: v_readfirstlane_b32 s7, v9 6179; GFX11-NEXT: v_readfirstlane_b32 s8, v0 6180; GFX11-NEXT: v_readfirstlane_b32 s9, v1 6181; GFX11-NEXT: ; return to shader part epilog 6182entry: 6183 %insert = insertelement <5 x double> %vec, double %val, i32 %idx 6184 ret <5 x double> %insert 6185} 6186 6187define amdgpu_ps <5 x double> @dyn_insertelement_v5f64_s_v_v(<5 x double> inreg %vec, double %val, i32 %idx) { 6188; GPRIDX-LABEL: dyn_insertelement_v5f64_s_v_v: 6189; GPRIDX: ; %bb.0: ; %entry 6190; GPRIDX-NEXT: s_mov_b32 s1, s3 6191; GPRIDX-NEXT: s_mov_b32 s3, s5 6192; GPRIDX-NEXT: s_mov_b32 s5, s7 6193; GPRIDX-NEXT: s_mov_b32 s7, s9 6194; GPRIDX-NEXT: s_mov_b32 s9, s11 6195; GPRIDX-NEXT: s_mov_b32 s0, s2 6196; GPRIDX-NEXT: s_mov_b32 s2, s4 6197; GPRIDX-NEXT: s_mov_b32 s4, s6 6198; GPRIDX-NEXT: s_mov_b32 s6, s8 6199; GPRIDX-NEXT: s_mov_b32 s8, s10 6200; GPRIDX-NEXT: v_mov_b32_e32 v12, s9 6201; GPRIDX-NEXT: v_mov_b32_e32 v4, s1 6202; GPRIDX-NEXT: v_mov_b32_e32 v3, s0 6203; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 6204; GPRIDX-NEXT: v_mov_b32_e32 v6, s3 6205; GPRIDX-NEXT: v_mov_b32_e32 v5, s2 6206; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v0, vcc 6207; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v1, vcc 6208; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 6209; GPRIDX-NEXT: v_mov_b32_e32 v8, s5 6210; GPRIDX-NEXT: v_mov_b32_e32 v7, s4 6211; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v0, vcc 6212; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v1, vcc 6213; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v2 6214; GPRIDX-NEXT: v_mov_b32_e32 v11, s8 6215; GPRIDX-NEXT: v_mov_b32_e32 v10, s7 6216; GPRIDX-NEXT: v_mov_b32_e32 v9, s6 6217; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v0, vcc 6218; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v8, v1, vcc 6219; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v2 6220; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[0:1], 4, v2 6221; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v9, v0, vcc 6222; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v10, v1, vcc 6223; GPRIDX-NEXT: v_cndmask_b32_e64 v0, v11, v0, s[0:1] 6224; GPRIDX-NEXT: v_cndmask_b32_e64 v1, v12, v1, s[0:1] 6225; GPRIDX-NEXT: v_readfirstlane_b32 s0, v3 6226; GPRIDX-NEXT: v_readfirstlane_b32 s1, v4 6227; GPRIDX-NEXT: v_readfirstlane_b32 s2, v5 6228; GPRIDX-NEXT: v_readfirstlane_b32 s3, v6 6229; GPRIDX-NEXT: v_readfirstlane_b32 s4, v7 6230; GPRIDX-NEXT: v_readfirstlane_b32 s5, v8 6231; GPRIDX-NEXT: v_readfirstlane_b32 s6, v9 6232; GPRIDX-NEXT: v_readfirstlane_b32 s7, v2 6233; GPRIDX-NEXT: v_readfirstlane_b32 s8, v0 6234; GPRIDX-NEXT: v_readfirstlane_b32 s9, v1 6235; GPRIDX-NEXT: ; return to shader part epilog 6236; 6237; GFX10-LABEL: dyn_insertelement_v5f64_s_v_v: 6238; GFX10: ; %bb.0: ; %entry 6239; GFX10-NEXT: s_mov_b32 s1, s3 6240; GFX10-NEXT: s_mov_b32 s3, s5 6241; GFX10-NEXT: s_mov_b32 s5, s7 6242; GFX10-NEXT: s_mov_b32 s7, s9 6243; GFX10-NEXT: s_mov_b32 s9, s11 6244; GFX10-NEXT: s_mov_b32 s0, s2 6245; GFX10-NEXT: s_mov_b32 s2, s4 6246; GFX10-NEXT: s_mov_b32 s4, s6 6247; GFX10-NEXT: s_mov_b32 s6, s8 6248; GFX10-NEXT: s_mov_b32 s8, s10 6249; GFX10-NEXT: v_mov_b32_e32 v12, s9 6250; GFX10-NEXT: v_mov_b32_e32 v11, s8 6251; GFX10-NEXT: v_mov_b32_e32 v10, s7 6252; GFX10-NEXT: v_mov_b32_e32 v9, s6 6253; GFX10-NEXT: v_mov_b32_e32 v8, s5 6254; GFX10-NEXT: v_mov_b32_e32 v7, s4 6255; GFX10-NEXT: v_mov_b32_e32 v6, s3 6256; GFX10-NEXT: v_mov_b32_e32 v5, s2 6257; GFX10-NEXT: v_mov_b32_e32 v4, s1 6258; GFX10-NEXT: v_mov_b32_e32 v3, s0 6259; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2 6260; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 1, v2 6261; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 4, v2 6262; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v0, vcc_lo 6263; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v1, vcc_lo 6264; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v0, s0 6265; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v2 6266; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v1, s0 6267; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 3, v2 6268; GFX10-NEXT: v_readfirstlane_b32 s2, v5 6269; GFX10-NEXT: v_cndmask_b32_e32 v7, v7, v0, vcc_lo 6270; GFX10-NEXT: v_cndmask_b32_e32 v2, v8, v1, vcc_lo 6271; GFX10-NEXT: v_cndmask_b32_e64 v8, v9, v0, s0 6272; GFX10-NEXT: v_cndmask_b32_e64 v9, v10, v1, s0 6273; GFX10-NEXT: v_cndmask_b32_e64 v0, v11, v0, s1 6274; GFX10-NEXT: v_cndmask_b32_e64 v1, v12, v1, s1 6275; GFX10-NEXT: v_readfirstlane_b32 s0, v3 6276; GFX10-NEXT: v_readfirstlane_b32 s1, v4 6277; GFX10-NEXT: v_readfirstlane_b32 s3, v6 6278; GFX10-NEXT: v_readfirstlane_b32 s4, v7 6279; GFX10-NEXT: v_readfirstlane_b32 s5, v2 6280; GFX10-NEXT: v_readfirstlane_b32 s6, v8 6281; GFX10-NEXT: v_readfirstlane_b32 s7, v9 6282; GFX10-NEXT: v_readfirstlane_b32 s8, v0 6283; GFX10-NEXT: v_readfirstlane_b32 s9, v1 6284; GFX10-NEXT: ; return to shader part epilog 6285; 6286; GFX11-LABEL: dyn_insertelement_v5f64_s_v_v: 6287; GFX11: ; %bb.0: ; %entry 6288; GFX11-NEXT: s_mov_b32 s1, s3 6289; GFX11-NEXT: s_mov_b32 s3, s5 6290; GFX11-NEXT: s_mov_b32 s5, s7 6291; GFX11-NEXT: s_mov_b32 s7, s9 6292; GFX11-NEXT: s_mov_b32 s9, s11 6293; GFX11-NEXT: s_mov_b32 s0, s2 6294; GFX11-NEXT: s_mov_b32 s2, s4 6295; GFX11-NEXT: s_mov_b32 s4, s6 6296; GFX11-NEXT: s_mov_b32 s6, s8 6297; GFX11-NEXT: s_mov_b32 s8, s10 6298; GFX11-NEXT: v_dual_mov_b32 v12, s9 :: v_dual_mov_b32 v11, s8 6299; GFX11-NEXT: v_dual_mov_b32 v10, s7 :: v_dual_mov_b32 v9, s6 6300; GFX11-NEXT: v_dual_mov_b32 v8, s5 :: v_dual_mov_b32 v7, s4 6301; GFX11-NEXT: v_dual_mov_b32 v6, s3 :: v_dual_mov_b32 v5, s2 6302; GFX11-NEXT: v_dual_mov_b32 v4, s1 :: v_dual_mov_b32 v3, s0 6303; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2 6304; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 1, v2 6305; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 4, v2 6306; GFX11-NEXT: v_dual_cndmask_b32 v3, v3, v0 :: v_dual_cndmask_b32 v4, v4, v1 6307; GFX11-NEXT: v_cndmask_b32_e64 v5, v5, v0, s0 6308; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v2 6309; GFX11-NEXT: v_cndmask_b32_e64 v6, v6, v1, s0 6310; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 3, v2 6311; GFX11-NEXT: v_readfirstlane_b32 s2, v5 6312; GFX11-NEXT: v_dual_cndmask_b32 v7, v7, v0 :: v_dual_cndmask_b32 v2, v8, v1 6313; GFX11-NEXT: v_cndmask_b32_e64 v8, v9, v0, s0 6314; GFX11-NEXT: v_cndmask_b32_e64 v9, v10, v1, s0 6315; GFX11-NEXT: v_cndmask_b32_e64 v0, v11, v0, s1 6316; GFX11-NEXT: v_cndmask_b32_e64 v1, v12, v1, s1 6317; GFX11-NEXT: v_readfirstlane_b32 s0, v3 6318; GFX11-NEXT: v_readfirstlane_b32 s1, v4 6319; GFX11-NEXT: v_readfirstlane_b32 s3, v6 6320; GFX11-NEXT: v_readfirstlane_b32 s4, v7 6321; GFX11-NEXT: v_readfirstlane_b32 s5, v2 6322; GFX11-NEXT: v_readfirstlane_b32 s6, v8 6323; GFX11-NEXT: v_readfirstlane_b32 s7, v9 6324; GFX11-NEXT: v_readfirstlane_b32 s8, v0 6325; GFX11-NEXT: v_readfirstlane_b32 s9, v1 6326; GFX11-NEXT: ; return to shader part epilog 6327entry: 6328 %insert = insertelement <5 x double> %vec, double %val, i32 %idx 6329 ret <5 x double> %insert 6330} 6331 6332define amdgpu_ps <5 x double> @dyn_insertelement_v5f64_v_v_s(<5 x double> %vec, double %val, i32 inreg %idx) { 6333; GPRIDX-LABEL: dyn_insertelement_v5f64_v_v_s: 6334; GPRIDX: ; %bb.0: ; %entry 6335; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 0 6336; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 6337; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 6338; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1 6339; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc 6340; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc 6341; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 2 6342; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc 6343; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v11, vcc 6344; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 3 6345; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc 6346; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v11, vcc 6347; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 4 6348; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v8, v10, vcc 6349; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc 6350; GPRIDX-NEXT: v_readfirstlane_b32 s0, v0 6351; GPRIDX-NEXT: v_readfirstlane_b32 s1, v1 6352; GPRIDX-NEXT: v_readfirstlane_b32 s2, v2 6353; GPRIDX-NEXT: v_readfirstlane_b32 s3, v3 6354; GPRIDX-NEXT: v_readfirstlane_b32 s4, v4 6355; GPRIDX-NEXT: v_readfirstlane_b32 s5, v5 6356; GPRIDX-NEXT: v_readfirstlane_b32 s6, v6 6357; GPRIDX-NEXT: v_readfirstlane_b32 s7, v7 6358; GPRIDX-NEXT: v_readfirstlane_b32 s8, v8 6359; GPRIDX-NEXT: v_readfirstlane_b32 s9, v9 6360; GPRIDX-NEXT: ; return to shader part epilog 6361; 6362; GFX10-LABEL: dyn_insertelement_v5f64_v_v_s: 6363; GFX10: ; %bb.0: ; %entry 6364; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 0 6365; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s2, 1 6366; GFX10-NEXT: v_cmp_eq_u32_e64 s1, s2, 4 6367; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo 6368; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo 6369; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v10, s0 6370; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 2 6371; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v11, s0 6372; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s2, 3 6373; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v10, s1 6374; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, v11, s1 6375; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc_lo 6376; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v11, vcc_lo 6377; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v10, s0 6378; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, v11, s0 6379; GFX10-NEXT: v_readfirstlane_b32 s0, v0 6380; GFX10-NEXT: v_readfirstlane_b32 s1, v1 6381; GFX10-NEXT: v_readfirstlane_b32 s2, v2 6382; GFX10-NEXT: v_readfirstlane_b32 s3, v3 6383; GFX10-NEXT: v_readfirstlane_b32 s4, v4 6384; GFX10-NEXT: v_readfirstlane_b32 s5, v5 6385; GFX10-NEXT: v_readfirstlane_b32 s6, v6 6386; GFX10-NEXT: v_readfirstlane_b32 s7, v7 6387; GFX10-NEXT: v_readfirstlane_b32 s8, v8 6388; GFX10-NEXT: v_readfirstlane_b32 s9, v9 6389; GFX10-NEXT: ; return to shader part epilog 6390; 6391; GFX11-LABEL: dyn_insertelement_v5f64_v_v_s: 6392; GFX11: ; %bb.0: ; %entry 6393; GFX11-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 0 6394; GFX11-NEXT: v_cmp_eq_u32_e64 s0, s2, 1 6395; GFX11-NEXT: v_cmp_eq_u32_e64 s1, s2, 4 6396; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v10 :: v_dual_cndmask_b32 v1, v1, v11 6397; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v10, s0 6398; GFX11-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 2 6399; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v11, s0 6400; GFX11-NEXT: v_cmp_eq_u32_e64 s0, s2, 3 6401; GFX11-NEXT: v_cndmask_b32_e64 v8, v8, v10, s1 6402; GFX11-NEXT: v_cndmask_b32_e64 v9, v9, v11, s1 6403; GFX11-NEXT: v_dual_cndmask_b32 v4, v4, v10 :: v_dual_cndmask_b32 v5, v5, v11 6404; GFX11-NEXT: v_cndmask_b32_e64 v6, v6, v10, s0 6405; GFX11-NEXT: v_cndmask_b32_e64 v7, v7, v11, s0 6406; GFX11-NEXT: v_readfirstlane_b32 s0, v0 6407; GFX11-NEXT: v_readfirstlane_b32 s1, v1 6408; GFX11-NEXT: v_readfirstlane_b32 s2, v2 6409; GFX11-NEXT: v_readfirstlane_b32 s3, v3 6410; GFX11-NEXT: v_readfirstlane_b32 s4, v4 6411; GFX11-NEXT: v_readfirstlane_b32 s5, v5 6412; GFX11-NEXT: v_readfirstlane_b32 s6, v6 6413; GFX11-NEXT: v_readfirstlane_b32 s7, v7 6414; GFX11-NEXT: v_readfirstlane_b32 s8, v8 6415; GFX11-NEXT: v_readfirstlane_b32 s9, v9 6416; GFX11-NEXT: ; return to shader part epilog 6417entry: 6418 %insert = insertelement <5 x double> %vec, double %val, i32 %idx 6419 ret <5 x double> %insert 6420} 6421 6422define amdgpu_ps <5 x double> @dyn_insertelement_v5f64_v_v_v(<5 x double> %vec, double %val, i32 %idx) { 6423; GPRIDX-LABEL: dyn_insertelement_v5f64_v_v_v: 6424; GPRIDX: ; %bb.0: ; %entry 6425; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v12 6426; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 6427; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 6428; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v12 6429; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc 6430; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc 6431; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v12 6432; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc 6433; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v11, vcc 6434; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v12 6435; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc 6436; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v11, vcc 6437; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v12 6438; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v8, v10, vcc 6439; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc 6440; GPRIDX-NEXT: v_readfirstlane_b32 s0, v0 6441; GPRIDX-NEXT: v_readfirstlane_b32 s1, v1 6442; GPRIDX-NEXT: v_readfirstlane_b32 s2, v2 6443; GPRIDX-NEXT: v_readfirstlane_b32 s3, v3 6444; GPRIDX-NEXT: v_readfirstlane_b32 s4, v4 6445; GPRIDX-NEXT: v_readfirstlane_b32 s5, v5 6446; GPRIDX-NEXT: v_readfirstlane_b32 s6, v6 6447; GPRIDX-NEXT: v_readfirstlane_b32 s7, v7 6448; GPRIDX-NEXT: v_readfirstlane_b32 s8, v8 6449; GPRIDX-NEXT: v_readfirstlane_b32 s9, v9 6450; GPRIDX-NEXT: ; return to shader part epilog 6451; 6452; GFX10-LABEL: dyn_insertelement_v5f64_v_v_v: 6453; GFX10: ; %bb.0: ; %entry 6454; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v12 6455; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 1, v12 6456; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 4, v12 6457; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo 6458; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo 6459; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v10, s0 6460; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v12 6461; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v11, s0 6462; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 3, v12 6463; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v10, s1 6464; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, v11, s1 6465; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc_lo 6466; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v11, vcc_lo 6467; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v10, s0 6468; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, v11, s0 6469; GFX10-NEXT: v_readfirstlane_b32 s0, v0 6470; GFX10-NEXT: v_readfirstlane_b32 s1, v1 6471; GFX10-NEXT: v_readfirstlane_b32 s2, v2 6472; GFX10-NEXT: v_readfirstlane_b32 s3, v3 6473; GFX10-NEXT: v_readfirstlane_b32 s4, v4 6474; GFX10-NEXT: v_readfirstlane_b32 s5, v5 6475; GFX10-NEXT: v_readfirstlane_b32 s6, v6 6476; GFX10-NEXT: v_readfirstlane_b32 s7, v7 6477; GFX10-NEXT: v_readfirstlane_b32 s8, v8 6478; GFX10-NEXT: v_readfirstlane_b32 s9, v9 6479; GFX10-NEXT: ; return to shader part epilog 6480; 6481; GFX11-LABEL: dyn_insertelement_v5f64_v_v_v: 6482; GFX11: ; %bb.0: ; %entry 6483; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v12 6484; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 1, v12 6485; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 4, v12 6486; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v10 :: v_dual_cndmask_b32 v1, v1, v11 6487; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, v10, s0 6488; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v12 6489; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v11, s0 6490; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 3, v12 6491; GFX11-NEXT: v_cndmask_b32_e64 v8, v8, v10, s1 6492; GFX11-NEXT: v_cndmask_b32_e64 v9, v9, v11, s1 6493; GFX11-NEXT: v_dual_cndmask_b32 v4, v4, v10 :: v_dual_cndmask_b32 v5, v5, v11 6494; GFX11-NEXT: v_cndmask_b32_e64 v6, v6, v10, s0 6495; GFX11-NEXT: v_cndmask_b32_e64 v7, v7, v11, s0 6496; GFX11-NEXT: v_readfirstlane_b32 s0, v0 6497; GFX11-NEXT: v_readfirstlane_b32 s1, v1 6498; GFX11-NEXT: v_readfirstlane_b32 s2, v2 6499; GFX11-NEXT: v_readfirstlane_b32 s3, v3 6500; GFX11-NEXT: v_readfirstlane_b32 s4, v4 6501; GFX11-NEXT: v_readfirstlane_b32 s5, v5 6502; GFX11-NEXT: v_readfirstlane_b32 s6, v6 6503; GFX11-NEXT: v_readfirstlane_b32 s7, v7 6504; GFX11-NEXT: v_readfirstlane_b32 s8, v8 6505; GFX11-NEXT: v_readfirstlane_b32 s9, v9 6506; GFX11-NEXT: ; return to shader part epilog 6507entry: 6508 %insert = insertelement <5 x double> %vec, double %val, i32 %idx 6509 ret <5 x double> %insert 6510} 6511