1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GPRIDX %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MOVREL %s 4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s 5; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s 6 7define float @dyn_extract_v8f32_const_s_v(i32 %sel) { 8; GCN-LABEL: dyn_extract_v8f32_const_s_v: 9; GCN: ; %bb.0: ; %entry 10; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 12; GCN-NEXT: v_mov_b32_e32 v1, 0x40400000 13; GCN-NEXT: v_cndmask_b32_e64 v6, 1.0, 2.0, vcc 14; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 15; GCN-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc 16; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 17; GCN-NEXT: v_mov_b32_e32 v2, 0x40a00000 18; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc 19; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 20; GCN-NEXT: v_mov_b32_e32 v3, 0x40c00000 21; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 22; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 23; GCN-NEXT: v_mov_b32_e32 v4, 0x40e00000 24; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 25; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 26; GCN-NEXT: v_mov_b32_e32 v5, 0x41000000 27; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 28; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 29; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v5, vcc 30; GCN-NEXT: s_setpc_b64 s[30:31] 31; 32; GFX10PLUS-LABEL: dyn_extract_v8f32_const_s_v: 33; GFX10PLUS: ; %bb.0: ; %entry 34; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 35; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 36; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo 37; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 38; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x40400000, vcc_lo 39; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 40; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo 41; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 42; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x40a00000, vcc_lo 43; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 44; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x40c00000, vcc_lo 45; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 46; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x40e00000, vcc_lo 47; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 48; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, 0x41000000, vcc_lo 49; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 50entry: 51 %ext = extractelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, i32 %sel 52 ret float %ext 53} 54 55define amdgpu_ps float @dyn_extract_v8f32_const_s_s(i32 inreg %sel) { 56; GPRIDX-LABEL: dyn_extract_v8f32_const_s_s: 57; GPRIDX: ; %bb.0: ; %entry 58; GPRIDX-NEXT: s_cmp_eq_u32 s2, 1 59; GPRIDX-NEXT: s_cselect_b32 s0, 2.0, 1.0 60; GPRIDX-NEXT: s_cmp_eq_u32 s2, 2 61; GPRIDX-NEXT: s_cselect_b32 s0, 0x40400000, s0 62; GPRIDX-NEXT: s_cmp_eq_u32 s2, 3 63; GPRIDX-NEXT: s_cselect_b32 s0, 4.0, s0 64; GPRIDX-NEXT: s_cmp_eq_u32 s2, 4 65; GPRIDX-NEXT: s_cselect_b32 s0, 0x40a00000, s0 66; GPRIDX-NEXT: s_cmp_eq_u32 s2, 5 67; GPRIDX-NEXT: s_cselect_b32 s0, 0x40c00000, s0 68; GPRIDX-NEXT: s_cmp_eq_u32 s2, 6 69; GPRIDX-NEXT: s_cselect_b32 s0, 0x40e00000, s0 70; GPRIDX-NEXT: s_cmp_eq_u32 s2, 7 71; GPRIDX-NEXT: s_cselect_b32 s0, 0x41000000, s0 72; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 73; GPRIDX-NEXT: ; return to shader part epilog 74; 75; MOVREL-LABEL: dyn_extract_v8f32_const_s_s: 76; MOVREL: ; %bb.0: ; %entry 77; MOVREL-NEXT: s_mov_b32 s4, 1.0 78; MOVREL-NEXT: s_mov_b32 m0, s2 79; MOVREL-NEXT: s_mov_b32 s11, 0x41000000 80; MOVREL-NEXT: s_mov_b32 s10, 0x40e00000 81; MOVREL-NEXT: s_mov_b32 s9, 0x40c00000 82; MOVREL-NEXT: s_mov_b32 s8, 0x40a00000 83; MOVREL-NEXT: s_mov_b32 s7, 4.0 84; MOVREL-NEXT: s_mov_b32 s6, 0x40400000 85; MOVREL-NEXT: s_mov_b32 s5, 2.0 86; MOVREL-NEXT: s_movrels_b32 s0, s4 87; MOVREL-NEXT: v_mov_b32_e32 v0, s0 88; MOVREL-NEXT: ; return to shader part epilog 89; 90; GFX10PLUS-LABEL: dyn_extract_v8f32_const_s_s: 91; GFX10PLUS: ; %bb.0: ; %entry 92; GFX10PLUS-NEXT: s_mov_b32 s4, 1.0 93; GFX10PLUS-NEXT: s_mov_b32 m0, s2 94; GFX10PLUS-NEXT: s_mov_b32 s11, 0x41000000 95; GFX10PLUS-NEXT: s_mov_b32 s10, 0x40e00000 96; GFX10PLUS-NEXT: s_mov_b32 s9, 0x40c00000 97; GFX10PLUS-NEXT: s_mov_b32 s8, 0x40a00000 98; GFX10PLUS-NEXT: s_mov_b32 s7, 4.0 99; GFX10PLUS-NEXT: s_mov_b32 s6, 0x40400000 100; GFX10PLUS-NEXT: s_mov_b32 s5, 2.0 101; GFX10PLUS-NEXT: s_movrels_b32 s0, s4 102; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0 103; GFX10PLUS-NEXT: ; return to shader part epilog 104entry: 105 %ext = extractelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, i32 %sel 106 ret float %ext 107} 108 109define amdgpu_ps float @dyn_extract_v8f32_s_v(<8 x float> inreg %vec, i32 %sel) { 110; GCN-LABEL: dyn_extract_v8f32_s_v: 111; GCN: ; %bb.0: ; %entry 112; GCN-NEXT: v_mov_b32_e32 v1, s2 113; GCN-NEXT: v_mov_b32_e32 v2, s3 114; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 115; GCN-NEXT: v_mov_b32_e32 v3, s4 116; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 117; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 118; GCN-NEXT: v_mov_b32_e32 v4, s5 119; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 120; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 121; GCN-NEXT: v_mov_b32_e32 v5, s6 122; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 123; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 124; GCN-NEXT: v_mov_b32_e32 v6, s7 125; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 126; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 127; GCN-NEXT: v_mov_b32_e32 v7, s8 128; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc 129; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 130; GCN-NEXT: v_mov_b32_e32 v8, s9 131; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 132; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 133; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v8, vcc 134; GCN-NEXT: ; return to shader part epilog 135; 136; GFX10PLUS-LABEL: dyn_extract_v8f32_s_v: 137; GFX10PLUS: ; %bb.0: ; %entry 138; GFX10PLUS-NEXT: v_mov_b32_e32 v1, s3 139; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 140; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo 141; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 142; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo 143; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 144; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s5, vcc_lo 145; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 146; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo 147; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 148; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s7, vcc_lo 149; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 150; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo 151; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 152; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, s9, vcc_lo 153; GFX10PLUS-NEXT: ; return to shader part epilog 154entry: 155 %ext = extractelement <8 x float> %vec, i32 %sel 156 ret float %ext 157} 158 159define float @dyn_extract_v8f32_v_v(<8 x float> %vec, i32 %sel) { 160; GCN-LABEL: dyn_extract_v8f32_v_v: 161; GCN: ; %bb.0: ; %entry 162; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 163; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8 164; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 165; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v8 166; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 167; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v8 168; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 169; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v8 170; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 171; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v8 172; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 173; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v8 174; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 175; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v8 176; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc 177; GCN-NEXT: s_setpc_b64 s[30:31] 178; 179; GFX10PLUS-LABEL: dyn_extract_v8f32_v_v: 180; GFX10PLUS: ; %bb.0: ; %entry 181; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 182; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8 183; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 184; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v8 185; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 186; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v8 187; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 188; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v8 189; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 190; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v8 191; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo 192; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v8 193; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 194; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v8 195; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo 196; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 197entry: 198 %ext = extractelement <8 x float> %vec, i32 %sel 199 ret float %ext 200} 201 202define amdgpu_ps float @dyn_extract_v8f32_v_s(<8 x float> %vec, i32 inreg %sel) { 203; GPRIDX-LABEL: dyn_extract_v8f32_v_s: 204; GPRIDX: ; %bb.0: ; %entry 205; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1 206; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 207; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 2 208; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 209; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 3 210; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 211; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 4 212; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 213; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 5 214; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 215; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 6 216; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 217; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 7 218; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc 219; GPRIDX-NEXT: ; return to shader part epilog 220; 221; MOVREL-LABEL: dyn_extract_v8f32_v_s: 222; MOVREL: ; %bb.0: ; %entry 223; MOVREL-NEXT: s_mov_b32 m0, s2 224; MOVREL-NEXT: v_movrels_b32_e32 v0, v0 225; MOVREL-NEXT: ; return to shader part epilog 226; 227; GFX10PLUS-LABEL: dyn_extract_v8f32_v_s: 228; GFX10PLUS: ; %bb.0: ; %entry 229; GFX10PLUS-NEXT: s_mov_b32 m0, s2 230; GFX10PLUS-NEXT: v_movrels_b32_e32 v0, v0 231; GFX10PLUS-NEXT: ; return to shader part epilog 232entry: 233 %ext = extractelement <8 x float> %vec, i32 %sel 234 ret float %ext 235} 236 237define amdgpu_ps float @dyn_extract_v8f32_s_s(<8 x float> inreg %vec, i32 inreg %sel) { 238; GPRIDX-LABEL: dyn_extract_v8f32_s_s: 239; GPRIDX: ; %bb.0: ; %entry 240; GPRIDX-NEXT: s_cmp_eq_u32 s10, 1 241; GPRIDX-NEXT: s_cselect_b32 s0, s3, s2 242; GPRIDX-NEXT: s_cmp_eq_u32 s10, 2 243; GPRIDX-NEXT: s_cselect_b32 s0, s4, s0 244; GPRIDX-NEXT: s_cmp_eq_u32 s10, 3 245; GPRIDX-NEXT: s_cselect_b32 s0, s5, s0 246; GPRIDX-NEXT: s_cmp_eq_u32 s10, 4 247; GPRIDX-NEXT: s_cselect_b32 s0, s6, s0 248; GPRIDX-NEXT: s_cmp_eq_u32 s10, 5 249; GPRIDX-NEXT: s_cselect_b32 s0, s7, s0 250; GPRIDX-NEXT: s_cmp_eq_u32 s10, 6 251; GPRIDX-NEXT: s_cselect_b32 s0, s8, s0 252; GPRIDX-NEXT: s_cmp_eq_u32 s10, 7 253; GPRIDX-NEXT: s_cselect_b32 s0, s9, s0 254; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 255; GPRIDX-NEXT: ; return to shader part epilog 256; 257; MOVREL-LABEL: dyn_extract_v8f32_s_s: 258; MOVREL: ; %bb.0: ; %entry 259; MOVREL-NEXT: s_mov_b32 s0, s2 260; MOVREL-NEXT: s_mov_b32 m0, s10 261; MOVREL-NEXT: s_mov_b32 s1, s3 262; MOVREL-NEXT: s_mov_b32 s2, s4 263; MOVREL-NEXT: s_mov_b32 s3, s5 264; MOVREL-NEXT: s_mov_b32 s4, s6 265; MOVREL-NEXT: s_mov_b32 s5, s7 266; MOVREL-NEXT: s_mov_b32 s6, s8 267; MOVREL-NEXT: s_mov_b32 s7, s9 268; MOVREL-NEXT: s_movrels_b32 s0, s0 269; MOVREL-NEXT: v_mov_b32_e32 v0, s0 270; MOVREL-NEXT: ; return to shader part epilog 271; 272; GFX10PLUS-LABEL: dyn_extract_v8f32_s_s: 273; GFX10PLUS: ; %bb.0: ; %entry 274; GFX10PLUS-NEXT: s_mov_b32 s0, s2 275; GFX10PLUS-NEXT: s_mov_b32 m0, s10 276; GFX10PLUS-NEXT: s_mov_b32 s1, s3 277; GFX10PLUS-NEXT: s_mov_b32 s2, s4 278; GFX10PLUS-NEXT: s_mov_b32 s3, s5 279; GFX10PLUS-NEXT: s_mov_b32 s4, s6 280; GFX10PLUS-NEXT: s_mov_b32 s5, s7 281; GFX10PLUS-NEXT: s_mov_b32 s6, s8 282; GFX10PLUS-NEXT: s_mov_b32 s7, s9 283; GFX10PLUS-NEXT: s_movrels_b32 s0, s0 284; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0 285; GFX10PLUS-NEXT: ; return to shader part epilog 286entry: 287 %ext = extractelement <8 x float> %vec, i32 %sel 288 ret float %ext 289} 290 291define i64 @dyn_extract_v8i64_const_s_v(i32 %sel) { 292; GCN-LABEL: dyn_extract_v8i64_const_s_v: 293; GCN: ; %bb.0: ; %entry 294; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 295; GCN-NEXT: s_mov_b64 s[16:17], 2 296; GCN-NEXT: s_mov_b64 s[18:19], 1 297; GCN-NEXT: s_mov_b64 s[14:15], 3 298; GCN-NEXT: v_mov_b32_e32 v1, s18 299; GCN-NEXT: v_mov_b32_e32 v2, s19 300; GCN-NEXT: v_mov_b32_e32 v3, s16 301; GCN-NEXT: v_mov_b32_e32 v4, s17 302; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 303; GCN-NEXT: s_mov_b64 s[12:13], 4 304; GCN-NEXT: v_mov_b32_e32 v5, s14 305; GCN-NEXT: v_mov_b32_e32 v6, s15 306; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 307; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 308; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 309; GCN-NEXT: s_mov_b64 s[10:11], 5 310; GCN-NEXT: v_mov_b32_e32 v7, s12 311; GCN-NEXT: v_mov_b32_e32 v8, s13 312; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 313; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc 314; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 315; GCN-NEXT: s_mov_b64 s[8:9], 6 316; GCN-NEXT: v_mov_b32_e32 v9, s10 317; GCN-NEXT: v_mov_b32_e32 v10, s11 318; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 319; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 320; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 321; GCN-NEXT: s_mov_b64 s[6:7], 7 322; GCN-NEXT: v_mov_b32_e32 v11, s8 323; GCN-NEXT: v_mov_b32_e32 v12, s9 324; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 325; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc 326; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 327; GCN-NEXT: s_mov_b64 s[4:5], 8 328; GCN-NEXT: v_mov_b32_e32 v13, s6 329; GCN-NEXT: v_mov_b32_e32 v14, s7 330; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 331; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc 332; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 333; GCN-NEXT: v_mov_b32_e32 v15, s4 334; GCN-NEXT: v_mov_b32_e32 v16, s5 335; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc 336; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v14, vcc 337; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 338; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v15, vcc 339; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v16, vcc 340; GCN-NEXT: s_setpc_b64 s[30:31] 341; 342; GFX10-LABEL: dyn_extract_v8i64_const_s_v: 343; GFX10: ; %bb.0: ; %entry 344; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 345; GFX10-NEXT: s_mov_b64 s[4:5], 2 346; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 347; GFX10-NEXT: v_mov_b32_e32 v1, s4 348; GFX10-NEXT: v_mov_b32_e32 v2, s5 349; GFX10-NEXT: s_mov_b64 s[6:7], 1 350; GFX10-NEXT: s_mov_b64 s[4:5], 3 351; GFX10-NEXT: v_cndmask_b32_e32 v1, s6, v1, vcc_lo 352; GFX10-NEXT: v_cndmask_b32_e32 v2, s7, v2, vcc_lo 353; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 354; GFX10-NEXT: s_mov_b64 s[6:7], 4 355; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo 356; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s5, vcc_lo 357; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 358; GFX10-NEXT: s_mov_b64 s[4:5], 5 359; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo 360; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo 361; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 362; GFX10-NEXT: s_mov_b64 s[6:7], 6 363; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo 364; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s5, vcc_lo 365; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 366; GFX10-NEXT: s_mov_b64 s[4:5], 7 367; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo 368; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo 369; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 370; GFX10-NEXT: s_mov_b64 s[6:7], 8 371; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo 372; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s5, vcc_lo 373; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 374; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s6, vcc_lo 375; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s7, vcc_lo 376; GFX10-NEXT: s_setpc_b64 s[30:31] 377; 378; GFX11-LABEL: dyn_extract_v8i64_const_s_v: 379; GFX11: ; %bb.0: ; %entry 380; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 381; GFX11-NEXT: s_mov_b64 s[0:1], 2 382; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 383; GFX11-NEXT: v_dual_mov_b32 v1, s0 :: v_dual_mov_b32 v2, s1 384; GFX11-NEXT: s_mov_b64 s[2:3], 1 385; GFX11-NEXT: s_mov_b64 s[0:1], 3 386; GFX11-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo 387; GFX11-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo 388; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 389; GFX11-NEXT: s_mov_b64 s[2:3], 4 390; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s0, vcc_lo 391; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s1, vcc_lo 392; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 393; GFX11-NEXT: s_mov_b64 s[0:1], 5 394; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s2, vcc_lo 395; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s3, vcc_lo 396; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 397; GFX11-NEXT: s_mov_b64 s[2:3], 6 398; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s0, vcc_lo 399; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s1, vcc_lo 400; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 401; GFX11-NEXT: s_mov_b64 s[0:1], 7 402; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s2, vcc_lo 403; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s3, vcc_lo 404; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 405; GFX11-NEXT: s_mov_b64 s[2:3], 8 406; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s0, vcc_lo 407; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s1, vcc_lo 408; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 409; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s2, vcc_lo 410; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, s3, vcc_lo 411; GFX11-NEXT: s_setpc_b64 s[30:31] 412entry: 413 %ext = extractelement <8 x i64> <i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8>, i32 %sel 414 ret i64 %ext 415} 416 417define amdgpu_ps void @dyn_extract_v8i64_const_s_s(i32 inreg %sel) { 418; GPRIDX-LABEL: dyn_extract_v8i64_const_s_s: 419; GPRIDX: ; %bb.0: ; %entry 420; GPRIDX-NEXT: s_mov_b64 s[4:5], 1 421; GPRIDX-NEXT: s_mov_b32 m0, s2 422; GPRIDX-NEXT: s_mov_b64 s[18:19], 8 423; GPRIDX-NEXT: s_mov_b64 s[16:17], 7 424; GPRIDX-NEXT: s_mov_b64 s[14:15], 6 425; GPRIDX-NEXT: s_mov_b64 s[12:13], 5 426; GPRIDX-NEXT: s_mov_b64 s[10:11], 4 427; GPRIDX-NEXT: s_mov_b64 s[8:9], 3 428; GPRIDX-NEXT: s_mov_b64 s[6:7], 2 429; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[4:5] 430; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 431; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 432; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 433; GPRIDX-NEXT: s_endpgm 434; 435; MOVREL-LABEL: dyn_extract_v8i64_const_s_s: 436; MOVREL: ; %bb.0: ; %entry 437; MOVREL-NEXT: s_mov_b64 s[4:5], 1 438; MOVREL-NEXT: s_mov_b32 m0, s2 439; MOVREL-NEXT: s_mov_b64 s[18:19], 8 440; MOVREL-NEXT: s_mov_b64 s[16:17], 7 441; MOVREL-NEXT: s_mov_b64 s[14:15], 6 442; MOVREL-NEXT: s_mov_b64 s[12:13], 5 443; MOVREL-NEXT: s_mov_b64 s[10:11], 4 444; MOVREL-NEXT: s_mov_b64 s[8:9], 3 445; MOVREL-NEXT: s_mov_b64 s[6:7], 2 446; MOVREL-NEXT: s_movrels_b64 s[0:1], s[4:5] 447; MOVREL-NEXT: v_mov_b32_e32 v0, s0 448; MOVREL-NEXT: v_mov_b32_e32 v1, s1 449; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 450; MOVREL-NEXT: s_endpgm 451; 452; GFX10-LABEL: dyn_extract_v8i64_const_s_s: 453; GFX10: ; %bb.0: ; %entry 454; GFX10-NEXT: s_mov_b64 s[4:5], 1 455; GFX10-NEXT: s_mov_b32 m0, s2 456; GFX10-NEXT: s_mov_b64 s[18:19], 8 457; GFX10-NEXT: s_mov_b64 s[16:17], 7 458; GFX10-NEXT: s_mov_b64 s[14:15], 6 459; GFX10-NEXT: s_mov_b64 s[12:13], 5 460; GFX10-NEXT: s_mov_b64 s[10:11], 4 461; GFX10-NEXT: s_mov_b64 s[8:9], 3 462; GFX10-NEXT: s_mov_b64 s[6:7], 2 463; GFX10-NEXT: s_movrels_b64 s[0:1], s[4:5] 464; GFX10-NEXT: v_mov_b32_e32 v0, s0 465; GFX10-NEXT: v_mov_b32_e32 v1, s1 466; GFX10-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 467; GFX10-NEXT: s_endpgm 468; 469; GFX11-LABEL: dyn_extract_v8i64_const_s_s: 470; GFX11: ; %bb.0: ; %entry 471; GFX11-NEXT: s_mov_b64 s[4:5], 1 472; GFX11-NEXT: s_mov_b32 m0, s2 473; GFX11-NEXT: s_mov_b64 s[18:19], 8 474; GFX11-NEXT: s_mov_b64 s[16:17], 7 475; GFX11-NEXT: s_mov_b64 s[14:15], 6 476; GFX11-NEXT: s_mov_b64 s[12:13], 5 477; GFX11-NEXT: s_mov_b64 s[10:11], 4 478; GFX11-NEXT: s_mov_b64 s[8:9], 3 479; GFX11-NEXT: s_mov_b64 s[6:7], 2 480; GFX11-NEXT: s_movrels_b64 s[0:1], s[4:5] 481; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 482; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off 483; GFX11-NEXT: s_endpgm 484entry: 485 %ext = extractelement <8 x i64> <i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8>, i32 %sel 486 store i64 %ext, ptr addrspace(1) undef 487 ret void 488} 489 490define amdgpu_ps void @dyn_extract_v8i64_s_v(<8 x i64> inreg %vec, i32 %sel) { 491; GPRIDX-LABEL: dyn_extract_v8i64_s_v: 492; GPRIDX: ; %bb.0: ; %entry 493; GPRIDX-NEXT: v_mov_b32_e32 v1, s2 494; GPRIDX-NEXT: v_mov_b32_e32 v2, s3 495; GPRIDX-NEXT: v_mov_b32_e32 v3, s4 496; GPRIDX-NEXT: v_mov_b32_e32 v4, s5 497; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 498; GPRIDX-NEXT: v_mov_b32_e32 v5, s6 499; GPRIDX-NEXT: v_mov_b32_e32 v6, s7 500; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 501; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 502; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 503; GPRIDX-NEXT: v_mov_b32_e32 v7, s8 504; GPRIDX-NEXT: v_mov_b32_e32 v8, s9 505; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 506; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc 507; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 508; GPRIDX-NEXT: v_mov_b32_e32 v9, s10 509; GPRIDX-NEXT: v_mov_b32_e32 v10, s11 510; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 511; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 512; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 513; GPRIDX-NEXT: v_mov_b32_e32 v11, s12 514; GPRIDX-NEXT: v_mov_b32_e32 v12, s13 515; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 516; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc 517; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 518; GPRIDX-NEXT: v_mov_b32_e32 v13, s14 519; GPRIDX-NEXT: v_mov_b32_e32 v14, s15 520; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 521; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc 522; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 523; GPRIDX-NEXT: v_mov_b32_e32 v15, s16 524; GPRIDX-NEXT: v_mov_b32_e32 v16, s17 525; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc 526; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v14, vcc 527; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 528; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v1, v15, vcc 529; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v2, v16, vcc 530; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 531; GPRIDX-NEXT: s_endpgm 532; 533; MOVREL-LABEL: dyn_extract_v8i64_s_v: 534; MOVREL: ; %bb.0: ; %entry 535; MOVREL-NEXT: v_mov_b32_e32 v1, s2 536; MOVREL-NEXT: v_mov_b32_e32 v2, s3 537; MOVREL-NEXT: v_mov_b32_e32 v3, s4 538; MOVREL-NEXT: v_mov_b32_e32 v4, s5 539; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 540; MOVREL-NEXT: v_mov_b32_e32 v5, s6 541; MOVREL-NEXT: v_mov_b32_e32 v6, s7 542; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 543; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 544; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 545; MOVREL-NEXT: v_mov_b32_e32 v7, s8 546; MOVREL-NEXT: v_mov_b32_e32 v8, s9 547; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 548; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc 549; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 550; MOVREL-NEXT: v_mov_b32_e32 v9, s10 551; MOVREL-NEXT: v_mov_b32_e32 v10, s11 552; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 553; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 554; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 555; MOVREL-NEXT: v_mov_b32_e32 v11, s12 556; MOVREL-NEXT: v_mov_b32_e32 v12, s13 557; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 558; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc 559; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 560; MOVREL-NEXT: v_mov_b32_e32 v13, s14 561; MOVREL-NEXT: v_mov_b32_e32 v14, s15 562; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 563; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc 564; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 565; MOVREL-NEXT: v_mov_b32_e32 v15, s16 566; MOVREL-NEXT: v_mov_b32_e32 v16, s17 567; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc 568; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v14, vcc 569; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 570; MOVREL-NEXT: v_cndmask_b32_e32 v0, v1, v15, vcc 571; MOVREL-NEXT: v_cndmask_b32_e32 v1, v2, v16, vcc 572; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 573; MOVREL-NEXT: s_endpgm 574; 575; GFX10-LABEL: dyn_extract_v8i64_s_v: 576; GFX10: ; %bb.0: ; %entry 577; GFX10-NEXT: v_mov_b32_e32 v1, s4 578; GFX10-NEXT: v_mov_b32_e32 v2, s5 579; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 580; GFX10-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo 581; GFX10-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo 582; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 583; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo 584; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo 585; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 586; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo 587; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo 588; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 589; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo 590; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo 591; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 592; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo 593; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s13, vcc_lo 594; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 595; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s14, vcc_lo 596; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s15, vcc_lo 597; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 598; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s16, vcc_lo 599; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s17, vcc_lo 600; GFX10-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 601; GFX10-NEXT: s_endpgm 602; 603; GFX11-LABEL: dyn_extract_v8i64_s_v: 604; GFX11: ; %bb.0: ; %entry 605; GFX11-NEXT: v_dual_mov_b32 v1, s4 :: v_dual_mov_b32 v2, s5 606; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 607; GFX11-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo 608; GFX11-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo 609; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 610; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo 611; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo 612; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 613; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo 614; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo 615; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 616; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo 617; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo 618; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 619; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo 620; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s13, vcc_lo 621; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 622; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s14, vcc_lo 623; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s15, vcc_lo 624; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 625; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s16, vcc_lo 626; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, s17, vcc_lo 627; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off 628; GFX11-NEXT: s_endpgm 629entry: 630 %ext = extractelement <8 x i64> %vec, i32 %sel 631 store i64 %ext, ptr addrspace(1) undef 632 ret void 633} 634 635define i64 @dyn_extract_v8i64_v_v(<8 x i64> %vec, i32 %sel) { 636; GCN-LABEL: dyn_extract_v8i64_v_v: 637; GCN: ; %bb.0: ; %entry 638; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 639; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v16 640; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 641; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 642; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v16 643; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 644; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 645; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v16 646; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 647; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 648; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v16 649; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 650; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 651; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v16 652; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 653; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 654; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v16 655; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc 656; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc 657; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v16 658; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc 659; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc 660; GCN-NEXT: s_setpc_b64 s[30:31] 661; 662; GFX10-LABEL: dyn_extract_v8i64_v_v: 663; GFX10: ; %bb.0: ; %entry 664; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 665; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16 666; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 667; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo 668; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16 669; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 670; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo 671; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v16 672; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 673; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo 674; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16 675; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 676; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo 677; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v16 678; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo 679; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo 680; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v16 681; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo 682; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc_lo 683; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v16 684; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo 685; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc_lo 686; GFX10-NEXT: s_setpc_b64 s[30:31] 687; 688; GFX11-LABEL: dyn_extract_v8i64_v_v: 689; GFX11: ; %bb.0: ; %entry 690; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 691; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16 692; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v2 :: v_dual_cndmask_b32 v1, v1, v3 693; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16 694; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v4 :: v_dual_cndmask_b32 v1, v1, v5 695; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v16 696; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v6 :: v_dual_cndmask_b32 v1, v1, v7 697; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16 698; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v8 :: v_dual_cndmask_b32 v1, v1, v9 699; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v16 700; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v10 :: v_dual_cndmask_b32 v1, v1, v11 701; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v16 702; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v12 :: v_dual_cndmask_b32 v1, v1, v13 703; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v16 704; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v14 :: v_dual_cndmask_b32 v1, v1, v15 705; GFX11-NEXT: s_setpc_b64 s[30:31] 706entry: 707 %ext = extractelement <8 x i64> %vec, i32 %sel 708 ret i64 %ext 709} 710 711define amdgpu_ps void @dyn_extract_v8i64_v_s(<8 x i64> %vec, i32 inreg %sel) { 712; GPRIDX-LABEL: dyn_extract_v8i64_v_s: 713; GPRIDX: ; %bb.0: ; %entry 714; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1 715; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0) 716; GPRIDX-NEXT: v_mov_b32_e32 v16, v0 717; GPRIDX-NEXT: v_mov_b32_e32 v17, v1 718; GPRIDX-NEXT: s_set_gpr_idx_off 719; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[16:17], off 720; GPRIDX-NEXT: s_endpgm 721; 722; MOVREL-LABEL: dyn_extract_v8i64_v_s: 723; MOVREL: ; %bb.0: ; %entry 724; MOVREL-NEXT: s_lshl_b32 m0, s2, 1 725; MOVREL-NEXT: v_movrels_b32_e32 v16, v0 726; MOVREL-NEXT: v_movrels_b32_e32 v17, v1 727; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[16:17] 728; MOVREL-NEXT: s_endpgm 729; 730; GFX10-LABEL: dyn_extract_v8i64_v_s: 731; GFX10: ; %bb.0: ; %entry 732; GFX10-NEXT: s_lshl_b32 m0, s2, 1 733; GFX10-NEXT: v_movrels_b32_e32 v16, v0 734; GFX10-NEXT: v_movrels_b32_e32 v17, v1 735; GFX10-NEXT: global_store_dwordx2 v[0:1], v[16:17], off 736; GFX10-NEXT: s_endpgm 737; 738; GFX11-LABEL: dyn_extract_v8i64_v_s: 739; GFX11: ; %bb.0: ; %entry 740; GFX11-NEXT: s_lshl_b32 m0, s2, 1 741; GFX11-NEXT: v_movrels_b32_e32 v16, v0 742; GFX11-NEXT: v_movrels_b32_e32 v17, v1 743; GFX11-NEXT: global_store_b64 v[0:1], v[16:17], off 744; GFX11-NEXT: s_endpgm 745entry: 746 %ext = extractelement <8 x i64> %vec, i32 %sel 747 store i64 %ext, ptr addrspace(1) undef 748 ret void 749} 750 751define amdgpu_ps void @dyn_extract_v8i64_s_s(<8 x i64> inreg %vec, i32 inreg %sel) { 752; GPRIDX-LABEL: dyn_extract_v8i64_s_s: 753; GPRIDX: ; %bb.0: ; %entry 754; GPRIDX-NEXT: s_mov_b32 s0, s2 755; GPRIDX-NEXT: s_mov_b32 s1, s3 756; GPRIDX-NEXT: s_mov_b32 m0, s18 757; GPRIDX-NEXT: s_mov_b32 s2, s4 758; GPRIDX-NEXT: s_mov_b32 s3, s5 759; GPRIDX-NEXT: s_mov_b32 s4, s6 760; GPRIDX-NEXT: s_mov_b32 s5, s7 761; GPRIDX-NEXT: s_mov_b32 s6, s8 762; GPRIDX-NEXT: s_mov_b32 s7, s9 763; GPRIDX-NEXT: s_mov_b32 s8, s10 764; GPRIDX-NEXT: s_mov_b32 s9, s11 765; GPRIDX-NEXT: s_mov_b32 s10, s12 766; GPRIDX-NEXT: s_mov_b32 s11, s13 767; GPRIDX-NEXT: s_mov_b32 s12, s14 768; GPRIDX-NEXT: s_mov_b32 s13, s15 769; GPRIDX-NEXT: s_mov_b32 s14, s16 770; GPRIDX-NEXT: s_mov_b32 s15, s17 771; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] 772; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 773; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 774; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 775; GPRIDX-NEXT: s_endpgm 776; 777; MOVREL-LABEL: dyn_extract_v8i64_s_s: 778; MOVREL: ; %bb.0: ; %entry 779; MOVREL-NEXT: s_mov_b32 s0, s2 780; MOVREL-NEXT: s_mov_b32 s1, s3 781; MOVREL-NEXT: s_mov_b32 m0, s18 782; MOVREL-NEXT: s_mov_b32 s2, s4 783; MOVREL-NEXT: s_mov_b32 s3, s5 784; MOVREL-NEXT: s_mov_b32 s4, s6 785; MOVREL-NEXT: s_mov_b32 s5, s7 786; MOVREL-NEXT: s_mov_b32 s6, s8 787; MOVREL-NEXT: s_mov_b32 s7, s9 788; MOVREL-NEXT: s_mov_b32 s8, s10 789; MOVREL-NEXT: s_mov_b32 s9, s11 790; MOVREL-NEXT: s_mov_b32 s10, s12 791; MOVREL-NEXT: s_mov_b32 s11, s13 792; MOVREL-NEXT: s_mov_b32 s12, s14 793; MOVREL-NEXT: s_mov_b32 s13, s15 794; MOVREL-NEXT: s_mov_b32 s14, s16 795; MOVREL-NEXT: s_mov_b32 s15, s17 796; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] 797; MOVREL-NEXT: v_mov_b32_e32 v0, s0 798; MOVREL-NEXT: v_mov_b32_e32 v1, s1 799; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 800; MOVREL-NEXT: s_endpgm 801; 802; GFX10-LABEL: dyn_extract_v8i64_s_s: 803; GFX10: ; %bb.0: ; %entry 804; GFX10-NEXT: s_mov_b32 s0, s2 805; GFX10-NEXT: s_mov_b32 s1, s3 806; GFX10-NEXT: s_mov_b32 m0, s18 807; GFX10-NEXT: s_mov_b32 s2, s4 808; GFX10-NEXT: s_mov_b32 s3, s5 809; GFX10-NEXT: s_mov_b32 s4, s6 810; GFX10-NEXT: s_mov_b32 s5, s7 811; GFX10-NEXT: s_mov_b32 s6, s8 812; GFX10-NEXT: s_mov_b32 s7, s9 813; GFX10-NEXT: s_mov_b32 s8, s10 814; GFX10-NEXT: s_mov_b32 s9, s11 815; GFX10-NEXT: s_mov_b32 s10, s12 816; GFX10-NEXT: s_mov_b32 s11, s13 817; GFX10-NEXT: s_mov_b32 s12, s14 818; GFX10-NEXT: s_mov_b32 s13, s15 819; GFX10-NEXT: s_mov_b32 s14, s16 820; GFX10-NEXT: s_mov_b32 s15, s17 821; GFX10-NEXT: s_movrels_b64 s[0:1], s[0:1] 822; GFX10-NEXT: v_mov_b32_e32 v0, s0 823; GFX10-NEXT: v_mov_b32_e32 v1, s1 824; GFX10-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 825; GFX10-NEXT: s_endpgm 826; 827; GFX11-LABEL: dyn_extract_v8i64_s_s: 828; GFX11: ; %bb.0: ; %entry 829; GFX11-NEXT: s_mov_b32 s0, s2 830; GFX11-NEXT: s_mov_b32 s1, s3 831; GFX11-NEXT: s_mov_b32 m0, s18 832; GFX11-NEXT: s_mov_b32 s2, s4 833; GFX11-NEXT: s_mov_b32 s3, s5 834; GFX11-NEXT: s_mov_b32 s4, s6 835; GFX11-NEXT: s_mov_b32 s5, s7 836; GFX11-NEXT: s_mov_b32 s6, s8 837; GFX11-NEXT: s_mov_b32 s7, s9 838; GFX11-NEXT: s_mov_b32 s8, s10 839; GFX11-NEXT: s_mov_b32 s9, s11 840; GFX11-NEXT: s_mov_b32 s10, s12 841; GFX11-NEXT: s_mov_b32 s11, s13 842; GFX11-NEXT: s_mov_b32 s12, s14 843; GFX11-NEXT: s_mov_b32 s13, s15 844; GFX11-NEXT: s_mov_b32 s14, s16 845; GFX11-NEXT: s_mov_b32 s15, s17 846; GFX11-NEXT: s_movrels_b64 s[0:1], s[0:1] 847; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 848; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off 849; GFX11-NEXT: s_endpgm 850entry: 851 %ext = extractelement <8 x i64> %vec, i32 %sel 852 store i64 %ext, ptr addrspace(1) undef 853 ret void 854} 855 856define amdgpu_ps float @dyn_extract_v8f32_s_s_offset3(<8 x float> inreg %vec, i32 inreg %sel) { 857; GPRIDX-LABEL: dyn_extract_v8f32_s_s_offset3: 858; GPRIDX: ; %bb.0: ; %entry 859; GPRIDX-NEXT: s_add_i32 s10, s10, 3 860; GPRIDX-NEXT: s_cmp_eq_u32 s10, 1 861; GPRIDX-NEXT: s_cselect_b32 s0, s3, s2 862; GPRIDX-NEXT: s_cmp_eq_u32 s10, 2 863; GPRIDX-NEXT: s_cselect_b32 s0, s4, s0 864; GPRIDX-NEXT: s_cmp_eq_u32 s10, 3 865; GPRIDX-NEXT: s_cselect_b32 s0, s5, s0 866; GPRIDX-NEXT: s_cmp_eq_u32 s10, 4 867; GPRIDX-NEXT: s_cselect_b32 s0, s6, s0 868; GPRIDX-NEXT: s_cmp_eq_u32 s10, 5 869; GPRIDX-NEXT: s_cselect_b32 s0, s7, s0 870; GPRIDX-NEXT: s_cmp_eq_u32 s10, 6 871; GPRIDX-NEXT: s_cselect_b32 s0, s8, s0 872; GPRIDX-NEXT: s_cmp_eq_u32 s10, 7 873; GPRIDX-NEXT: s_cselect_b32 s0, s9, s0 874; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 875; GPRIDX-NEXT: ; return to shader part epilog 876; 877; MOVREL-LABEL: dyn_extract_v8f32_s_s_offset3: 878; MOVREL: ; %bb.0: ; %entry 879; MOVREL-NEXT: s_mov_b32 s0, s2 880; MOVREL-NEXT: s_mov_b32 s1, s3 881; MOVREL-NEXT: s_mov_b32 s3, s5 882; MOVREL-NEXT: s_mov_b32 m0, s10 883; MOVREL-NEXT: s_mov_b32 s2, s4 884; MOVREL-NEXT: s_mov_b32 s4, s6 885; MOVREL-NEXT: s_mov_b32 s5, s7 886; MOVREL-NEXT: s_mov_b32 s6, s8 887; MOVREL-NEXT: s_mov_b32 s7, s9 888; MOVREL-NEXT: s_movrels_b32 s0, s3 889; MOVREL-NEXT: v_mov_b32_e32 v0, s0 890; MOVREL-NEXT: ; return to shader part epilog 891; 892; GFX10PLUS-LABEL: dyn_extract_v8f32_s_s_offset3: 893; GFX10PLUS: ; %bb.0: ; %entry 894; GFX10PLUS-NEXT: s_mov_b32 s1, s3 895; GFX10PLUS-NEXT: s_mov_b32 s3, s5 896; GFX10PLUS-NEXT: s_mov_b32 m0, s10 897; GFX10PLUS-NEXT: s_mov_b32 s0, s2 898; GFX10PLUS-NEXT: s_mov_b32 s2, s4 899; GFX10PLUS-NEXT: s_mov_b32 s4, s6 900; GFX10PLUS-NEXT: s_mov_b32 s5, s7 901; GFX10PLUS-NEXT: s_mov_b32 s6, s8 902; GFX10PLUS-NEXT: s_mov_b32 s7, s9 903; GFX10PLUS-NEXT: s_movrels_b32 s0, s3 904; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0 905; GFX10PLUS-NEXT: ; return to shader part epilog 906entry: 907 %add = add i32 %sel, 3 908 %ext = extractelement <8 x float> %vec, i32 %add 909 ret float %ext 910} 911 912define float @dyn_extract_v8f32_v_v_offset3(<8 x float> %vec, i32 %sel) { 913; GPRIDX-LABEL: dyn_extract_v8f32_v_v_offset3: 914; GPRIDX: ; %bb.0: ; %entry 915; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 916; GPRIDX-NEXT: v_add_u32_e32 v8, 3, v8 917; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8 918; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 919; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v8 920; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 921; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v8 922; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 923; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v8 924; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 925; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v8 926; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 927; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v8 928; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 929; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v8 930; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc 931; GPRIDX-NEXT: s_setpc_b64 s[30:31] 932; 933; MOVREL-LABEL: dyn_extract_v8f32_v_v_offset3: 934; MOVREL: ; %bb.0: ; %entry 935; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 936; MOVREL-NEXT: v_add_u32_e32 v8, vcc, 3, v8 937; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8 938; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 939; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 2, v8 940; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 941; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 3, v8 942; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 943; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 4, v8 944; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 945; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 5, v8 946; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 947; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 6, v8 948; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 949; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 7, v8 950; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc 951; MOVREL-NEXT: s_setpc_b64 s[30:31] 952; 953; GFX10PLUS-LABEL: dyn_extract_v8f32_v_v_offset3: 954; GFX10PLUS: ; %bb.0: ; %entry 955; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 956; GFX10PLUS-NEXT: v_add_nc_u32_e32 v8, 3, v8 957; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8 958; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 959; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v8 960; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 961; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v8 962; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 963; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v8 964; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 965; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v8 966; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo 967; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v8 968; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 969; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v8 970; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo 971; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 972entry: 973 %add = add i32 %sel, 3 974 %ext = extractelement <8 x float> %vec, i32 %add 975 ret float %ext 976} 977 978define amdgpu_ps double @dyn_extract_v8f64_s_s_offset1(<8 x double> inreg %vec, i32 inreg %sel) { 979; GCN-LABEL: dyn_extract_v8f64_s_s_offset1: 980; GCN: ; %bb.0: ; %entry 981; GCN-NEXT: s_mov_b32 s0, s2 982; GCN-NEXT: s_mov_b32 s1, s3 983; GCN-NEXT: s_mov_b32 s2, s4 984; GCN-NEXT: s_mov_b32 s3, s5 985; GCN-NEXT: s_mov_b32 m0, s18 986; GCN-NEXT: s_mov_b32 s4, s6 987; GCN-NEXT: s_mov_b32 s5, s7 988; GCN-NEXT: s_mov_b32 s6, s8 989; GCN-NEXT: s_mov_b32 s7, s9 990; GCN-NEXT: s_mov_b32 s8, s10 991; GCN-NEXT: s_mov_b32 s9, s11 992; GCN-NEXT: s_mov_b32 s10, s12 993; GCN-NEXT: s_mov_b32 s11, s13 994; GCN-NEXT: s_mov_b32 s12, s14 995; GCN-NEXT: s_mov_b32 s13, s15 996; GCN-NEXT: s_mov_b32 s14, s16 997; GCN-NEXT: s_mov_b32 s15, s17 998; GCN-NEXT: s_movrels_b64 s[0:1], s[2:3] 999; GCN-NEXT: ; return to shader part epilog 1000; 1001; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset1: 1002; GFX10PLUS: ; %bb.0: ; %entry 1003; GFX10PLUS-NEXT: s_mov_b32 s0, s2 1004; GFX10PLUS-NEXT: s_mov_b32 s1, s3 1005; GFX10PLUS-NEXT: s_mov_b32 s2, s4 1006; GFX10PLUS-NEXT: s_mov_b32 s3, s5 1007; GFX10PLUS-NEXT: s_mov_b32 m0, s18 1008; GFX10PLUS-NEXT: s_mov_b32 s4, s6 1009; GFX10PLUS-NEXT: s_mov_b32 s5, s7 1010; GFX10PLUS-NEXT: s_mov_b32 s6, s8 1011; GFX10PLUS-NEXT: s_mov_b32 s7, s9 1012; GFX10PLUS-NEXT: s_mov_b32 s8, s10 1013; GFX10PLUS-NEXT: s_mov_b32 s9, s11 1014; GFX10PLUS-NEXT: s_mov_b32 s10, s12 1015; GFX10PLUS-NEXT: s_mov_b32 s11, s13 1016; GFX10PLUS-NEXT: s_mov_b32 s12, s14 1017; GFX10PLUS-NEXT: s_mov_b32 s13, s15 1018; GFX10PLUS-NEXT: s_mov_b32 s14, s16 1019; GFX10PLUS-NEXT: s_mov_b32 s15, s17 1020; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[2:3] 1021; GFX10PLUS-NEXT: ; return to shader part epilog 1022entry: 1023 %add = add i32 %sel, 1 1024 %ext = extractelement <8 x double> %vec, i32 %add 1025 ret double %ext 1026} 1027 1028define amdgpu_ps double @dyn_extract_v8f64_s_s_offset2(<8 x double> inreg %vec, i32 inreg %sel) { 1029; GCN-LABEL: dyn_extract_v8f64_s_s_offset2: 1030; GCN: ; %bb.0: ; %entry 1031; GCN-NEXT: s_mov_b32 s0, s2 1032; GCN-NEXT: s_mov_b32 s1, s3 1033; GCN-NEXT: s_mov_b32 s2, s4 1034; GCN-NEXT: s_mov_b32 s3, s5 1035; GCN-NEXT: s_mov_b32 s4, s6 1036; GCN-NEXT: s_mov_b32 s5, s7 1037; GCN-NEXT: s_mov_b32 m0, s18 1038; GCN-NEXT: s_mov_b32 s6, s8 1039; GCN-NEXT: s_mov_b32 s7, s9 1040; GCN-NEXT: s_mov_b32 s8, s10 1041; GCN-NEXT: s_mov_b32 s9, s11 1042; GCN-NEXT: s_mov_b32 s10, s12 1043; GCN-NEXT: s_mov_b32 s11, s13 1044; GCN-NEXT: s_mov_b32 s12, s14 1045; GCN-NEXT: s_mov_b32 s13, s15 1046; GCN-NEXT: s_mov_b32 s14, s16 1047; GCN-NEXT: s_mov_b32 s15, s17 1048; GCN-NEXT: s_movrels_b64 s[0:1], s[4:5] 1049; GCN-NEXT: ; return to shader part epilog 1050; 1051; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset2: 1052; GFX10PLUS: ; %bb.0: ; %entry 1053; GFX10PLUS-NEXT: s_mov_b32 s0, s2 1054; GFX10PLUS-NEXT: s_mov_b32 s1, s3 1055; GFX10PLUS-NEXT: s_mov_b32 s2, s4 1056; GFX10PLUS-NEXT: s_mov_b32 s3, s5 1057; GFX10PLUS-NEXT: s_mov_b32 s4, s6 1058; GFX10PLUS-NEXT: s_mov_b32 s5, s7 1059; GFX10PLUS-NEXT: s_mov_b32 m0, s18 1060; GFX10PLUS-NEXT: s_mov_b32 s6, s8 1061; GFX10PLUS-NEXT: s_mov_b32 s7, s9 1062; GFX10PLUS-NEXT: s_mov_b32 s8, s10 1063; GFX10PLUS-NEXT: s_mov_b32 s9, s11 1064; GFX10PLUS-NEXT: s_mov_b32 s10, s12 1065; GFX10PLUS-NEXT: s_mov_b32 s11, s13 1066; GFX10PLUS-NEXT: s_mov_b32 s12, s14 1067; GFX10PLUS-NEXT: s_mov_b32 s13, s15 1068; GFX10PLUS-NEXT: s_mov_b32 s14, s16 1069; GFX10PLUS-NEXT: s_mov_b32 s15, s17 1070; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[4:5] 1071; GFX10PLUS-NEXT: ; return to shader part epilog 1072entry: 1073 %add = add i32 %sel, 2 1074 %ext = extractelement <8 x double> %vec, i32 %add 1075 ret double %ext 1076} 1077 1078define amdgpu_ps double @dyn_extract_v8f64_s_s_offset3(<8 x double> inreg %vec, i32 inreg %sel) { 1079; GCN-LABEL: dyn_extract_v8f64_s_s_offset3: 1080; GCN: ; %bb.0: ; %entry 1081; GCN-NEXT: s_mov_b32 s0, s2 1082; GCN-NEXT: s_mov_b32 s1, s3 1083; GCN-NEXT: s_mov_b32 s2, s4 1084; GCN-NEXT: s_mov_b32 s3, s5 1085; GCN-NEXT: s_mov_b32 s4, s6 1086; GCN-NEXT: s_mov_b32 s5, s7 1087; GCN-NEXT: s_mov_b32 s6, s8 1088; GCN-NEXT: s_mov_b32 s7, s9 1089; GCN-NEXT: s_mov_b32 m0, s18 1090; GCN-NEXT: s_mov_b32 s8, s10 1091; GCN-NEXT: s_mov_b32 s9, s11 1092; GCN-NEXT: s_mov_b32 s10, s12 1093; GCN-NEXT: s_mov_b32 s11, s13 1094; GCN-NEXT: s_mov_b32 s12, s14 1095; GCN-NEXT: s_mov_b32 s13, s15 1096; GCN-NEXT: s_mov_b32 s14, s16 1097; GCN-NEXT: s_mov_b32 s15, s17 1098; GCN-NEXT: s_movrels_b64 s[0:1], s[6:7] 1099; GCN-NEXT: ; return to shader part epilog 1100; 1101; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset3: 1102; GFX10PLUS: ; %bb.0: ; %entry 1103; GFX10PLUS-NEXT: s_mov_b32 s0, s2 1104; GFX10PLUS-NEXT: s_mov_b32 s1, s3 1105; GFX10PLUS-NEXT: s_mov_b32 s2, s4 1106; GFX10PLUS-NEXT: s_mov_b32 s3, s5 1107; GFX10PLUS-NEXT: s_mov_b32 s4, s6 1108; GFX10PLUS-NEXT: s_mov_b32 s5, s7 1109; GFX10PLUS-NEXT: s_mov_b32 s6, s8 1110; GFX10PLUS-NEXT: s_mov_b32 s7, s9 1111; GFX10PLUS-NEXT: s_mov_b32 m0, s18 1112; GFX10PLUS-NEXT: s_mov_b32 s8, s10 1113; GFX10PLUS-NEXT: s_mov_b32 s9, s11 1114; GFX10PLUS-NEXT: s_mov_b32 s10, s12 1115; GFX10PLUS-NEXT: s_mov_b32 s11, s13 1116; GFX10PLUS-NEXT: s_mov_b32 s12, s14 1117; GFX10PLUS-NEXT: s_mov_b32 s13, s15 1118; GFX10PLUS-NEXT: s_mov_b32 s14, s16 1119; GFX10PLUS-NEXT: s_mov_b32 s15, s17 1120; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[6:7] 1121; GFX10PLUS-NEXT: ; return to shader part epilog 1122entry: 1123 %add = add i32 %sel, 3 1124 %ext = extractelement <8 x double> %vec, i32 %add 1125 ret double %ext 1126} 1127 1128define amdgpu_ps double @dyn_extract_v8f64_s_s_offset4(<8 x double> inreg %vec, i32 inreg %sel) { 1129; GCN-LABEL: dyn_extract_v8f64_s_s_offset4: 1130; GCN: ; %bb.0: ; %entry 1131; GCN-NEXT: s_mov_b32 s0, s2 1132; GCN-NEXT: s_mov_b32 s1, s3 1133; GCN-NEXT: s_mov_b32 s2, s4 1134; GCN-NEXT: s_mov_b32 s3, s5 1135; GCN-NEXT: s_mov_b32 s4, s6 1136; GCN-NEXT: s_mov_b32 s5, s7 1137; GCN-NEXT: s_mov_b32 s6, s8 1138; GCN-NEXT: s_mov_b32 s7, s9 1139; GCN-NEXT: s_mov_b32 s8, s10 1140; GCN-NEXT: s_mov_b32 s9, s11 1141; GCN-NEXT: s_mov_b32 m0, s18 1142; GCN-NEXT: s_mov_b32 s10, s12 1143; GCN-NEXT: s_mov_b32 s11, s13 1144; GCN-NEXT: s_mov_b32 s12, s14 1145; GCN-NEXT: s_mov_b32 s13, s15 1146; GCN-NEXT: s_mov_b32 s14, s16 1147; GCN-NEXT: s_mov_b32 s15, s17 1148; GCN-NEXT: s_movrels_b64 s[0:1], s[8:9] 1149; GCN-NEXT: ; return to shader part epilog 1150; 1151; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset4: 1152; GFX10PLUS: ; %bb.0: ; %entry 1153; GFX10PLUS-NEXT: s_mov_b32 s0, s2 1154; GFX10PLUS-NEXT: s_mov_b32 s1, s3 1155; GFX10PLUS-NEXT: s_mov_b32 s2, s4 1156; GFX10PLUS-NEXT: s_mov_b32 s3, s5 1157; GFX10PLUS-NEXT: s_mov_b32 s4, s6 1158; GFX10PLUS-NEXT: s_mov_b32 s5, s7 1159; GFX10PLUS-NEXT: s_mov_b32 s6, s8 1160; GFX10PLUS-NEXT: s_mov_b32 s7, s9 1161; GFX10PLUS-NEXT: s_mov_b32 s8, s10 1162; GFX10PLUS-NEXT: s_mov_b32 s9, s11 1163; GFX10PLUS-NEXT: s_mov_b32 m0, s18 1164; GFX10PLUS-NEXT: s_mov_b32 s10, s12 1165; GFX10PLUS-NEXT: s_mov_b32 s11, s13 1166; GFX10PLUS-NEXT: s_mov_b32 s12, s14 1167; GFX10PLUS-NEXT: s_mov_b32 s13, s15 1168; GFX10PLUS-NEXT: s_mov_b32 s14, s16 1169; GFX10PLUS-NEXT: s_mov_b32 s15, s17 1170; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[8:9] 1171; GFX10PLUS-NEXT: ; return to shader part epilog 1172entry: 1173 %add = add i32 %sel, 4 1174 %ext = extractelement <8 x double> %vec, i32 %add 1175 ret double %ext 1176} 1177 1178define amdgpu_ps double @dyn_extract_v8f64_s_s_offset5(<8 x double> inreg %vec, i32 inreg %sel) { 1179; GCN-LABEL: dyn_extract_v8f64_s_s_offset5: 1180; GCN: ; %bb.0: ; %entry 1181; GCN-NEXT: s_mov_b32 s0, s2 1182; GCN-NEXT: s_mov_b32 s1, s3 1183; GCN-NEXT: s_mov_b32 s2, s4 1184; GCN-NEXT: s_mov_b32 s3, s5 1185; GCN-NEXT: s_mov_b32 s4, s6 1186; GCN-NEXT: s_mov_b32 s5, s7 1187; GCN-NEXT: s_mov_b32 s6, s8 1188; GCN-NEXT: s_mov_b32 s7, s9 1189; GCN-NEXT: s_mov_b32 s8, s10 1190; GCN-NEXT: s_mov_b32 s9, s11 1191; GCN-NEXT: s_mov_b32 s10, s12 1192; GCN-NEXT: s_mov_b32 s11, s13 1193; GCN-NEXT: s_mov_b32 m0, s18 1194; GCN-NEXT: s_mov_b32 s12, s14 1195; GCN-NEXT: s_mov_b32 s13, s15 1196; GCN-NEXT: s_mov_b32 s14, s16 1197; GCN-NEXT: s_mov_b32 s15, s17 1198; GCN-NEXT: s_movrels_b64 s[0:1], s[10:11] 1199; GCN-NEXT: ; return to shader part epilog 1200; 1201; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset5: 1202; GFX10PLUS: ; %bb.0: ; %entry 1203; GFX10PLUS-NEXT: s_mov_b32 s0, s2 1204; GFX10PLUS-NEXT: s_mov_b32 s1, s3 1205; GFX10PLUS-NEXT: s_mov_b32 s2, s4 1206; GFX10PLUS-NEXT: s_mov_b32 s3, s5 1207; GFX10PLUS-NEXT: s_mov_b32 s4, s6 1208; GFX10PLUS-NEXT: s_mov_b32 s5, s7 1209; GFX10PLUS-NEXT: s_mov_b32 s6, s8 1210; GFX10PLUS-NEXT: s_mov_b32 s7, s9 1211; GFX10PLUS-NEXT: s_mov_b32 s8, s10 1212; GFX10PLUS-NEXT: s_mov_b32 s9, s11 1213; GFX10PLUS-NEXT: s_mov_b32 s10, s12 1214; GFX10PLUS-NEXT: s_mov_b32 s11, s13 1215; GFX10PLUS-NEXT: s_mov_b32 m0, s18 1216; GFX10PLUS-NEXT: s_mov_b32 s12, s14 1217; GFX10PLUS-NEXT: s_mov_b32 s13, s15 1218; GFX10PLUS-NEXT: s_mov_b32 s14, s16 1219; GFX10PLUS-NEXT: s_mov_b32 s15, s17 1220; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[10:11] 1221; GFX10PLUS-NEXT: ; return to shader part epilog 1222entry: 1223 %add = add i32 %sel, 5 1224 %ext = extractelement <8 x double> %vec, i32 %add 1225 ret double %ext 1226} 1227 1228define amdgpu_ps double @dyn_extract_v8f64_s_s_offset6(<8 x double> inreg %vec, i32 inreg %sel) { 1229; GCN-LABEL: dyn_extract_v8f64_s_s_offset6: 1230; GCN: ; %bb.0: ; %entry 1231; GCN-NEXT: s_mov_b32 s0, s2 1232; GCN-NEXT: s_mov_b32 s1, s3 1233; GCN-NEXT: s_mov_b32 s2, s4 1234; GCN-NEXT: s_mov_b32 s3, s5 1235; GCN-NEXT: s_mov_b32 s4, s6 1236; GCN-NEXT: s_mov_b32 s5, s7 1237; GCN-NEXT: s_mov_b32 s6, s8 1238; GCN-NEXT: s_mov_b32 s7, s9 1239; GCN-NEXT: s_mov_b32 s8, s10 1240; GCN-NEXT: s_mov_b32 s9, s11 1241; GCN-NEXT: s_mov_b32 s10, s12 1242; GCN-NEXT: s_mov_b32 s11, s13 1243; GCN-NEXT: s_mov_b32 s12, s14 1244; GCN-NEXT: s_mov_b32 s13, s15 1245; GCN-NEXT: s_mov_b32 m0, s18 1246; GCN-NEXT: s_mov_b32 s14, s16 1247; GCN-NEXT: s_mov_b32 s15, s17 1248; GCN-NEXT: s_movrels_b64 s[0:1], s[12:13] 1249; GCN-NEXT: ; return to shader part epilog 1250; 1251; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset6: 1252; GFX10PLUS: ; %bb.0: ; %entry 1253; GFX10PLUS-NEXT: s_mov_b32 s0, s2 1254; GFX10PLUS-NEXT: s_mov_b32 s1, s3 1255; GFX10PLUS-NEXT: s_mov_b32 s2, s4 1256; GFX10PLUS-NEXT: s_mov_b32 s3, s5 1257; GFX10PLUS-NEXT: s_mov_b32 s4, s6 1258; GFX10PLUS-NEXT: s_mov_b32 s5, s7 1259; GFX10PLUS-NEXT: s_mov_b32 s6, s8 1260; GFX10PLUS-NEXT: s_mov_b32 s7, s9 1261; GFX10PLUS-NEXT: s_mov_b32 s8, s10 1262; GFX10PLUS-NEXT: s_mov_b32 s9, s11 1263; GFX10PLUS-NEXT: s_mov_b32 s10, s12 1264; GFX10PLUS-NEXT: s_mov_b32 s11, s13 1265; GFX10PLUS-NEXT: s_mov_b32 s12, s14 1266; GFX10PLUS-NEXT: s_mov_b32 s13, s15 1267; GFX10PLUS-NEXT: s_mov_b32 m0, s18 1268; GFX10PLUS-NEXT: s_mov_b32 s14, s16 1269; GFX10PLUS-NEXT: s_mov_b32 s15, s17 1270; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[12:13] 1271; GFX10PLUS-NEXT: ; return to shader part epilog 1272entry: 1273 %add = add i32 %sel, 6 1274 %ext = extractelement <8 x double> %vec, i32 %add 1275 ret double %ext 1276} 1277 1278define amdgpu_ps double @dyn_extract_v8f64_s_s_offset7(<8 x double> inreg %vec, i32 inreg %sel) { 1279; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset7: 1280; GPRIDX: ; %bb.0: ; %entry 1281; GPRIDX-NEXT: s_mov_b32 s0, s2 1282; GPRIDX-NEXT: s_mov_b32 s1, s3 1283; GPRIDX-NEXT: s_mov_b32 s2, s4 1284; GPRIDX-NEXT: s_mov_b32 s3, s5 1285; GPRIDX-NEXT: s_mov_b32 s4, s6 1286; GPRIDX-NEXT: s_mov_b32 s5, s7 1287; GPRIDX-NEXT: s_mov_b32 s6, s8 1288; GPRIDX-NEXT: s_mov_b32 s7, s9 1289; GPRIDX-NEXT: s_mov_b32 s8, s10 1290; GPRIDX-NEXT: s_mov_b32 s9, s11 1291; GPRIDX-NEXT: s_mov_b32 s10, s12 1292; GPRIDX-NEXT: s_mov_b32 s11, s13 1293; GPRIDX-NEXT: s_mov_b32 s12, s14 1294; GPRIDX-NEXT: s_mov_b32 s13, s15 1295; GPRIDX-NEXT: s_mov_b32 s14, s16 1296; GPRIDX-NEXT: s_mov_b32 s15, s17 1297; GPRIDX-NEXT: s_mov_b32 m0, s18 1298; GPRIDX-NEXT: s_nop 0 1299; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[14:15] 1300; GPRIDX-NEXT: ; return to shader part epilog 1301; 1302; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset7: 1303; MOVREL: ; %bb.0: ; %entry 1304; MOVREL-NEXT: s_mov_b32 s0, s2 1305; MOVREL-NEXT: s_mov_b32 s1, s3 1306; MOVREL-NEXT: s_mov_b32 s2, s4 1307; MOVREL-NEXT: s_mov_b32 s3, s5 1308; MOVREL-NEXT: s_mov_b32 s4, s6 1309; MOVREL-NEXT: s_mov_b32 s5, s7 1310; MOVREL-NEXT: s_mov_b32 s6, s8 1311; MOVREL-NEXT: s_mov_b32 s7, s9 1312; MOVREL-NEXT: s_mov_b32 s8, s10 1313; MOVREL-NEXT: s_mov_b32 s9, s11 1314; MOVREL-NEXT: s_mov_b32 s10, s12 1315; MOVREL-NEXT: s_mov_b32 s11, s13 1316; MOVREL-NEXT: s_mov_b32 s12, s14 1317; MOVREL-NEXT: s_mov_b32 s13, s15 1318; MOVREL-NEXT: s_mov_b32 s14, s16 1319; MOVREL-NEXT: s_mov_b32 s15, s17 1320; MOVREL-NEXT: s_mov_b32 m0, s18 1321; MOVREL-NEXT: s_movrels_b64 s[0:1], s[14:15] 1322; MOVREL-NEXT: ; return to shader part epilog 1323; 1324; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offset7: 1325; GFX10PLUS: ; %bb.0: ; %entry 1326; GFX10PLUS-NEXT: s_mov_b32 s0, s2 1327; GFX10PLUS-NEXT: s_mov_b32 s1, s3 1328; GFX10PLUS-NEXT: s_mov_b32 s2, s4 1329; GFX10PLUS-NEXT: s_mov_b32 s3, s5 1330; GFX10PLUS-NEXT: s_mov_b32 s4, s6 1331; GFX10PLUS-NEXT: s_mov_b32 s5, s7 1332; GFX10PLUS-NEXT: s_mov_b32 s6, s8 1333; GFX10PLUS-NEXT: s_mov_b32 s7, s9 1334; GFX10PLUS-NEXT: s_mov_b32 s8, s10 1335; GFX10PLUS-NEXT: s_mov_b32 s9, s11 1336; GFX10PLUS-NEXT: s_mov_b32 s10, s12 1337; GFX10PLUS-NEXT: s_mov_b32 s11, s13 1338; GFX10PLUS-NEXT: s_mov_b32 s12, s14 1339; GFX10PLUS-NEXT: s_mov_b32 s13, s15 1340; GFX10PLUS-NEXT: s_mov_b32 s14, s16 1341; GFX10PLUS-NEXT: s_mov_b32 s15, s17 1342; GFX10PLUS-NEXT: s_mov_b32 m0, s18 1343; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[14:15] 1344; GFX10PLUS-NEXT: ; return to shader part epilog 1345entry: 1346 %add = add i32 %sel, 7 1347 %ext = extractelement <8 x double> %vec, i32 %add 1348 ret double %ext 1349} 1350 1351define amdgpu_ps double @dyn_extract_v8f64_s_s_offsetm1(<8 x double> inreg %vec, i32 inreg %sel) { 1352; GCN-LABEL: dyn_extract_v8f64_s_s_offsetm1: 1353; GCN: ; %bb.0: ; %entry 1354; GCN-NEXT: s_mov_b32 s0, s2 1355; GCN-NEXT: s_mov_b32 s1, s3 1356; GCN-NEXT: s_add_i32 m0, s18, -1 1357; GCN-NEXT: s_mov_b32 s2, s4 1358; GCN-NEXT: s_mov_b32 s3, s5 1359; GCN-NEXT: s_mov_b32 s4, s6 1360; GCN-NEXT: s_mov_b32 s5, s7 1361; GCN-NEXT: s_mov_b32 s6, s8 1362; GCN-NEXT: s_mov_b32 s7, s9 1363; GCN-NEXT: s_mov_b32 s8, s10 1364; GCN-NEXT: s_mov_b32 s9, s11 1365; GCN-NEXT: s_mov_b32 s10, s12 1366; GCN-NEXT: s_mov_b32 s11, s13 1367; GCN-NEXT: s_mov_b32 s12, s14 1368; GCN-NEXT: s_mov_b32 s13, s15 1369; GCN-NEXT: s_mov_b32 s14, s16 1370; GCN-NEXT: s_mov_b32 s15, s17 1371; GCN-NEXT: s_movrels_b64 s[0:1], s[0:1] 1372; GCN-NEXT: ; return to shader part epilog 1373; 1374; GFX10PLUS-LABEL: dyn_extract_v8f64_s_s_offsetm1: 1375; GFX10PLUS: ; %bb.0: ; %entry 1376; GFX10PLUS-NEXT: s_mov_b32 s0, s2 1377; GFX10PLUS-NEXT: s_mov_b32 s1, s3 1378; GFX10PLUS-NEXT: s_add_i32 m0, s18, -1 1379; GFX10PLUS-NEXT: s_mov_b32 s2, s4 1380; GFX10PLUS-NEXT: s_mov_b32 s3, s5 1381; GFX10PLUS-NEXT: s_mov_b32 s4, s6 1382; GFX10PLUS-NEXT: s_mov_b32 s5, s7 1383; GFX10PLUS-NEXT: s_mov_b32 s6, s8 1384; GFX10PLUS-NEXT: s_mov_b32 s7, s9 1385; GFX10PLUS-NEXT: s_mov_b32 s8, s10 1386; GFX10PLUS-NEXT: s_mov_b32 s9, s11 1387; GFX10PLUS-NEXT: s_mov_b32 s10, s12 1388; GFX10PLUS-NEXT: s_mov_b32 s11, s13 1389; GFX10PLUS-NEXT: s_mov_b32 s12, s14 1390; GFX10PLUS-NEXT: s_mov_b32 s13, s15 1391; GFX10PLUS-NEXT: s_mov_b32 s14, s16 1392; GFX10PLUS-NEXT: s_mov_b32 s15, s17 1393; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[0:1] 1394; GFX10PLUS-NEXT: ; return to shader part epilog 1395entry: 1396 %add = add i32 %sel, -1 1397 %ext = extractelement <8 x double> %vec, i32 %add 1398 ret double %ext 1399} 1400 1401define double @dyn_extract_v8f64_v_v_offset3(<8 x double> %vec, i32 %sel) { 1402; GPRIDX-LABEL: dyn_extract_v8f64_v_v_offset3: 1403; GPRIDX: ; %bb.0: ; %entry 1404; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1405; GPRIDX-NEXT: v_add_u32_e32 v16, 3, v16 1406; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v16 1407; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1408; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 1409; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v16 1410; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 1411; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 1412; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v16 1413; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 1414; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 1415; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v16 1416; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 1417; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 1418; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v16 1419; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 1420; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 1421; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v16 1422; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc 1423; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc 1424; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v16 1425; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc 1426; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc 1427; GPRIDX-NEXT: s_setpc_b64 s[30:31] 1428; 1429; MOVREL-LABEL: dyn_extract_v8f64_v_v_offset3: 1430; MOVREL: ; %bb.0: ; %entry 1431; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1432; MOVREL-NEXT: v_add_u32_e32 v16, vcc, 3, v16 1433; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 1, v16 1434; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1435; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 1436; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 2, v16 1437; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 1438; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 1439; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 3, v16 1440; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 1441; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 1442; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 4, v16 1443; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 1444; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 1445; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 5, v16 1446; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 1447; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 1448; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 6, v16 1449; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc 1450; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc 1451; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 7, v16 1452; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc 1453; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc 1454; MOVREL-NEXT: s_setpc_b64 s[30:31] 1455; 1456; GFX10-LABEL: dyn_extract_v8f64_v_v_offset3: 1457; GFX10: ; %bb.0: ; %entry 1458; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1459; GFX10-NEXT: v_add_nc_u32_e32 v16, 3, v16 1460; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16 1461; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 1462; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo 1463; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16 1464; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 1465; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo 1466; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v16 1467; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 1468; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo 1469; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16 1470; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 1471; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo 1472; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v16 1473; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo 1474; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo 1475; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v16 1476; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo 1477; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc_lo 1478; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v16 1479; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo 1480; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc_lo 1481; GFX10-NEXT: s_setpc_b64 s[30:31] 1482; 1483; GFX11-LABEL: dyn_extract_v8f64_v_v_offset3: 1484; GFX11: ; %bb.0: ; %entry 1485; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1486; GFX11-NEXT: v_add_nc_u32_e32 v16, 3, v16 1487; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16 1488; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v3 :: v_dual_cndmask_b32 v0, v0, v2 1489; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16 1490; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v5 :: v_dual_cndmask_b32 v0, v0, v4 1491; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v16 1492; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v7 :: v_dual_cndmask_b32 v0, v0, v6 1493; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16 1494; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v9 :: v_dual_cndmask_b32 v0, v0, v8 1495; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v16 1496; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v11 :: v_dual_cndmask_b32 v0, v0, v10 1497; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v16 1498; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v13 :: v_dual_cndmask_b32 v0, v0, v12 1499; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v16 1500; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v15 :: v_dual_cndmask_b32 v0, v0, v14 1501; GFX11-NEXT: s_setpc_b64 s[30:31] 1502entry: 1503 %add = add i32 %sel, 3 1504 %ext = extractelement <8 x double> %vec, i32 %add 1505 ret double %ext 1506} 1507 1508define ptr addrspace(3) @dyn_extract_v8p3_v_v(<8 x ptr addrspace(3)> %vec, i32 %idx) { 1509; GCN-LABEL: dyn_extract_v8p3_v_v: 1510; GCN: ; %bb.0: ; %entry 1511; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1512; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8 1513; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1514; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v8 1515; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1516; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v8 1517; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 1518; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v8 1519; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 1520; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v8 1521; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 1522; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v8 1523; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 1524; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v8 1525; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc 1526; GCN-NEXT: s_setpc_b64 s[30:31] 1527; 1528; GFX10PLUS-LABEL: dyn_extract_v8p3_v_v: 1529; GFX10PLUS: ; %bb.0: ; %entry 1530; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1531; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8 1532; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 1533; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v8 1534; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 1535; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v8 1536; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 1537; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v8 1538; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 1539; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v8 1540; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo 1541; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v8 1542; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 1543; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v8 1544; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo 1545; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 1546entry: 1547 %ext = extractelement <8 x ptr addrspace(3)> %vec, i32 %idx 1548 ret ptr addrspace(3) %ext 1549} 1550 1551define amdgpu_ps void @dyn_extract_v8p3_s_s(<8 x ptr addrspace(3)> inreg %vec, i32 inreg %idx) { 1552; GPRIDX-LABEL: dyn_extract_v8p3_s_s: 1553; GPRIDX: ; %bb.0: ; %entry 1554; GPRIDX-NEXT: s_cmp_eq_u32 s10, 1 1555; GPRIDX-NEXT: s_cselect_b32 s0, s3, s2 1556; GPRIDX-NEXT: s_cmp_eq_u32 s10, 2 1557; GPRIDX-NEXT: s_cselect_b32 s0, s4, s0 1558; GPRIDX-NEXT: s_cmp_eq_u32 s10, 3 1559; GPRIDX-NEXT: s_cselect_b32 s0, s5, s0 1560; GPRIDX-NEXT: s_cmp_eq_u32 s10, 4 1561; GPRIDX-NEXT: s_cselect_b32 s0, s6, s0 1562; GPRIDX-NEXT: s_cmp_eq_u32 s10, 5 1563; GPRIDX-NEXT: s_cselect_b32 s0, s7, s0 1564; GPRIDX-NEXT: s_cmp_eq_u32 s10, 6 1565; GPRIDX-NEXT: s_cselect_b32 s0, s8, s0 1566; GPRIDX-NEXT: s_cmp_eq_u32 s10, 7 1567; GPRIDX-NEXT: s_cselect_b32 s0, s9, s0 1568; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 1569; GPRIDX-NEXT: ds_write_b32 v0, v0 1570; GPRIDX-NEXT: s_endpgm 1571; 1572; MOVREL-LABEL: dyn_extract_v8p3_s_s: 1573; MOVREL: ; %bb.0: ; %entry 1574; MOVREL-NEXT: s_mov_b32 s0, s2 1575; MOVREL-NEXT: s_mov_b32 m0, s10 1576; MOVREL-NEXT: s_mov_b32 s1, s3 1577; MOVREL-NEXT: s_mov_b32 s2, s4 1578; MOVREL-NEXT: s_mov_b32 s3, s5 1579; MOVREL-NEXT: s_mov_b32 s4, s6 1580; MOVREL-NEXT: s_mov_b32 s5, s7 1581; MOVREL-NEXT: s_mov_b32 s6, s8 1582; MOVREL-NEXT: s_mov_b32 s7, s9 1583; MOVREL-NEXT: s_movrels_b32 s0, s0 1584; MOVREL-NEXT: v_mov_b32_e32 v0, s0 1585; MOVREL-NEXT: s_mov_b32 m0, -1 1586; MOVREL-NEXT: ds_write_b32 v0, v0 1587; MOVREL-NEXT: s_endpgm 1588; 1589; GFX10-LABEL: dyn_extract_v8p3_s_s: 1590; GFX10: ; %bb.0: ; %entry 1591; GFX10-NEXT: s_mov_b32 s0, s2 1592; GFX10-NEXT: s_mov_b32 m0, s10 1593; GFX10-NEXT: s_mov_b32 s1, s3 1594; GFX10-NEXT: s_mov_b32 s2, s4 1595; GFX10-NEXT: s_mov_b32 s3, s5 1596; GFX10-NEXT: s_mov_b32 s4, s6 1597; GFX10-NEXT: s_mov_b32 s5, s7 1598; GFX10-NEXT: s_mov_b32 s6, s8 1599; GFX10-NEXT: s_mov_b32 s7, s9 1600; GFX10-NEXT: s_movrels_b32 s0, s0 1601; GFX10-NEXT: v_mov_b32_e32 v0, s0 1602; GFX10-NEXT: ds_write_b32 v0, v0 1603; GFX10-NEXT: s_endpgm 1604; 1605; GFX11-LABEL: dyn_extract_v8p3_s_s: 1606; GFX11: ; %bb.0: ; %entry 1607; GFX11-NEXT: s_mov_b32 s0, s2 1608; GFX11-NEXT: s_mov_b32 m0, s10 1609; GFX11-NEXT: s_mov_b32 s1, s3 1610; GFX11-NEXT: s_mov_b32 s2, s4 1611; GFX11-NEXT: s_mov_b32 s3, s5 1612; GFX11-NEXT: s_mov_b32 s4, s6 1613; GFX11-NEXT: s_mov_b32 s5, s7 1614; GFX11-NEXT: s_mov_b32 s6, s8 1615; GFX11-NEXT: s_mov_b32 s7, s9 1616; GFX11-NEXT: s_movrels_b32 s0, s0 1617; GFX11-NEXT: v_mov_b32_e32 v0, s0 1618; GFX11-NEXT: ds_store_b32 v0, v0 1619; GFX11-NEXT: s_endpgm 1620entry: 1621 %ext = extractelement <8 x ptr addrspace(3)> %vec, i32 %idx 1622 store ptr addrspace(3) %ext, ptr addrspace(3) undef 1623 ret void 1624} 1625 1626define ptr addrspace(1) @dyn_extract_v8p1_v_v(<8 x ptr addrspace(1)> %vec, i32 %idx) { 1627; GCN-LABEL: dyn_extract_v8p1_v_v: 1628; GCN: ; %bb.0: ; %entry 1629; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1630; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v16 1631; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1632; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 1633; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v16 1634; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 1635; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 1636; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v16 1637; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 1638; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 1639; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v16 1640; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 1641; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 1642; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v16 1643; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 1644; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 1645; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v16 1646; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc 1647; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc 1648; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v16 1649; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc 1650; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc 1651; GCN-NEXT: s_setpc_b64 s[30:31] 1652; 1653; GFX10-LABEL: dyn_extract_v8p1_v_v: 1654; GFX10: ; %bb.0: ; %entry 1655; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1656; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16 1657; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 1658; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo 1659; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16 1660; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 1661; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo 1662; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v16 1663; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 1664; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo 1665; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16 1666; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 1667; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo 1668; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v16 1669; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo 1670; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo 1671; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v16 1672; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo 1673; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc_lo 1674; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v16 1675; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo 1676; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc_lo 1677; GFX10-NEXT: s_setpc_b64 s[30:31] 1678; 1679; GFX11-LABEL: dyn_extract_v8p1_v_v: 1680; GFX11: ; %bb.0: ; %entry 1681; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1682; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16 1683; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v2 :: v_dual_cndmask_b32 v1, v1, v3 1684; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16 1685; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v4 :: v_dual_cndmask_b32 v1, v1, v5 1686; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v16 1687; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v6 :: v_dual_cndmask_b32 v1, v1, v7 1688; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16 1689; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v8 :: v_dual_cndmask_b32 v1, v1, v9 1690; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v16 1691; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v10 :: v_dual_cndmask_b32 v1, v1, v11 1692; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v16 1693; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v12 :: v_dual_cndmask_b32 v1, v1, v13 1694; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v16 1695; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v14 :: v_dual_cndmask_b32 v1, v1, v15 1696; GFX11-NEXT: s_setpc_b64 s[30:31] 1697entry: 1698 %ext = extractelement <8 x ptr addrspace(1)> %vec, i32 %idx 1699 ret ptr addrspace(1) %ext 1700} 1701 1702define amdgpu_ps void @dyn_extract_v8p1_s_s(<8 x ptr addrspace(1)> inreg %vec, i32 inreg %idx) { 1703; GPRIDX-LABEL: dyn_extract_v8p1_s_s: 1704; GPRIDX: ; %bb.0: ; %entry 1705; GPRIDX-NEXT: s_mov_b32 s0, s2 1706; GPRIDX-NEXT: s_mov_b32 s1, s3 1707; GPRIDX-NEXT: s_mov_b32 m0, s18 1708; GPRIDX-NEXT: s_mov_b32 s2, s4 1709; GPRIDX-NEXT: s_mov_b32 s3, s5 1710; GPRIDX-NEXT: s_mov_b32 s4, s6 1711; GPRIDX-NEXT: s_mov_b32 s5, s7 1712; GPRIDX-NEXT: s_mov_b32 s6, s8 1713; GPRIDX-NEXT: s_mov_b32 s7, s9 1714; GPRIDX-NEXT: s_mov_b32 s8, s10 1715; GPRIDX-NEXT: s_mov_b32 s9, s11 1716; GPRIDX-NEXT: s_mov_b32 s10, s12 1717; GPRIDX-NEXT: s_mov_b32 s11, s13 1718; GPRIDX-NEXT: s_mov_b32 s12, s14 1719; GPRIDX-NEXT: s_mov_b32 s13, s15 1720; GPRIDX-NEXT: s_mov_b32 s14, s16 1721; GPRIDX-NEXT: s_mov_b32 s15, s17 1722; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] 1723; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 1724; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 1725; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 1726; GPRIDX-NEXT: s_endpgm 1727; 1728; MOVREL-LABEL: dyn_extract_v8p1_s_s: 1729; MOVREL: ; %bb.0: ; %entry 1730; MOVREL-NEXT: s_mov_b32 s0, s2 1731; MOVREL-NEXT: s_mov_b32 s1, s3 1732; MOVREL-NEXT: s_mov_b32 m0, s18 1733; MOVREL-NEXT: s_mov_b32 s2, s4 1734; MOVREL-NEXT: s_mov_b32 s3, s5 1735; MOVREL-NEXT: s_mov_b32 s4, s6 1736; MOVREL-NEXT: s_mov_b32 s5, s7 1737; MOVREL-NEXT: s_mov_b32 s6, s8 1738; MOVREL-NEXT: s_mov_b32 s7, s9 1739; MOVREL-NEXT: s_mov_b32 s8, s10 1740; MOVREL-NEXT: s_mov_b32 s9, s11 1741; MOVREL-NEXT: s_mov_b32 s10, s12 1742; MOVREL-NEXT: s_mov_b32 s11, s13 1743; MOVREL-NEXT: s_mov_b32 s12, s14 1744; MOVREL-NEXT: s_mov_b32 s13, s15 1745; MOVREL-NEXT: s_mov_b32 s14, s16 1746; MOVREL-NEXT: s_mov_b32 s15, s17 1747; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] 1748; MOVREL-NEXT: v_mov_b32_e32 v0, s0 1749; MOVREL-NEXT: v_mov_b32_e32 v1, s1 1750; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 1751; MOVREL-NEXT: s_endpgm 1752; 1753; GFX10-LABEL: dyn_extract_v8p1_s_s: 1754; GFX10: ; %bb.0: ; %entry 1755; GFX10-NEXT: s_mov_b32 s0, s2 1756; GFX10-NEXT: s_mov_b32 s1, s3 1757; GFX10-NEXT: s_mov_b32 m0, s18 1758; GFX10-NEXT: s_mov_b32 s2, s4 1759; GFX10-NEXT: s_mov_b32 s3, s5 1760; GFX10-NEXT: s_mov_b32 s4, s6 1761; GFX10-NEXT: s_mov_b32 s5, s7 1762; GFX10-NEXT: s_mov_b32 s6, s8 1763; GFX10-NEXT: s_mov_b32 s7, s9 1764; GFX10-NEXT: s_mov_b32 s8, s10 1765; GFX10-NEXT: s_mov_b32 s9, s11 1766; GFX10-NEXT: s_mov_b32 s10, s12 1767; GFX10-NEXT: s_mov_b32 s11, s13 1768; GFX10-NEXT: s_mov_b32 s12, s14 1769; GFX10-NEXT: s_mov_b32 s13, s15 1770; GFX10-NEXT: s_mov_b32 s14, s16 1771; GFX10-NEXT: s_mov_b32 s15, s17 1772; GFX10-NEXT: s_movrels_b64 s[0:1], s[0:1] 1773; GFX10-NEXT: v_mov_b32_e32 v0, s0 1774; GFX10-NEXT: v_mov_b32_e32 v1, s1 1775; GFX10-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 1776; GFX10-NEXT: s_endpgm 1777; 1778; GFX11-LABEL: dyn_extract_v8p1_s_s: 1779; GFX11: ; %bb.0: ; %entry 1780; GFX11-NEXT: s_mov_b32 s0, s2 1781; GFX11-NEXT: s_mov_b32 s1, s3 1782; GFX11-NEXT: s_mov_b32 m0, s18 1783; GFX11-NEXT: s_mov_b32 s2, s4 1784; GFX11-NEXT: s_mov_b32 s3, s5 1785; GFX11-NEXT: s_mov_b32 s4, s6 1786; GFX11-NEXT: s_mov_b32 s5, s7 1787; GFX11-NEXT: s_mov_b32 s6, s8 1788; GFX11-NEXT: s_mov_b32 s7, s9 1789; GFX11-NEXT: s_mov_b32 s8, s10 1790; GFX11-NEXT: s_mov_b32 s9, s11 1791; GFX11-NEXT: s_mov_b32 s10, s12 1792; GFX11-NEXT: s_mov_b32 s11, s13 1793; GFX11-NEXT: s_mov_b32 s12, s14 1794; GFX11-NEXT: s_mov_b32 s13, s15 1795; GFX11-NEXT: s_mov_b32 s14, s16 1796; GFX11-NEXT: s_mov_b32 s15, s17 1797; GFX11-NEXT: s_movrels_b64 s[0:1], s[0:1] 1798; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 1799; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off 1800; GFX11-NEXT: s_endpgm 1801entry: 1802 %ext = extractelement <8 x ptr addrspace(1)> %vec, i32 %idx 1803 store ptr addrspace(1) %ext, ptr addrspace(1) undef 1804 ret void 1805} 1806 1807define amdgpu_ps float @dyn_extract_v16f32_v_s(<16 x float> %vec, i32 inreg %sel) { 1808; GPRIDX-LABEL: dyn_extract_v16f32_v_s: 1809; GPRIDX: ; %bb.0: ; %entry 1810; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0) 1811; GPRIDX-NEXT: v_mov_b32_e32 v0, v0 1812; GPRIDX-NEXT: s_set_gpr_idx_off 1813; GPRIDX-NEXT: ; return to shader part epilog 1814; 1815; MOVREL-LABEL: dyn_extract_v16f32_v_s: 1816; MOVREL: ; %bb.0: ; %entry 1817; MOVREL-NEXT: s_mov_b32 m0, s2 1818; MOVREL-NEXT: v_movrels_b32_e32 v0, v0 1819; MOVREL-NEXT: ; return to shader part epilog 1820; 1821; GFX10PLUS-LABEL: dyn_extract_v16f32_v_s: 1822; GFX10PLUS: ; %bb.0: ; %entry 1823; GFX10PLUS-NEXT: s_mov_b32 m0, s2 1824; GFX10PLUS-NEXT: v_movrels_b32_e32 v0, v0 1825; GFX10PLUS-NEXT: ; return to shader part epilog 1826entry: 1827 %ext = extractelement <16 x float> %vec, i32 %sel 1828 ret float %ext 1829} 1830 1831define amdgpu_ps float @dyn_extract_v32f32_v_s(<32 x float> %vec, i32 inreg %sel) { 1832; GPRIDX-LABEL: dyn_extract_v32f32_v_s: 1833; GPRIDX: ; %bb.0: ; %entry 1834; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0) 1835; GPRIDX-NEXT: v_mov_b32_e32 v0, v0 1836; GPRIDX-NEXT: s_set_gpr_idx_off 1837; GPRIDX-NEXT: ; return to shader part epilog 1838; 1839; MOVREL-LABEL: dyn_extract_v32f32_v_s: 1840; MOVREL: ; %bb.0: ; %entry 1841; MOVREL-NEXT: s_mov_b32 m0, s2 1842; MOVREL-NEXT: v_movrels_b32_e32 v0, v0 1843; MOVREL-NEXT: ; return to shader part epilog 1844; 1845; GFX10PLUS-LABEL: dyn_extract_v32f32_v_s: 1846; GFX10PLUS: ; %bb.0: ; %entry 1847; GFX10PLUS-NEXT: s_mov_b32 m0, s2 1848; GFX10PLUS-NEXT: v_movrels_b32_e32 v0, v0 1849; GFX10PLUS-NEXT: ; return to shader part epilog 1850entry: 1851 %ext = extractelement <32 x float> %vec, i32 %sel 1852 ret float %ext 1853} 1854 1855define amdgpu_ps double @dyn_extract_v16f64_v_s(<16 x double> %vec, i32 inreg %sel) { 1856; GPRIDX-LABEL: dyn_extract_v16f64_v_s: 1857; GPRIDX: ; %bb.0: ; %entry 1858; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1 1859; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0) 1860; GPRIDX-NEXT: v_mov_b32_e32 v32, v0 1861; GPRIDX-NEXT: v_mov_b32_e32 v0, v1 1862; GPRIDX-NEXT: s_set_gpr_idx_off 1863; GPRIDX-NEXT: v_readfirstlane_b32 s0, v32 1864; GPRIDX-NEXT: v_readfirstlane_b32 s1, v0 1865; GPRIDX-NEXT: ; return to shader part epilog 1866; 1867; MOVREL-LABEL: dyn_extract_v16f64_v_s: 1868; MOVREL: ; %bb.0: ; %entry 1869; MOVREL-NEXT: s_lshl_b32 m0, s2, 1 1870; MOVREL-NEXT: v_movrels_b32_e32 v32, v0 1871; MOVREL-NEXT: v_movrels_b32_e32 v0, v1 1872; MOVREL-NEXT: v_readfirstlane_b32 s0, v32 1873; MOVREL-NEXT: v_readfirstlane_b32 s1, v0 1874; MOVREL-NEXT: ; return to shader part epilog 1875; 1876; GFX10PLUS-LABEL: dyn_extract_v16f64_v_s: 1877; GFX10PLUS: ; %bb.0: ; %entry 1878; GFX10PLUS-NEXT: s_lshl_b32 m0, s2, 1 1879; GFX10PLUS-NEXT: v_movrels_b32_e32 v32, v0 1880; GFX10PLUS-NEXT: v_movrels_b32_e32 v0, v1 1881; GFX10PLUS-NEXT: v_readfirstlane_b32 s0, v32 1882; GFX10PLUS-NEXT: v_readfirstlane_b32 s1, v0 1883; GFX10PLUS-NEXT: ; return to shader part epilog 1884entry: 1885 %ext = extractelement <16 x double> %vec, i32 %sel 1886 ret double %ext 1887} 1888 1889define amdgpu_ps float @dyn_extract_v16f32_s_s(i32 inreg %sel) { 1890; GCN-LABEL: dyn_extract_v16f32_s_s: 1891; GCN: ; %bb.0: ; %entry 1892; GCN-NEXT: s_mov_b32 s4, 1.0 1893; GCN-NEXT: s_mov_b32 m0, s2 1894; GCN-NEXT: s_mov_b32 s19, 0x41800000 1895; GCN-NEXT: s_mov_b32 s18, 0x41700000 1896; GCN-NEXT: s_mov_b32 s17, 0x41600000 1897; GCN-NEXT: s_mov_b32 s16, 0x41500000 1898; GCN-NEXT: s_mov_b32 s15, 0x41400000 1899; GCN-NEXT: s_mov_b32 s14, 0x41300000 1900; GCN-NEXT: s_mov_b32 s13, 0x41200000 1901; GCN-NEXT: s_mov_b32 s12, 0x41100000 1902; GCN-NEXT: s_mov_b32 s11, 0x41000000 1903; GCN-NEXT: s_mov_b32 s10, 0x40e00000 1904; GCN-NEXT: s_mov_b32 s9, 0x40c00000 1905; GCN-NEXT: s_mov_b32 s8, 0x40a00000 1906; GCN-NEXT: s_mov_b32 s7, 4.0 1907; GCN-NEXT: s_mov_b32 s6, 0x40400000 1908; GCN-NEXT: s_mov_b32 s5, 2.0 1909; GCN-NEXT: s_movrels_b32 s0, s4 1910; GCN-NEXT: v_mov_b32_e32 v0, s0 1911; GCN-NEXT: ; return to shader part epilog 1912; 1913; GFX10PLUS-LABEL: dyn_extract_v16f32_s_s: 1914; GFX10PLUS: ; %bb.0: ; %entry 1915; GFX10PLUS-NEXT: s_mov_b32 s4, 1.0 1916; GFX10PLUS-NEXT: s_mov_b32 m0, s2 1917; GFX10PLUS-NEXT: s_mov_b32 s19, 0x41800000 1918; GFX10PLUS-NEXT: s_mov_b32 s18, 0x41700000 1919; GFX10PLUS-NEXT: s_mov_b32 s17, 0x41600000 1920; GFX10PLUS-NEXT: s_mov_b32 s16, 0x41500000 1921; GFX10PLUS-NEXT: s_mov_b32 s15, 0x41400000 1922; GFX10PLUS-NEXT: s_mov_b32 s14, 0x41300000 1923; GFX10PLUS-NEXT: s_mov_b32 s13, 0x41200000 1924; GFX10PLUS-NEXT: s_mov_b32 s12, 0x41100000 1925; GFX10PLUS-NEXT: s_mov_b32 s11, 0x41000000 1926; GFX10PLUS-NEXT: s_mov_b32 s10, 0x40e00000 1927; GFX10PLUS-NEXT: s_mov_b32 s9, 0x40c00000 1928; GFX10PLUS-NEXT: s_mov_b32 s8, 0x40a00000 1929; GFX10PLUS-NEXT: s_mov_b32 s7, 4.0 1930; GFX10PLUS-NEXT: s_mov_b32 s6, 0x40400000 1931; GFX10PLUS-NEXT: s_mov_b32 s5, 2.0 1932; GFX10PLUS-NEXT: s_movrels_b32 s0, s4 1933; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0 1934; GFX10PLUS-NEXT: ; return to shader part epilog 1935entry: 1936 %ext = extractelement <16 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>, i32 %sel 1937 ret float %ext 1938} 1939 1940define amdgpu_ps float @dyn_extract_v32f32_s_s(i32 inreg %sel) { 1941; GCN-LABEL: dyn_extract_v32f32_s_s: 1942; GCN: ; %bb.0: ; %entry 1943; GCN-NEXT: s_mov_b32 s36, 1.0 1944; GCN-NEXT: s_mov_b32 m0, s2 1945; GCN-NEXT: s_mov_b32 s67, 0x42000000 1946; GCN-NEXT: s_mov_b32 s66, 0x41f80000 1947; GCN-NEXT: s_mov_b32 s65, 0x41f00000 1948; GCN-NEXT: s_mov_b32 s64, 0x41e80000 1949; GCN-NEXT: s_mov_b32 s63, 0x41e00000 1950; GCN-NEXT: s_mov_b32 s62, 0x41d80000 1951; GCN-NEXT: s_mov_b32 s61, 0x41d00000 1952; GCN-NEXT: s_mov_b32 s60, 0x41c80000 1953; GCN-NEXT: s_mov_b32 s59, 0x41c00000 1954; GCN-NEXT: s_mov_b32 s58, 0x41b80000 1955; GCN-NEXT: s_mov_b32 s57, 0x41b00000 1956; GCN-NEXT: s_mov_b32 s56, 0x41a80000 1957; GCN-NEXT: s_mov_b32 s55, 0x41a00000 1958; GCN-NEXT: s_mov_b32 s54, 0x41980000 1959; GCN-NEXT: s_mov_b32 s53, 0x41900000 1960; GCN-NEXT: s_mov_b32 s52, 0x41880000 1961; GCN-NEXT: s_mov_b32 s51, 0x41800000 1962; GCN-NEXT: s_mov_b32 s50, 0x41700000 1963; GCN-NEXT: s_mov_b32 s49, 0x41600000 1964; GCN-NEXT: s_mov_b32 s48, 0x41500000 1965; GCN-NEXT: s_mov_b32 s47, 0x41400000 1966; GCN-NEXT: s_mov_b32 s46, 0x41300000 1967; GCN-NEXT: s_mov_b32 s45, 0x41200000 1968; GCN-NEXT: s_mov_b32 s44, 0x41100000 1969; GCN-NEXT: s_mov_b32 s43, 0x41000000 1970; GCN-NEXT: s_mov_b32 s42, 0x40e00000 1971; GCN-NEXT: s_mov_b32 s41, 0x40c00000 1972; GCN-NEXT: s_mov_b32 s40, 0x40a00000 1973; GCN-NEXT: s_mov_b32 s39, 4.0 1974; GCN-NEXT: s_mov_b32 s38, 0x40400000 1975; GCN-NEXT: s_mov_b32 s37, 2.0 1976; GCN-NEXT: s_movrels_b32 s0, s36 1977; GCN-NEXT: v_mov_b32_e32 v0, s0 1978; GCN-NEXT: ; return to shader part epilog 1979; 1980; GFX10PLUS-LABEL: dyn_extract_v32f32_s_s: 1981; GFX10PLUS: ; %bb.0: ; %entry 1982; GFX10PLUS-NEXT: s_mov_b32 s36, 1.0 1983; GFX10PLUS-NEXT: s_mov_b32 m0, s2 1984; GFX10PLUS-NEXT: s_mov_b32 s67, 0x42000000 1985; GFX10PLUS-NEXT: s_mov_b32 s66, 0x41f80000 1986; GFX10PLUS-NEXT: s_mov_b32 s65, 0x41f00000 1987; GFX10PLUS-NEXT: s_mov_b32 s64, 0x41e80000 1988; GFX10PLUS-NEXT: s_mov_b32 s63, 0x41e00000 1989; GFX10PLUS-NEXT: s_mov_b32 s62, 0x41d80000 1990; GFX10PLUS-NEXT: s_mov_b32 s61, 0x41d00000 1991; GFX10PLUS-NEXT: s_mov_b32 s60, 0x41c80000 1992; GFX10PLUS-NEXT: s_mov_b32 s59, 0x41c00000 1993; GFX10PLUS-NEXT: s_mov_b32 s58, 0x41b80000 1994; GFX10PLUS-NEXT: s_mov_b32 s57, 0x41b00000 1995; GFX10PLUS-NEXT: s_mov_b32 s56, 0x41a80000 1996; GFX10PLUS-NEXT: s_mov_b32 s55, 0x41a00000 1997; GFX10PLUS-NEXT: s_mov_b32 s54, 0x41980000 1998; GFX10PLUS-NEXT: s_mov_b32 s53, 0x41900000 1999; GFX10PLUS-NEXT: s_mov_b32 s52, 0x41880000 2000; GFX10PLUS-NEXT: s_mov_b32 s51, 0x41800000 2001; GFX10PLUS-NEXT: s_mov_b32 s50, 0x41700000 2002; GFX10PLUS-NEXT: s_mov_b32 s49, 0x41600000 2003; GFX10PLUS-NEXT: s_mov_b32 s48, 0x41500000 2004; GFX10PLUS-NEXT: s_mov_b32 s47, 0x41400000 2005; GFX10PLUS-NEXT: s_mov_b32 s46, 0x41300000 2006; GFX10PLUS-NEXT: s_mov_b32 s45, 0x41200000 2007; GFX10PLUS-NEXT: s_mov_b32 s44, 0x41100000 2008; GFX10PLUS-NEXT: s_mov_b32 s43, 0x41000000 2009; GFX10PLUS-NEXT: s_mov_b32 s42, 0x40e00000 2010; GFX10PLUS-NEXT: s_mov_b32 s41, 0x40c00000 2011; GFX10PLUS-NEXT: s_mov_b32 s40, 0x40a00000 2012; GFX10PLUS-NEXT: s_mov_b32 s39, 4.0 2013; GFX10PLUS-NEXT: s_mov_b32 s38, 0x40400000 2014; GFX10PLUS-NEXT: s_mov_b32 s37, 2.0 2015; GFX10PLUS-NEXT: s_movrels_b32 s0, s36 2016; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0 2017; GFX10PLUS-NEXT: ; return to shader part epilog 2018entry: 2019 %ext = extractelement <32 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0, float 17.0, float 18.0, float 19.0, float 20.0, float 21.0, float 22.0, float 23.0, float 24.0, float 25.0, float 26.0, float 27.0, float 28.0, float 29.0, float 30.0, float 31.0, float 32.0>, i32 %sel 2020 ret float %ext 2021} 2022 2023define amdgpu_ps double @dyn_extract_v16f64_s_s(i32 inreg %sel) { 2024; GCN-LABEL: dyn_extract_v16f64_s_s: 2025; GCN: ; %bb.0: ; %entry 2026; GCN-NEXT: s_mov_b32 s66, 0 2027; GCN-NEXT: s_mov_b32 s64, 0 2028; GCN-NEXT: s_mov_b32 s62, 0 2029; GCN-NEXT: s_mov_b32 s60, 0 2030; GCN-NEXT: s_mov_b32 s58, 0 2031; GCN-NEXT: s_mov_b32 s56, 0 2032; GCN-NEXT: s_mov_b32 s54, 0 2033; GCN-NEXT: s_mov_b32 s52, 0 2034; GCN-NEXT: s_mov_b32 s50, 0 2035; GCN-NEXT: s_mov_b32 s48, 0 2036; GCN-NEXT: s_mov_b32 s46, 0 2037; GCN-NEXT: s_mov_b32 s44, 0 2038; GCN-NEXT: s_mov_b32 s40, 0 2039; GCN-NEXT: s_mov_b64 s[36:37], 1.0 2040; GCN-NEXT: s_mov_b32 m0, s2 2041; GCN-NEXT: s_mov_b32 s67, 0x40300000 2042; GCN-NEXT: s_mov_b32 s65, 0x402e0000 2043; GCN-NEXT: s_mov_b32 s63, 0x402c0000 2044; GCN-NEXT: s_mov_b32 s61, 0x402a0000 2045; GCN-NEXT: s_mov_b32 s59, 0x40280000 2046; GCN-NEXT: s_mov_b32 s57, 0x40260000 2047; GCN-NEXT: s_mov_b32 s55, 0x40240000 2048; GCN-NEXT: s_mov_b32 s53, 0x40220000 2049; GCN-NEXT: s_mov_b32 s51, 0x40200000 2050; GCN-NEXT: s_mov_b32 s49, 0x401c0000 2051; GCN-NEXT: s_mov_b32 s47, 0x40180000 2052; GCN-NEXT: s_mov_b32 s45, 0x40140000 2053; GCN-NEXT: s_mov_b64 s[42:43], 4.0 2054; GCN-NEXT: s_mov_b32 s41, 0x40080000 2055; GCN-NEXT: s_mov_b64 s[38:39], 2.0 2056; GCN-NEXT: s_movrels_b64 s[0:1], s[36:37] 2057; GCN-NEXT: ; return to shader part epilog 2058; 2059; GFX10PLUS-LABEL: dyn_extract_v16f64_s_s: 2060; GFX10PLUS: ; %bb.0: ; %entry 2061; GFX10PLUS-NEXT: s_mov_b64 s[36:37], 1.0 2062; GFX10PLUS-NEXT: s_mov_b32 m0, s2 2063; GFX10PLUS-NEXT: s_mov_b32 s66, 0 2064; GFX10PLUS-NEXT: s_mov_b32 s64, 0 2065; GFX10PLUS-NEXT: s_mov_b32 s62, 0 2066; GFX10PLUS-NEXT: s_mov_b32 s60, 0 2067; GFX10PLUS-NEXT: s_mov_b32 s58, 0 2068; GFX10PLUS-NEXT: s_mov_b32 s56, 0 2069; GFX10PLUS-NEXT: s_mov_b32 s54, 0 2070; GFX10PLUS-NEXT: s_mov_b32 s52, 0 2071; GFX10PLUS-NEXT: s_mov_b32 s50, 0 2072; GFX10PLUS-NEXT: s_mov_b32 s48, 0 2073; GFX10PLUS-NEXT: s_mov_b32 s46, 0 2074; GFX10PLUS-NEXT: s_mov_b32 s44, 0 2075; GFX10PLUS-NEXT: s_mov_b32 s40, 0 2076; GFX10PLUS-NEXT: s_mov_b32 s67, 0x40300000 2077; GFX10PLUS-NEXT: s_mov_b32 s65, 0x402e0000 2078; GFX10PLUS-NEXT: s_mov_b32 s63, 0x402c0000 2079; GFX10PLUS-NEXT: s_mov_b32 s61, 0x402a0000 2080; GFX10PLUS-NEXT: s_mov_b32 s59, 0x40280000 2081; GFX10PLUS-NEXT: s_mov_b32 s57, 0x40260000 2082; GFX10PLUS-NEXT: s_mov_b32 s55, 0x40240000 2083; GFX10PLUS-NEXT: s_mov_b32 s53, 0x40220000 2084; GFX10PLUS-NEXT: s_mov_b32 s51, 0x40200000 2085; GFX10PLUS-NEXT: s_mov_b32 s49, 0x401c0000 2086; GFX10PLUS-NEXT: s_mov_b32 s47, 0x40180000 2087; GFX10PLUS-NEXT: s_mov_b32 s45, 0x40140000 2088; GFX10PLUS-NEXT: s_mov_b64 s[42:43], 4.0 2089; GFX10PLUS-NEXT: s_mov_b32 s41, 0x40080000 2090; GFX10PLUS-NEXT: s_mov_b64 s[38:39], 2.0 2091; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[36:37] 2092; GFX10PLUS-NEXT: ; return to shader part epilog 2093entry: 2094 %ext = extractelement <16 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0, double 9.0, double 10.0, double 11.0, double 12.0, double 13.0, double 14.0, double 15.0, double 16.0>, i32 %sel 2095 ret double %ext 2096} 2097 2098define amdgpu_ps float @dyn_extract_v6f32_s_v(<6 x float> inreg %vec, i32 %sel) { 2099; GCN-LABEL: dyn_extract_v6f32_s_v: 2100; GCN: ; %bb.0: ; %entry 2101; GCN-NEXT: v_mov_b32_e32 v1, s2 2102; GCN-NEXT: v_mov_b32_e32 v2, s3 2103; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 2104; GCN-NEXT: v_mov_b32_e32 v3, s4 2105; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 2106; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 2107; GCN-NEXT: v_mov_b32_e32 v4, s5 2108; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 2109; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 2110; GCN-NEXT: v_mov_b32_e32 v5, s6 2111; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 2112; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 2113; GCN-NEXT: v_mov_b32_e32 v6, s7 2114; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 2115; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 2116; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v6, vcc 2117; GCN-NEXT: ; return to shader part epilog 2118; 2119; GFX10PLUS-LABEL: dyn_extract_v6f32_s_v: 2120; GFX10PLUS: ; %bb.0: ; %entry 2121; GFX10PLUS-NEXT: v_mov_b32_e32 v1, s3 2122; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 2123; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo 2124; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 2125; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo 2126; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 2127; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s5, vcc_lo 2128; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 2129; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo 2130; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 2131; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, s7, vcc_lo 2132; GFX10PLUS-NEXT: ; return to shader part epilog 2133entry: 2134 %ext = extractelement <6 x float> %vec, i32 %sel 2135 ret float %ext 2136} 2137 2138define float @dyn_extract_v6f32_v_v(<6 x float> %vec, i32 %sel) { 2139; GCN-LABEL: dyn_extract_v6f32_v_v: 2140; GCN: ; %bb.0: ; %entry 2141; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2142; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v6 2143; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 2144; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v6 2145; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 2146; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v6 2147; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 2148; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v6 2149; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 2150; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v6 2151; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 2152; GCN-NEXT: s_setpc_b64 s[30:31] 2153; 2154; GFX10PLUS-LABEL: dyn_extract_v6f32_v_v: 2155; GFX10PLUS: ; %bb.0: ; %entry 2156; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2157; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6 2158; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 2159; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v6 2160; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 2161; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v6 2162; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 2163; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v6 2164; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 2165; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v6 2166; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo 2167; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 2168entry: 2169 %ext = extractelement <6 x float> %vec, i32 %sel 2170 ret float %ext 2171} 2172 2173define amdgpu_ps float @dyn_extract_v6f32_v_s(<6 x float> %vec, i32 inreg %sel) { 2174; GCN-LABEL: dyn_extract_v6f32_v_s: 2175; GCN: ; %bb.0: ; %entry 2176; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1 2177; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 2178; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 2 2179; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 2180; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 3 2181; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 2182; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 4 2183; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 2184; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 5 2185; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 2186; GCN-NEXT: ; return to shader part epilog 2187; 2188; GFX10PLUS-LABEL: dyn_extract_v6f32_v_s: 2189; GFX10PLUS: ; %bb.0: ; %entry 2190; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 1 2191; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 2192; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 2 2193; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 2194; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 3 2195; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 2196; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 4 2197; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 2198; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 5 2199; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo 2200; GFX10PLUS-NEXT: ; return to shader part epilog 2201entry: 2202 %ext = extractelement <6 x float> %vec, i32 %sel 2203 ret float %ext 2204} 2205 2206define amdgpu_ps float @dyn_extract_v6f32_s_s(<6 x float> inreg %vec, i32 inreg %sel) { 2207; GCN-LABEL: dyn_extract_v6f32_s_s: 2208; GCN: ; %bb.0: ; %entry 2209; GCN-NEXT: s_cmp_eq_u32 s8, 1 2210; GCN-NEXT: s_cselect_b32 s0, s3, s2 2211; GCN-NEXT: s_cmp_eq_u32 s8, 2 2212; GCN-NEXT: s_cselect_b32 s0, s4, s0 2213; GCN-NEXT: s_cmp_eq_u32 s8, 3 2214; GCN-NEXT: s_cselect_b32 s0, s5, s0 2215; GCN-NEXT: s_cmp_eq_u32 s8, 4 2216; GCN-NEXT: s_cselect_b32 s0, s6, s0 2217; GCN-NEXT: s_cmp_eq_u32 s8, 5 2218; GCN-NEXT: s_cselect_b32 s0, s7, s0 2219; GCN-NEXT: v_mov_b32_e32 v0, s0 2220; GCN-NEXT: ; return to shader part epilog 2221; 2222; GFX10PLUS-LABEL: dyn_extract_v6f32_s_s: 2223; GFX10PLUS: ; %bb.0: ; %entry 2224; GFX10PLUS-NEXT: s_cmp_eq_u32 s8, 1 2225; GFX10PLUS-NEXT: s_cselect_b32 s0, s3, s2 2226; GFX10PLUS-NEXT: s_cmp_eq_u32 s8, 2 2227; GFX10PLUS-NEXT: s_cselect_b32 s0, s4, s0 2228; GFX10PLUS-NEXT: s_cmp_eq_u32 s8, 3 2229; GFX10PLUS-NEXT: s_cselect_b32 s0, s5, s0 2230; GFX10PLUS-NEXT: s_cmp_eq_u32 s8, 4 2231; GFX10PLUS-NEXT: s_cselect_b32 s0, s6, s0 2232; GFX10PLUS-NEXT: s_cmp_eq_u32 s8, 5 2233; GFX10PLUS-NEXT: s_cselect_b32 s0, s7, s0 2234; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0 2235; GFX10PLUS-NEXT: ; return to shader part epilog 2236entry: 2237 %ext = extractelement <6 x float> %vec, i32 %sel 2238 ret float %ext 2239} 2240 2241define amdgpu_ps float @dyn_extract_v7f32_s_v(<7 x float> inreg %vec, i32 %sel) { 2242; GCN-LABEL: dyn_extract_v7f32_s_v: 2243; GCN: ; %bb.0: ; %entry 2244; GCN-NEXT: v_mov_b32_e32 v1, s2 2245; GCN-NEXT: v_mov_b32_e32 v2, s3 2246; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 2247; GCN-NEXT: v_mov_b32_e32 v3, s4 2248; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 2249; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 2250; GCN-NEXT: v_mov_b32_e32 v4, s5 2251; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 2252; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 2253; GCN-NEXT: v_mov_b32_e32 v5, s6 2254; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 2255; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 2256; GCN-NEXT: v_mov_b32_e32 v6, s7 2257; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 2258; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 2259; GCN-NEXT: v_mov_b32_e32 v7, s8 2260; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc 2261; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 2262; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v7, vcc 2263; GCN-NEXT: ; return to shader part epilog 2264; 2265; GFX10PLUS-LABEL: dyn_extract_v7f32_s_v: 2266; GFX10PLUS: ; %bb.0: ; %entry 2267; GFX10PLUS-NEXT: v_mov_b32_e32 v1, s3 2268; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 2269; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo 2270; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 2271; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo 2272; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 2273; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s5, vcc_lo 2274; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 2275; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo 2276; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 2277; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s7, vcc_lo 2278; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 2279; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, s8, vcc_lo 2280; GFX10PLUS-NEXT: ; return to shader part epilog 2281entry: 2282 %ext = extractelement <7 x float> %vec, i32 %sel 2283 ret float %ext 2284} 2285 2286define float @dyn_extract_v7f32_v_v(<7 x float> %vec, i32 %sel) { 2287; GCN-LABEL: dyn_extract_v7f32_v_v: 2288; GCN: ; %bb.0: ; %entry 2289; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2290; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v7 2291; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 2292; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v7 2293; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 2294; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v7 2295; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 2296; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v7 2297; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 2298; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v7 2299; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 2300; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v7 2301; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 2302; GCN-NEXT: s_setpc_b64 s[30:31] 2303; 2304; GFX10PLUS-LABEL: dyn_extract_v7f32_v_v: 2305; GFX10PLUS: ; %bb.0: ; %entry 2306; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2307; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v7 2308; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 2309; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v7 2310; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 2311; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v7 2312; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 2313; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v7 2314; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 2315; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v7 2316; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo 2317; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v7 2318; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 2319; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 2320entry: 2321 %ext = extractelement <7 x float> %vec, i32 %sel 2322 ret float %ext 2323} 2324 2325define amdgpu_ps float @dyn_extract_v7f32_v_s(<7 x float> %vec, i32 inreg %sel) { 2326; GCN-LABEL: dyn_extract_v7f32_v_s: 2327; GCN: ; %bb.0: ; %entry 2328; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1 2329; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 2330; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 2 2331; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 2332; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 3 2333; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 2334; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 4 2335; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 2336; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 5 2337; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 2338; GCN-NEXT: v_cmp_eq_u32_e64 vcc, s2, 6 2339; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 2340; GCN-NEXT: ; return to shader part epilog 2341; 2342; GFX10PLUS-LABEL: dyn_extract_v7f32_v_s: 2343; GFX10PLUS: ; %bb.0: ; %entry 2344; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 1 2345; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 2346; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 2 2347; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 2348; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 3 2349; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 2350; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 4 2351; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 2352; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 5 2353; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo 2354; GFX10PLUS-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 6 2355; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 2356; GFX10PLUS-NEXT: ; return to shader part epilog 2357entry: 2358 %ext = extractelement <7 x float> %vec, i32 %sel 2359 ret float %ext 2360} 2361 2362define amdgpu_ps float @dyn_extract_v7f32_s_s(<7 x float> inreg %vec, i32 inreg %sel) { 2363; GCN-LABEL: dyn_extract_v7f32_s_s: 2364; GCN: ; %bb.0: ; %entry 2365; GCN-NEXT: s_cmp_eq_u32 s9, 1 2366; GCN-NEXT: s_cselect_b32 s0, s3, s2 2367; GCN-NEXT: s_cmp_eq_u32 s9, 2 2368; GCN-NEXT: s_cselect_b32 s0, s4, s0 2369; GCN-NEXT: s_cmp_eq_u32 s9, 3 2370; GCN-NEXT: s_cselect_b32 s0, s5, s0 2371; GCN-NEXT: s_cmp_eq_u32 s9, 4 2372; GCN-NEXT: s_cselect_b32 s0, s6, s0 2373; GCN-NEXT: s_cmp_eq_u32 s9, 5 2374; GCN-NEXT: s_cselect_b32 s0, s7, s0 2375; GCN-NEXT: s_cmp_eq_u32 s9, 6 2376; GCN-NEXT: s_cselect_b32 s0, s8, s0 2377; GCN-NEXT: v_mov_b32_e32 v0, s0 2378; GCN-NEXT: ; return to shader part epilog 2379; 2380; GFX10PLUS-LABEL: dyn_extract_v7f32_s_s: 2381; GFX10PLUS: ; %bb.0: ; %entry 2382; GFX10PLUS-NEXT: s_cmp_eq_u32 s9, 1 2383; GFX10PLUS-NEXT: s_cselect_b32 s0, s3, s2 2384; GFX10PLUS-NEXT: s_cmp_eq_u32 s9, 2 2385; GFX10PLUS-NEXT: s_cselect_b32 s0, s4, s0 2386; GFX10PLUS-NEXT: s_cmp_eq_u32 s9, 3 2387; GFX10PLUS-NEXT: s_cselect_b32 s0, s5, s0 2388; GFX10PLUS-NEXT: s_cmp_eq_u32 s9, 4 2389; GFX10PLUS-NEXT: s_cselect_b32 s0, s6, s0 2390; GFX10PLUS-NEXT: s_cmp_eq_u32 s9, 5 2391; GFX10PLUS-NEXT: s_cselect_b32 s0, s7, s0 2392; GFX10PLUS-NEXT: s_cmp_eq_u32 s9, 6 2393; GFX10PLUS-NEXT: s_cselect_b32 s0, s8, s0 2394; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0 2395; GFX10PLUS-NEXT: ; return to shader part epilog 2396entry: 2397 %ext = extractelement <7 x float> %vec, i32 %sel 2398 ret float %ext 2399} 2400 2401define amdgpu_ps double @dyn_extract_v6f64_s_v(<6 x double> inreg %vec, i32 %sel) { 2402; GCN-LABEL: dyn_extract_v6f64_s_v: 2403; GCN: ; %bb.0: ; %entry 2404; GCN-NEXT: v_mov_b32_e32 v1, s2 2405; GCN-NEXT: v_mov_b32_e32 v2, s3 2406; GCN-NEXT: v_mov_b32_e32 v3, s4 2407; GCN-NEXT: v_mov_b32_e32 v4, s5 2408; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 2409; GCN-NEXT: v_mov_b32_e32 v5, s6 2410; GCN-NEXT: v_mov_b32_e32 v6, s7 2411; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 2412; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2413; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 2414; GCN-NEXT: v_mov_b32_e32 v7, s8 2415; GCN-NEXT: v_mov_b32_e32 v8, s9 2416; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 2417; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc 2418; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 2419; GCN-NEXT: v_mov_b32_e32 v9, s10 2420; GCN-NEXT: v_mov_b32_e32 v10, s11 2421; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 2422; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 2423; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 2424; GCN-NEXT: v_mov_b32_e32 v11, s12 2425; GCN-NEXT: v_mov_b32_e32 v12, s13 2426; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 2427; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc 2428; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 2429; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v11, vcc 2430; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v12, vcc 2431; GCN-NEXT: v_readfirstlane_b32 s0, v0 2432; GCN-NEXT: v_readfirstlane_b32 s1, v1 2433; GCN-NEXT: ; return to shader part epilog 2434; 2435; GFX10-LABEL: dyn_extract_v6f64_s_v: 2436; GFX10: ; %bb.0: ; %entry 2437; GFX10-NEXT: v_mov_b32_e32 v1, s4 2438; GFX10-NEXT: v_mov_b32_e32 v2, s5 2439; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 2440; GFX10-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo 2441; GFX10-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo 2442; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 2443; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo 2444; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo 2445; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 2446; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo 2447; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo 2448; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 2449; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo 2450; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo 2451; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 2452; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s12, vcc_lo 2453; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s13, vcc_lo 2454; GFX10-NEXT: v_readfirstlane_b32 s0, v0 2455; GFX10-NEXT: v_readfirstlane_b32 s1, v1 2456; GFX10-NEXT: ; return to shader part epilog 2457; 2458; GFX11-LABEL: dyn_extract_v6f64_s_v: 2459; GFX11: ; %bb.0: ; %entry 2460; GFX11-NEXT: v_dual_mov_b32 v1, s4 :: v_dual_mov_b32 v2, s5 2461; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 2462; GFX11-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo 2463; GFX11-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo 2464; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 2465; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo 2466; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo 2467; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 2468; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo 2469; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo 2470; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 2471; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo 2472; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo 2473; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 2474; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s12, vcc_lo 2475; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, s13, vcc_lo 2476; GFX11-NEXT: v_readfirstlane_b32 s0, v0 2477; GFX11-NEXT: v_readfirstlane_b32 s1, v1 2478; GFX11-NEXT: ; return to shader part epilog 2479entry: 2480 %ext = extractelement <6 x double> %vec, i32 %sel 2481 ret double %ext 2482} 2483 2484define double @dyn_extract_v6f64_v_v(<6 x double> %vec, i32 %sel) { 2485; GCN-LABEL: dyn_extract_v6f64_v_v: 2486; GCN: ; %bb.0: ; %entry 2487; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2488; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v12 2489; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 2490; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 2491; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v12 2492; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 2493; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 2494; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v12 2495; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 2496; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 2497; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v12 2498; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 2499; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 2500; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v12 2501; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 2502; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 2503; GCN-NEXT: s_setpc_b64 s[30:31] 2504; 2505; GFX10-LABEL: dyn_extract_v6f64_v_v: 2506; GFX10: ; %bb.0: ; %entry 2507; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2508; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v12 2509; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 2510; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo 2511; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v12 2512; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 2513; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo 2514; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v12 2515; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 2516; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo 2517; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v12 2518; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 2519; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo 2520; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v12 2521; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo 2522; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo 2523; GFX10-NEXT: s_setpc_b64 s[30:31] 2524; 2525; GFX11-LABEL: dyn_extract_v6f64_v_v: 2526; GFX11: ; %bb.0: ; %entry 2527; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2528; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v12 2529; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v2 :: v_dual_cndmask_b32 v1, v1, v3 2530; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v12 2531; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v4 :: v_dual_cndmask_b32 v1, v1, v5 2532; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v12 2533; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v6 :: v_dual_cndmask_b32 v1, v1, v7 2534; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v12 2535; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v8 :: v_dual_cndmask_b32 v1, v1, v9 2536; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v12 2537; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v10 :: v_dual_cndmask_b32 v1, v1, v11 2538; GFX11-NEXT: s_setpc_b64 s[30:31] 2539entry: 2540 %ext = extractelement <6 x double> %vec, i32 %sel 2541 ret double %ext 2542} 2543 2544define amdgpu_ps double @dyn_extract_v6f64_v_s(<6 x double> %vec, i32 inreg %sel) { 2545; GPRIDX-LABEL: dyn_extract_v6f64_v_s: 2546; GPRIDX: ; %bb.0: ; %entry 2547; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1 2548; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0) 2549; GPRIDX-NEXT: v_mov_b32_e32 v12, v0 2550; GPRIDX-NEXT: v_mov_b32_e32 v0, v1 2551; GPRIDX-NEXT: s_set_gpr_idx_off 2552; GPRIDX-NEXT: v_readfirstlane_b32 s0, v12 2553; GPRIDX-NEXT: v_readfirstlane_b32 s1, v0 2554; GPRIDX-NEXT: ; return to shader part epilog 2555; 2556; MOVREL-LABEL: dyn_extract_v6f64_v_s: 2557; MOVREL: ; %bb.0: ; %entry 2558; MOVREL-NEXT: s_lshl_b32 m0, s2, 1 2559; MOVREL-NEXT: v_movrels_b32_e32 v12, v0 2560; MOVREL-NEXT: v_movrels_b32_e32 v0, v1 2561; MOVREL-NEXT: v_readfirstlane_b32 s0, v12 2562; MOVREL-NEXT: v_readfirstlane_b32 s1, v0 2563; MOVREL-NEXT: ; return to shader part epilog 2564; 2565; GFX10PLUS-LABEL: dyn_extract_v6f64_v_s: 2566; GFX10PLUS: ; %bb.0: ; %entry 2567; GFX10PLUS-NEXT: s_lshl_b32 m0, s2, 1 2568; GFX10PLUS-NEXT: v_movrels_b32_e32 v12, v0 2569; GFX10PLUS-NEXT: v_movrels_b32_e32 v0, v1 2570; GFX10PLUS-NEXT: v_readfirstlane_b32 s0, v12 2571; GFX10PLUS-NEXT: v_readfirstlane_b32 s1, v0 2572; GFX10PLUS-NEXT: ; return to shader part epilog 2573entry: 2574 %ext = extractelement <6 x double> %vec, i32 %sel 2575 ret double %ext 2576} 2577 2578define amdgpu_ps double @dyn_extract_v6f64_s_s(<6 x double> inreg %vec, i32 inreg %sel) { 2579; GCN-LABEL: dyn_extract_v6f64_s_s: 2580; GCN: ; %bb.0: ; %entry 2581; GCN-NEXT: s_mov_b32 s0, s2 2582; GCN-NEXT: s_mov_b32 s1, s3 2583; GCN-NEXT: s_mov_b32 m0, s14 2584; GCN-NEXT: s_mov_b32 s2, s4 2585; GCN-NEXT: s_mov_b32 s3, s5 2586; GCN-NEXT: s_mov_b32 s4, s6 2587; GCN-NEXT: s_mov_b32 s5, s7 2588; GCN-NEXT: s_mov_b32 s6, s8 2589; GCN-NEXT: s_mov_b32 s7, s9 2590; GCN-NEXT: s_mov_b32 s8, s10 2591; GCN-NEXT: s_mov_b32 s9, s11 2592; GCN-NEXT: s_mov_b32 s10, s12 2593; GCN-NEXT: s_mov_b32 s11, s13 2594; GCN-NEXT: s_movrels_b64 s[0:1], s[0:1] 2595; GCN-NEXT: ; return to shader part epilog 2596; 2597; GFX10PLUS-LABEL: dyn_extract_v6f64_s_s: 2598; GFX10PLUS: ; %bb.0: ; %entry 2599; GFX10PLUS-NEXT: s_mov_b32 s0, s2 2600; GFX10PLUS-NEXT: s_mov_b32 s1, s3 2601; GFX10PLUS-NEXT: s_mov_b32 m0, s14 2602; GFX10PLUS-NEXT: s_mov_b32 s2, s4 2603; GFX10PLUS-NEXT: s_mov_b32 s3, s5 2604; GFX10PLUS-NEXT: s_mov_b32 s4, s6 2605; GFX10PLUS-NEXT: s_mov_b32 s5, s7 2606; GFX10PLUS-NEXT: s_mov_b32 s6, s8 2607; GFX10PLUS-NEXT: s_mov_b32 s7, s9 2608; GFX10PLUS-NEXT: s_mov_b32 s8, s10 2609; GFX10PLUS-NEXT: s_mov_b32 s9, s11 2610; GFX10PLUS-NEXT: s_mov_b32 s10, s12 2611; GFX10PLUS-NEXT: s_mov_b32 s11, s13 2612; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[0:1] 2613; GFX10PLUS-NEXT: ; return to shader part epilog 2614entry: 2615 %ext = extractelement <6 x double> %vec, i32 %sel 2616 ret double %ext 2617} 2618 2619define amdgpu_ps double @dyn_extract_v7f64_s_v_bitcast(<14 x float> inreg %userData, i32 %sel) { 2620; GCN-LABEL: dyn_extract_v7f64_s_v_bitcast: 2621; GCN: ; %bb.0: ; %entry 2622; GCN-NEXT: v_mov_b32_e32 v1, s2 2623; GCN-NEXT: v_mov_b32_e32 v2, s3 2624; GCN-NEXT: v_mov_b32_e32 v3, s4 2625; GCN-NEXT: v_mov_b32_e32 v4, s5 2626; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 2627; GCN-NEXT: v_mov_b32_e32 v5, s6 2628; GCN-NEXT: v_mov_b32_e32 v6, s7 2629; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 2630; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2631; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 2632; GCN-NEXT: v_mov_b32_e32 v7, s8 2633; GCN-NEXT: v_mov_b32_e32 v8, s9 2634; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 2635; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc 2636; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 2637; GCN-NEXT: v_mov_b32_e32 v9, s10 2638; GCN-NEXT: v_mov_b32_e32 v10, s11 2639; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 2640; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 2641; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 2642; GCN-NEXT: v_mov_b32_e32 v11, s12 2643; GCN-NEXT: v_mov_b32_e32 v12, s13 2644; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 2645; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc 2646; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 2647; GCN-NEXT: v_mov_b32_e32 v13, s14 2648; GCN-NEXT: v_mov_b32_e32 v14, s15 2649; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 2650; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc 2651; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 2652; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc 2653; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v14, vcc 2654; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 2655; GCN-NEXT: ; kill: def $vgpr15 killed $sgpr2 killed $exec 2656; GCN-NEXT: ; kill: def $vgpr16 killed $sgpr3 killed $exec 2657; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v15, vcc 2658; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v16, vcc 2659; GCN-NEXT: v_readfirstlane_b32 s0, v0 2660; GCN-NEXT: v_readfirstlane_b32 s1, v1 2661; GCN-NEXT: ; return to shader part epilog 2662; 2663; GFX10-LABEL: dyn_extract_v7f64_s_v_bitcast: 2664; GFX10: ; %bb.0: ; %entry 2665; GFX10-NEXT: v_mov_b32_e32 v1, s4 2666; GFX10-NEXT: v_mov_b32_e32 v2, s5 2667; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 2668; GFX10-NEXT: s_mov_b32 s0, s14 2669; GFX10-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo 2670; GFX10-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo 2671; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 2672; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo 2673; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo 2674; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 2675; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo 2676; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo 2677; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 2678; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo 2679; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo 2680; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 2681; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo 2682; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s13, vcc_lo 2683; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 2684; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s0, vcc_lo 2685; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s15, vcc_lo 2686; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 2687; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s2, vcc_lo 2688; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s3, vcc_lo 2689; GFX10-NEXT: v_readfirstlane_b32 s0, v0 2690; GFX10-NEXT: v_readfirstlane_b32 s1, v1 2691; GFX10-NEXT: ; return to shader part epilog 2692; 2693; GFX11-LABEL: dyn_extract_v7f64_s_v_bitcast: 2694; GFX11: ; %bb.0: ; %entry 2695; GFX11-NEXT: v_dual_mov_b32 v1, s4 :: v_dual_mov_b32 v2, s5 2696; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 2697; GFX11-NEXT: s_mov_b32 s0, s14 2698; GFX11-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo 2699; GFX11-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo 2700; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 2701; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo 2702; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo 2703; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 2704; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo 2705; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo 2706; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 2707; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo 2708; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo 2709; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 2710; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo 2711; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s13, vcc_lo 2712; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 2713; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s0, vcc_lo 2714; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s15, vcc_lo 2715; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 2716; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s2, vcc_lo 2717; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, s3, vcc_lo 2718; GFX11-NEXT: v_readfirstlane_b32 s0, v0 2719; GFX11-NEXT: v_readfirstlane_b32 s1, v1 2720; GFX11-NEXT: ; return to shader part epilog 2721entry: 2722 %bc = bitcast <14 x float> %userData to <7 x double> 2723 %ext = extractelement <7 x double> %bc, i32 %sel 2724 ret double %ext 2725} 2726 2727define amdgpu_ps i64 @dyn_extract_v7i64_s_v_bitcast(<14 x i32> inreg %userData, i32 %sel) { 2728; GCN-LABEL: dyn_extract_v7i64_s_v_bitcast: 2729; GCN: ; %bb.0: ; %entry 2730; GCN-NEXT: s_mov_b32 s0, s10 2731; GCN-NEXT: s_mov_b32 s1, s11 2732; GCN-NEXT: ; return to shader part epilog 2733; 2734; GFX10PLUS-LABEL: dyn_extract_v7i64_s_v_bitcast: 2735; GFX10PLUS: ; %bb.0: ; %entry 2736; GFX10PLUS-NEXT: s_mov_b32 s0, s10 2737; GFX10PLUS-NEXT: s_mov_b32 s1, s11 2738; GFX10PLUS-NEXT: ; return to shader part epilog 2739entry: 2740 %.bc = bitcast <14 x i32> %userData to <7 x i64> 2741 %ext = extractelement <7 x i64> %.bc, i32 4 2742 ret i64 %ext 2743} 2744 2745define amdgpu_ps double @dyn_extract_v7f64_s_v(<7 x double> inreg %vec, i32 %sel) { 2746; GCN-LABEL: dyn_extract_v7f64_s_v: 2747; GCN: ; %bb.0: ; %entry 2748; GCN-NEXT: v_mov_b32_e32 v1, s2 2749; GCN-NEXT: v_mov_b32_e32 v2, s3 2750; GCN-NEXT: v_mov_b32_e32 v3, s4 2751; GCN-NEXT: v_mov_b32_e32 v4, s5 2752; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 2753; GCN-NEXT: v_mov_b32_e32 v5, s6 2754; GCN-NEXT: v_mov_b32_e32 v6, s7 2755; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 2756; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2757; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 2758; GCN-NEXT: v_mov_b32_e32 v7, s8 2759; GCN-NEXT: v_mov_b32_e32 v8, s9 2760; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 2761; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc 2762; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 2763; GCN-NEXT: v_mov_b32_e32 v9, s10 2764; GCN-NEXT: v_mov_b32_e32 v10, s11 2765; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 2766; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 2767; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 2768; GCN-NEXT: v_mov_b32_e32 v11, s12 2769; GCN-NEXT: v_mov_b32_e32 v12, s13 2770; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 2771; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc 2772; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 2773; GCN-NEXT: v_mov_b32_e32 v13, s14 2774; GCN-NEXT: v_mov_b32_e32 v14, s15 2775; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 2776; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc 2777; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 2778; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc 2779; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v14, vcc 2780; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 2781; GCN-NEXT: ; kill: def $vgpr15 killed $sgpr2 killed $exec 2782; GCN-NEXT: ; kill: def $vgpr16 killed $sgpr3 killed $exec 2783; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v15, vcc 2784; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v16, vcc 2785; GCN-NEXT: v_readfirstlane_b32 s0, v0 2786; GCN-NEXT: v_readfirstlane_b32 s1, v1 2787; GCN-NEXT: ; return to shader part epilog 2788; 2789; GFX10-LABEL: dyn_extract_v7f64_s_v: 2790; GFX10: ; %bb.0: ; %entry 2791; GFX10-NEXT: v_mov_b32_e32 v1, s4 2792; GFX10-NEXT: v_mov_b32_e32 v2, s5 2793; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 2794; GFX10-NEXT: s_mov_b32 s0, s14 2795; GFX10-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo 2796; GFX10-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo 2797; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 2798; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo 2799; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo 2800; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 2801; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo 2802; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo 2803; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 2804; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo 2805; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo 2806; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 2807; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo 2808; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s13, vcc_lo 2809; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 2810; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s0, vcc_lo 2811; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s15, vcc_lo 2812; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 2813; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s2, vcc_lo 2814; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s3, vcc_lo 2815; GFX10-NEXT: v_readfirstlane_b32 s0, v0 2816; GFX10-NEXT: v_readfirstlane_b32 s1, v1 2817; GFX10-NEXT: ; return to shader part epilog 2818; 2819; GFX11-LABEL: dyn_extract_v7f64_s_v: 2820; GFX11: ; %bb.0: ; %entry 2821; GFX11-NEXT: v_dual_mov_b32 v1, s4 :: v_dual_mov_b32 v2, s5 2822; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 2823; GFX11-NEXT: s_mov_b32 s0, s14 2824; GFX11-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo 2825; GFX11-NEXT: v_cndmask_b32_e32 v2, s3, v2, vcc_lo 2826; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 2827; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo 2828; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s7, vcc_lo 2829; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 2830; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo 2831; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s9, vcc_lo 2832; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 2833; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo 2834; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo 2835; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 2836; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo 2837; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s13, vcc_lo 2838; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 2839; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s0, vcc_lo 2840; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s15, vcc_lo 2841; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 2842; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s2, vcc_lo 2843; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, s3, vcc_lo 2844; GFX11-NEXT: v_readfirstlane_b32 s0, v0 2845; GFX11-NEXT: v_readfirstlane_b32 s1, v1 2846; GFX11-NEXT: ; return to shader part epilog 2847entry: 2848 %ext = extractelement <7 x double> %vec, i32 %sel 2849 ret double %ext 2850} 2851 2852define double @dyn_extract_v7f64_v_v(<7 x double> %vec, i32 %sel) { 2853; GCN-LABEL: dyn_extract_v7f64_v_v: 2854; GCN: ; %bb.0: ; %entry 2855; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2856; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v14 2857; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 2858; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 2859; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v14 2860; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 2861; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 2862; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v14 2863; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 2864; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 2865; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v14 2866; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 2867; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 2868; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v14 2869; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 2870; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 2871; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v14 2872; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc 2873; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc 2874; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v14 2875; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc 2876; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc 2877; GCN-NEXT: s_setpc_b64 s[30:31] 2878; 2879; GFX10-LABEL: dyn_extract_v7f64_v_v: 2880; GFX10: ; %bb.0: ; %entry 2881; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2882; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v14 2883; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 2884; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo 2885; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v14 2886; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 2887; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo 2888; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v14 2889; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 2890; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo 2891; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v14 2892; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 2893; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo 2894; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v14 2895; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo 2896; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo 2897; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v14 2898; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo 2899; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc_lo 2900; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v14 2901; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo 2902; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc_lo 2903; GFX10-NEXT: s_setpc_b64 s[30:31] 2904; 2905; GFX11-LABEL: dyn_extract_v7f64_v_v: 2906; GFX11: ; %bb.0: ; %entry 2907; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2908; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v14 2909; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v2 :: v_dual_cndmask_b32 v1, v1, v3 2910; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v14 2911; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v4 :: v_dual_cndmask_b32 v1, v1, v5 2912; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v14 2913; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v6 :: v_dual_cndmask_b32 v1, v1, v7 2914; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v14 2915; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v8 :: v_dual_cndmask_b32 v1, v1, v9 2916; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v14 2917; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v10 :: v_dual_cndmask_b32 v1, v1, v11 2918; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v14 2919; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v12 :: v_dual_cndmask_b32 v1, v1, v13 2920; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v14 2921; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v14 :: v_dual_cndmask_b32 v1, v1, v15 2922; GFX11-NEXT: s_setpc_b64 s[30:31] 2923entry: 2924 %ext = extractelement <7 x double> %vec, i32 %sel 2925 ret double %ext 2926} 2927 2928define amdgpu_ps double @dyn_extract_v7f64_v_s(<7 x double> %vec, i32 inreg %sel) { 2929; GPRIDX-LABEL: dyn_extract_v7f64_v_s: 2930; GPRIDX: ; %bb.0: ; %entry 2931; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1 2932; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0) 2933; GPRIDX-NEXT: v_mov_b32_e32 v14, v0 2934; GPRIDX-NEXT: v_mov_b32_e32 v0, v1 2935; GPRIDX-NEXT: s_set_gpr_idx_off 2936; GPRIDX-NEXT: v_readfirstlane_b32 s0, v14 2937; GPRIDX-NEXT: v_readfirstlane_b32 s1, v0 2938; GPRIDX-NEXT: ; return to shader part epilog 2939; 2940; MOVREL-LABEL: dyn_extract_v7f64_v_s: 2941; MOVREL: ; %bb.0: ; %entry 2942; MOVREL-NEXT: s_lshl_b32 m0, s2, 1 2943; MOVREL-NEXT: v_movrels_b32_e32 v14, v0 2944; MOVREL-NEXT: v_movrels_b32_e32 v0, v1 2945; MOVREL-NEXT: v_readfirstlane_b32 s0, v14 2946; MOVREL-NEXT: v_readfirstlane_b32 s1, v0 2947; MOVREL-NEXT: ; return to shader part epilog 2948; 2949; GFX10PLUS-LABEL: dyn_extract_v7f64_v_s: 2950; GFX10PLUS: ; %bb.0: ; %entry 2951; GFX10PLUS-NEXT: s_lshl_b32 m0, s2, 1 2952; GFX10PLUS-NEXT: v_movrels_b32_e32 v14, v0 2953; GFX10PLUS-NEXT: v_movrels_b32_e32 v0, v1 2954; GFX10PLUS-NEXT: v_readfirstlane_b32 s0, v14 2955; GFX10PLUS-NEXT: v_readfirstlane_b32 s1, v0 2956; GFX10PLUS-NEXT: ; return to shader part epilog 2957entry: 2958 %ext = extractelement <7 x double> %vec, i32 %sel 2959 ret double %ext 2960} 2961 2962define amdgpu_ps double @dyn_extract_v7f64_s_s(<7 x double> inreg %vec, i32 inreg %sel) { 2963; GCN-LABEL: dyn_extract_v7f64_s_s: 2964; GCN: ; %bb.0: ; %entry 2965; GCN-NEXT: s_mov_b32 s0, s2 2966; GCN-NEXT: s_mov_b32 s1, s3 2967; GCN-NEXT: s_mov_b32 m0, s16 2968; GCN-NEXT: s_mov_b32 s2, s4 2969; GCN-NEXT: s_mov_b32 s3, s5 2970; GCN-NEXT: s_mov_b32 s4, s6 2971; GCN-NEXT: s_mov_b32 s5, s7 2972; GCN-NEXT: s_mov_b32 s6, s8 2973; GCN-NEXT: s_mov_b32 s7, s9 2974; GCN-NEXT: s_mov_b32 s8, s10 2975; GCN-NEXT: s_mov_b32 s9, s11 2976; GCN-NEXT: s_mov_b32 s10, s12 2977; GCN-NEXT: s_mov_b32 s11, s13 2978; GCN-NEXT: s_mov_b32 s12, s14 2979; GCN-NEXT: s_mov_b32 s13, s15 2980; GCN-NEXT: s_movrels_b64 s[0:1], s[0:1] 2981; GCN-NEXT: ; return to shader part epilog 2982; 2983; GFX10PLUS-LABEL: dyn_extract_v7f64_s_s: 2984; GFX10PLUS: ; %bb.0: ; %entry 2985; GFX10PLUS-NEXT: s_mov_b32 s0, s2 2986; GFX10PLUS-NEXT: s_mov_b32 s1, s3 2987; GFX10PLUS-NEXT: s_mov_b32 m0, s16 2988; GFX10PLUS-NEXT: s_mov_b32 s2, s4 2989; GFX10PLUS-NEXT: s_mov_b32 s3, s5 2990; GFX10PLUS-NEXT: s_mov_b32 s4, s6 2991; GFX10PLUS-NEXT: s_mov_b32 s5, s7 2992; GFX10PLUS-NEXT: s_mov_b32 s6, s8 2993; GFX10PLUS-NEXT: s_mov_b32 s7, s9 2994; GFX10PLUS-NEXT: s_mov_b32 s8, s10 2995; GFX10PLUS-NEXT: s_mov_b32 s9, s11 2996; GFX10PLUS-NEXT: s_mov_b32 s10, s12 2997; GFX10PLUS-NEXT: s_mov_b32 s11, s13 2998; GFX10PLUS-NEXT: s_mov_b32 s12, s14 2999; GFX10PLUS-NEXT: s_mov_b32 s13, s15 3000; GFX10PLUS-NEXT: s_movrels_b64 s[0:1], s[0:1] 3001; GFX10PLUS-NEXT: ; return to shader part epilog 3002entry: 3003 %ext = extractelement <7 x double> %vec, i32 %sel 3004 ret double %ext 3005} 3006 3007define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel) { 3008; GPRIDX-LABEL: dyn_extract_v5f64_s_s: 3009; GPRIDX: .amd_kernel_code_t 3010; GPRIDX-NEXT: amd_code_version_major = 1 3011; GPRIDX-NEXT: amd_code_version_minor = 2 3012; GPRIDX-NEXT: amd_machine_kind = 1 3013; GPRIDX-NEXT: amd_machine_version_major = 9 3014; GPRIDX-NEXT: amd_machine_version_minor = 0 3015; GPRIDX-NEXT: amd_machine_version_stepping = 0 3016; GPRIDX-NEXT: kernel_code_entry_byte_offset = 256 3017; GPRIDX-NEXT: kernel_code_prefetch_byte_size = 0 3018; GPRIDX-NEXT: granulated_workitem_vgpr_count = 0 3019; GPRIDX-NEXT: granulated_wavefront_sgpr_count = 1 3020; GPRIDX-NEXT: priority = 0 3021; GPRIDX-NEXT: float_mode = 240 3022; GPRIDX-NEXT: priv = 0 3023; GPRIDX-NEXT: enable_dx10_clamp = 1 3024; GPRIDX-NEXT: debug_mode = 0 3025; GPRIDX-NEXT: enable_ieee_mode = 1 3026; GPRIDX-NEXT: enable_wgp_mode = 0 3027; GPRIDX-NEXT: enable_mem_ordered = 0 3028; GPRIDX-NEXT: enable_fwd_progress = 0 3029; GPRIDX-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 3030; GPRIDX-NEXT: user_sgpr_count = 12 3031; GPRIDX-NEXT: enable_trap_handler = 0 3032; GPRIDX-NEXT: enable_sgpr_workgroup_id_x = 1 3033; GPRIDX-NEXT: enable_sgpr_workgroup_id_y = 1 3034; GPRIDX-NEXT: enable_sgpr_workgroup_id_z = 1 3035; GPRIDX-NEXT: enable_sgpr_workgroup_info = 0 3036; GPRIDX-NEXT: enable_vgpr_workitem_id = 2 3037; GPRIDX-NEXT: enable_exception_msb = 0 3038; GPRIDX-NEXT: granulated_lds_size = 0 3039; GPRIDX-NEXT: enable_exception = 0 3040; GPRIDX-NEXT: enable_sgpr_private_segment_buffer = 1 3041; GPRIDX-NEXT: enable_sgpr_dispatch_ptr = 1 3042; GPRIDX-NEXT: enable_sgpr_queue_ptr = 1 3043; GPRIDX-NEXT: enable_sgpr_kernarg_segment_ptr = 1 3044; GPRIDX-NEXT: enable_sgpr_dispatch_id = 1 3045; GPRIDX-NEXT: enable_sgpr_flat_scratch_init = 0 3046; GPRIDX-NEXT: enable_sgpr_private_segment_size = 0 3047; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_x = 0 3048; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_y = 0 3049; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_z = 0 3050; GPRIDX-NEXT: enable_wavefront_size32 = 0 3051; GPRIDX-NEXT: enable_ordered_append_gds = 0 3052; GPRIDX-NEXT: private_element_size = 1 3053; GPRIDX-NEXT: is_ptr64 = 1 3054; GPRIDX-NEXT: is_dynamic_callstack = 0 3055; GPRIDX-NEXT: is_debug_enabled = 0 3056; GPRIDX-NEXT: is_xnack_enabled = 1 3057; GPRIDX-NEXT: workitem_private_segment_byte_size = 0 3058; GPRIDX-NEXT: workgroup_group_segment_byte_size = 0 3059; GPRIDX-NEXT: gds_segment_byte_size = 0 3060; GPRIDX-NEXT: kernarg_segment_byte_size = 28 3061; GPRIDX-NEXT: workgroup_fbarrier_count = 0 3062; GPRIDX-NEXT: wavefront_sgpr_count = 15 3063; GPRIDX-NEXT: workitem_vgpr_count = 3 3064; GPRIDX-NEXT: reserved_vgpr_first = 0 3065; GPRIDX-NEXT: reserved_vgpr_count = 0 3066; GPRIDX-NEXT: reserved_sgpr_first = 0 3067; GPRIDX-NEXT: reserved_sgpr_count = 0 3068; GPRIDX-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 3069; GPRIDX-NEXT: debug_private_segment_buffer_sgpr = 0 3070; GPRIDX-NEXT: kernarg_segment_alignment = 4 3071; GPRIDX-NEXT: group_segment_alignment = 4 3072; GPRIDX-NEXT: private_segment_alignment = 4 3073; GPRIDX-NEXT: wavefront_size = 6 3074; GPRIDX-NEXT: call_convention = -1 3075; GPRIDX-NEXT: runtime_loader_kernel_symbol = 0 3076; GPRIDX-NEXT: .end_amd_kernel_code_t 3077; GPRIDX-NEXT: ; %bb.0: ; %entry 3078; GPRIDX-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 3079; GPRIDX-NEXT: s_load_dword s10, s[8:9], 0x8 3080; GPRIDX-NEXT: s_mov_b32 s4, 0 3081; GPRIDX-NEXT: s_mov_b32 s5, 0x40080000 3082; GPRIDX-NEXT: s_mov_b32 s2, 0 3083; GPRIDX-NEXT: s_mov_b32 s3, 0x40140000 3084; GPRIDX-NEXT: s_waitcnt lgkmcnt(0) 3085; GPRIDX-NEXT: s_cmp_eq_u32 s10, 1 3086; GPRIDX-NEXT: s_cselect_b64 s[6:7], 2.0, 1.0 3087; GPRIDX-NEXT: s_cmp_eq_u32 s10, 2 3088; GPRIDX-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] 3089; GPRIDX-NEXT: s_cmp_eq_u32 s10, 3 3090; GPRIDX-NEXT: s_cselect_b64 s[4:5], 4.0, s[4:5] 3091; GPRIDX-NEXT: s_cmp_eq_u32 s10, 4 3092; GPRIDX-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] 3093; GPRIDX-NEXT: v_mov_b32_e32 v0, s2 3094; GPRIDX-NEXT: v_mov_b32_e32 v1, s3 3095; GPRIDX-NEXT: v_mov_b32_e32 v2, 0 3096; GPRIDX-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 3097; GPRIDX-NEXT: s_endpgm 3098; 3099; MOVREL-LABEL: dyn_extract_v5f64_s_s: 3100; MOVREL: .amd_kernel_code_t 3101; MOVREL-NEXT: amd_code_version_major = 1 3102; MOVREL-NEXT: amd_code_version_minor = 2 3103; MOVREL-NEXT: amd_machine_kind = 1 3104; MOVREL-NEXT: amd_machine_version_major = 8 3105; MOVREL-NEXT: amd_machine_version_minor = 0 3106; MOVREL-NEXT: amd_machine_version_stepping = 3 3107; MOVREL-NEXT: kernel_code_entry_byte_offset = 256 3108; MOVREL-NEXT: kernel_code_prefetch_byte_size = 0 3109; MOVREL-NEXT: granulated_workitem_vgpr_count = 0 3110; MOVREL-NEXT: granulated_wavefront_sgpr_count = 1 3111; MOVREL-NEXT: priority = 0 3112; MOVREL-NEXT: float_mode = 240 3113; MOVREL-NEXT: priv = 0 3114; MOVREL-NEXT: enable_dx10_clamp = 1 3115; MOVREL-NEXT: debug_mode = 0 3116; MOVREL-NEXT: enable_ieee_mode = 1 3117; MOVREL-NEXT: enable_wgp_mode = 0 3118; MOVREL-NEXT: enable_mem_ordered = 0 3119; MOVREL-NEXT: enable_fwd_progress = 0 3120; MOVREL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 3121; MOVREL-NEXT: user_sgpr_count = 12 3122; MOVREL-NEXT: enable_trap_handler = 0 3123; MOVREL-NEXT: enable_sgpr_workgroup_id_x = 1 3124; MOVREL-NEXT: enable_sgpr_workgroup_id_y = 1 3125; MOVREL-NEXT: enable_sgpr_workgroup_id_z = 1 3126; MOVREL-NEXT: enable_sgpr_workgroup_info = 0 3127; MOVREL-NEXT: enable_vgpr_workitem_id = 2 3128; MOVREL-NEXT: enable_exception_msb = 0 3129; MOVREL-NEXT: granulated_lds_size = 0 3130; MOVREL-NEXT: enable_exception = 0 3131; MOVREL-NEXT: enable_sgpr_private_segment_buffer = 1 3132; MOVREL-NEXT: enable_sgpr_dispatch_ptr = 1 3133; MOVREL-NEXT: enable_sgpr_queue_ptr = 1 3134; MOVREL-NEXT: enable_sgpr_kernarg_segment_ptr = 1 3135; MOVREL-NEXT: enable_sgpr_dispatch_id = 1 3136; MOVREL-NEXT: enable_sgpr_flat_scratch_init = 0 3137; MOVREL-NEXT: enable_sgpr_private_segment_size = 0 3138; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_x = 0 3139; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_y = 0 3140; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_z = 0 3141; MOVREL-NEXT: enable_wavefront_size32 = 0 3142; MOVREL-NEXT: enable_ordered_append_gds = 0 3143; MOVREL-NEXT: private_element_size = 1 3144; MOVREL-NEXT: is_ptr64 = 1 3145; MOVREL-NEXT: is_dynamic_callstack = 0 3146; MOVREL-NEXT: is_debug_enabled = 0 3147; MOVREL-NEXT: is_xnack_enabled = 0 3148; MOVREL-NEXT: workitem_private_segment_byte_size = 0 3149; MOVREL-NEXT: workgroup_group_segment_byte_size = 0 3150; MOVREL-NEXT: gds_segment_byte_size = 0 3151; MOVREL-NEXT: kernarg_segment_byte_size = 28 3152; MOVREL-NEXT: workgroup_fbarrier_count = 0 3153; MOVREL-NEXT: wavefront_sgpr_count = 10 3154; MOVREL-NEXT: workitem_vgpr_count = 4 3155; MOVREL-NEXT: reserved_vgpr_first = 0 3156; MOVREL-NEXT: reserved_vgpr_count = 0 3157; MOVREL-NEXT: reserved_sgpr_first = 0 3158; MOVREL-NEXT: reserved_sgpr_count = 0 3159; MOVREL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 3160; MOVREL-NEXT: debug_private_segment_buffer_sgpr = 0 3161; MOVREL-NEXT: kernarg_segment_alignment = 4 3162; MOVREL-NEXT: group_segment_alignment = 4 3163; MOVREL-NEXT: private_segment_alignment = 4 3164; MOVREL-NEXT: wavefront_size = 6 3165; MOVREL-NEXT: call_convention = -1 3166; MOVREL-NEXT: runtime_loader_kernel_symbol = 0 3167; MOVREL-NEXT: .end_amd_kernel_code_t 3168; MOVREL-NEXT: ; %bb.0: ; %entry 3169; MOVREL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 3170; MOVREL-NEXT: s_load_dword s8, s[8:9], 0x8 3171; MOVREL-NEXT: s_mov_b32 s4, 0 3172; MOVREL-NEXT: s_mov_b32 s5, 0x40080000 3173; MOVREL-NEXT: s_mov_b32 s2, 0 3174; MOVREL-NEXT: s_mov_b32 s3, 0x40140000 3175; MOVREL-NEXT: s_waitcnt lgkmcnt(0) 3176; MOVREL-NEXT: s_cmp_eq_u32 s8, 1 3177; MOVREL-NEXT: s_cselect_b64 s[6:7], 2.0, 1.0 3178; MOVREL-NEXT: s_cmp_eq_u32 s8, 2 3179; MOVREL-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] 3180; MOVREL-NEXT: s_cmp_eq_u32 s8, 3 3181; MOVREL-NEXT: s_cselect_b64 s[4:5], 4.0, s[4:5] 3182; MOVREL-NEXT: s_cmp_eq_u32 s8, 4 3183; MOVREL-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] 3184; MOVREL-NEXT: v_mov_b32_e32 v0, s2 3185; MOVREL-NEXT: v_mov_b32_e32 v3, s1 3186; MOVREL-NEXT: v_mov_b32_e32 v1, s3 3187; MOVREL-NEXT: v_mov_b32_e32 v2, s0 3188; MOVREL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 3189; MOVREL-NEXT: s_endpgm 3190; 3191; GFX10-LABEL: dyn_extract_v5f64_s_s: 3192; GFX10: .amd_kernel_code_t 3193; GFX10-NEXT: amd_code_version_major = 1 3194; GFX10-NEXT: amd_code_version_minor = 2 3195; GFX10-NEXT: amd_machine_kind = 1 3196; GFX10-NEXT: amd_machine_version_major = 10 3197; GFX10-NEXT: amd_machine_version_minor = 1 3198; GFX10-NEXT: amd_machine_version_stepping = 0 3199; GFX10-NEXT: kernel_code_entry_byte_offset = 256 3200; GFX10-NEXT: kernel_code_prefetch_byte_size = 0 3201; GFX10-NEXT: granulated_workitem_vgpr_count = 0 3202; GFX10-NEXT: granulated_wavefront_sgpr_count = 1 3203; GFX10-NEXT: priority = 0 3204; GFX10-NEXT: float_mode = 240 3205; GFX10-NEXT: priv = 0 3206; GFX10-NEXT: enable_dx10_clamp = 1 3207; GFX10-NEXT: debug_mode = 0 3208; GFX10-NEXT: enable_ieee_mode = 1 3209; GFX10-NEXT: enable_wgp_mode = 1 3210; GFX10-NEXT: enable_mem_ordered = 1 3211; GFX10-NEXT: enable_fwd_progress = 0 3212; GFX10-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 3213; GFX10-NEXT: user_sgpr_count = 12 3214; GFX10-NEXT: enable_trap_handler = 0 3215; GFX10-NEXT: enable_sgpr_workgroup_id_x = 1 3216; GFX10-NEXT: enable_sgpr_workgroup_id_y = 1 3217; GFX10-NEXT: enable_sgpr_workgroup_id_z = 1 3218; GFX10-NEXT: enable_sgpr_workgroup_info = 0 3219; GFX10-NEXT: enable_vgpr_workitem_id = 2 3220; GFX10-NEXT: enable_exception_msb = 0 3221; GFX10-NEXT: granulated_lds_size = 0 3222; GFX10-NEXT: enable_exception = 0 3223; GFX10-NEXT: enable_sgpr_private_segment_buffer = 1 3224; GFX10-NEXT: enable_sgpr_dispatch_ptr = 1 3225; GFX10-NEXT: enable_sgpr_queue_ptr = 1 3226; GFX10-NEXT: enable_sgpr_kernarg_segment_ptr = 1 3227; GFX10-NEXT: enable_sgpr_dispatch_id = 1 3228; GFX10-NEXT: enable_sgpr_flat_scratch_init = 0 3229; GFX10-NEXT: enable_sgpr_private_segment_size = 0 3230; GFX10-NEXT: enable_sgpr_grid_workgroup_count_x = 0 3231; GFX10-NEXT: enable_sgpr_grid_workgroup_count_y = 0 3232; GFX10-NEXT: enable_sgpr_grid_workgroup_count_z = 0 3233; GFX10-NEXT: enable_wavefront_size32 = 1 3234; GFX10-NEXT: enable_ordered_append_gds = 0 3235; GFX10-NEXT: private_element_size = 1 3236; GFX10-NEXT: is_ptr64 = 1 3237; GFX10-NEXT: is_dynamic_callstack = 0 3238; GFX10-NEXT: is_debug_enabled = 0 3239; GFX10-NEXT: is_xnack_enabled = 1 3240; GFX10-NEXT: workitem_private_segment_byte_size = 0 3241; GFX10-NEXT: workgroup_group_segment_byte_size = 0 3242; GFX10-NEXT: gds_segment_byte_size = 0 3243; GFX10-NEXT: kernarg_segment_byte_size = 28 3244; GFX10-NEXT: workgroup_fbarrier_count = 0 3245; GFX10-NEXT: wavefront_sgpr_count = 10 3246; GFX10-NEXT: workitem_vgpr_count = 3 3247; GFX10-NEXT: reserved_vgpr_first = 0 3248; GFX10-NEXT: reserved_vgpr_count = 0 3249; GFX10-NEXT: reserved_sgpr_first = 0 3250; GFX10-NEXT: reserved_sgpr_count = 0 3251; GFX10-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 3252; GFX10-NEXT: debug_private_segment_buffer_sgpr = 0 3253; GFX10-NEXT: kernarg_segment_alignment = 4 3254; GFX10-NEXT: group_segment_alignment = 4 3255; GFX10-NEXT: private_segment_alignment = 4 3256; GFX10-NEXT: wavefront_size = 5 3257; GFX10-NEXT: call_convention = -1 3258; GFX10-NEXT: runtime_loader_kernel_symbol = 0 3259; GFX10-NEXT: .end_amd_kernel_code_t 3260; GFX10-NEXT: ; %bb.0: ; %entry 3261; GFX10-NEXT: s_clause 0x1 3262; GFX10-NEXT: s_load_dword s6, s[8:9], 0x8 3263; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 3264; GFX10-NEXT: s_mov_b32 s2, 0 3265; GFX10-NEXT: s_mov_b32 s3, 0x40080000 3266; GFX10-NEXT: v_mov_b32_e32 v2, 0 3267; GFX10-NEXT: s_waitcnt lgkmcnt(0) 3268; GFX10-NEXT: s_cmp_eq_u32 s6, 1 3269; GFX10-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0 3270; GFX10-NEXT: s_cmp_eq_u32 s6, 2 3271; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] 3272; GFX10-NEXT: s_cmp_eq_u32 s6, 3 3273; GFX10-NEXT: s_mov_b32 s4, 0 3274; GFX10-NEXT: s_mov_b32 s5, 0x40140000 3275; GFX10-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3] 3276; GFX10-NEXT: s_cmp_eq_u32 s6, 4 3277; GFX10-NEXT: s_cselect_b64 s[2:3], s[4:5], s[2:3] 3278; GFX10-NEXT: v_mov_b32_e32 v0, s2 3279; GFX10-NEXT: v_mov_b32_e32 v1, s3 3280; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 3281; GFX10-NEXT: s_endpgm 3282; 3283; GFX11-LABEL: dyn_extract_v5f64_s_s: 3284; GFX11: .amd_kernel_code_t 3285; GFX11-NEXT: amd_code_version_major = 1 3286; GFX11-NEXT: amd_code_version_minor = 2 3287; GFX11-NEXT: amd_machine_kind = 1 3288; GFX11-NEXT: amd_machine_version_major = 11 3289; GFX11-NEXT: amd_machine_version_minor = 0 3290; GFX11-NEXT: amd_machine_version_stepping = 0 3291; GFX11-NEXT: kernel_code_entry_byte_offset = 256 3292; GFX11-NEXT: kernel_code_prefetch_byte_size = 0 3293; GFX11-NEXT: granulated_workitem_vgpr_count = 0 3294; GFX11-NEXT: granulated_wavefront_sgpr_count = 0 3295; GFX11-NEXT: priority = 0 3296; GFX11-NEXT: float_mode = 240 3297; GFX11-NEXT: priv = 0 3298; GFX11-NEXT: enable_dx10_clamp = 1 3299; GFX11-NEXT: debug_mode = 0 3300; GFX11-NEXT: enable_ieee_mode = 1 3301; GFX11-NEXT: enable_wgp_mode = 1 3302; GFX11-NEXT: enable_mem_ordered = 1 3303; GFX11-NEXT: enable_fwd_progress = 0 3304; GFX11-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 3305; GFX11-NEXT: user_sgpr_count = 13 3306; GFX11-NEXT: enable_trap_handler = 0 3307; GFX11-NEXT: enable_sgpr_workgroup_id_x = 1 3308; GFX11-NEXT: enable_sgpr_workgroup_id_y = 1 3309; GFX11-NEXT: enable_sgpr_workgroup_id_z = 1 3310; GFX11-NEXT: enable_sgpr_workgroup_info = 0 3311; GFX11-NEXT: enable_vgpr_workitem_id = 2 3312; GFX11-NEXT: enable_exception_msb = 0 3313; GFX11-NEXT: granulated_lds_size = 0 3314; GFX11-NEXT: enable_exception = 0 3315; GFX11-NEXT: enable_sgpr_private_segment_buffer = 0 3316; GFX11-NEXT: enable_sgpr_dispatch_ptr = 1 3317; GFX11-NEXT: enable_sgpr_queue_ptr = 1 3318; GFX11-NEXT: enable_sgpr_kernarg_segment_ptr = 1 3319; GFX11-NEXT: enable_sgpr_dispatch_id = 1 3320; GFX11-NEXT: enable_sgpr_flat_scratch_init = 0 3321; GFX11-NEXT: enable_sgpr_private_segment_size = 0 3322; GFX11-NEXT: enable_sgpr_grid_workgroup_count_x = 0 3323; GFX11-NEXT: enable_sgpr_grid_workgroup_count_y = 0 3324; GFX11-NEXT: enable_sgpr_grid_workgroup_count_z = 0 3325; GFX11-NEXT: enable_wavefront_size32 = 1 3326; GFX11-NEXT: enable_ordered_append_gds = 0 3327; GFX11-NEXT: private_element_size = 1 3328; GFX11-NEXT: is_ptr64 = 1 3329; GFX11-NEXT: is_dynamic_callstack = 0 3330; GFX11-NEXT: is_debug_enabled = 0 3331; GFX11-NEXT: is_xnack_enabled = 0 3332; GFX11-NEXT: workitem_private_segment_byte_size = 0 3333; GFX11-NEXT: workgroup_group_segment_byte_size = 0 3334; GFX11-NEXT: gds_segment_byte_size = 0 3335; GFX11-NEXT: kernarg_segment_byte_size = 28 3336; GFX11-NEXT: workgroup_fbarrier_count = 0 3337; GFX11-NEXT: wavefront_sgpr_count = 7 3338; GFX11-NEXT: workitem_vgpr_count = 3 3339; GFX11-NEXT: reserved_vgpr_first = 0 3340; GFX11-NEXT: reserved_vgpr_count = 0 3341; GFX11-NEXT: reserved_sgpr_first = 0 3342; GFX11-NEXT: reserved_sgpr_count = 0 3343; GFX11-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 3344; GFX11-NEXT: debug_private_segment_buffer_sgpr = 0 3345; GFX11-NEXT: kernarg_segment_alignment = 4 3346; GFX11-NEXT: group_segment_alignment = 4 3347; GFX11-NEXT: private_segment_alignment = 4 3348; GFX11-NEXT: wavefront_size = 5 3349; GFX11-NEXT: call_convention = -1 3350; GFX11-NEXT: runtime_loader_kernel_symbol = 0 3351; GFX11-NEXT: .end_amd_kernel_code_t 3352; GFX11-NEXT: ; %bb.0: ; %entry 3353; GFX11-NEXT: s_clause 0x1 3354; GFX11-NEXT: s_load_b32 s6, s[4:5], 0x8 3355; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 3356; GFX11-NEXT: s_mov_b32 s2, 0 3357; GFX11-NEXT: s_mov_b32 s3, 0x40080000 3358; GFX11-NEXT: v_mov_b32_e32 v2, 0 3359; GFX11-NEXT: s_waitcnt lgkmcnt(0) 3360; GFX11-NEXT: s_cmp_eq_u32 s6, 1 3361; GFX11-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0 3362; GFX11-NEXT: s_cmp_eq_u32 s6, 2 3363; GFX11-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] 3364; GFX11-NEXT: s_cmp_eq_u32 s6, 3 3365; GFX11-NEXT: s_mov_b32 s4, 0 3366; GFX11-NEXT: s_mov_b32 s5, 0x40140000 3367; GFX11-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3] 3368; GFX11-NEXT: s_cmp_eq_u32 s6, 4 3369; GFX11-NEXT: s_cselect_b64 s[2:3], s[4:5], s[2:3] 3370; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 3371; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 3372; GFX11-NEXT: s_endpgm 3373entry: 3374 %ext = extractelement <5 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0>, i32 %sel 3375 store double %ext, ptr addrspace(1) %out 3376 ret void 3377} 3378 3379define float @dyn_extract_v15f32_const_s_v(i32 %sel) { 3380; GCN-LABEL: dyn_extract_v15f32_const_s_v: 3381; GCN: ; %bb.0: ; %entry 3382; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3383; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 3384; GCN-NEXT: v_mov_b32_e32 v1, 0x40400000 3385; GCN-NEXT: v_cndmask_b32_e64 v13, 1.0, 2.0, vcc 3386; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 3387; GCN-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc 3388; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 3389; GCN-NEXT: v_mov_b32_e32 v2, 0x40a00000 3390; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc 3391; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 3392; GCN-NEXT: v_mov_b32_e32 v3, 0x40c00000 3393; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 3394; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 3395; GCN-NEXT: v_mov_b32_e32 v4, 0x40e00000 3396; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 3397; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 3398; GCN-NEXT: v_mov_b32_e32 v5, 0x41000000 3399; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 3400; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 3401; GCN-NEXT: v_mov_b32_e32 v6, 0x41100000 3402; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 3403; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 8, v0 3404; GCN-NEXT: v_mov_b32_e32 v7, 0x41200000 3405; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc 3406; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 9, v0 3407; GCN-NEXT: v_mov_b32_e32 v8, 0x41300000 3408; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 3409; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 10, v0 3410; GCN-NEXT: v_mov_b32_e32 v9, 0x41400000 3411; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc 3412; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 11, v0 3413; GCN-NEXT: v_mov_b32_e32 v10, 0x41500000 3414; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 3415; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 12, v0 3416; GCN-NEXT: v_mov_b32_e32 v11, 0x41600000 3417; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v10, vcc 3418; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 13, v0 3419; GCN-NEXT: v_mov_b32_e32 v12, 0x41700000 3420; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 3421; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 14, v0 3422; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v12, vcc 3423; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 15, v0 3424; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 3425; GCN-NEXT: s_setpc_b64 s[30:31] 3426; 3427; GFX10-LABEL: dyn_extract_v15f32_const_s_v: 3428; GFX10: ; %bb.0: ; %entry 3429; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3430; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 3431; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo 3432; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 3433; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40400000, vcc_lo 3434; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 3435; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo 3436; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 3437; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40a00000, vcc_lo 3438; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 3439; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40c00000, vcc_lo 3440; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 3441; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40e00000, vcc_lo 3442; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 3443; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41000000, vcc_lo 3444; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v0 3445; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41100000, vcc_lo 3446; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v0 3447; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41200000, vcc_lo 3448; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v0 3449; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41300000, vcc_lo 3450; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v0 3451; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41400000, vcc_lo 3452; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v0 3453; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41500000, vcc_lo 3454; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v0 3455; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41600000, vcc_lo 3456; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v0 3457; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41700000, vcc_lo 3458; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 15, v0 3459; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s4, vcc_lo 3460; GFX10-NEXT: s_setpc_b64 s[30:31] 3461; 3462; GFX11-LABEL: dyn_extract_v15f32_const_s_v: 3463; GFX11: ; %bb.0: ; %entry 3464; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3465; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 3466; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo 3467; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 3468; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x40400000, vcc_lo 3469; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 3470; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo 3471; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 3472; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x40a00000, vcc_lo 3473; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 3474; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x40c00000, vcc_lo 3475; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 3476; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x40e00000, vcc_lo 3477; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 3478; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41000000, vcc_lo 3479; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v0 3480; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41100000, vcc_lo 3481; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v0 3482; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41200000, vcc_lo 3483; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v0 3484; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41300000, vcc_lo 3485; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v0 3486; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41400000, vcc_lo 3487; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v0 3488; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41500000, vcc_lo 3489; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v0 3490; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41600000, vcc_lo 3491; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v0 3492; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41700000, vcc_lo 3493; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 15, v0 3494; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s0, vcc_lo 3495; GFX11-NEXT: s_setpc_b64 s[30:31] 3496entry: 3497 %ext = extractelement <15 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0>, i32 %sel 3498 ret float %ext 3499} 3500 3501define amdgpu_ps float @dyn_extract_v15f32_const_s_s(i32 inreg %sel) { 3502; GCN-LABEL: dyn_extract_v15f32_const_s_s: 3503; GCN: ; %bb.0: ; %entry 3504; GCN-NEXT: s_mov_b32 s4, 1.0 3505; GCN-NEXT: s_mov_b32 m0, s2 3506; GCN-NEXT: s_mov_b32 s18, 0x41700000 3507; GCN-NEXT: s_mov_b32 s17, 0x41600000 3508; GCN-NEXT: s_mov_b32 s16, 0x41500000 3509; GCN-NEXT: s_mov_b32 s15, 0x41400000 3510; GCN-NEXT: s_mov_b32 s14, 0x41300000 3511; GCN-NEXT: s_mov_b32 s13, 0x41200000 3512; GCN-NEXT: s_mov_b32 s12, 0x41100000 3513; GCN-NEXT: s_mov_b32 s11, 0x41000000 3514; GCN-NEXT: s_mov_b32 s10, 0x40e00000 3515; GCN-NEXT: s_mov_b32 s9, 0x40c00000 3516; GCN-NEXT: s_mov_b32 s8, 0x40a00000 3517; GCN-NEXT: s_mov_b32 s7, 4.0 3518; GCN-NEXT: s_mov_b32 s6, 0x40400000 3519; GCN-NEXT: s_mov_b32 s5, 2.0 3520; GCN-NEXT: s_movrels_b32 s0, s4 3521; GCN-NEXT: v_mov_b32_e32 v0, s0 3522; GCN-NEXT: ; return to shader part epilog 3523; 3524; GFX10PLUS-LABEL: dyn_extract_v15f32_const_s_s: 3525; GFX10PLUS: ; %bb.0: ; %entry 3526; GFX10PLUS-NEXT: s_mov_b32 s4, 1.0 3527; GFX10PLUS-NEXT: s_mov_b32 m0, s2 3528; GFX10PLUS-NEXT: s_mov_b32 s18, 0x41700000 3529; GFX10PLUS-NEXT: s_mov_b32 s17, 0x41600000 3530; GFX10PLUS-NEXT: s_mov_b32 s16, 0x41500000 3531; GFX10PLUS-NEXT: s_mov_b32 s15, 0x41400000 3532; GFX10PLUS-NEXT: s_mov_b32 s14, 0x41300000 3533; GFX10PLUS-NEXT: s_mov_b32 s13, 0x41200000 3534; GFX10PLUS-NEXT: s_mov_b32 s12, 0x41100000 3535; GFX10PLUS-NEXT: s_mov_b32 s11, 0x41000000 3536; GFX10PLUS-NEXT: s_mov_b32 s10, 0x40e00000 3537; GFX10PLUS-NEXT: s_mov_b32 s9, 0x40c00000 3538; GFX10PLUS-NEXT: s_mov_b32 s8, 0x40a00000 3539; GFX10PLUS-NEXT: s_mov_b32 s7, 4.0 3540; GFX10PLUS-NEXT: s_mov_b32 s6, 0x40400000 3541; GFX10PLUS-NEXT: s_mov_b32 s5, 2.0 3542; GFX10PLUS-NEXT: s_movrels_b32 s0, s4 3543; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0 3544; GFX10PLUS-NEXT: ; return to shader part epilog 3545entry: 3546 %ext = extractelement <15 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0>, i32 %sel 3547 ret float %ext 3548} 3549 3550define amdgpu_ps float @dyn_extract_v15f32_s_v(<15 x float> inreg %vec, i32 %sel) { 3551; GCN-LABEL: dyn_extract_v15f32_s_v: 3552; GCN: ; %bb.0: ; %entry 3553; GCN-NEXT: v_mov_b32_e32 v1, s2 3554; GCN-NEXT: v_mov_b32_e32 v2, s3 3555; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 3556; GCN-NEXT: v_mov_b32_e32 v3, s4 3557; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 3558; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 3559; GCN-NEXT: v_mov_b32_e32 v4, s5 3560; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 3561; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 3562; GCN-NEXT: v_mov_b32_e32 v5, s6 3563; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 3564; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 3565; GCN-NEXT: v_mov_b32_e32 v6, s7 3566; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 3567; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 3568; GCN-NEXT: v_mov_b32_e32 v7, s8 3569; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc 3570; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 3571; GCN-NEXT: v_mov_b32_e32 v8, s9 3572; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 3573; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 3574; GCN-NEXT: v_mov_b32_e32 v9, s10 3575; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc 3576; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 8, v0 3577; GCN-NEXT: v_mov_b32_e32 v10, s11 3578; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 3579; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 9, v0 3580; GCN-NEXT: v_mov_b32_e32 v11, s12 3581; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v10, vcc 3582; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 10, v0 3583; GCN-NEXT: v_mov_b32_e32 v12, s13 3584; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 3585; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 11, v0 3586; GCN-NEXT: v_mov_b32_e32 v13, s14 3587; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v12, vcc 3588; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 12, v0 3589; GCN-NEXT: v_mov_b32_e32 v14, s15 3590; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc 3591; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 13, v0 3592; GCN-NEXT: v_mov_b32_e32 v15, s16 3593; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v14, vcc 3594; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 14, v0 3595; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc 3596; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 15, v0 3597; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 3598; GCN-NEXT: ; return to shader part epilog 3599; 3600; GFX10PLUS-LABEL: dyn_extract_v15f32_s_v: 3601; GFX10PLUS: ; %bb.0: ; %entry 3602; GFX10PLUS-NEXT: v_mov_b32_e32 v1, s3 3603; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 3604; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, s2, v1, vcc_lo 3605; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 3606; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s4, vcc_lo 3607; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 3608; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s5, vcc_lo 3609; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 3610; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s6, vcc_lo 3611; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 3612; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s7, vcc_lo 3613; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 3614; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s8, vcc_lo 3615; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 3616; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s9, vcc_lo 3617; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v0 3618; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s10, vcc_lo 3619; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v0 3620; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s11, vcc_lo 3621; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v0 3622; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s12, vcc_lo 3623; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v0 3624; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s13, vcc_lo 3625; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v0 3626; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s14, vcc_lo 3627; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v0 3628; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s15, vcc_lo 3629; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v0 3630; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, s16, vcc_lo 3631; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 15, v0 3632; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, s0, vcc_lo 3633; GFX10PLUS-NEXT: ; return to shader part epilog 3634entry: 3635 %ext = extractelement <15 x float> %vec, i32 %sel 3636 ret float %ext 3637} 3638 3639define float @dyn_extract_v15f32_v_v(<15 x float> %vec, i32 %sel) { 3640; GCN-LABEL: dyn_extract_v15f32_v_v: 3641; GCN: ; %bb.0: ; %entry 3642; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3643; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v15 3644; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3645; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v15 3646; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 3647; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v15 3648; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 3649; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v15 3650; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 3651; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v15 3652; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 3653; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v15 3654; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 3655; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v15 3656; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc 3657; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 8, v15 3658; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 3659; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 9, v15 3660; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc 3661; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 10, v15 3662; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 3663; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 11, v15 3664; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc 3665; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 12, v15 3666; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc 3667; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 13, v15 3668; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc 3669; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 14, v15 3670; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc 3671; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 15, v15 3672; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v0, vcc 3673; GCN-NEXT: s_setpc_b64 s[30:31] 3674; 3675; GFX10-LABEL: dyn_extract_v15f32_v_v: 3676; GFX10: ; %bb.0: ; %entry 3677; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3678; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15 3679; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 3680; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v15 3681; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 3682; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v15 3683; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 3684; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v15 3685; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 3686; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v15 3687; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo 3688; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v15 3689; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 3690; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v15 3691; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo 3692; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v15 3693; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 3694; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v15 3695; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc_lo 3696; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v15 3697; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo 3698; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v15 3699; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc_lo 3700; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v15 3701; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo 3702; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v15 3703; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc_lo 3704; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v15 3705; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo 3706; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 15, v15 3707; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s4, vcc_lo 3708; GFX10-NEXT: s_setpc_b64 s[30:31] 3709; 3710; GFX11-LABEL: dyn_extract_v15f32_v_v: 3711; GFX11: ; %bb.0: ; %entry 3712; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3713; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15 3714; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 3715; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v15 3716; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 3717; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v15 3718; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 3719; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v15 3720; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 3721; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v15 3722; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo 3723; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v15 3724; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 3725; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v15 3726; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo 3727; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v15 3728; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 3729; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v15 3730; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc_lo 3731; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v15 3732; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo 3733; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v15 3734; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc_lo 3735; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v15 3736; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo 3737; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v15 3738; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc_lo 3739; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v15 3740; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo 3741; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 15, v15 3742; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s0, vcc_lo 3743; GFX11-NEXT: s_setpc_b64 s[30:31] 3744entry: 3745 %ext = extractelement <15 x float> %vec, i32 %sel 3746 ret float %ext 3747} 3748 3749define amdgpu_ps float @dyn_extract_v15f32_v_s(<15 x float> %vec, i32 inreg %sel) { 3750; GPRIDX-LABEL: dyn_extract_v15f32_v_s: 3751; GPRIDX: ; %bb.0: ; %entry 3752; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0) 3753; GPRIDX-NEXT: v_mov_b32_e32 v0, v0 3754; GPRIDX-NEXT: s_set_gpr_idx_off 3755; GPRIDX-NEXT: ; return to shader part epilog 3756; 3757; MOVREL-LABEL: dyn_extract_v15f32_v_s: 3758; MOVREL: ; %bb.0: ; %entry 3759; MOVREL-NEXT: s_mov_b32 m0, s2 3760; MOVREL-NEXT: v_movrels_b32_e32 v0, v0 3761; MOVREL-NEXT: ; return to shader part epilog 3762; 3763; GFX10PLUS-LABEL: dyn_extract_v15f32_v_s: 3764; GFX10PLUS: ; %bb.0: ; %entry 3765; GFX10PLUS-NEXT: s_mov_b32 m0, s2 3766; GFX10PLUS-NEXT: v_movrels_b32_e32 v0, v0 3767; GFX10PLUS-NEXT: ; return to shader part epilog 3768entry: 3769 %ext = extractelement <15 x float> %vec, i32 %sel 3770 ret float %ext 3771} 3772 3773define amdgpu_ps float @dyn_extract_v15f32_s_s(<15 x float> inreg %vec, i32 inreg %sel) { 3774; GCN-LABEL: dyn_extract_v15f32_s_s: 3775; GCN: ; %bb.0: ; %entry 3776; GCN-NEXT: s_mov_b32 s0, s2 3777; GCN-NEXT: s_mov_b32 m0, s17 3778; GCN-NEXT: s_mov_b32 s1, s3 3779; GCN-NEXT: s_mov_b32 s2, s4 3780; GCN-NEXT: s_mov_b32 s3, s5 3781; GCN-NEXT: s_mov_b32 s4, s6 3782; GCN-NEXT: s_mov_b32 s5, s7 3783; GCN-NEXT: s_mov_b32 s6, s8 3784; GCN-NEXT: s_mov_b32 s7, s9 3785; GCN-NEXT: s_mov_b32 s8, s10 3786; GCN-NEXT: s_mov_b32 s9, s11 3787; GCN-NEXT: s_mov_b32 s10, s12 3788; GCN-NEXT: s_mov_b32 s11, s13 3789; GCN-NEXT: s_mov_b32 s12, s14 3790; GCN-NEXT: s_mov_b32 s13, s15 3791; GCN-NEXT: s_mov_b32 s14, s16 3792; GCN-NEXT: s_movrels_b32 s0, s0 3793; GCN-NEXT: v_mov_b32_e32 v0, s0 3794; GCN-NEXT: ; return to shader part epilog 3795; 3796; GFX10PLUS-LABEL: dyn_extract_v15f32_s_s: 3797; GFX10PLUS: ; %bb.0: ; %entry 3798; GFX10PLUS-NEXT: s_mov_b32 s0, s2 3799; GFX10PLUS-NEXT: s_mov_b32 m0, s17 3800; GFX10PLUS-NEXT: s_mov_b32 s1, s3 3801; GFX10PLUS-NEXT: s_mov_b32 s2, s4 3802; GFX10PLUS-NEXT: s_mov_b32 s3, s5 3803; GFX10PLUS-NEXT: s_mov_b32 s4, s6 3804; GFX10PLUS-NEXT: s_mov_b32 s5, s7 3805; GFX10PLUS-NEXT: s_mov_b32 s6, s8 3806; GFX10PLUS-NEXT: s_mov_b32 s7, s9 3807; GFX10PLUS-NEXT: s_mov_b32 s8, s10 3808; GFX10PLUS-NEXT: s_mov_b32 s9, s11 3809; GFX10PLUS-NEXT: s_mov_b32 s10, s12 3810; GFX10PLUS-NEXT: s_mov_b32 s11, s13 3811; GFX10PLUS-NEXT: s_mov_b32 s12, s14 3812; GFX10PLUS-NEXT: s_mov_b32 s13, s15 3813; GFX10PLUS-NEXT: s_mov_b32 s14, s16 3814; GFX10PLUS-NEXT: s_movrels_b32 s0, s0 3815; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0 3816; GFX10PLUS-NEXT: ; return to shader part epilog 3817entry: 3818 %ext = extractelement <15 x float> %vec, i32 %sel 3819 ret float %ext 3820} 3821 3822define amdgpu_ps float @dyn_extract_v15f32_s_s_offset3(<15 x float> inreg %vec, i32 inreg %sel) { 3823; GCN-LABEL: dyn_extract_v15f32_s_s_offset3: 3824; GCN: ; %bb.0: ; %entry 3825; GCN-NEXT: s_mov_b32 s0, s2 3826; GCN-NEXT: s_mov_b32 s1, s3 3827; GCN-NEXT: s_mov_b32 s3, s5 3828; GCN-NEXT: s_mov_b32 m0, s17 3829; GCN-NEXT: s_mov_b32 s2, s4 3830; GCN-NEXT: s_mov_b32 s4, s6 3831; GCN-NEXT: s_mov_b32 s5, s7 3832; GCN-NEXT: s_mov_b32 s6, s8 3833; GCN-NEXT: s_mov_b32 s7, s9 3834; GCN-NEXT: s_mov_b32 s8, s10 3835; GCN-NEXT: s_mov_b32 s9, s11 3836; GCN-NEXT: s_mov_b32 s10, s12 3837; GCN-NEXT: s_mov_b32 s11, s13 3838; GCN-NEXT: s_mov_b32 s12, s14 3839; GCN-NEXT: s_mov_b32 s13, s15 3840; GCN-NEXT: s_mov_b32 s14, s16 3841; GCN-NEXT: s_movrels_b32 s0, s3 3842; GCN-NEXT: v_mov_b32_e32 v0, s0 3843; GCN-NEXT: ; return to shader part epilog 3844; 3845; GFX10PLUS-LABEL: dyn_extract_v15f32_s_s_offset3: 3846; GFX10PLUS: ; %bb.0: ; %entry 3847; GFX10PLUS-NEXT: s_mov_b32 s1, s3 3848; GFX10PLUS-NEXT: s_mov_b32 s3, s5 3849; GFX10PLUS-NEXT: s_mov_b32 m0, s17 3850; GFX10PLUS-NEXT: s_mov_b32 s0, s2 3851; GFX10PLUS-NEXT: s_mov_b32 s2, s4 3852; GFX10PLUS-NEXT: s_mov_b32 s4, s6 3853; GFX10PLUS-NEXT: s_mov_b32 s5, s7 3854; GFX10PLUS-NEXT: s_mov_b32 s6, s8 3855; GFX10PLUS-NEXT: s_mov_b32 s7, s9 3856; GFX10PLUS-NEXT: s_mov_b32 s8, s10 3857; GFX10PLUS-NEXT: s_mov_b32 s9, s11 3858; GFX10PLUS-NEXT: s_mov_b32 s10, s12 3859; GFX10PLUS-NEXT: s_mov_b32 s11, s13 3860; GFX10PLUS-NEXT: s_mov_b32 s12, s14 3861; GFX10PLUS-NEXT: s_mov_b32 s13, s15 3862; GFX10PLUS-NEXT: s_mov_b32 s14, s16 3863; GFX10PLUS-NEXT: s_movrels_b32 s0, s3 3864; GFX10PLUS-NEXT: v_mov_b32_e32 v0, s0 3865; GFX10PLUS-NEXT: ; return to shader part epilog 3866entry: 3867 %add = add i32 %sel, 3 3868 %ext = extractelement <15 x float> %vec, i32 %add 3869 ret float %ext 3870} 3871 3872define float @dyn_extract_v15f32_v_v_offset3(<15 x float> %vec, i32 %sel) { 3873; GPRIDX-LABEL: dyn_extract_v15f32_v_v_offset3: 3874; GPRIDX: ; %bb.0: ; %entry 3875; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3876; GPRIDX-NEXT: v_add_u32_e32 v15, 3, v15 3877; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v15 3878; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3879; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v15 3880; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 3881; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v15 3882; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 3883; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v15 3884; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 3885; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v15 3886; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 3887; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v15 3888; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 3889; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v15 3890; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc 3891; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 8, v15 3892; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 3893; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 9, v15 3894; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc 3895; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 10, v15 3896; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 3897; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 11, v15 3898; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc 3899; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 12, v15 3900; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc 3901; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 13, v15 3902; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc 3903; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 14, v15 3904; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc 3905; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 15, v15 3906; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v0, vcc 3907; GPRIDX-NEXT: s_setpc_b64 s[30:31] 3908; 3909; MOVREL-LABEL: dyn_extract_v15f32_v_v_offset3: 3910; MOVREL: ; %bb.0: ; %entry 3911; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3912; MOVREL-NEXT: v_add_u32_e32 v15, vcc, 3, v15 3913; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 1, v15 3914; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3915; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 2, v15 3916; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 3917; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 3, v15 3918; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 3919; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 4, v15 3920; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 3921; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 5, v15 3922; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 3923; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 6, v15 3924; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 3925; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 7, v15 3926; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc 3927; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 8, v15 3928; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 3929; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 9, v15 3930; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc 3931; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 10, v15 3932; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 3933; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 11, v15 3934; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc 3935; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 12, v15 3936; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc 3937; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 13, v15 3938; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc 3939; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 14, v15 3940; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc 3941; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, 15, v15 3942; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v0, vcc 3943; MOVREL-NEXT: s_setpc_b64 s[30:31] 3944; 3945; GFX10-LABEL: dyn_extract_v15f32_v_v_offset3: 3946; GFX10: ; %bb.0: ; %entry 3947; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3948; GFX10-NEXT: v_add_nc_u32_e32 v15, 3, v15 3949; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15 3950; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 3951; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v15 3952; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 3953; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v15 3954; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 3955; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v15 3956; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 3957; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v15 3958; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo 3959; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v15 3960; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 3961; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v15 3962; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo 3963; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v15 3964; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 3965; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v15 3966; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc_lo 3967; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v15 3968; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo 3969; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v15 3970; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc_lo 3971; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v15 3972; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo 3973; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v15 3974; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc_lo 3975; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v15 3976; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo 3977; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 15, v15 3978; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s4, vcc_lo 3979; GFX10-NEXT: s_setpc_b64 s[30:31] 3980; 3981; GFX11-LABEL: dyn_extract_v15f32_v_v_offset3: 3982; GFX11: ; %bb.0: ; %entry 3983; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3984; GFX11-NEXT: v_add_nc_u32_e32 v15, 3, v15 3985; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15 3986; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 3987; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v15 3988; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 3989; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v15 3990; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 3991; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v15 3992; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 3993; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v15 3994; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo 3995; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v15 3996; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo 3997; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v15 3998; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo 3999; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v15 4000; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 4001; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v15 4002; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc_lo 4003; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v15 4004; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo 4005; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v15 4006; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc_lo 4007; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v15 4008; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc_lo 4009; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v15 4010; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v13, vcc_lo 4011; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v15 4012; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo 4013; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 15, v15 4014; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s0, vcc_lo 4015; GFX11-NEXT: s_setpc_b64 s[30:31] 4016entry: 4017 %add = add i32 %sel, 3 4018 %ext = extractelement <15 x float> %vec, i32 %add 4019 ret float %ext 4020} 4021 4022define amdgpu_kernel void @dyn_extract_v4f32_s_s_s(ptr addrspace(1) %out, i32 %sel) { 4023; GPRIDX-LABEL: dyn_extract_v4f32_s_s_s: 4024; GPRIDX: .amd_kernel_code_t 4025; GPRIDX-NEXT: amd_code_version_major = 1 4026; GPRIDX-NEXT: amd_code_version_minor = 2 4027; GPRIDX-NEXT: amd_machine_kind = 1 4028; GPRIDX-NEXT: amd_machine_version_major = 9 4029; GPRIDX-NEXT: amd_machine_version_minor = 0 4030; GPRIDX-NEXT: amd_machine_version_stepping = 0 4031; GPRIDX-NEXT: kernel_code_entry_byte_offset = 256 4032; GPRIDX-NEXT: kernel_code_prefetch_byte_size = 0 4033; GPRIDX-NEXT: granulated_workitem_vgpr_count = 0 4034; GPRIDX-NEXT: granulated_wavefront_sgpr_count = 1 4035; GPRIDX-NEXT: priority = 0 4036; GPRIDX-NEXT: float_mode = 240 4037; GPRIDX-NEXT: priv = 0 4038; GPRIDX-NEXT: enable_dx10_clamp = 1 4039; GPRIDX-NEXT: debug_mode = 0 4040; GPRIDX-NEXT: enable_ieee_mode = 1 4041; GPRIDX-NEXT: enable_wgp_mode = 0 4042; GPRIDX-NEXT: enable_mem_ordered = 0 4043; GPRIDX-NEXT: enable_fwd_progress = 0 4044; GPRIDX-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 4045; GPRIDX-NEXT: user_sgpr_count = 12 4046; GPRIDX-NEXT: enable_trap_handler = 0 4047; GPRIDX-NEXT: enable_sgpr_workgroup_id_x = 1 4048; GPRIDX-NEXT: enable_sgpr_workgroup_id_y = 1 4049; GPRIDX-NEXT: enable_sgpr_workgroup_id_z = 1 4050; GPRIDX-NEXT: enable_sgpr_workgroup_info = 0 4051; GPRIDX-NEXT: enable_vgpr_workitem_id = 2 4052; GPRIDX-NEXT: enable_exception_msb = 0 4053; GPRIDX-NEXT: granulated_lds_size = 0 4054; GPRIDX-NEXT: enable_exception = 0 4055; GPRIDX-NEXT: enable_sgpr_private_segment_buffer = 1 4056; GPRIDX-NEXT: enable_sgpr_dispatch_ptr = 1 4057; GPRIDX-NEXT: enable_sgpr_queue_ptr = 1 4058; GPRIDX-NEXT: enable_sgpr_kernarg_segment_ptr = 1 4059; GPRIDX-NEXT: enable_sgpr_dispatch_id = 1 4060; GPRIDX-NEXT: enable_sgpr_flat_scratch_init = 0 4061; GPRIDX-NEXT: enable_sgpr_private_segment_size = 0 4062; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_x = 0 4063; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_y = 0 4064; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_z = 0 4065; GPRIDX-NEXT: enable_wavefront_size32 = 0 4066; GPRIDX-NEXT: enable_ordered_append_gds = 0 4067; GPRIDX-NEXT: private_element_size = 1 4068; GPRIDX-NEXT: is_ptr64 = 1 4069; GPRIDX-NEXT: is_dynamic_callstack = 0 4070; GPRIDX-NEXT: is_debug_enabled = 0 4071; GPRIDX-NEXT: is_xnack_enabled = 1 4072; GPRIDX-NEXT: workitem_private_segment_byte_size = 0 4073; GPRIDX-NEXT: workgroup_group_segment_byte_size = 0 4074; GPRIDX-NEXT: gds_segment_byte_size = 0 4075; GPRIDX-NEXT: kernarg_segment_byte_size = 28 4076; GPRIDX-NEXT: workgroup_fbarrier_count = 0 4077; GPRIDX-NEXT: wavefront_sgpr_count = 14 4078; GPRIDX-NEXT: workitem_vgpr_count = 2 4079; GPRIDX-NEXT: reserved_vgpr_first = 0 4080; GPRIDX-NEXT: reserved_vgpr_count = 0 4081; GPRIDX-NEXT: reserved_sgpr_first = 0 4082; GPRIDX-NEXT: reserved_sgpr_count = 0 4083; GPRIDX-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 4084; GPRIDX-NEXT: debug_private_segment_buffer_sgpr = 0 4085; GPRIDX-NEXT: kernarg_segment_alignment = 4 4086; GPRIDX-NEXT: group_segment_alignment = 4 4087; GPRIDX-NEXT: private_segment_alignment = 4 4088; GPRIDX-NEXT: wavefront_size = 6 4089; GPRIDX-NEXT: call_convention = -1 4090; GPRIDX-NEXT: runtime_loader_kernel_symbol = 0 4091; GPRIDX-NEXT: .end_amd_kernel_code_t 4092; GPRIDX-NEXT: ; %bb.0: ; %entry 4093; GPRIDX-NEXT: s_load_dword s2, s[8:9], 0x8 4094; GPRIDX-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 4095; GPRIDX-NEXT: v_mov_b32_e32 v1, 0 4096; GPRIDX-NEXT: s_waitcnt lgkmcnt(0) 4097; GPRIDX-NEXT: s_cmp_eq_u32 s2, 1 4098; GPRIDX-NEXT: s_cselect_b32 s3, 2.0, 1.0 4099; GPRIDX-NEXT: s_cmp_eq_u32 s2, 2 4100; GPRIDX-NEXT: s_cselect_b32 s3, 0x40400000, s3 4101; GPRIDX-NEXT: s_cmp_eq_u32 s2, 3 4102; GPRIDX-NEXT: s_cselect_b32 s2, 4.0, s3 4103; GPRIDX-NEXT: v_mov_b32_e32 v0, s2 4104; GPRIDX-NEXT: global_store_dword v1, v0, s[0:1] 4105; GPRIDX-NEXT: s_endpgm 4106; 4107; MOVREL-LABEL: dyn_extract_v4f32_s_s_s: 4108; MOVREL: .amd_kernel_code_t 4109; MOVREL-NEXT: amd_code_version_major = 1 4110; MOVREL-NEXT: amd_code_version_minor = 2 4111; MOVREL-NEXT: amd_machine_kind = 1 4112; MOVREL-NEXT: amd_machine_version_major = 8 4113; MOVREL-NEXT: amd_machine_version_minor = 0 4114; MOVREL-NEXT: amd_machine_version_stepping = 3 4115; MOVREL-NEXT: kernel_code_entry_byte_offset = 256 4116; MOVREL-NEXT: kernel_code_prefetch_byte_size = 0 4117; MOVREL-NEXT: granulated_workitem_vgpr_count = 0 4118; MOVREL-NEXT: granulated_wavefront_sgpr_count = 1 4119; MOVREL-NEXT: priority = 0 4120; MOVREL-NEXT: float_mode = 240 4121; MOVREL-NEXT: priv = 0 4122; MOVREL-NEXT: enable_dx10_clamp = 1 4123; MOVREL-NEXT: debug_mode = 0 4124; MOVREL-NEXT: enable_ieee_mode = 1 4125; MOVREL-NEXT: enable_wgp_mode = 0 4126; MOVREL-NEXT: enable_mem_ordered = 0 4127; MOVREL-NEXT: enable_fwd_progress = 0 4128; MOVREL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 4129; MOVREL-NEXT: user_sgpr_count = 12 4130; MOVREL-NEXT: enable_trap_handler = 0 4131; MOVREL-NEXT: enable_sgpr_workgroup_id_x = 1 4132; MOVREL-NEXT: enable_sgpr_workgroup_id_y = 1 4133; MOVREL-NEXT: enable_sgpr_workgroup_id_z = 1 4134; MOVREL-NEXT: enable_sgpr_workgroup_info = 0 4135; MOVREL-NEXT: enable_vgpr_workitem_id = 2 4136; MOVREL-NEXT: enable_exception_msb = 0 4137; MOVREL-NEXT: granulated_lds_size = 0 4138; MOVREL-NEXT: enable_exception = 0 4139; MOVREL-NEXT: enable_sgpr_private_segment_buffer = 1 4140; MOVREL-NEXT: enable_sgpr_dispatch_ptr = 1 4141; MOVREL-NEXT: enable_sgpr_queue_ptr = 1 4142; MOVREL-NEXT: enable_sgpr_kernarg_segment_ptr = 1 4143; MOVREL-NEXT: enable_sgpr_dispatch_id = 1 4144; MOVREL-NEXT: enable_sgpr_flat_scratch_init = 0 4145; MOVREL-NEXT: enable_sgpr_private_segment_size = 0 4146; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_x = 0 4147; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_y = 0 4148; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_z = 0 4149; MOVREL-NEXT: enable_wavefront_size32 = 0 4150; MOVREL-NEXT: enable_ordered_append_gds = 0 4151; MOVREL-NEXT: private_element_size = 1 4152; MOVREL-NEXT: is_ptr64 = 1 4153; MOVREL-NEXT: is_dynamic_callstack = 0 4154; MOVREL-NEXT: is_debug_enabled = 0 4155; MOVREL-NEXT: is_xnack_enabled = 0 4156; MOVREL-NEXT: workitem_private_segment_byte_size = 0 4157; MOVREL-NEXT: workgroup_group_segment_byte_size = 0 4158; MOVREL-NEXT: gds_segment_byte_size = 0 4159; MOVREL-NEXT: kernarg_segment_byte_size = 28 4160; MOVREL-NEXT: workgroup_fbarrier_count = 0 4161; MOVREL-NEXT: wavefront_sgpr_count = 10 4162; MOVREL-NEXT: workitem_vgpr_count = 3 4163; MOVREL-NEXT: reserved_vgpr_first = 0 4164; MOVREL-NEXT: reserved_vgpr_count = 0 4165; MOVREL-NEXT: reserved_sgpr_first = 0 4166; MOVREL-NEXT: reserved_sgpr_count = 0 4167; MOVREL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 4168; MOVREL-NEXT: debug_private_segment_buffer_sgpr = 0 4169; MOVREL-NEXT: kernarg_segment_alignment = 4 4170; MOVREL-NEXT: group_segment_alignment = 4 4171; MOVREL-NEXT: private_segment_alignment = 4 4172; MOVREL-NEXT: wavefront_size = 6 4173; MOVREL-NEXT: call_convention = -1 4174; MOVREL-NEXT: runtime_loader_kernel_symbol = 0 4175; MOVREL-NEXT: .end_amd_kernel_code_t 4176; MOVREL-NEXT: ; %bb.0: ; %entry 4177; MOVREL-NEXT: s_load_dword s2, s[8:9], 0x8 4178; MOVREL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 4179; MOVREL-NEXT: s_waitcnt lgkmcnt(0) 4180; MOVREL-NEXT: s_cmp_eq_u32 s2, 1 4181; MOVREL-NEXT: s_cselect_b32 s3, 2.0, 1.0 4182; MOVREL-NEXT: s_cmp_eq_u32 s2, 2 4183; MOVREL-NEXT: s_cselect_b32 s3, 0x40400000, s3 4184; MOVREL-NEXT: s_cmp_eq_u32 s2, 3 4185; MOVREL-NEXT: s_cselect_b32 s2, 4.0, s3 4186; MOVREL-NEXT: v_mov_b32_e32 v0, s0 4187; MOVREL-NEXT: v_mov_b32_e32 v2, s2 4188; MOVREL-NEXT: v_mov_b32_e32 v1, s1 4189; MOVREL-NEXT: flat_store_dword v[0:1], v2 4190; MOVREL-NEXT: s_endpgm 4191; 4192; GFX10-LABEL: dyn_extract_v4f32_s_s_s: 4193; GFX10: .amd_kernel_code_t 4194; GFX10-NEXT: amd_code_version_major = 1 4195; GFX10-NEXT: amd_code_version_minor = 2 4196; GFX10-NEXT: amd_machine_kind = 1 4197; GFX10-NEXT: amd_machine_version_major = 10 4198; GFX10-NEXT: amd_machine_version_minor = 1 4199; GFX10-NEXT: amd_machine_version_stepping = 0 4200; GFX10-NEXT: kernel_code_entry_byte_offset = 256 4201; GFX10-NEXT: kernel_code_prefetch_byte_size = 0 4202; GFX10-NEXT: granulated_workitem_vgpr_count = 0 4203; GFX10-NEXT: granulated_wavefront_sgpr_count = 1 4204; GFX10-NEXT: priority = 0 4205; GFX10-NEXT: float_mode = 240 4206; GFX10-NEXT: priv = 0 4207; GFX10-NEXT: enable_dx10_clamp = 1 4208; GFX10-NEXT: debug_mode = 0 4209; GFX10-NEXT: enable_ieee_mode = 1 4210; GFX10-NEXT: enable_wgp_mode = 1 4211; GFX10-NEXT: enable_mem_ordered = 1 4212; GFX10-NEXT: enable_fwd_progress = 0 4213; GFX10-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 4214; GFX10-NEXT: user_sgpr_count = 12 4215; GFX10-NEXT: enable_trap_handler = 0 4216; GFX10-NEXT: enable_sgpr_workgroup_id_x = 1 4217; GFX10-NEXT: enable_sgpr_workgroup_id_y = 1 4218; GFX10-NEXT: enable_sgpr_workgroup_id_z = 1 4219; GFX10-NEXT: enable_sgpr_workgroup_info = 0 4220; GFX10-NEXT: enable_vgpr_workitem_id = 2 4221; GFX10-NEXT: enable_exception_msb = 0 4222; GFX10-NEXT: granulated_lds_size = 0 4223; GFX10-NEXT: enable_exception = 0 4224; GFX10-NEXT: enable_sgpr_private_segment_buffer = 1 4225; GFX10-NEXT: enable_sgpr_dispatch_ptr = 1 4226; GFX10-NEXT: enable_sgpr_queue_ptr = 1 4227; GFX10-NEXT: enable_sgpr_kernarg_segment_ptr = 1 4228; GFX10-NEXT: enable_sgpr_dispatch_id = 1 4229; GFX10-NEXT: enable_sgpr_flat_scratch_init = 0 4230; GFX10-NEXT: enable_sgpr_private_segment_size = 0 4231; GFX10-NEXT: enable_sgpr_grid_workgroup_count_x = 0 4232; GFX10-NEXT: enable_sgpr_grid_workgroup_count_y = 0 4233; GFX10-NEXT: enable_sgpr_grid_workgroup_count_z = 0 4234; GFX10-NEXT: enable_wavefront_size32 = 1 4235; GFX10-NEXT: enable_ordered_append_gds = 0 4236; GFX10-NEXT: private_element_size = 1 4237; GFX10-NEXT: is_ptr64 = 1 4238; GFX10-NEXT: is_dynamic_callstack = 0 4239; GFX10-NEXT: is_debug_enabled = 0 4240; GFX10-NEXT: is_xnack_enabled = 1 4241; GFX10-NEXT: workitem_private_segment_byte_size = 0 4242; GFX10-NEXT: workgroup_group_segment_byte_size = 0 4243; GFX10-NEXT: gds_segment_byte_size = 0 4244; GFX10-NEXT: kernarg_segment_byte_size = 28 4245; GFX10-NEXT: workgroup_fbarrier_count = 0 4246; GFX10-NEXT: wavefront_sgpr_count = 10 4247; GFX10-NEXT: workitem_vgpr_count = 2 4248; GFX10-NEXT: reserved_vgpr_first = 0 4249; GFX10-NEXT: reserved_vgpr_count = 0 4250; GFX10-NEXT: reserved_sgpr_first = 0 4251; GFX10-NEXT: reserved_sgpr_count = 0 4252; GFX10-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 4253; GFX10-NEXT: debug_private_segment_buffer_sgpr = 0 4254; GFX10-NEXT: kernarg_segment_alignment = 4 4255; GFX10-NEXT: group_segment_alignment = 4 4256; GFX10-NEXT: private_segment_alignment = 4 4257; GFX10-NEXT: wavefront_size = 5 4258; GFX10-NEXT: call_convention = -1 4259; GFX10-NEXT: runtime_loader_kernel_symbol = 0 4260; GFX10-NEXT: .end_amd_kernel_code_t 4261; GFX10-NEXT: ; %bb.0: ; %entry 4262; GFX10-NEXT: s_clause 0x1 4263; GFX10-NEXT: s_load_dword s2, s[8:9], 0x8 4264; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 4265; GFX10-NEXT: v_mov_b32_e32 v1, 0 4266; GFX10-NEXT: s_waitcnt lgkmcnt(0) 4267; GFX10-NEXT: s_cmp_eq_u32 s2, 1 4268; GFX10-NEXT: s_cselect_b32 s3, 2.0, 1.0 4269; GFX10-NEXT: s_cmp_eq_u32 s2, 2 4270; GFX10-NEXT: s_cselect_b32 s3, 0x40400000, s3 4271; GFX10-NEXT: s_cmp_eq_u32 s2, 3 4272; GFX10-NEXT: s_cselect_b32 s2, 4.0, s3 4273; GFX10-NEXT: v_mov_b32_e32 v0, s2 4274; GFX10-NEXT: global_store_dword v1, v0, s[0:1] 4275; GFX10-NEXT: s_endpgm 4276; 4277; GFX11-LABEL: dyn_extract_v4f32_s_s_s: 4278; GFX11: .amd_kernel_code_t 4279; GFX11-NEXT: amd_code_version_major = 1 4280; GFX11-NEXT: amd_code_version_minor = 2 4281; GFX11-NEXT: amd_machine_kind = 1 4282; GFX11-NEXT: amd_machine_version_major = 11 4283; GFX11-NEXT: amd_machine_version_minor = 0 4284; GFX11-NEXT: amd_machine_version_stepping = 0 4285; GFX11-NEXT: kernel_code_entry_byte_offset = 256 4286; GFX11-NEXT: kernel_code_prefetch_byte_size = 0 4287; GFX11-NEXT: granulated_workitem_vgpr_count = 0 4288; GFX11-NEXT: granulated_wavefront_sgpr_count = 0 4289; GFX11-NEXT: priority = 0 4290; GFX11-NEXT: float_mode = 240 4291; GFX11-NEXT: priv = 0 4292; GFX11-NEXT: enable_dx10_clamp = 1 4293; GFX11-NEXT: debug_mode = 0 4294; GFX11-NEXT: enable_ieee_mode = 1 4295; GFX11-NEXT: enable_wgp_mode = 1 4296; GFX11-NEXT: enable_mem_ordered = 1 4297; GFX11-NEXT: enable_fwd_progress = 0 4298; GFX11-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 4299; GFX11-NEXT: user_sgpr_count = 13 4300; GFX11-NEXT: enable_trap_handler = 0 4301; GFX11-NEXT: enable_sgpr_workgroup_id_x = 1 4302; GFX11-NEXT: enable_sgpr_workgroup_id_y = 1 4303; GFX11-NEXT: enable_sgpr_workgroup_id_z = 1 4304; GFX11-NEXT: enable_sgpr_workgroup_info = 0 4305; GFX11-NEXT: enable_vgpr_workitem_id = 2 4306; GFX11-NEXT: enable_exception_msb = 0 4307; GFX11-NEXT: granulated_lds_size = 0 4308; GFX11-NEXT: enable_exception = 0 4309; GFX11-NEXT: enable_sgpr_private_segment_buffer = 0 4310; GFX11-NEXT: enable_sgpr_dispatch_ptr = 1 4311; GFX11-NEXT: enable_sgpr_queue_ptr = 1 4312; GFX11-NEXT: enable_sgpr_kernarg_segment_ptr = 1 4313; GFX11-NEXT: enable_sgpr_dispatch_id = 1 4314; GFX11-NEXT: enable_sgpr_flat_scratch_init = 0 4315; GFX11-NEXT: enable_sgpr_private_segment_size = 0 4316; GFX11-NEXT: enable_sgpr_grid_workgroup_count_x = 0 4317; GFX11-NEXT: enable_sgpr_grid_workgroup_count_y = 0 4318; GFX11-NEXT: enable_sgpr_grid_workgroup_count_z = 0 4319; GFX11-NEXT: enable_wavefront_size32 = 1 4320; GFX11-NEXT: enable_ordered_append_gds = 0 4321; GFX11-NEXT: private_element_size = 1 4322; GFX11-NEXT: is_ptr64 = 1 4323; GFX11-NEXT: is_dynamic_callstack = 0 4324; GFX11-NEXT: is_debug_enabled = 0 4325; GFX11-NEXT: is_xnack_enabled = 0 4326; GFX11-NEXT: workitem_private_segment_byte_size = 0 4327; GFX11-NEXT: workgroup_group_segment_byte_size = 0 4328; GFX11-NEXT: gds_segment_byte_size = 0 4329; GFX11-NEXT: kernarg_segment_byte_size = 28 4330; GFX11-NEXT: workgroup_fbarrier_count = 0 4331; GFX11-NEXT: wavefront_sgpr_count = 6 4332; GFX11-NEXT: workitem_vgpr_count = 2 4333; GFX11-NEXT: reserved_vgpr_first = 0 4334; GFX11-NEXT: reserved_vgpr_count = 0 4335; GFX11-NEXT: reserved_sgpr_first = 0 4336; GFX11-NEXT: reserved_sgpr_count = 0 4337; GFX11-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 4338; GFX11-NEXT: debug_private_segment_buffer_sgpr = 0 4339; GFX11-NEXT: kernarg_segment_alignment = 4 4340; GFX11-NEXT: group_segment_alignment = 4 4341; GFX11-NEXT: private_segment_alignment = 4 4342; GFX11-NEXT: wavefront_size = 5 4343; GFX11-NEXT: call_convention = -1 4344; GFX11-NEXT: runtime_loader_kernel_symbol = 0 4345; GFX11-NEXT: .end_amd_kernel_code_t 4346; GFX11-NEXT: ; %bb.0: ; %entry 4347; GFX11-NEXT: s_clause 0x1 4348; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x8 4349; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4350; GFX11-NEXT: v_mov_b32_e32 v1, 0 4351; GFX11-NEXT: s_waitcnt lgkmcnt(0) 4352; GFX11-NEXT: s_cmp_eq_u32 s2, 1 4353; GFX11-NEXT: s_cselect_b32 s3, 2.0, 1.0 4354; GFX11-NEXT: s_cmp_eq_u32 s2, 2 4355; GFX11-NEXT: s_cselect_b32 s3, 0x40400000, s3 4356; GFX11-NEXT: s_cmp_eq_u32 s2, 3 4357; GFX11-NEXT: s_cselect_b32 s2, 4.0, s3 4358; GFX11-NEXT: v_mov_b32_e32 v0, s2 4359; GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 4360; GFX11-NEXT: s_endpgm 4361entry: 4362 %ext = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %sel 4363 store float %ext, ptr addrspace(1) %out 4364 ret void 4365} 4366 4367define amdgpu_kernel void @dyn_extract_v4f64_s_s_s(ptr addrspace(1) %out, i32 %sel) { 4368; GPRIDX-LABEL: dyn_extract_v4f64_s_s_s: 4369; GPRIDX: .amd_kernel_code_t 4370; GPRIDX-NEXT: amd_code_version_major = 1 4371; GPRIDX-NEXT: amd_code_version_minor = 2 4372; GPRIDX-NEXT: amd_machine_kind = 1 4373; GPRIDX-NEXT: amd_machine_version_major = 9 4374; GPRIDX-NEXT: amd_machine_version_minor = 0 4375; GPRIDX-NEXT: amd_machine_version_stepping = 0 4376; GPRIDX-NEXT: kernel_code_entry_byte_offset = 256 4377; GPRIDX-NEXT: kernel_code_prefetch_byte_size = 0 4378; GPRIDX-NEXT: granulated_workitem_vgpr_count = 0 4379; GPRIDX-NEXT: granulated_wavefront_sgpr_count = 1 4380; GPRIDX-NEXT: priority = 0 4381; GPRIDX-NEXT: float_mode = 240 4382; GPRIDX-NEXT: priv = 0 4383; GPRIDX-NEXT: enable_dx10_clamp = 1 4384; GPRIDX-NEXT: debug_mode = 0 4385; GPRIDX-NEXT: enable_ieee_mode = 1 4386; GPRIDX-NEXT: enable_wgp_mode = 0 4387; GPRIDX-NEXT: enable_mem_ordered = 0 4388; GPRIDX-NEXT: enable_fwd_progress = 0 4389; GPRIDX-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 4390; GPRIDX-NEXT: user_sgpr_count = 12 4391; GPRIDX-NEXT: enable_trap_handler = 0 4392; GPRIDX-NEXT: enable_sgpr_workgroup_id_x = 1 4393; GPRIDX-NEXT: enable_sgpr_workgroup_id_y = 1 4394; GPRIDX-NEXT: enable_sgpr_workgroup_id_z = 1 4395; GPRIDX-NEXT: enable_sgpr_workgroup_info = 0 4396; GPRIDX-NEXT: enable_vgpr_workitem_id = 2 4397; GPRIDX-NEXT: enable_exception_msb = 0 4398; GPRIDX-NEXT: granulated_lds_size = 0 4399; GPRIDX-NEXT: enable_exception = 0 4400; GPRIDX-NEXT: enable_sgpr_private_segment_buffer = 1 4401; GPRIDX-NEXT: enable_sgpr_dispatch_ptr = 1 4402; GPRIDX-NEXT: enable_sgpr_queue_ptr = 1 4403; GPRIDX-NEXT: enable_sgpr_kernarg_segment_ptr = 1 4404; GPRIDX-NEXT: enable_sgpr_dispatch_id = 1 4405; GPRIDX-NEXT: enable_sgpr_flat_scratch_init = 0 4406; GPRIDX-NEXT: enable_sgpr_private_segment_size = 0 4407; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_x = 0 4408; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_y = 0 4409; GPRIDX-NEXT: enable_sgpr_grid_workgroup_count_z = 0 4410; GPRIDX-NEXT: enable_wavefront_size32 = 0 4411; GPRIDX-NEXT: enable_ordered_append_gds = 0 4412; GPRIDX-NEXT: private_element_size = 1 4413; GPRIDX-NEXT: is_ptr64 = 1 4414; GPRIDX-NEXT: is_dynamic_callstack = 0 4415; GPRIDX-NEXT: is_debug_enabled = 0 4416; GPRIDX-NEXT: is_xnack_enabled = 1 4417; GPRIDX-NEXT: workitem_private_segment_byte_size = 0 4418; GPRIDX-NEXT: workgroup_group_segment_byte_size = 0 4419; GPRIDX-NEXT: gds_segment_byte_size = 0 4420; GPRIDX-NEXT: kernarg_segment_byte_size = 28 4421; GPRIDX-NEXT: workgroup_fbarrier_count = 0 4422; GPRIDX-NEXT: wavefront_sgpr_count = 14 4423; GPRIDX-NEXT: workitem_vgpr_count = 3 4424; GPRIDX-NEXT: reserved_vgpr_first = 0 4425; GPRIDX-NEXT: reserved_vgpr_count = 0 4426; GPRIDX-NEXT: reserved_sgpr_first = 0 4427; GPRIDX-NEXT: reserved_sgpr_count = 0 4428; GPRIDX-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 4429; GPRIDX-NEXT: debug_private_segment_buffer_sgpr = 0 4430; GPRIDX-NEXT: kernarg_segment_alignment = 4 4431; GPRIDX-NEXT: group_segment_alignment = 4 4432; GPRIDX-NEXT: private_segment_alignment = 4 4433; GPRIDX-NEXT: wavefront_size = 6 4434; GPRIDX-NEXT: call_convention = -1 4435; GPRIDX-NEXT: runtime_loader_kernel_symbol = 0 4436; GPRIDX-NEXT: .end_amd_kernel_code_t 4437; GPRIDX-NEXT: ; %bb.0: ; %entry 4438; GPRIDX-NEXT: s_load_dword s6, s[8:9], 0x8 4439; GPRIDX-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 4440; GPRIDX-NEXT: s_mov_b32 s2, 0 4441; GPRIDX-NEXT: s_mov_b32 s3, 0x40080000 4442; GPRIDX-NEXT: v_mov_b32_e32 v2, 0 4443; GPRIDX-NEXT: s_waitcnt lgkmcnt(0) 4444; GPRIDX-NEXT: s_cmp_eq_u32 s6, 1 4445; GPRIDX-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0 4446; GPRIDX-NEXT: s_cmp_eq_u32 s6, 2 4447; GPRIDX-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] 4448; GPRIDX-NEXT: s_cmp_eq_u32 s6, 3 4449; GPRIDX-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3] 4450; GPRIDX-NEXT: v_mov_b32_e32 v0, s2 4451; GPRIDX-NEXT: v_mov_b32_e32 v1, s3 4452; GPRIDX-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 4453; GPRIDX-NEXT: s_endpgm 4454; 4455; MOVREL-LABEL: dyn_extract_v4f64_s_s_s: 4456; MOVREL: .amd_kernel_code_t 4457; MOVREL-NEXT: amd_code_version_major = 1 4458; MOVREL-NEXT: amd_code_version_minor = 2 4459; MOVREL-NEXT: amd_machine_kind = 1 4460; MOVREL-NEXT: amd_machine_version_major = 8 4461; MOVREL-NEXT: amd_machine_version_minor = 0 4462; MOVREL-NEXT: amd_machine_version_stepping = 3 4463; MOVREL-NEXT: kernel_code_entry_byte_offset = 256 4464; MOVREL-NEXT: kernel_code_prefetch_byte_size = 0 4465; MOVREL-NEXT: granulated_workitem_vgpr_count = 0 4466; MOVREL-NEXT: granulated_wavefront_sgpr_count = 1 4467; MOVREL-NEXT: priority = 0 4468; MOVREL-NEXT: float_mode = 240 4469; MOVREL-NEXT: priv = 0 4470; MOVREL-NEXT: enable_dx10_clamp = 1 4471; MOVREL-NEXT: debug_mode = 0 4472; MOVREL-NEXT: enable_ieee_mode = 1 4473; MOVREL-NEXT: enable_wgp_mode = 0 4474; MOVREL-NEXT: enable_mem_ordered = 0 4475; MOVREL-NEXT: enable_fwd_progress = 0 4476; MOVREL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 4477; MOVREL-NEXT: user_sgpr_count = 12 4478; MOVREL-NEXT: enable_trap_handler = 0 4479; MOVREL-NEXT: enable_sgpr_workgroup_id_x = 1 4480; MOVREL-NEXT: enable_sgpr_workgroup_id_y = 1 4481; MOVREL-NEXT: enable_sgpr_workgroup_id_z = 1 4482; MOVREL-NEXT: enable_sgpr_workgroup_info = 0 4483; MOVREL-NEXT: enable_vgpr_workitem_id = 2 4484; MOVREL-NEXT: enable_exception_msb = 0 4485; MOVREL-NEXT: granulated_lds_size = 0 4486; MOVREL-NEXT: enable_exception = 0 4487; MOVREL-NEXT: enable_sgpr_private_segment_buffer = 1 4488; MOVREL-NEXT: enable_sgpr_dispatch_ptr = 1 4489; MOVREL-NEXT: enable_sgpr_queue_ptr = 1 4490; MOVREL-NEXT: enable_sgpr_kernarg_segment_ptr = 1 4491; MOVREL-NEXT: enable_sgpr_dispatch_id = 1 4492; MOVREL-NEXT: enable_sgpr_flat_scratch_init = 0 4493; MOVREL-NEXT: enable_sgpr_private_segment_size = 0 4494; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_x = 0 4495; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_y = 0 4496; MOVREL-NEXT: enable_sgpr_grid_workgroup_count_z = 0 4497; MOVREL-NEXT: enable_wavefront_size32 = 0 4498; MOVREL-NEXT: enable_ordered_append_gds = 0 4499; MOVREL-NEXT: private_element_size = 1 4500; MOVREL-NEXT: is_ptr64 = 1 4501; MOVREL-NEXT: is_dynamic_callstack = 0 4502; MOVREL-NEXT: is_debug_enabled = 0 4503; MOVREL-NEXT: is_xnack_enabled = 0 4504; MOVREL-NEXT: workitem_private_segment_byte_size = 0 4505; MOVREL-NEXT: workgroup_group_segment_byte_size = 0 4506; MOVREL-NEXT: gds_segment_byte_size = 0 4507; MOVREL-NEXT: kernarg_segment_byte_size = 28 4508; MOVREL-NEXT: workgroup_fbarrier_count = 0 4509; MOVREL-NEXT: wavefront_sgpr_count = 10 4510; MOVREL-NEXT: workitem_vgpr_count = 4 4511; MOVREL-NEXT: reserved_vgpr_first = 0 4512; MOVREL-NEXT: reserved_vgpr_count = 0 4513; MOVREL-NEXT: reserved_sgpr_first = 0 4514; MOVREL-NEXT: reserved_sgpr_count = 0 4515; MOVREL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 4516; MOVREL-NEXT: debug_private_segment_buffer_sgpr = 0 4517; MOVREL-NEXT: kernarg_segment_alignment = 4 4518; MOVREL-NEXT: group_segment_alignment = 4 4519; MOVREL-NEXT: private_segment_alignment = 4 4520; MOVREL-NEXT: wavefront_size = 6 4521; MOVREL-NEXT: call_convention = -1 4522; MOVREL-NEXT: runtime_loader_kernel_symbol = 0 4523; MOVREL-NEXT: .end_amd_kernel_code_t 4524; MOVREL-NEXT: ; %bb.0: ; %entry 4525; MOVREL-NEXT: s_load_dword s6, s[8:9], 0x8 4526; MOVREL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 4527; MOVREL-NEXT: s_mov_b32 s2, 0 4528; MOVREL-NEXT: s_mov_b32 s3, 0x40080000 4529; MOVREL-NEXT: s_waitcnt lgkmcnt(0) 4530; MOVREL-NEXT: s_cmp_eq_u32 s6, 1 4531; MOVREL-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0 4532; MOVREL-NEXT: s_cmp_eq_u32 s6, 2 4533; MOVREL-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] 4534; MOVREL-NEXT: s_cmp_eq_u32 s6, 3 4535; MOVREL-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3] 4536; MOVREL-NEXT: v_mov_b32_e32 v0, s2 4537; MOVREL-NEXT: v_mov_b32_e32 v3, s1 4538; MOVREL-NEXT: v_mov_b32_e32 v1, s3 4539; MOVREL-NEXT: v_mov_b32_e32 v2, s0 4540; MOVREL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 4541; MOVREL-NEXT: s_endpgm 4542; 4543; GFX10-LABEL: dyn_extract_v4f64_s_s_s: 4544; GFX10: .amd_kernel_code_t 4545; GFX10-NEXT: amd_code_version_major = 1 4546; GFX10-NEXT: amd_code_version_minor = 2 4547; GFX10-NEXT: amd_machine_kind = 1 4548; GFX10-NEXT: amd_machine_version_major = 10 4549; GFX10-NEXT: amd_machine_version_minor = 1 4550; GFX10-NEXT: amd_machine_version_stepping = 0 4551; GFX10-NEXT: kernel_code_entry_byte_offset = 256 4552; GFX10-NEXT: kernel_code_prefetch_byte_size = 0 4553; GFX10-NEXT: granulated_workitem_vgpr_count = 0 4554; GFX10-NEXT: granulated_wavefront_sgpr_count = 1 4555; GFX10-NEXT: priority = 0 4556; GFX10-NEXT: float_mode = 240 4557; GFX10-NEXT: priv = 0 4558; GFX10-NEXT: enable_dx10_clamp = 1 4559; GFX10-NEXT: debug_mode = 0 4560; GFX10-NEXT: enable_ieee_mode = 1 4561; GFX10-NEXT: enable_wgp_mode = 1 4562; GFX10-NEXT: enable_mem_ordered = 1 4563; GFX10-NEXT: enable_fwd_progress = 0 4564; GFX10-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 4565; GFX10-NEXT: user_sgpr_count = 12 4566; GFX10-NEXT: enable_trap_handler = 0 4567; GFX10-NEXT: enable_sgpr_workgroup_id_x = 1 4568; GFX10-NEXT: enable_sgpr_workgroup_id_y = 1 4569; GFX10-NEXT: enable_sgpr_workgroup_id_z = 1 4570; GFX10-NEXT: enable_sgpr_workgroup_info = 0 4571; GFX10-NEXT: enable_vgpr_workitem_id = 2 4572; GFX10-NEXT: enable_exception_msb = 0 4573; GFX10-NEXT: granulated_lds_size = 0 4574; GFX10-NEXT: enable_exception = 0 4575; GFX10-NEXT: enable_sgpr_private_segment_buffer = 1 4576; GFX10-NEXT: enable_sgpr_dispatch_ptr = 1 4577; GFX10-NEXT: enable_sgpr_queue_ptr = 1 4578; GFX10-NEXT: enable_sgpr_kernarg_segment_ptr = 1 4579; GFX10-NEXT: enable_sgpr_dispatch_id = 1 4580; GFX10-NEXT: enable_sgpr_flat_scratch_init = 0 4581; GFX10-NEXT: enable_sgpr_private_segment_size = 0 4582; GFX10-NEXT: enable_sgpr_grid_workgroup_count_x = 0 4583; GFX10-NEXT: enable_sgpr_grid_workgroup_count_y = 0 4584; GFX10-NEXT: enable_sgpr_grid_workgroup_count_z = 0 4585; GFX10-NEXT: enable_wavefront_size32 = 1 4586; GFX10-NEXT: enable_ordered_append_gds = 0 4587; GFX10-NEXT: private_element_size = 1 4588; GFX10-NEXT: is_ptr64 = 1 4589; GFX10-NEXT: is_dynamic_callstack = 0 4590; GFX10-NEXT: is_debug_enabled = 0 4591; GFX10-NEXT: is_xnack_enabled = 1 4592; GFX10-NEXT: workitem_private_segment_byte_size = 0 4593; GFX10-NEXT: workgroup_group_segment_byte_size = 0 4594; GFX10-NEXT: gds_segment_byte_size = 0 4595; GFX10-NEXT: kernarg_segment_byte_size = 28 4596; GFX10-NEXT: workgroup_fbarrier_count = 0 4597; GFX10-NEXT: wavefront_sgpr_count = 10 4598; GFX10-NEXT: workitem_vgpr_count = 3 4599; GFX10-NEXT: reserved_vgpr_first = 0 4600; GFX10-NEXT: reserved_vgpr_count = 0 4601; GFX10-NEXT: reserved_sgpr_first = 0 4602; GFX10-NEXT: reserved_sgpr_count = 0 4603; GFX10-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 4604; GFX10-NEXT: debug_private_segment_buffer_sgpr = 0 4605; GFX10-NEXT: kernarg_segment_alignment = 4 4606; GFX10-NEXT: group_segment_alignment = 4 4607; GFX10-NEXT: private_segment_alignment = 4 4608; GFX10-NEXT: wavefront_size = 5 4609; GFX10-NEXT: call_convention = -1 4610; GFX10-NEXT: runtime_loader_kernel_symbol = 0 4611; GFX10-NEXT: .end_amd_kernel_code_t 4612; GFX10-NEXT: ; %bb.0: ; %entry 4613; GFX10-NEXT: s_clause 0x1 4614; GFX10-NEXT: s_load_dword s6, s[8:9], 0x8 4615; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 4616; GFX10-NEXT: s_mov_b32 s2, 0 4617; GFX10-NEXT: s_mov_b32 s3, 0x40080000 4618; GFX10-NEXT: v_mov_b32_e32 v2, 0 4619; GFX10-NEXT: s_waitcnt lgkmcnt(0) 4620; GFX10-NEXT: s_cmp_eq_u32 s6, 1 4621; GFX10-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0 4622; GFX10-NEXT: s_cmp_eq_u32 s6, 2 4623; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] 4624; GFX10-NEXT: s_cmp_eq_u32 s6, 3 4625; GFX10-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3] 4626; GFX10-NEXT: v_mov_b32_e32 v0, s2 4627; GFX10-NEXT: v_mov_b32_e32 v1, s3 4628; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 4629; GFX10-NEXT: s_endpgm 4630; 4631; GFX11-LABEL: dyn_extract_v4f64_s_s_s: 4632; GFX11: .amd_kernel_code_t 4633; GFX11-NEXT: amd_code_version_major = 1 4634; GFX11-NEXT: amd_code_version_minor = 2 4635; GFX11-NEXT: amd_machine_kind = 1 4636; GFX11-NEXT: amd_machine_version_major = 11 4637; GFX11-NEXT: amd_machine_version_minor = 0 4638; GFX11-NEXT: amd_machine_version_stepping = 0 4639; GFX11-NEXT: kernel_code_entry_byte_offset = 256 4640; GFX11-NEXT: kernel_code_prefetch_byte_size = 0 4641; GFX11-NEXT: granulated_workitem_vgpr_count = 0 4642; GFX11-NEXT: granulated_wavefront_sgpr_count = 0 4643; GFX11-NEXT: priority = 0 4644; GFX11-NEXT: float_mode = 240 4645; GFX11-NEXT: priv = 0 4646; GFX11-NEXT: enable_dx10_clamp = 1 4647; GFX11-NEXT: debug_mode = 0 4648; GFX11-NEXT: enable_ieee_mode = 1 4649; GFX11-NEXT: enable_wgp_mode = 1 4650; GFX11-NEXT: enable_mem_ordered = 1 4651; GFX11-NEXT: enable_fwd_progress = 0 4652; GFX11-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 4653; GFX11-NEXT: user_sgpr_count = 13 4654; GFX11-NEXT: enable_trap_handler = 0 4655; GFX11-NEXT: enable_sgpr_workgroup_id_x = 1 4656; GFX11-NEXT: enable_sgpr_workgroup_id_y = 1 4657; GFX11-NEXT: enable_sgpr_workgroup_id_z = 1 4658; GFX11-NEXT: enable_sgpr_workgroup_info = 0 4659; GFX11-NEXT: enable_vgpr_workitem_id = 2 4660; GFX11-NEXT: enable_exception_msb = 0 4661; GFX11-NEXT: granulated_lds_size = 0 4662; GFX11-NEXT: enable_exception = 0 4663; GFX11-NEXT: enable_sgpr_private_segment_buffer = 0 4664; GFX11-NEXT: enable_sgpr_dispatch_ptr = 1 4665; GFX11-NEXT: enable_sgpr_queue_ptr = 1 4666; GFX11-NEXT: enable_sgpr_kernarg_segment_ptr = 1 4667; GFX11-NEXT: enable_sgpr_dispatch_id = 1 4668; GFX11-NEXT: enable_sgpr_flat_scratch_init = 0 4669; GFX11-NEXT: enable_sgpr_private_segment_size = 0 4670; GFX11-NEXT: enable_sgpr_grid_workgroup_count_x = 0 4671; GFX11-NEXT: enable_sgpr_grid_workgroup_count_y = 0 4672; GFX11-NEXT: enable_sgpr_grid_workgroup_count_z = 0 4673; GFX11-NEXT: enable_wavefront_size32 = 1 4674; GFX11-NEXT: enable_ordered_append_gds = 0 4675; GFX11-NEXT: private_element_size = 1 4676; GFX11-NEXT: is_ptr64 = 1 4677; GFX11-NEXT: is_dynamic_callstack = 0 4678; GFX11-NEXT: is_debug_enabled = 0 4679; GFX11-NEXT: is_xnack_enabled = 0 4680; GFX11-NEXT: workitem_private_segment_byte_size = 0 4681; GFX11-NEXT: workgroup_group_segment_byte_size = 0 4682; GFX11-NEXT: gds_segment_byte_size = 0 4683; GFX11-NEXT: kernarg_segment_byte_size = 28 4684; GFX11-NEXT: workgroup_fbarrier_count = 0 4685; GFX11-NEXT: wavefront_sgpr_count = 7 4686; GFX11-NEXT: workitem_vgpr_count = 3 4687; GFX11-NEXT: reserved_vgpr_first = 0 4688; GFX11-NEXT: reserved_vgpr_count = 0 4689; GFX11-NEXT: reserved_sgpr_first = 0 4690; GFX11-NEXT: reserved_sgpr_count = 0 4691; GFX11-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 4692; GFX11-NEXT: debug_private_segment_buffer_sgpr = 0 4693; GFX11-NEXT: kernarg_segment_alignment = 4 4694; GFX11-NEXT: group_segment_alignment = 4 4695; GFX11-NEXT: private_segment_alignment = 4 4696; GFX11-NEXT: wavefront_size = 5 4697; GFX11-NEXT: call_convention = -1 4698; GFX11-NEXT: runtime_loader_kernel_symbol = 0 4699; GFX11-NEXT: .end_amd_kernel_code_t 4700; GFX11-NEXT: ; %bb.0: ; %entry 4701; GFX11-NEXT: s_clause 0x1 4702; GFX11-NEXT: s_load_b32 s6, s[4:5], 0x8 4703; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 4704; GFX11-NEXT: s_mov_b32 s2, 0 4705; GFX11-NEXT: s_mov_b32 s3, 0x40080000 4706; GFX11-NEXT: v_mov_b32_e32 v2, 0 4707; GFX11-NEXT: s_waitcnt lgkmcnt(0) 4708; GFX11-NEXT: s_cmp_eq_u32 s6, 1 4709; GFX11-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0 4710; GFX11-NEXT: s_cmp_eq_u32 s6, 2 4711; GFX11-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] 4712; GFX11-NEXT: s_cmp_eq_u32 s6, 3 4713; GFX11-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3] 4714; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 4715; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 4716; GFX11-NEXT: s_endpgm 4717entry: 4718 %ext = extractelement <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, i32 %sel 4719 store double %ext, ptr addrspace(1) %out 4720 ret void 4721} 4722 4723define i32 @v_extract_v64i32_7(ptr addrspace(1) %ptr) { 4724; GPRIDX-LABEL: v_extract_v64i32_7: 4725; GPRIDX: ; %bb.0: 4726; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4727; GPRIDX-NEXT: global_load_dword v0, v[0:1], off offset:28 4728; GPRIDX-NEXT: s_waitcnt vmcnt(0) 4729; GPRIDX-NEXT: s_setpc_b64 s[30:31] 4730; 4731; MOVREL-LABEL: v_extract_v64i32_7: 4732; MOVREL: ; %bb.0: 4733; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4734; MOVREL-NEXT: v_add_u32_e32 v0, vcc, 28, v0 4735; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4736; MOVREL-NEXT: flat_load_dword v0, v[0:1] 4737; MOVREL-NEXT: s_waitcnt vmcnt(0) 4738; MOVREL-NEXT: s_setpc_b64 s[30:31] 4739; 4740; GFX10-LABEL: v_extract_v64i32_7: 4741; GFX10: ; %bb.0: 4742; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4743; GFX10-NEXT: global_load_dword v0, v[0:1], off offset:28 4744; GFX10-NEXT: s_waitcnt vmcnt(0) 4745; GFX10-NEXT: s_setpc_b64 s[30:31] 4746; 4747; GFX11-LABEL: v_extract_v64i32_7: 4748; GFX11: ; %bb.0: 4749; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4750; GFX11-NEXT: global_load_b32 v0, v[0:1], off offset:28 4751; GFX11-NEXT: s_waitcnt vmcnt(0) 4752; GFX11-NEXT: s_setpc_b64 s[30:31] 4753 %vec = load <64 x i32>, ptr addrspace(1) %ptr 4754 %elt = extractelement <64 x i32> %vec, i32 7 4755 ret i32 %elt 4756} 4757 4758define i32 @v_extract_v64i32_32(ptr addrspace(1) %ptr) { 4759; GPRIDX-LABEL: v_extract_v64i32_32: 4760; GPRIDX: ; %bb.0: 4761; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4762; GPRIDX-NEXT: global_load_dword v0, v[0:1], off offset:128 4763; GPRIDX-NEXT: s_waitcnt vmcnt(0) 4764; GPRIDX-NEXT: s_setpc_b64 s[30:31] 4765; 4766; MOVREL-LABEL: v_extract_v64i32_32: 4767; MOVREL: ; %bb.0: 4768; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4769; MOVREL-NEXT: v_add_u32_e32 v0, vcc, 0x80, v0 4770; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4771; MOVREL-NEXT: flat_load_dword v0, v[0:1] 4772; MOVREL-NEXT: s_waitcnt vmcnt(0) 4773; MOVREL-NEXT: s_setpc_b64 s[30:31] 4774; 4775; GFX10-LABEL: v_extract_v64i32_32: 4776; GFX10: ; %bb.0: 4777; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4778; GFX10-NEXT: global_load_dword v0, v[0:1], off offset:128 4779; GFX10-NEXT: s_waitcnt vmcnt(0) 4780; GFX10-NEXT: s_setpc_b64 s[30:31] 4781; 4782; GFX11-LABEL: v_extract_v64i32_32: 4783; GFX11: ; %bb.0: 4784; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4785; GFX11-NEXT: global_load_b32 v0, v[0:1], off offset:128 4786; GFX11-NEXT: s_waitcnt vmcnt(0) 4787; GFX11-NEXT: s_setpc_b64 s[30:31] 4788 %vec = load <64 x i32>, ptr addrspace(1) %ptr 4789 %elt = extractelement <64 x i32> %vec, i32 32 4790 ret i32 %elt 4791} 4792 4793define i32 @v_extract_v64i32_33(ptr addrspace(1) %ptr) { 4794; GPRIDX-LABEL: v_extract_v64i32_33: 4795; GPRIDX: ; %bb.0: 4796; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4797; GPRIDX-NEXT: global_load_dword v0, v[0:1], off offset:132 4798; GPRIDX-NEXT: s_waitcnt vmcnt(0) 4799; GPRIDX-NEXT: s_setpc_b64 s[30:31] 4800; 4801; MOVREL-LABEL: v_extract_v64i32_33: 4802; MOVREL: ; %bb.0: 4803; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4804; MOVREL-NEXT: v_add_u32_e32 v0, vcc, 0x84, v0 4805; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4806; MOVREL-NEXT: flat_load_dword v0, v[0:1] 4807; MOVREL-NEXT: s_waitcnt vmcnt(0) 4808; MOVREL-NEXT: s_setpc_b64 s[30:31] 4809; 4810; GFX10-LABEL: v_extract_v64i32_33: 4811; GFX10: ; %bb.0: 4812; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4813; GFX10-NEXT: global_load_dword v0, v[0:1], off offset:132 4814; GFX10-NEXT: s_waitcnt vmcnt(0) 4815; GFX10-NEXT: s_setpc_b64 s[30:31] 4816; 4817; GFX11-LABEL: v_extract_v64i32_33: 4818; GFX11: ; %bb.0: 4819; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4820; GFX11-NEXT: global_load_b32 v0, v[0:1], off offset:132 4821; GFX11-NEXT: s_waitcnt vmcnt(0) 4822; GFX11-NEXT: s_setpc_b64 s[30:31] 4823 %vec = load <64 x i32>, ptr addrspace(1) %ptr 4824 %elt = extractelement <64 x i32> %vec, i32 33 4825 ret i32 %elt 4826} 4827 4828define i32 @v_extract_v64i32_37(ptr addrspace(1) %ptr) { 4829; GPRIDX-LABEL: v_extract_v64i32_37: 4830; GPRIDX: ; %bb.0: 4831; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4832; GPRIDX-NEXT: global_load_dword v0, v[0:1], off offset:148 4833; GPRIDX-NEXT: s_waitcnt vmcnt(0) 4834; GPRIDX-NEXT: s_setpc_b64 s[30:31] 4835; 4836; MOVREL-LABEL: v_extract_v64i32_37: 4837; MOVREL: ; %bb.0: 4838; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4839; MOVREL-NEXT: v_add_u32_e32 v0, vcc, 0x94, v0 4840; MOVREL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4841; MOVREL-NEXT: flat_load_dword v0, v[0:1] 4842; MOVREL-NEXT: s_waitcnt vmcnt(0) 4843; MOVREL-NEXT: s_setpc_b64 s[30:31] 4844; 4845; GFX10-LABEL: v_extract_v64i32_37: 4846; GFX10: ; %bb.0: 4847; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4848; GFX10-NEXT: global_load_dword v0, v[0:1], off offset:148 4849; GFX10-NEXT: s_waitcnt vmcnt(0) 4850; GFX10-NEXT: s_setpc_b64 s[30:31] 4851; 4852; GFX11-LABEL: v_extract_v64i32_37: 4853; GFX11: ; %bb.0: 4854; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4855; GFX11-NEXT: global_load_b32 v0, v[0:1], off offset:148 4856; GFX11-NEXT: s_waitcnt vmcnt(0) 4857; GFX11-NEXT: s_setpc_b64 s[30:31] 4858 %vec = load <64 x i32>, ptr addrspace(1) %ptr 4859 %elt = extractelement <64 x i32> %vec, i32 37 4860 ret i32 %elt 4861} 4862