1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 2; RUN: llc -mtriple=amdgcn-- -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s 3 4define <2 x i16> @extract_2xi16(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i1 %c0) { 5; GCN-LABEL: extract_2xi16: 6; GCN: ; %bb.0: 7; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8; GCN-NEXT: v_and_b32_e32 v4, 1, v4 9; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v4 10; GCN-NEXT: s_xor_b64 s[4:5], vcc, -1 11; GCN-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7 12; GCN-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] 13; GCN-NEXT: s_xor_b64 s[4:5], exec, s[6:7] 14; GCN-NEXT: s_cbranch_execz .LBB0_2 15; GCN-NEXT: ; %bb.1: ; %F 16; GCN-NEXT: s_mov_b32 s10, 0 17; GCN-NEXT: s_mov_b32 s11, 0xf000 18; GCN-NEXT: s_mov_b32 s8, s10 19; GCN-NEXT: s_mov_b32 s9, s10 20; GCN-NEXT: buffer_load_ushort v0, v[2:3], s[8:11], 0 addr64 glc 21; GCN-NEXT: s_waitcnt vmcnt(0) 22; GCN-NEXT: buffer_load_ushort v1, v[2:3], s[8:11], 0 addr64 offset:2 glc 23; GCN-NEXT: s_waitcnt vmcnt(0) 24; GCN-NEXT: buffer_load_ushort v4, v[2:3], s[8:11], 0 addr64 offset:4 glc 25; GCN-NEXT: s_waitcnt vmcnt(0) 26; GCN-NEXT: buffer_load_ushort v4, v[2:3], s[8:11], 0 addr64 offset:6 glc 27; GCN-NEXT: s_waitcnt vmcnt(0) 28; GCN-NEXT: buffer_load_ushort v4, v[2:3], s[8:11], 0 addr64 offset:8 glc 29; GCN-NEXT: s_waitcnt vmcnt(0) 30; GCN-NEXT: buffer_load_ushort v4, v[2:3], s[8:11], 0 addr64 offset:10 glc 31; GCN-NEXT: s_waitcnt vmcnt(0) 32; GCN-NEXT: buffer_load_ushort v4, v[2:3], s[8:11], 0 addr64 offset:12 glc 33; GCN-NEXT: s_waitcnt vmcnt(0) 34; GCN-NEXT: buffer_load_ushort v2, v[2:3], s[8:11], 0 addr64 offset:14 glc 35; GCN-NEXT: s_waitcnt vmcnt(0) 36; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 37; GCN-NEXT: v_or_b32_e32 v4, v0, v1 38; GCN-NEXT: ; implicit-def: $vgpr0 39; GCN-NEXT: .LBB0_2: ; %Flow 40; GCN-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] 41; GCN-NEXT: s_cbranch_execz .LBB0_4 42; GCN-NEXT: ; %bb.3: ; %T 43; GCN-NEXT: s_mov_b32 s10, 0 44; GCN-NEXT: s_mov_b32 s11, 0xf000 45; GCN-NEXT: s_mov_b32 s8, s10 46; GCN-NEXT: s_mov_b32 s9, s10 47; GCN-NEXT: buffer_load_ushort v2, v[0:1], s[8:11], 0 addr64 glc 48; GCN-NEXT: s_waitcnt vmcnt(0) 49; GCN-NEXT: buffer_load_ushort v3, v[0:1], s[8:11], 0 addr64 offset:2 glc 50; GCN-NEXT: s_waitcnt vmcnt(0) 51; GCN-NEXT: buffer_load_ushort v4, v[0:1], s[8:11], 0 addr64 offset:4 glc 52; GCN-NEXT: s_waitcnt vmcnt(0) 53; GCN-NEXT: buffer_load_ushort v4, v[0:1], s[8:11], 0 addr64 offset:6 glc 54; GCN-NEXT: s_waitcnt vmcnt(0) 55; GCN-NEXT: buffer_load_ushort v4, v[0:1], s[8:11], 0 addr64 offset:8 glc 56; GCN-NEXT: s_waitcnt vmcnt(0) 57; GCN-NEXT: buffer_load_ushort v4, v[0:1], s[8:11], 0 addr64 offset:10 glc 58; GCN-NEXT: s_waitcnt vmcnt(0) 59; GCN-NEXT: buffer_load_ushort v4, v[0:1], s[8:11], 0 addr64 offset:12 glc 60; GCN-NEXT: s_waitcnt vmcnt(0) 61; GCN-NEXT: buffer_load_ushort v0, v[0:1], s[8:11], 0 addr64 offset:14 glc 62; GCN-NEXT: s_waitcnt vmcnt(0) 63; GCN-NEXT: v_lshlrev_b32_e32 v0, 16, v3 64; GCN-NEXT: v_or_b32_e32 v4, v2, v0 65; GCN-NEXT: .LBB0_4: ; %exit 66; GCN-NEXT: s_or_b64 exec, exec, s[4:5] 67; GCN-NEXT: v_ashrrev_i32_e32 v0, 16, v4 68; GCN-NEXT: v_bfe_i32 v1, v4, 0, 16 69; GCN-NEXT: v_mov_b32_e32 v2, 0xffff 70; GCN-NEXT: v_mov_b32_e32 v3, 0x8000 71; GCN-NEXT: v_mov_b32_e32 v4, 0xffff8000 72; GCN-NEXT: v_cmp_lt_i32_e32 vcc, -1, v1 73; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc 74; GCN-NEXT: v_cmp_lt_i32_e32 vcc, -1, v0 75; GCN-NEXT: v_cndmask_b32_e32 v2, -1, v4, vcc 76; GCN-NEXT: v_lshlrev_b32_e32 v0, 16, v2 77; GCN-NEXT: v_or_b32_e32 v0, v1, v0 78; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v2 79; GCN-NEXT: s_setpc_b64 s[30:31] 80 br i1 %c0, label %T, label %F 81 82T: 83 %t = load volatile <8 x i16>, ptr addrspace(1) %p0 84 br label %exit 85 86F: 87 %f = load volatile <8 x i16>, ptr addrspace(1) %p1 88 br label %exit 89 90exit: 91 %m = phi <8 x i16> [ %t, %T ], [ %f, %F ] 92 %v2 = shufflevector <8 x i16> %m, <8 x i16> undef, <2 x i32> <i32 0, i32 1> 93 %b2 = icmp sgt <2 x i16> %v2, <i16 -1, i16 -1> 94 %r2 = select <2 x i1> %b2, <2 x i16> <i16 -32768, i16 -32768>, <2 x i16> <i16 -1, i16 -1> 95 ret <2 x i16> %r2 96} 97 98define <2 x i64> @extract_2xi64(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i1 %c0) { 99; GCN-LABEL: extract_2xi64: 100; GCN: ; %bb.0: 101; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 102; GCN-NEXT: v_and_b32_e32 v4, 1, v4 103; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v4 104; GCN-NEXT: s_xor_b64 s[4:5], vcc, -1 105; GCN-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19 106; GCN-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] 107; GCN-NEXT: s_xor_b64 s[4:5], exec, s[6:7] 108; GCN-NEXT: s_cbranch_execz .LBB1_2 109; GCN-NEXT: ; %bb.1: ; %F 110; GCN-NEXT: s_mov_b32 s10, 0 111; GCN-NEXT: s_mov_b32 s11, 0xf000 112; GCN-NEXT: s_mov_b32 s8, s10 113; GCN-NEXT: s_mov_b32 s9, s10 114; GCN-NEXT: buffer_load_dwordx4 v[4:7], v[2:3], s[8:11], 0 addr64 glc 115; GCN-NEXT: s_waitcnt vmcnt(0) 116; GCN-NEXT: buffer_load_dwordx4 v[8:11], v[2:3], s[8:11], 0 addr64 offset:16 glc 117; GCN-NEXT: s_waitcnt vmcnt(0) 118; GCN-NEXT: buffer_load_dwordx4 v[8:11], v[2:3], s[8:11], 0 addr64 offset:32 glc 119; GCN-NEXT: s_waitcnt vmcnt(0) 120; GCN-NEXT: buffer_load_dwordx4 v[0:3], v[2:3], s[8:11], 0 addr64 offset:48 glc 121; GCN-NEXT: s_waitcnt vmcnt(0) 122; GCN-NEXT: ; implicit-def: $vgpr0 123; GCN-NEXT: .LBB1_2: ; %Flow 124; GCN-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] 125; GCN-NEXT: s_cbranch_execz .LBB1_4 126; GCN-NEXT: ; %bb.3: ; %T 127; GCN-NEXT: s_mov_b32 s10, 0 128; GCN-NEXT: s_mov_b32 s11, 0xf000 129; GCN-NEXT: s_mov_b32 s8, s10 130; GCN-NEXT: s_mov_b32 s9, s10 131; GCN-NEXT: s_waitcnt vmcnt(0) 132; GCN-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[8:11], 0 addr64 glc 133; GCN-NEXT: s_waitcnt vmcnt(0) 134; GCN-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[8:11], 0 addr64 offset:16 glc 135; GCN-NEXT: s_waitcnt vmcnt(0) 136; GCN-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[8:11], 0 addr64 offset:32 glc 137; GCN-NEXT: s_waitcnt vmcnt(0) 138; GCN-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[8:11], 0 addr64 offset:48 glc 139; GCN-NEXT: s_waitcnt vmcnt(0) 140; GCN-NEXT: .LBB1_4: ; %exit 141; GCN-NEXT: s_or_b64 exec, exec, s[4:5] 142; GCN-NEXT: s_waitcnt vmcnt(0) 143; GCN-NEXT: v_mov_b32_e32 v1, 0xffff8000 144; GCN-NEXT: v_cmp_lt_i64_e32 vcc, -1, v[4:5] 145; GCN-NEXT: v_cndmask_b32_e32 v0, -1, v1, vcc 146; GCN-NEXT: v_cmp_lt_i64_e32 vcc, -1, v[6:7] 147; GCN-NEXT: v_cndmask_b32_e32 v2, -1, v1, vcc 148; GCN-NEXT: v_mov_b32_e32 v1, -1 149; GCN-NEXT: v_mov_b32_e32 v3, -1 150; GCN-NEXT: s_setpc_b64 s[30:31] 151 br i1 %c0, label %T, label %F 152 153T: 154 %t = load volatile <8 x i64>, ptr addrspace(1) %p0 155 br label %exit 156 157F: 158 %f = load volatile <8 x i64>, ptr addrspace(1) %p1 159 br label %exit 160 161exit: 162 %m = phi <8 x i64> [ %t, %T ], [ %f, %F ] 163 %v2 = shufflevector <8 x i64> %m, <8 x i64> undef, <2 x i32> <i32 0, i32 1> 164 %b2 = icmp sgt <2 x i64> %v2, <i64 -1, i64 -1> 165 %r2 = select <2 x i1> %b2, <2 x i64> <i64 -32768, i64 -32768>, <2 x i64> <i64 -1, i64 -1> 166 ret <2 x i64> %r2 167} 168 169define <4 x i64> @extract_4xi64(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i1 %c0) { 170; GCN-LABEL: extract_4xi64: 171; GCN: ; %bb.0: 172; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 173; GCN-NEXT: v_and_b32_e32 v4, 1, v4 174; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v4 175; GCN-NEXT: s_xor_b64 s[4:5], vcc, -1 176; GCN-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19 177; GCN-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] 178; GCN-NEXT: s_xor_b64 s[4:5], exec, s[6:7] 179; GCN-NEXT: s_cbranch_execz .LBB2_2 180; GCN-NEXT: ; %bb.1: ; %F 181; GCN-NEXT: s_mov_b32 s10, 0 182; GCN-NEXT: s_mov_b32 s11, 0xf000 183; GCN-NEXT: s_mov_b32 s8, s10 184; GCN-NEXT: s_mov_b32 s9, s10 185; GCN-NEXT: buffer_load_dwordx4 v[4:7], v[2:3], s[8:11], 0 addr64 glc 186; GCN-NEXT: s_waitcnt vmcnt(0) 187; GCN-NEXT: buffer_load_dwordx4 v[8:11], v[2:3], s[8:11], 0 addr64 offset:16 glc 188; GCN-NEXT: s_waitcnt vmcnt(0) 189; GCN-NEXT: buffer_load_dwordx4 v[12:15], v[2:3], s[8:11], 0 addr64 offset:32 glc 190; GCN-NEXT: s_waitcnt vmcnt(0) 191; GCN-NEXT: buffer_load_dwordx4 v[0:3], v[2:3], s[8:11], 0 addr64 offset:48 glc 192; GCN-NEXT: s_waitcnt vmcnt(0) 193; GCN-NEXT: ; implicit-def: $vgpr0 194; GCN-NEXT: .LBB2_2: ; %Flow 195; GCN-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] 196; GCN-NEXT: s_cbranch_execz .LBB2_4 197; GCN-NEXT: ; %bb.3: ; %T 198; GCN-NEXT: s_mov_b32 s10, 0 199; GCN-NEXT: s_mov_b32 s11, 0xf000 200; GCN-NEXT: s_mov_b32 s8, s10 201; GCN-NEXT: s_mov_b32 s9, s10 202; GCN-NEXT: s_waitcnt vmcnt(0) 203; GCN-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[8:11], 0 addr64 glc 204; GCN-NEXT: s_waitcnt vmcnt(0) 205; GCN-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[8:11], 0 addr64 offset:16 glc 206; GCN-NEXT: s_waitcnt vmcnt(0) 207; GCN-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[8:11], 0 addr64 offset:32 glc 208; GCN-NEXT: s_waitcnt vmcnt(0) 209; GCN-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[8:11], 0 addr64 offset:48 glc 210; GCN-NEXT: s_waitcnt vmcnt(0) 211; GCN-NEXT: .LBB2_4: ; %exit 212; GCN-NEXT: s_or_b64 exec, exec, s[4:5] 213; GCN-NEXT: s_waitcnt vmcnt(0) 214; GCN-NEXT: v_mov_b32_e32 v1, 0xffff8000 215; GCN-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[4:5] 216; GCN-NEXT: v_cndmask_b32_e64 v0, v1, -1, vcc 217; GCN-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[6:7] 218; GCN-NEXT: v_cndmask_b32_e64 v2, v1, -1, vcc 219; GCN-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[8:9] 220; GCN-NEXT: v_cndmask_b32_e64 v4, v1, -1, vcc 221; GCN-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[10:11] 222; GCN-NEXT: v_cndmask_b32_e64 v6, v1, -1, vcc 223; GCN-NEXT: v_mov_b32_e32 v1, -1 224; GCN-NEXT: v_mov_b32_e32 v3, -1 225; GCN-NEXT: v_mov_b32_e32 v5, -1 226; GCN-NEXT: v_mov_b32_e32 v7, -1 227; GCN-NEXT: s_setpc_b64 s[30:31] 228 br i1 %c0, label %T, label %F 229 230T: 231 %t = load volatile <8 x i64>, ptr addrspace(1) %p0 232 br label %exit 233 234F: 235 %f = load volatile <8 x i64>, ptr addrspace(1) %p1 236 br label %exit 237 238exit: 239 %m = phi <8 x i64> [ %t, %T ], [ %f, %F ] 240 %v2 = shufflevector <8 x i64> %m, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 241 %b2 = icmp sgt <4 x i64> %v2, <i64 -1, i64 -1, i64 -1, i64 -1> 242 %r2 = select <4 x i1> %b2, <4 x i64> <i64 -32768, i64 -32768, i64 -32768, i64 -32768>, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1> 243 ret <4 x i64> %r2 244} 245 246define <8 x i64> @extract_8xi64(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i1 %c0) { 247; GCN-LABEL: extract_8xi64: 248; GCN: ; %bb.0: 249; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 250; GCN-NEXT: v_and_b32_e32 v4, 1, v4 251; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v4 252; GCN-NEXT: s_xor_b64 s[4:5], vcc, -1 253; GCN-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35 254; GCN-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] 255; GCN-NEXT: s_xor_b64 s[4:5], exec, s[6:7] 256; GCN-NEXT: s_cbranch_execz .LBB3_2 257; GCN-NEXT: ; %bb.1: ; %F 258; GCN-NEXT: s_mov_b32 s10, 0 259; GCN-NEXT: s_mov_b32 s11, 0xf000 260; GCN-NEXT: s_mov_b32 s8, s10 261; GCN-NEXT: s_mov_b32 s9, s10 262; GCN-NEXT: buffer_load_dwordx4 v[4:7], v[2:3], s[8:11], 0 addr64 offset:112 glc 263; GCN-NEXT: s_waitcnt vmcnt(0) 264; GCN-NEXT: buffer_load_dwordx4 v[4:7], v[2:3], s[8:11], 0 addr64 offset:96 glc 265; GCN-NEXT: s_waitcnt vmcnt(0) 266; GCN-NEXT: buffer_load_dwordx4 v[4:7], v[2:3], s[8:11], 0 addr64 offset:80 glc 267; GCN-NEXT: s_waitcnt vmcnt(0) 268; GCN-NEXT: buffer_load_dwordx4 v[4:7], v[2:3], s[8:11], 0 addr64 offset:64 glc 269; GCN-NEXT: s_waitcnt vmcnt(0) 270; GCN-NEXT: buffer_load_dwordx4 v[4:7], v[2:3], s[8:11], 0 addr64 glc 271; GCN-NEXT: s_waitcnt vmcnt(0) 272; GCN-NEXT: buffer_load_dwordx4 v[8:11], v[2:3], s[8:11], 0 addr64 offset:16 glc 273; GCN-NEXT: s_waitcnt vmcnt(0) 274; GCN-NEXT: buffer_load_dwordx4 v[12:15], v[2:3], s[8:11], 0 addr64 offset:32 glc 275; GCN-NEXT: s_waitcnt vmcnt(0) 276; GCN-NEXT: buffer_load_dwordx4 v[16:19], v[2:3], s[8:11], 0 addr64 offset:48 glc 277; GCN-NEXT: s_waitcnt vmcnt(0) 278; GCN-NEXT: ; implicit-def: $vgpr0 279; GCN-NEXT: .LBB3_2: ; %Flow 280; GCN-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] 281; GCN-NEXT: s_cbranch_execz .LBB3_4 282; GCN-NEXT: ; %bb.3: ; %T 283; GCN-NEXT: s_mov_b32 s10, 0 284; GCN-NEXT: s_mov_b32 s11, 0xf000 285; GCN-NEXT: s_mov_b32 s8, s10 286; GCN-NEXT: s_mov_b32 s9, s10 287; GCN-NEXT: buffer_load_dwordx4 v[2:5], v[0:1], s[8:11], 0 addr64 offset:112 glc 288; GCN-NEXT: s_waitcnt vmcnt(0) 289; GCN-NEXT: buffer_load_dwordx4 v[2:5], v[0:1], s[8:11], 0 addr64 offset:96 glc 290; GCN-NEXT: s_waitcnt vmcnt(0) 291; GCN-NEXT: buffer_load_dwordx4 v[2:5], v[0:1], s[8:11], 0 addr64 offset:80 glc 292; GCN-NEXT: s_waitcnt vmcnt(0) 293; GCN-NEXT: buffer_load_dwordx4 v[2:5], v[0:1], s[8:11], 0 addr64 offset:64 glc 294; GCN-NEXT: s_waitcnt vmcnt(0) 295; GCN-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[8:11], 0 addr64 glc 296; GCN-NEXT: s_waitcnt vmcnt(0) 297; GCN-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[8:11], 0 addr64 offset:16 glc 298; GCN-NEXT: s_waitcnt vmcnt(0) 299; GCN-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[8:11], 0 addr64 offset:32 glc 300; GCN-NEXT: s_waitcnt vmcnt(0) 301; GCN-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[8:11], 0 addr64 offset:48 glc 302; GCN-NEXT: s_waitcnt vmcnt(0) 303; GCN-NEXT: .LBB3_4: ; %exit 304; GCN-NEXT: s_or_b64 exec, exec, s[4:5] 305; GCN-NEXT: v_mov_b32_e32 v1, 0xffff8000 306; GCN-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[6:7] 307; GCN-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[8:9] 308; GCN-NEXT: v_cmp_gt_i64_e64 s[6:7], 0, v[10:11] 309; GCN-NEXT: v_cmp_gt_i64_e64 s[8:9], 0, v[12:13] 310; GCN-NEXT: v_cmp_gt_i64_e64 s[10:11], 0, v[14:15] 311; GCN-NEXT: s_waitcnt vmcnt(0) 312; GCN-NEXT: v_cmp_gt_i64_e64 s[12:13], 0, v[16:17] 313; GCN-NEXT: v_cmp_gt_i64_e64 s[14:15], 0, v[18:19] 314; GCN-NEXT: v_cmp_gt_i64_e64 s[16:17], 0, v[4:5] 315; GCN-NEXT: v_cndmask_b32_e64 v0, v1, -1, s[16:17] 316; GCN-NEXT: v_cndmask_b32_e64 v2, v1, -1, vcc 317; GCN-NEXT: v_cndmask_b32_e64 v4, v1, -1, s[4:5] 318; GCN-NEXT: v_cndmask_b32_e64 v6, v1, -1, s[6:7] 319; GCN-NEXT: v_cndmask_b32_e64 v8, v1, -1, s[8:9] 320; GCN-NEXT: v_cndmask_b32_e64 v10, v1, -1, s[10:11] 321; GCN-NEXT: v_cndmask_b32_e64 v12, v1, -1, s[12:13] 322; GCN-NEXT: v_cndmask_b32_e64 v14, v1, -1, s[14:15] 323; GCN-NEXT: v_mov_b32_e32 v1, -1 324; GCN-NEXT: v_mov_b32_e32 v3, -1 325; GCN-NEXT: v_mov_b32_e32 v5, -1 326; GCN-NEXT: v_mov_b32_e32 v7, -1 327; GCN-NEXT: v_mov_b32_e32 v9, -1 328; GCN-NEXT: v_mov_b32_e32 v11, -1 329; GCN-NEXT: v_mov_b32_e32 v13, -1 330; GCN-NEXT: v_mov_b32_e32 v15, -1 331; GCN-NEXT: s_setpc_b64 s[30:31] 332 br i1 %c0, label %T, label %F 333 334T: 335 %t = load volatile <16 x i64>, ptr addrspace(1) %p0 336 br label %exit 337 338F: 339 %f = load volatile <16 x i64>, ptr addrspace(1) %p1 340 br label %exit 341 342exit: 343 %m = phi <16 x i64> [ %t, %T ], [ %f, %F ] 344 %v2 = shufflevector <16 x i64> %m, <16 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 345 %b2 = icmp sgt <8 x i64> %v2, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1> 346 %r2 = select <8 x i1> %b2, <8 x i64> <i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768>, <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1> 347 ret <8 x i64> %r2 348} 349 350define <2 x double> @extract_2xf64(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i1 %c0) { 351; GCN-LABEL: extract_2xf64: 352; GCN: ; %bb.0: 353; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 354; GCN-NEXT: v_and_b32_e32 v4, 1, v4 355; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v4 356; GCN-NEXT: s_xor_b64 s[4:5], vcc, -1 357; GCN-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19 358; GCN-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] 359; GCN-NEXT: s_xor_b64 s[4:5], exec, s[6:7] 360; GCN-NEXT: s_cbranch_execz .LBB4_2 361; GCN-NEXT: ; %bb.1: ; %F 362; GCN-NEXT: s_mov_b32 s10, 0 363; GCN-NEXT: s_mov_b32 s11, 0xf000 364; GCN-NEXT: s_mov_b32 s8, s10 365; GCN-NEXT: s_mov_b32 s9, s10 366; GCN-NEXT: buffer_load_dwordx4 v[4:7], v[2:3], s[8:11], 0 addr64 glc 367; GCN-NEXT: s_waitcnt vmcnt(0) 368; GCN-NEXT: buffer_load_dwordx4 v[8:11], v[2:3], s[8:11], 0 addr64 offset:16 glc 369; GCN-NEXT: s_waitcnt vmcnt(0) 370; GCN-NEXT: buffer_load_dwordx4 v[8:11], v[2:3], s[8:11], 0 addr64 offset:32 glc 371; GCN-NEXT: s_waitcnt vmcnt(0) 372; GCN-NEXT: buffer_load_dwordx4 v[0:3], v[2:3], s[8:11], 0 addr64 offset:48 glc 373; GCN-NEXT: s_waitcnt vmcnt(0) 374; GCN-NEXT: ; implicit-def: $vgpr0 375; GCN-NEXT: .LBB4_2: ; %Flow 376; GCN-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] 377; GCN-NEXT: s_cbranch_execz .LBB4_4 378; GCN-NEXT: ; %bb.3: ; %T 379; GCN-NEXT: s_mov_b32 s10, 0 380; GCN-NEXT: s_mov_b32 s11, 0xf000 381; GCN-NEXT: s_mov_b32 s8, s10 382; GCN-NEXT: s_mov_b32 s9, s10 383; GCN-NEXT: s_waitcnt vmcnt(0) 384; GCN-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[8:11], 0 addr64 glc 385; GCN-NEXT: s_waitcnt vmcnt(0) 386; GCN-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[8:11], 0 addr64 offset:16 glc 387; GCN-NEXT: s_waitcnt vmcnt(0) 388; GCN-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[8:11], 0 addr64 offset:32 glc 389; GCN-NEXT: s_waitcnt vmcnt(0) 390; GCN-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[8:11], 0 addr64 offset:48 glc 391; GCN-NEXT: s_waitcnt vmcnt(0) 392; GCN-NEXT: .LBB4_4: ; %exit 393; GCN-NEXT: s_or_b64 exec, exec, s[4:5] 394; GCN-NEXT: s_waitcnt vmcnt(0) 395; GCN-NEXT: v_mov_b32_e32 v0, 0xbff00000 396; GCN-NEXT: v_cmp_lt_f64_e32 vcc, -1.0, v[4:5] 397; GCN-NEXT: v_cndmask_b32_e64 v1, v0, -2.0, vcc 398; GCN-NEXT: v_cmp_lt_f64_e32 vcc, -1.0, v[6:7] 399; GCN-NEXT: v_cndmask_b32_e64 v3, v0, -2.0, vcc 400; GCN-NEXT: v_mov_b32_e32 v0, 0 401; GCN-NEXT: v_mov_b32_e32 v2, 0 402; GCN-NEXT: s_setpc_b64 s[30:31] 403 br i1 %c0, label %T, label %F 404 405T: 406 %t = load volatile <8 x double>, ptr addrspace(1) %p0 407 br label %exit 408 409F: 410 %f = load volatile <8 x double>, ptr addrspace(1) %p1 411 br label %exit 412 413exit: 414 %m = phi <8 x double> [ %t, %T ], [ %f, %F ] 415 %v2 = shufflevector <8 x double> %m, <8 x double> undef, <2 x i32> <i32 0, i32 1> 416 %b2 = fcmp ogt <2 x double> %v2, <double -1.0, double -1.0> 417 %r2 = select <2 x i1> %b2, <2 x double> <double -2.0, double -2.0>, <2 x double> <double -1.0, double -1.0> 418 ret <2 x double> %r2 419} 420 421define <4 x double> @extract_4xf64(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i1 %c0) { 422; GCN-LABEL: extract_4xf64: 423; GCN: ; %bb.0: 424; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 425; GCN-NEXT: v_and_b32_e32 v4, 1, v4 426; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v4 427; GCN-NEXT: s_xor_b64 s[4:5], vcc, -1 428; GCN-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19 429; GCN-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] 430; GCN-NEXT: s_xor_b64 s[4:5], exec, s[6:7] 431; GCN-NEXT: s_cbranch_execz .LBB5_2 432; GCN-NEXT: ; %bb.1: ; %F 433; GCN-NEXT: s_mov_b32 s10, 0 434; GCN-NEXT: s_mov_b32 s11, 0xf000 435; GCN-NEXT: s_mov_b32 s8, s10 436; GCN-NEXT: s_mov_b32 s9, s10 437; GCN-NEXT: buffer_load_dwordx4 v[4:7], v[2:3], s[8:11], 0 addr64 glc 438; GCN-NEXT: s_waitcnt vmcnt(0) 439; GCN-NEXT: buffer_load_dwordx4 v[8:11], v[2:3], s[8:11], 0 addr64 offset:16 glc 440; GCN-NEXT: s_waitcnt vmcnt(0) 441; GCN-NEXT: buffer_load_dwordx4 v[12:15], v[2:3], s[8:11], 0 addr64 offset:32 glc 442; GCN-NEXT: s_waitcnt vmcnt(0) 443; GCN-NEXT: buffer_load_dwordx4 v[0:3], v[2:3], s[8:11], 0 addr64 offset:48 glc 444; GCN-NEXT: s_waitcnt vmcnt(0) 445; GCN-NEXT: ; implicit-def: $vgpr0 446; GCN-NEXT: .LBB5_2: ; %Flow 447; GCN-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] 448; GCN-NEXT: s_cbranch_execz .LBB5_4 449; GCN-NEXT: ; %bb.3: ; %T 450; GCN-NEXT: s_mov_b32 s10, 0 451; GCN-NEXT: s_mov_b32 s11, 0xf000 452; GCN-NEXT: s_mov_b32 s8, s10 453; GCN-NEXT: s_mov_b32 s9, s10 454; GCN-NEXT: s_waitcnt vmcnt(0) 455; GCN-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[8:11], 0 addr64 glc 456; GCN-NEXT: s_waitcnt vmcnt(0) 457; GCN-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[8:11], 0 addr64 offset:16 glc 458; GCN-NEXT: s_waitcnt vmcnt(0) 459; GCN-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[8:11], 0 addr64 offset:32 glc 460; GCN-NEXT: s_waitcnt vmcnt(0) 461; GCN-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[8:11], 0 addr64 offset:48 glc 462; GCN-NEXT: s_waitcnt vmcnt(0) 463; GCN-NEXT: .LBB5_4: ; %exit 464; GCN-NEXT: s_or_b64 exec, exec, s[4:5] 465; GCN-NEXT: s_waitcnt vmcnt(0) 466; GCN-NEXT: v_mov_b32_e32 v0, 0xbff00000 467; GCN-NEXT: v_cmp_nlt_f64_e32 vcc, -1.0, v[4:5] 468; GCN-NEXT: v_cndmask_b32_e32 v1, -2.0, v0, vcc 469; GCN-NEXT: v_cmp_nlt_f64_e32 vcc, -1.0, v[6:7] 470; GCN-NEXT: v_cndmask_b32_e32 v3, -2.0, v0, vcc 471; GCN-NEXT: v_cmp_nlt_f64_e32 vcc, -1.0, v[8:9] 472; GCN-NEXT: v_cndmask_b32_e32 v5, -2.0, v0, vcc 473; GCN-NEXT: v_cmp_nlt_f64_e32 vcc, -1.0, v[10:11] 474; GCN-NEXT: v_cndmask_b32_e32 v7, -2.0, v0, vcc 475; GCN-NEXT: v_mov_b32_e32 v0, 0 476; GCN-NEXT: v_mov_b32_e32 v2, 0 477; GCN-NEXT: v_mov_b32_e32 v4, 0 478; GCN-NEXT: v_mov_b32_e32 v6, 0 479; GCN-NEXT: s_setpc_b64 s[30:31] 480 br i1 %c0, label %T, label %F 481 482T: 483 %t = load volatile <8 x double>, ptr addrspace(1) %p0 484 br label %exit 485 486F: 487 %f = load volatile <8 x double>, ptr addrspace(1) %p1 488 br label %exit 489 490exit: 491 %m = phi <8 x double> [ %t, %T ], [ %f, %F ] 492 %v2 = shufflevector <8 x double> %m, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 493 %b2 = fcmp ogt <4 x double> %v2, <double -1.0, double -1.0, double -1.0, double -1.0> 494 %r2 = select <4 x i1> %b2, <4 x double> <double -2.0, double -2.0, double -2.0, double -2.0>, <4 x double> <double -1.0, double -1.0, double -1.0, double -1.0> 495 ret <4 x double> %r2 496} 497 498define <8 x double> @extract_8xf64(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i1 %c0) { 499; GCN-LABEL: extract_8xf64: 500; GCN: ; %bb.0: 501; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 502; GCN-NEXT: v_and_b32_e32 v4, 1, v4 503; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v4 504; GCN-NEXT: s_xor_b64 s[4:5], vcc, -1 505; GCN-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35 506; GCN-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] 507; GCN-NEXT: s_xor_b64 s[4:5], exec, s[6:7] 508; GCN-NEXT: s_cbranch_execz .LBB6_2 509; GCN-NEXT: ; %bb.1: ; %F 510; GCN-NEXT: s_mov_b32 s10, 0 511; GCN-NEXT: s_mov_b32 s11, 0xf000 512; GCN-NEXT: s_mov_b32 s8, s10 513; GCN-NEXT: s_mov_b32 s9, s10 514; GCN-NEXT: buffer_load_dwordx4 v[4:7], v[2:3], s[8:11], 0 addr64 offset:112 glc 515; GCN-NEXT: s_waitcnt vmcnt(0) 516; GCN-NEXT: buffer_load_dwordx4 v[4:7], v[2:3], s[8:11], 0 addr64 offset:96 glc 517; GCN-NEXT: s_waitcnt vmcnt(0) 518; GCN-NEXT: buffer_load_dwordx4 v[4:7], v[2:3], s[8:11], 0 addr64 offset:80 glc 519; GCN-NEXT: s_waitcnt vmcnt(0) 520; GCN-NEXT: buffer_load_dwordx4 v[4:7], v[2:3], s[8:11], 0 addr64 offset:64 glc 521; GCN-NEXT: s_waitcnt vmcnt(0) 522; GCN-NEXT: buffer_load_dwordx4 v[4:7], v[2:3], s[8:11], 0 addr64 glc 523; GCN-NEXT: s_waitcnt vmcnt(0) 524; GCN-NEXT: buffer_load_dwordx4 v[8:11], v[2:3], s[8:11], 0 addr64 offset:16 glc 525; GCN-NEXT: s_waitcnt vmcnt(0) 526; GCN-NEXT: buffer_load_dwordx4 v[12:15], v[2:3], s[8:11], 0 addr64 offset:32 glc 527; GCN-NEXT: s_waitcnt vmcnt(0) 528; GCN-NEXT: buffer_load_dwordx4 v[16:19], v[2:3], s[8:11], 0 addr64 offset:48 glc 529; GCN-NEXT: s_waitcnt vmcnt(0) 530; GCN-NEXT: ; implicit-def: $vgpr0 531; GCN-NEXT: .LBB6_2: ; %Flow 532; GCN-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] 533; GCN-NEXT: s_cbranch_execz .LBB6_4 534; GCN-NEXT: ; %bb.3: ; %T 535; GCN-NEXT: s_mov_b32 s10, 0 536; GCN-NEXT: s_mov_b32 s11, 0xf000 537; GCN-NEXT: s_mov_b32 s8, s10 538; GCN-NEXT: s_mov_b32 s9, s10 539; GCN-NEXT: buffer_load_dwordx4 v[2:5], v[0:1], s[8:11], 0 addr64 offset:112 glc 540; GCN-NEXT: s_waitcnt vmcnt(0) 541; GCN-NEXT: buffer_load_dwordx4 v[2:5], v[0:1], s[8:11], 0 addr64 offset:96 glc 542; GCN-NEXT: s_waitcnt vmcnt(0) 543; GCN-NEXT: buffer_load_dwordx4 v[2:5], v[0:1], s[8:11], 0 addr64 offset:80 glc 544; GCN-NEXT: s_waitcnt vmcnt(0) 545; GCN-NEXT: buffer_load_dwordx4 v[2:5], v[0:1], s[8:11], 0 addr64 offset:64 glc 546; GCN-NEXT: s_waitcnt vmcnt(0) 547; GCN-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[8:11], 0 addr64 glc 548; GCN-NEXT: s_waitcnt vmcnt(0) 549; GCN-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[8:11], 0 addr64 offset:16 glc 550; GCN-NEXT: s_waitcnt vmcnt(0) 551; GCN-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[8:11], 0 addr64 offset:32 glc 552; GCN-NEXT: s_waitcnt vmcnt(0) 553; GCN-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[8:11], 0 addr64 offset:48 glc 554; GCN-NEXT: s_waitcnt vmcnt(0) 555; GCN-NEXT: .LBB6_4: ; %exit 556; GCN-NEXT: s_or_b64 exec, exec, s[4:5] 557; GCN-NEXT: v_mov_b32_e32 v0, 0xbff00000 558; GCN-NEXT: v_cmp_nlt_f64_e32 vcc, -1.0, v[6:7] 559; GCN-NEXT: v_cmp_nlt_f64_e64 s[4:5], -1.0, v[8:9] 560; GCN-NEXT: v_cmp_nlt_f64_e64 s[6:7], -1.0, v[10:11] 561; GCN-NEXT: v_cmp_nlt_f64_e64 s[8:9], -1.0, v[12:13] 562; GCN-NEXT: v_cmp_nlt_f64_e64 s[10:11], -1.0, v[14:15] 563; GCN-NEXT: s_waitcnt vmcnt(0) 564; GCN-NEXT: v_cmp_nlt_f64_e64 s[12:13], -1.0, v[16:17] 565; GCN-NEXT: v_cmp_nlt_f64_e64 s[14:15], -1.0, v[18:19] 566; GCN-NEXT: v_cmp_nlt_f64_e64 s[16:17], -1.0, v[4:5] 567; GCN-NEXT: v_cndmask_b32_e64 v1, -2.0, v0, s[16:17] 568; GCN-NEXT: v_cndmask_b32_e32 v3, -2.0, v0, vcc 569; GCN-NEXT: v_cndmask_b32_e64 v5, -2.0, v0, s[4:5] 570; GCN-NEXT: v_cndmask_b32_e64 v7, -2.0, v0, s[6:7] 571; GCN-NEXT: v_cndmask_b32_e64 v9, -2.0, v0, s[8:9] 572; GCN-NEXT: v_cndmask_b32_e64 v11, -2.0, v0, s[10:11] 573; GCN-NEXT: v_cndmask_b32_e64 v13, -2.0, v0, s[12:13] 574; GCN-NEXT: v_cndmask_b32_e64 v15, -2.0, v0, s[14:15] 575; GCN-NEXT: v_mov_b32_e32 v0, 0 576; GCN-NEXT: v_mov_b32_e32 v2, 0 577; GCN-NEXT: v_mov_b32_e32 v4, 0 578; GCN-NEXT: v_mov_b32_e32 v6, 0 579; GCN-NEXT: v_mov_b32_e32 v8, 0 580; GCN-NEXT: v_mov_b32_e32 v10, 0 581; GCN-NEXT: v_mov_b32_e32 v12, 0 582; GCN-NEXT: v_mov_b32_e32 v14, 0 583; GCN-NEXT: s_setpc_b64 s[30:31] 584 br i1 %c0, label %T, label %F 585 586T: 587 %t = load volatile <16 x double>, ptr addrspace(1) %p0 588 br label %exit 589 590F: 591 %f = load volatile <16 x double>, ptr addrspace(1) %p1 592 br label %exit 593 594exit: 595 %m = phi <16 x double> [ %t, %T ], [ %f, %F ] 596 %v2 = shufflevector <16 x double> %m, <16 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 597 %b2 = fcmp ogt <8 x double> %v2, <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0> 598 %r2 = select <8 x i1> %b2, <8 x double> <double -2.0, double -2.0, double -2.0, double -2.0, double -2.0, double -2.0, double -2.0, double -2.0>, <8 x double> <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0> 599 ret <8 x double> %r2 600} 601