1; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s 2 3; GCN-LABEL: {{^}}select_and1: 4; GCN: s_cselect_b32 [[SEL:s[0-9]+]], s{{[0-9]+}}, 5; GCN: v_mov_b32_e32 [[VSEL:v[0-9]+]], [[SEL]] 6; GCN-NOT: v_and_b32 7; GCN: store_dword v{{[0-9]+}}, [[VSEL]], s{{\[[0-9]+:[0-9]+\]}} 8define amdgpu_kernel void @select_and1(ptr addrspace(1) %p, i32 %x, i32 %y) { 9 %c = icmp slt i32 %x, 11 10 %s = select i1 %c, i32 0, i32 -1 11 %a = and i32 %y, %s 12 store i32 %a, ptr addrspace(1) %p, align 4 13 ret void 14} 15 16; GCN-LABEL: {{^}}select_and2: 17; GCN: s_cselect_b32 [[SEL:s[0-9]+]], s{{[0-9]+}}, 18; GCN: v_mov_b32_e32 [[VSEL:v[0-9]+]], [[SEL]] 19; GCN-NOT: v_and_b32 20; GCN: store_dword v{{[0-9]+}}, [[VSEL]], s{{\[[0-9]+:[0-9]+\]}} 21define amdgpu_kernel void @select_and2(ptr addrspace(1) %p, i32 %x, i32 %y) { 22 %c = icmp slt i32 %x, 11 23 %s = select i1 %c, i32 0, i32 -1 24 %a = and i32 %s, %y 25 store i32 %a, ptr addrspace(1) %p, align 4 26 ret void 27} 28 29; GCN-LABEL: {{^}}select_and3: 30; GCN: s_cselect_b32 [[SEL:s[0-9]+]], s{{[0-9]+}}, 31; GCN: v_mov_b32_e32 [[VSEL:v[0-9]+]], [[SEL]] 32; GCN-NOT: v_and_b32 33; GCN: store_dword v{{[0-9]+}}, [[VSEL]], s{{\[[0-9]+:[0-9]+\]}} 34define amdgpu_kernel void @select_and3(ptr addrspace(1) %p, i32 %x, i32 %y) { 35 %c = icmp slt i32 %x, 11 36 %s = select i1 %c, i32 -1, i32 0 37 %a = and i32 %y, %s 38 store i32 %a, ptr addrspace(1) %p, align 4 39 ret void 40} 41 42; GCN-LABEL: {{^}}select_and_v4: 43; GCN: s_cselect_b32 s[[SEL0:[0-9]+]], s{{[0-9]+}}, 0 44; GCN: s_cselect_b32 s[[SEL1:[0-9]+]], s{{[0-9]+}}, 0 45; GCN: s_cselect_b32 s[[SEL2:[0-9]+]], s{{[0-9]+}}, 0 46; GCN: s_cselect_b32 s[[SEL3:[0-9]+]], s{{[0-9]+}}, 0 47; GCN: v_mov_b32_e32 v[[V0:[0-9]+]], s[[SEL3]] 48; GCN: v_mov_b32_e32 v[[V1:[0-9]+]], s[[SEL2]] 49; GCN: v_mov_b32_e32 v[[V2:[0-9]+]], s[[SEL1]] 50; GCN: v_mov_b32_e32 v[[V3:[0-9]+]], s[[SEL0]] 51; GCN-NOT: v_and_b32 52; GCN: global_store_dwordx4 v{{[0-9]+}}, v[[[V0]]:[[V3]]] 53define amdgpu_kernel void @select_and_v4(ptr addrspace(1) %p, i32 %x, <4 x i32> %y) { 54 %c = icmp slt i32 %x, 11 55 %s = select i1 %c, <4 x i32> zeroinitializer, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1> 56 %a = and <4 x i32> %s, %y 57 store <4 x i32> %a, ptr addrspace(1) %p, align 32 58 ret void 59} 60 61; GCN-LABEL: {{^}}select_or1: 62; GCN: s_cselect_b32 [[SEL:s[0-9]+]], s{{[0-9]+}}, 63; GCN: v_mov_b32_e32 [[VSEL:v[0-9]+]], [[SEL]] 64; GCN-NOT: v_or_b32 65; GCN: store_dword v{{[0-9]+}}, [[VSEL]], s{{\[[0-9]+:[0-9]+\]}} 66define amdgpu_kernel void @select_or1(ptr addrspace(1) %p, i32 %x, i32 %y) { 67 %c = icmp slt i32 %x, 11 68 %s = select i1 %c, i32 0, i32 -1 69 %a = or i32 %y, %s 70 store i32 %a, ptr addrspace(1) %p, align 4 71 ret void 72} 73 74; GCN-LABEL: {{^}}select_or2: 75; GCN: s_cselect_b32 [[SEL:s[0-9]+]], s{{[0-9]+}}, 76; GCN: v_mov_b32_e32 [[VSEL:v[0-9]+]], [[SEL]] 77; GCN-NOT: v_or_b32 78; GCN: store_dword v{{[0-9]+}}, [[VSEL]], s{{\[[0-9]+:[0-9]+\]}} 79define amdgpu_kernel void @select_or2(ptr addrspace(1) %p, i32 %x, i32 %y) { 80 %c = icmp slt i32 %x, 11 81 %s = select i1 %c, i32 0, i32 -1 82 %a = or i32 %s, %y 83 store i32 %a, ptr addrspace(1) %p, align 4 84 ret void 85} 86 87; GCN-LABEL: {{^}}select_or3: 88; GCN: s_cselect_b32 [[SEL:s[0-9]+]], s{{[0-9]+}}, 89; GCN: v_mov_b32_e32 [[VSEL:v[0-9]+]], [[SEL]] 90; GCN-NOT: v_or_b32 91; GCN: store_dword v{{[0-9]+}}, [[VSEL]], s{{\[[0-9]+:[0-9]+\]}} 92define amdgpu_kernel void @select_or3(ptr addrspace(1) %p, i32 %x, i32 %y) { 93 %c = icmp slt i32 %x, 11 94 %s = select i1 %c, i32 -1, i32 0 95 %a = or i32 %y, %s 96 store i32 %a, ptr addrspace(1) %p, align 4 97 ret void 98} 99 100; GCN-LABEL: {{^}}select_or_v4: 101; GCN: s_cselect_b32 s[[SEL0:[0-9]+]], s{{[0-9]+}}, -1 102; GCN: s_cselect_b32 s[[SEL1:[0-9]+]], s{{[0-9]+}}, -1 103; GCN: s_cselect_b32 s[[SEL2:[0-9]+]], s{{[0-9]+}}, -1 104; GCN: s_cselect_b32 s[[SEL3:[0-9]+]], s{{[0-9]+}}, -1 105; GCN-NOT: v_or_b32 106; GCN: v_mov_b32_e32 v[[V0:[0-9]+]], s[[SEL3]] 107; GCN: v_mov_b32_e32 v[[V1:[0-9]+]], s[[SEL2]] 108; GCN: v_mov_b32_e32 v[[V2:[0-9]+]], s[[SEL1]] 109; GCN: v_mov_b32_e32 v[[V3:[0-9]+]], s[[SEL0]] 110; GCN: global_store_dwordx4 v{{[0-9]+}}, v[[[V0]]:[[V3]]] 111define amdgpu_kernel void @select_or_v4(ptr addrspace(1) %p, i32 %x, <4 x i32> %y) { 112 %c = icmp slt i32 %x, 11 113 %s = select i1 %c, <4 x i32> zeroinitializer, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1> 114 %a = or <4 x i32> %s, %y 115 store <4 x i32> %a, ptr addrspace(1) %p, align 32 116 ret void 117} 118 119; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants: 120; GCN: s_cselect_b32 s{{[0-9]+}}, 9, 2 121define amdgpu_kernel void @sel_constants_sub_constant_sel_constants(ptr addrspace(1) %p, i1 %cond) { 122 %sel = select i1 %cond, i32 -4, i32 3 123 %bo = sub i32 5, %sel 124 store i32 %bo, ptr addrspace(1) %p, align 4 125 ret void 126} 127 128; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants_i16: 129; GCN: s_cselect_b32 s{{[0-9]+}}, 9, 2 130define amdgpu_kernel void @sel_constants_sub_constant_sel_constants_i16(ptr addrspace(1) %p, i1 %cond) { 131 %sel = select i1 %cond, i16 -4, i16 3 132 %bo = sub i16 5, %sel 133 store i16 %bo, ptr addrspace(1) %p, align 2 134 ret void 135} 136 137; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants_i16_neg: 138; GCN: s_cselect_b32 s[[SGPR:[0-9]+]], s[[SGPR]], 0xf449 139define amdgpu_kernel void @sel_constants_sub_constant_sel_constants_i16_neg(ptr addrspace(1) %p, i1 %cond) { 140 %sel = select i1 %cond, i16 4, i16 3000 141 %bo = sub i16 1, %sel 142 store i16 %bo, ptr addrspace(1) %p, align 2 143 ret void 144} 145 146; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants_v2i16: 147; GCN-DAG: s_mov_b32 [[T:s[0-9]+]], 0x50009 148; GCN: s_cselect_b32 s{{[0-9]+}}, [[T]], 0x60002 149define amdgpu_kernel void @sel_constants_sub_constant_sel_constants_v2i16(ptr addrspace(1) %p, i1 %cond) { 150 %sel = select i1 %cond, <2 x i16> <i16 -4, i16 2>, <2 x i16> <i16 3, i16 1> 151 %bo = sub <2 x i16> <i16 5, i16 7>, %sel 152 store <2 x i16> %bo, ptr addrspace(1) %p, align 4 153 ret void 154} 155 156; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants_v4i32: 157; GCN: s_cselect_b32 s[[SEL0:[0-9]+]], 7, 14 158; GCN: s_cselect_b32 s[[SEL1:[0-9]+]], 6, 10 159; GCN: s_cselect_b32 s[[SEL2:[0-9]+]], 5, 6 160; GCN: s_cselect_b32 s[[SEL3:[0-9]+]], 9, 2 161; GCN: v_mov_b32_e32 v[[V0:[0-9]+]], s[[SEL3]] 162; GCN: v_mov_b32_e32 v[[V1:[0-9]+]], s[[SEL2]] 163; GCN: v_mov_b32_e32 v[[V2:[0-9]+]], s[[SEL1]] 164; GCN: v_mov_b32_e32 v[[V3:[0-9]+]], s[[SEL0]] 165; GCN: global_store_dwordx4 v{{[0-9]+}}, v[[[V0]]:[[V3]]] 166define amdgpu_kernel void @sel_constants_sub_constant_sel_constants_v4i32(ptr addrspace(1) %p, i1 %cond) { 167 %sel = select i1 %cond, <4 x i32> <i32 -4, i32 2, i32 3, i32 4>, <4 x i32> <i32 3, i32 1, i32 -1, i32 -3> 168 %bo = sub <4 x i32> <i32 5, i32 7, i32 9, i32 11>, %sel 169 store <4 x i32> %bo, ptr addrspace(1) %p, align 32 170 ret void 171} 172 173; GCN-LABEL: {{^}}sdiv_constant_sel_constants_i64: 174; GCN: s_cselect_b32 s{{[0-9]+}}, 0, 5 175define amdgpu_kernel void @sdiv_constant_sel_constants_i64(ptr addrspace(1) %p, i1 %cond) { 176 %sel = select i1 %cond, i64 121, i64 23 177 %bo = sdiv i64 120, %sel 178 store i64 %bo, ptr addrspace(1) %p, align 8 179 ret void 180} 181 182; GCN-LABEL: {{^}}sdiv_constant_sel_constants_i32: 183; GCN: s_cselect_b32 s{{[0-9]+}}, 26, 8 184define amdgpu_kernel void @sdiv_constant_sel_constants_i32(ptr addrspace(1) %p, i1 %cond) { 185 %sel = select i1 %cond, i32 7, i32 23 186 %bo = sdiv i32 184, %sel 187 store i32 %bo, ptr addrspace(1) %p, align 8 188 ret void 189} 190 191; GCN-LABEL: {{^}}udiv_constant_sel_constants_i64: 192; GCN: s_cselect_b32 s{{[0-9]+}}, 0, 5 193define amdgpu_kernel void @udiv_constant_sel_constants_i64(ptr addrspace(1) %p, i1 %cond) { 194 %sel = select i1 %cond, i64 -4, i64 23 195 %bo = udiv i64 120, %sel 196 store i64 %bo, ptr addrspace(1) %p, align 8 197 ret void 198} 199 200; GCN-LABEL: {{^}}srem_constant_sel_constants: 201; GCN: s_cselect_b32 s{{[0-9]+}}, 33, 3 202define amdgpu_kernel void @srem_constant_sel_constants(ptr addrspace(1) %p, i1 %cond) { 203 %sel = select i1 %cond, i64 34, i64 15 204 %bo = srem i64 33, %sel 205 store i64 %bo, ptr addrspace(1) %p, align 8 206 ret void 207} 208 209; GCN-LABEL: {{^}}urem_constant_sel_constants: 210; GCN: s_cselect_b32 s{{[0-9]+}}, 33, 3 211define amdgpu_kernel void @urem_constant_sel_constants(ptr addrspace(1) %p, i1 %cond) { 212 %sel = select i1 %cond, i64 34, i64 15 213 %bo = urem i64 33, %sel 214 store i64 %bo, ptr addrspace(1) %p, align 8 215 ret void 216} 217 218; GCN-LABEL: {{^}}shl_constant_sel_constants: 219; GCN: s_cselect_b32 s{{[0-9]+}}, 4, 8 220define amdgpu_kernel void @shl_constant_sel_constants(ptr addrspace(1) %p, i1 %cond) { 221 %sel = select i1 %cond, i32 2, i32 3 222 %bo = shl i32 1, %sel 223 store i32 %bo, ptr addrspace(1) %p, align 4 224 ret void 225} 226 227; GCN-LABEL: {{^}}lshr_constant_sel_constants: 228; GCN: s_cselect_b32 s{{[0-9]+}}, 16, 8 229define amdgpu_kernel void @lshr_constant_sel_constants(ptr addrspace(1) %p, i1 %cond) { 230 %sel = select i1 %cond, i32 2, i32 3 231 %bo = lshr i32 64, %sel 232 store i32 %bo, ptr addrspace(1) %p, align 4 233 ret void 234} 235 236; GCN-LABEL: {{^}}ashr_constant_sel_constants: 237; GCN: s_cselect_b32 s{{[0-9]+}}, 32, 16 238define amdgpu_kernel void @ashr_constant_sel_constants(ptr addrspace(1) %p, i1 %cond) { 239 %sel = select i1 %cond, i32 2, i32 3 240 %bo = ashr i32 128, %sel 241 store i32 %bo, ptr addrspace(1) %p, align 4 242 ret void 243} 244 245; GCN-LABEL: {{^}}fsub_constant_sel_constants: 246; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, -4.0, 1.0, 247define amdgpu_kernel void @fsub_constant_sel_constants(ptr addrspace(1) %p, i1 %cond) { 248 %sel = select i1 %cond, float -2.0, float 3.0 249 %bo = fsub float -1.0, %sel 250 store float %bo, ptr addrspace(1) %p, align 4 251 ret void 252} 253 254; GCN-LABEL: {{^}}fsub_constant_sel_constants_f16: 255; TODO: it shall be possible to fold constants with OpSel 256; GCN-DAG: v_mov_b32_e32 [[T:v[0-9]+]], 0x3c00 257; GCN-DAG: v_mov_b32_e32 [[F:v[0-9]+]], 0xc400 258; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, [[F]], [[T]], 259define amdgpu_kernel void @fsub_constant_sel_constants_f16(ptr addrspace(1) %p, i1 %cond) { 260 %sel = select i1 %cond, half -2.0, half 3.0 261 %bo = fsub half -1.0, %sel 262 store half %bo, ptr addrspace(1) %p, align 2 263 ret void 264} 265 266; GCN-LABEL: {{^}}fsub_constant_sel_constants_v2f16: 267; GCN: s_cselect_b32 s{{[0-9]+}}, 0x45003c00, -2.0 268define amdgpu_kernel void @fsub_constant_sel_constants_v2f16(ptr addrspace(1) %p, i1 %cond) { 269 %sel = select i1 %cond, <2 x half> <half -2.0, half -3.0>, <2 x half> <half -1.0, half 4.0> 270 %bo = fsub <2 x half> <half -1.0, half 2.0>, %sel 271 store <2 x half> %bo, ptr addrspace(1) %p, align 4 272 ret void 273} 274 275; GCN-LABEL: {{^}}fsub_constant_sel_constants_v4f32: 276; GCN: s_mov_b32 [[T0:s[0-9]+]], 0x41500000 277; GCN: s_cselect_b32 s[[SEL0:[0-9]+]], [[T0]], 0x40c00000 278; GCN: s_cselect_b32 s[[SEL1:[0-9]+]], 0x41100000, 4.0 279; GCN: s_cselect_b32 s[[SEL2:[0-9]+]], 0x40a00000, 2.0 280; GCN: s_cselect_b32 s[[SEL3:[0-9]+]], 1.0, 0 281; GCN: v_mov_b32_e32 v[[V0:[0-9]+]], s[[SEL3]] 282; GCN: v_mov_b32_e32 v[[V1:[0-9]+]], s[[SEL2]] 283; GCN: v_mov_b32_e32 v[[V2:[0-9]+]], s[[SEL1]] 284; GCN: v_mov_b32_e32 v[[V3:[0-9]+]], s[[SEL0]] 285; GCN: global_store_dwordx4 v{{[0-9]+}}, v[[[V0]]:[[V3]]] 286define amdgpu_kernel void @fsub_constant_sel_constants_v4f32(ptr addrspace(1) %p, i1 %cond) { 287 %sel = select i1 %cond, <4 x float> <float -2.0, float -3.0, float -4.0, float -5.0>, <4 x float> <float -1.0, float 0.0, float 1.0, float 2.0> 288 %bo = fsub <4 x float> <float -1.0, float 2.0, float 5.0, float 8.0>, %sel 289 store <4 x float> %bo, ptr addrspace(1) %p, align 32 290 ret void 291} 292 293; GCN-LABEL: {{^}}fdiv_constant_sel_constants: 294; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 4.0, -2.0, 295define amdgpu_kernel void @fdiv_constant_sel_constants(ptr addrspace(1) %p, i1 %cond) { 296 %sel = select i1 %cond, float -4.0, float 2.0 297 %bo = fdiv float 8.0, %sel 298 store float %bo, ptr addrspace(1) %p, align 4 299 ret void 300} 301 302; GCN-LABEL: {{^}}frem_constant_sel_constants: 303; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 2.0, 1.0, 304define amdgpu_kernel void @frem_constant_sel_constants(ptr addrspace(1) %p, i1 %cond) { 305 %sel = select i1 %cond, float -4.0, float 3.0 306 %bo = frem float 5.0, %sel 307 store float %bo, ptr addrspace(1) %p, align 4 308 ret void 309} 310