1; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-no-signed-zeros-fp-math < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s 2; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-no-signed-zeros-fp-math < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s 3 4; GCN-LABEL: {{^}}add_select_fabs_fabs_f32: 5; GCN: buffer_load_dword [[X:v[0-9]+]] 6; GCN: buffer_load_dword [[Y:v[0-9]+]] 7; GCN: buffer_load_dword [[Z:v[0-9]+]] 8 9; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc 10; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Z]] 11define amdgpu_kernel void @add_select_fabs_fabs_f32(i32 %c) #0 { 12 %x = load volatile float, ptr addrspace(1) undef 13 %y = load volatile float, ptr addrspace(1) undef 14 %z = load volatile float, ptr addrspace(1) undef 15 %cmp = icmp eq i32 %c, 0 16 %fabs.x = call float @llvm.fabs.f32(float %x) 17 %fabs.y = call float @llvm.fabs.f32(float %y) 18 %select = select i1 %cmp, float %fabs.x, float %fabs.y 19 %add = fadd float %select, %z 20 store float %add, ptr addrspace(1) undef 21 ret void 22} 23 24; GCN-LABEL: {{^}}add_select_multi_use_lhs_fabs_fabs_f32: 25; GCN: buffer_load_dword [[X:v[0-9]+]] 26; GCN: buffer_load_dword [[Y:v[0-9]+]] 27; GCN: buffer_load_dword [[Z:v[0-9]+]] 28; GCN: buffer_load_dword [[W:v[0-9]+]] 29 30; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc 31; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Z]] 32; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[X]]|, [[W]] 33define amdgpu_kernel void @add_select_multi_use_lhs_fabs_fabs_f32(i32 %c) #0 { 34 %x = load volatile float, ptr addrspace(1) undef 35 %y = load volatile float, ptr addrspace(1) undef 36 %z = load volatile float, ptr addrspace(1) undef 37 %w = load volatile float, ptr addrspace(1) undef 38 %cmp = icmp eq i32 %c, 0 39 %fabs.x = call float @llvm.fabs.f32(float %x) 40 %fabs.y = call float @llvm.fabs.f32(float %y) 41 %select = select i1 %cmp, float %fabs.x, float %fabs.y 42 %add0 = fadd float %select, %z 43 %add1 = fadd float %fabs.x, %w 44 store volatile float %add0, ptr addrspace(1) undef 45 store volatile float %add1, ptr addrspace(1) undef 46 ret void 47} 48 49; GCN-LABEL: {{^}}add_select_multi_store_use_lhs_fabs_fabs_f32: 50; GCN: buffer_load_dword [[X:v[0-9]+]] 51; GCN: buffer_load_dword [[Y:v[0-9]+]] 52; GCN: buffer_load_dword [[Z:v[0-9]+]] 53 54; GCN-DAG: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc 55; GCN-DAG: v_add_f32_e64 [[ADD:v[0-9]+]], |[[SELECT]]|, [[Z]] 56; GCN-DAG: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]] 57 58; GCN: buffer_store_dword [[ADD]] 59; GCN: buffer_store_dword [[X_ABS]] 60define amdgpu_kernel void @add_select_multi_store_use_lhs_fabs_fabs_f32(i32 %c) #0 { 61 %x = load volatile float, ptr addrspace(1) undef 62 %y = load volatile float, ptr addrspace(1) undef 63 %z = load volatile float, ptr addrspace(1) undef 64 %cmp = icmp eq i32 %c, 0 65 %fabs.x = call float @llvm.fabs.f32(float %x) 66 %fabs.y = call float @llvm.fabs.f32(float %y) 67 %select = select i1 %cmp, float %fabs.x, float %fabs.y 68 %add0 = fadd float %select, %z 69 store volatile float %add0, ptr addrspace(1) undef 70 store volatile float %fabs.x, ptr addrspace(1) undef 71 ret void 72} 73 74; GCN-LABEL: {{^}}add_select_multi_use_rhs_fabs_fabs_f32: 75; GCN: buffer_load_dword [[X:v[0-9]+]] 76; GCN: buffer_load_dword [[Y:v[0-9]+]] 77; GCN: buffer_load_dword [[Z:v[0-9]+]] 78; GCN: buffer_load_dword [[W:v[0-9]+]] 79 80; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc 81; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Z]] 82; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[Y]]|, [[W]] 83define amdgpu_kernel void @add_select_multi_use_rhs_fabs_fabs_f32(i32 %c) #0 { 84 %x = load volatile float, ptr addrspace(1) undef 85 %y = load volatile float, ptr addrspace(1) undef 86 %z = load volatile float, ptr addrspace(1) undef 87 %w = load volatile float, ptr addrspace(1) undef 88 %cmp = icmp eq i32 %c, 0 89 %fabs.x = call float @llvm.fabs.f32(float %x) 90 %fabs.y = call float @llvm.fabs.f32(float %y) 91 %select = select i1 %cmp, float %fabs.x, float %fabs.y 92 %add0 = fadd float %select, %z 93 %add1 = fadd float %fabs.y, %w 94 store volatile float %add0, ptr addrspace(1) undef 95 store volatile float %add1, ptr addrspace(1) undef 96 ret void 97} 98 99; GCN-LABEL: {{^}}add_select_fabs_var_f32: 100; GCN: buffer_load_dword [[X:v[0-9]+]] 101; GCN: buffer_load_dword [[Y:v[0-9]+]] 102; GCN: buffer_load_dword [[Z:v[0-9]+]] 103 104; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[Y]], |[[X]]|, 105; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]] 106define amdgpu_kernel void @add_select_fabs_var_f32(i32 %c) #0 { 107 %x = load volatile float, ptr addrspace(1) undef 108 %y = load volatile float, ptr addrspace(1) undef 109 %z = load volatile float, ptr addrspace(1) undef 110 %cmp = icmp eq i32 %c, 0 111 %fabs.x = call float @llvm.fabs.f32(float %x) 112 %select = select i1 %cmp, float %fabs.x, float %y 113 %add = fadd float %select, %z 114 store volatile float %add, ptr addrspace(1) undef 115 ret void 116} 117 118; GCN-LABEL: {{^}}add_select_fabs_negk_f32: 119; GCN: buffer_load_dword [[X:v[0-9]+]] 120; GCN: buffer_load_dword [[Y:v[0-9]+]] 121 122; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, |[[X]]|, 123; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]] 124define amdgpu_kernel void @add_select_fabs_negk_f32(i32 %c) #0 { 125 %x = load volatile float, ptr addrspace(1) undef 126 %y = load volatile float, ptr addrspace(1) undef 127 %cmp = icmp eq i32 %c, 0 128 %fabs = call float @llvm.fabs.f32(float %x) 129 %select = select i1 %cmp, float %fabs, float -1.0 130 %add = fadd float %select, %y 131 store volatile float %add, ptr addrspace(1) undef 132 ret void 133} 134 135; FIXME: fabs should fold away 136; GCN-LABEL: {{^}}add_select_fabs_negk_negk_f32: 137; GCN: buffer_load_dword [[X:v[0-9]+]] 138 139; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, -2.0, s 140; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[X]] 141define amdgpu_kernel void @add_select_fabs_negk_negk_f32(i32 %c) #0 { 142 %x = load volatile float, ptr addrspace(1) undef 143 %cmp = icmp eq i32 %c, 0 144 %select = select i1 %cmp, float -2.0, float -1.0 145 %fabs = call float @llvm.fabs.f32(float %select) 146 %add = fadd float %fabs, %x 147 store volatile float %add, ptr addrspace(1) undef 148 ret void 149} 150 151; GCN-LABEL: {{^}}add_select_posk_posk_f32: 152; GCN: buffer_load_dword [[X:v[0-9]+]] 153 154; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 1.0, 2.0, s 155; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[X]] 156define amdgpu_kernel void @add_select_posk_posk_f32(i32 %c) #0 { 157 %x = load volatile float, ptr addrspace(1) undef 158 %cmp = icmp eq i32 %c, 0 159 %select = select i1 %cmp, float 2.0, float 1.0 160 %add = fadd float %select, %x 161 store volatile float %add, ptr addrspace(1) undef 162 ret void 163} 164 165; GCN-LABEL: {{^}}add_select_negk_fabs_f32: 166; GCN: buffer_load_dword [[X:v[0-9]+]] 167; GCN: buffer_load_dword [[Y:v[0-9]+]] 168 169; GCN-DAG: s_cmp_lg_u32 s{{[0-9]+}}, 0 170; GCN: s_cselect_b64 [[VCC:.*]], -1, 0 171; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, |[[X]]|, [[VCC]] 172; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]] 173define amdgpu_kernel void @add_select_negk_fabs_f32(i32 %c) #0 { 174 %x = load volatile float, ptr addrspace(1) undef 175 %y = load volatile float, ptr addrspace(1) undef 176 %cmp = icmp eq i32 %c, 0 177 %fabs = call float @llvm.fabs.f32(float %x) 178 %select = select i1 %cmp, float -1.0, float %fabs 179 %add = fadd float %select, %y 180 store volatile float %add, ptr addrspace(1) undef 181 ret void 182} 183 184; GCN-LABEL: {{^}}add_select_negliteralk_fabs_f32: 185; GCN-DAG: buffer_load_dword [[X:v[0-9]+]] 186; GCN-DAG: buffer_load_dword [[Y:v[0-9]+]] 187; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xc4800000 188 189; GCN-DAG: s_cmp_lg_u32 s{{[0-9]+}}, 0 190; GCN: s_cselect_b64 [[VCC:.*]], -1, 0 191; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[K]], |[[X]]|, [[VCC]] 192; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]] 193define amdgpu_kernel void @add_select_negliteralk_fabs_f32(i32 %c) #0 { 194 %x = load volatile float, ptr addrspace(1) undef 195 %y = load volatile float, ptr addrspace(1) undef 196 %cmp = icmp eq i32 %c, 0 197 %fabs = call float @llvm.fabs.f32(float %x) 198 %select = select i1 %cmp, float -1024.0, float %fabs 199 %add = fadd float %select, %y 200 store volatile float %add, ptr addrspace(1) undef 201 ret void 202} 203 204; GCN-LABEL: {{^}}add_select_fabs_posk_f32: 205; GCN: buffer_load_dword [[X:v[0-9]+]] 206; GCN: buffer_load_dword [[Y:v[0-9]+]] 207 208; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 1.0, |[[X]]|, s{{\[[0-9]+:[0-9]+\]}} 209; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]] 210define amdgpu_kernel void @add_select_fabs_posk_f32(i32 %c) #0 { 211 %x = load volatile float, ptr addrspace(1) undef 212 %y = load volatile float, ptr addrspace(1) undef 213 214 %cmp = icmp eq i32 %c, 0 215 %fabs = call float @llvm.fabs.f32(float %x) 216 %select = select i1 %cmp, float %fabs, float 1.0 217 %add = fadd float %select, %y 218 store volatile float %add, ptr addrspace(1) undef 219 ret void 220} 221 222; GCN-LABEL: {{^}}add_select_posk_fabs_f32: 223; GCN: buffer_load_dword [[X:v[0-9]+]] 224; GCN: buffer_load_dword [[Y:v[0-9]+]] 225 226; GCN-DAG: s_cmp_lg_u32 s{{[0-9]+}}, 0 227; GCN: s_cselect_b64 [[VCC:.*]], -1, 0 228; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 1.0, |[[X]]|, s{{\[[0-9]+:[0-9]+\]}} 229; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]] 230define amdgpu_kernel void @add_select_posk_fabs_f32(i32 %c) #0 { 231 %x = load volatile float, ptr addrspace(1) undef 232 %y = load volatile float, ptr addrspace(1) undef 233 %cmp = icmp eq i32 %c, 0 234 %fabs = call float @llvm.fabs.f32(float %x) 235 %select = select i1 %cmp, float 1.0, float %fabs 236 %add = fadd float %select, %y 237 store volatile float %add, ptr addrspace(1) undef 238 ret void 239} 240 241; GCN-LABEL: {{^}}add_select_fneg_fneg_f32: 242; GCN: buffer_load_dword [[X:v[0-9]+]] 243; GCN: buffer_load_dword [[Y:v[0-9]+]] 244; GCN: buffer_load_dword [[Z:v[0-9]+]] 245 246; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc 247; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]] 248define amdgpu_kernel void @add_select_fneg_fneg_f32(i32 %c) #0 { 249 %x = load volatile float, ptr addrspace(1) undef 250 %y = load volatile float, ptr addrspace(1) undef 251 %z = load volatile float, ptr addrspace(1) undef 252 %cmp = icmp eq i32 %c, 0 253 %fneg.x = fsub float -0.0, %x 254 %fneg.y = fsub float -0.0, %y 255 %select = select i1 %cmp, float %fneg.x, float %fneg.y 256 %add = fadd float %select, %z 257 store volatile float %add, ptr addrspace(1) undef 258 ret void 259} 260 261; GCN-LABEL: {{^}}add_select_multi_use_lhs_fneg_fneg_f32: 262; GCN: buffer_load_dword [[X:v[0-9]+]] 263; GCN: buffer_load_dword [[Y:v[0-9]+]] 264; GCN: buffer_load_dword [[Z:v[0-9]+]] 265; GCN: buffer_load_dword [[W:v[0-9]+]] 266 267; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc 268; GCN-DAG: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]] 269; GCN-DAG: v_sub_f32_e32 v{{[0-9]+}}, [[W]], [[X]] 270define amdgpu_kernel void @add_select_multi_use_lhs_fneg_fneg_f32(i32 %c) #0 { 271 %x = load volatile float, ptr addrspace(1) undef 272 %y = load volatile float, ptr addrspace(1) undef 273 %z = load volatile float, ptr addrspace(1) undef 274 %w = load volatile float, ptr addrspace(1) undef 275 %cmp = icmp eq i32 %c, 0 276 %fneg.x = fsub float -0.0, %x 277 %fneg.y = fsub float -0.0, %y 278 %select = select i1 %cmp, float %fneg.x, float %fneg.y 279 %add0 = fadd float %select, %z 280 %add1 = fadd float %fneg.x, %w 281 store volatile float %add0, ptr addrspace(1) undef 282 store volatile float %add1, ptr addrspace(1) undef 283 ret void 284} 285 286; GCN-LABEL: {{^}}add_select_multi_store_use_lhs_fneg_fneg_f32: 287; GCN: buffer_load_dword [[X:v[0-9]+]] 288; GCN: buffer_load_dword [[Y:v[0-9]+]] 289; GCN: buffer_load_dword [[Z:v[0-9]+]] 290 291; GCN-DAG: v_xor_b32_e32 [[NEG_X:v[0-9]+]], 0x80000000, [[X]] 292; GCN-DAG: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc 293; GCN-DAG: v_sub_f32_e32 [[ADD:v[0-9]+]], [[Z]], [[SELECT]] 294 295; GCN: buffer_store_dword [[ADD]] 296; GCN: buffer_store_dword [[NEG_X]] 297define amdgpu_kernel void @add_select_multi_store_use_lhs_fneg_fneg_f32(i32 %c) #0 { 298 %x = load volatile float, ptr addrspace(1) undef 299 %y = load volatile float, ptr addrspace(1) undef 300 %z = load volatile float, ptr addrspace(1) undef 301 %cmp = icmp eq i32 %c, 0 302 %fneg.x = fsub float -0.0, %x 303 %fneg.y = fsub float -0.0, %y 304 %select = select i1 %cmp, float %fneg.x, float %fneg.y 305 %add0 = fadd float %select, %z 306 store volatile float %add0, ptr addrspace(1) undef 307 store volatile float %fneg.x, ptr addrspace(1) undef 308 ret void 309} 310 311; GCN-LABEL: {{^}}add_select_multi_use_rhs_fneg_fneg_f32: 312; GCN: buffer_load_dword [[X:v[0-9]+]] 313; GCN: buffer_load_dword [[Y:v[0-9]+]] 314; GCN: buffer_load_dword [[Z:v[0-9]+]] 315; GCN: buffer_load_dword [[W:v[0-9]+]] 316 317; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc 318; GCN-DAG: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]] 319; GCN-DAG: v_sub_f32_e32 v{{[0-9]+}}, [[W]], [[Y]] 320define amdgpu_kernel void @add_select_multi_use_rhs_fneg_fneg_f32(i32 %c) #0 { 321 %x = load volatile float, ptr addrspace(1) undef 322 %y = load volatile float, ptr addrspace(1) undef 323 %z = load volatile float, ptr addrspace(1) undef 324 %w = load volatile float, ptr addrspace(1) undef 325 %cmp = icmp eq i32 %c, 0 326 %fneg.x = fsub float -0.0, %x 327 %fneg.y = fsub float -0.0, %y 328 %select = select i1 %cmp, float %fneg.x, float %fneg.y 329 %add0 = fadd float %select, %z 330 %add1 = fadd float %fneg.y, %w 331 store volatile float %add0, ptr addrspace(1) undef 332 store volatile float %add1, ptr addrspace(1) undef 333 ret void 334} 335 336; GCN-LABEL: {{^}}add_select_fneg_var_f32: 337; GCN: buffer_load_dword [[X:v[0-9]+]] 338; GCN: buffer_load_dword [[Y:v[0-9]+]] 339; GCN: buffer_load_dword [[Z:v[0-9]+]] 340 341; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[Y]], -[[X]], 342; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]] 343define amdgpu_kernel void @add_select_fneg_var_f32(i32 %c) #0 { 344 %x = load volatile float, ptr addrspace(1) undef 345 %y = load volatile float, ptr addrspace(1) undef 346 %z = load volatile float, ptr addrspace(1) undef 347 %cmp = icmp eq i32 %c, 0 348 %fneg.x = fsub float -0.0, %x 349 %select = select i1 %cmp, float %fneg.x, float %y 350 %add = fadd float %select, %z 351 store volatile float %add, ptr addrspace(1) undef 352 ret void 353} 354 355; GCN-LABEL: {{^}}add_select_fneg_negk_f32: 356; GCN: buffer_load_dword [[X:v[0-9]+]] 357; GCN: buffer_load_dword [[Y:v[0-9]+]] 358 359; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc 360; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]] 361define amdgpu_kernel void @add_select_fneg_negk_f32(i32 %c) #0 { 362 %x = load volatile float, ptr addrspace(1) undef 363 %y = load volatile float, ptr addrspace(1) undef 364 %cmp = icmp eq i32 %c, 0 365 %fneg.x = fsub float -0.0, %x 366 %select = select i1 %cmp, float %fneg.x, float -1.0 367 %add = fadd float %select, %y 368 store volatile float %add, ptr addrspace(1) undef 369 ret void 370} 371 372; GCN-LABEL: {{^}}add_select_fneg_inv2pi_f32: 373; GCN-DAG: buffer_load_dword [[X:v[0-9]+]] 374; GCN-DAG: buffer_load_dword [[Y:v[0-9]+]] 375 376; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xbe22f983 377; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K]], [[X]], vcc 378; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]] 379define amdgpu_kernel void @add_select_fneg_inv2pi_f32(i32 %c) #0 { 380 %x = load volatile float, ptr addrspace(1) undef 381 %y = load volatile float, ptr addrspace(1) undef 382 %cmp = icmp eq i32 %c, 0 383 %fneg.x = fneg float %x 384 %select = select i1 %cmp, float %fneg.x, float 0x3FC45F3060000000 385 %add = fadd float %select, %y 386 store volatile float %add, ptr addrspace(1) undef 387 ret void 388} 389 390; GCN-LABEL: {{^}}add_select_fneg_neginv2pi_f32: 391; GCN-DAG: buffer_load_dword [[X:v[0-9]+]] 392; GCN-DAG: buffer_load_dword [[Y:v[0-9]+]] 393; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e22f983 394 395; SI: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K]], [[X]], vcc 396; VI: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 0.15915494, [[X]], vcc 397 398; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]] 399define amdgpu_kernel void @add_select_fneg_neginv2pi_f32(i32 %c) #0 { 400 %x = load volatile float, ptr addrspace(1) undef 401 %y = load volatile float, ptr addrspace(1) undef 402 %cmp = icmp eq i32 %c, 0 403 %fneg.x = fneg float %x 404 %select = select i1 %cmp, float %fneg.x, float 0xBFC45F3060000000 405 %add = fadd float %select, %y 406 store volatile float %add, ptr addrspace(1) undef 407 ret void 408} 409 410; GCN-LABEL: {{^}}add_select_negk_negk_f32: 411; GCN: buffer_load_dword [[X:v[0-9]+]] 412 413; GCN: s_cmp_eq_u32 414; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, -2.0, s 415; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[X]] 416define amdgpu_kernel void @add_select_negk_negk_f32(i32 %c) #0 { 417 %x = load volatile float, ptr addrspace(1) undef 418 %cmp = icmp eq i32 %c, 0 419 %select = select i1 %cmp, float -2.0, float -1.0 420 %add = fadd float %select, %x 421 store volatile float %add, ptr addrspace(1) undef 422 ret void 423} 424 425; GCN-LABEL: {{^}}add_select_negliteralk_negliteralk_f32: 426; GCN-DAG: v_mov_b32_e32 [[K0:v[0-9]+]], 0xc5000000 427; GCN-DAG: v_mov_b32_e32 [[K1:v[0-9]+]], 0xc5800000 428; GCN-DAG: buffer_load_dword [[X:v[0-9]+]] 429 430; GCN: s_cmp_eq_u32 431; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K1]], [[K0]], vcc 432; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[X]] 433define amdgpu_kernel void @add_select_negliteralk_negliteralk_f32(i32 %c) #0 { 434 %x = load volatile float, ptr addrspace(1) undef 435 %cmp = icmp eq i32 %c, 0 436 %select = select i1 %cmp, float -2048.0, float -4096.0 437 %add = fadd float %select, %x 438 store volatile float %add, ptr addrspace(1) undef 439 ret void 440} 441 442; GCN-LABEL: {{^}}add_select_fneg_negk_negk_f32: 443; GCN: buffer_load_dword [[X:v[0-9]+]] 444 445; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 1.0, 2.0, s 446; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[X]] 447define amdgpu_kernel void @add_select_fneg_negk_negk_f32(i32 %c) #0 { 448 %x = load volatile float, ptr addrspace(1) undef 449 %cmp = icmp eq i32 %c, 0 450 %select = select i1 %cmp, float -2.0, float -1.0 451 %fneg.x = fsub float -0.0, %select 452 %add = fadd float %fneg.x, %x 453 store volatile float %add, ptr addrspace(1) undef 454 ret void 455} 456 457; GCN-LABEL: {{^}}add_select_negk_fneg_f32: 458; GCN: buffer_load_dword [[X:v[0-9]+]] 459; GCN: buffer_load_dword [[Y:v[0-9]+]] 460 461; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0 462; GCN: s_cselect_b64 vcc, -1, 0 463; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc 464; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]] 465define amdgpu_kernel void @add_select_negk_fneg_f32(i32 %c) #0 { 466 %x = load volatile float, ptr addrspace(1) undef 467 %y = load volatile float, ptr addrspace(1) undef 468 %cmp = icmp eq i32 %c, 0 469 %fneg.x = fsub float -0.0, %x 470 %select = select i1 %cmp, float -1.0, float %fneg.x 471 %add = fadd float %select, %y 472 store volatile float %add, ptr addrspace(1) undef 473 ret void 474} 475 476; GCN-LABEL: {{^}}add_select_fneg_posk_f32: 477; GCN: buffer_load_dword [[X:v[0-9]+]] 478; GCN: buffer_load_dword [[Y:v[0-9]+]] 479 480; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -1.0, [[X]], vcc 481; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]] 482define amdgpu_kernel void @add_select_fneg_posk_f32(i32 %c) #0 { 483 %x = load volatile float, ptr addrspace(1) undef 484 %y = load volatile float, ptr addrspace(1) undef 485 %cmp = icmp eq i32 %c, 0 486 %fneg.x = fsub float -0.0, %x 487 %select = select i1 %cmp, float %fneg.x, float 1.0 488 %add = fadd float %select, %y 489 store volatile float %add, ptr addrspace(1) undef 490 ret void 491} 492 493; GCN-LABEL: {{^}}add_select_posk_fneg_f32: 494; GCN: buffer_load_dword [[X:v[0-9]+]] 495; GCN: buffer_load_dword [[Y:v[0-9]+]] 496 497; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0 498; GCN: s_cselect_b64 vcc, -1, 0 499; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -1.0, [[X]], vcc 500; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]] 501define amdgpu_kernel void @add_select_posk_fneg_f32(i32 %c) #0 { 502 %x = load volatile float, ptr addrspace(1) undef 503 %y = load volatile float, ptr addrspace(1) undef 504 %cmp = icmp eq i32 %c, 0 505 %fneg.x = fsub float -0.0, %x 506 %select = select i1 %cmp, float 1.0, float %fneg.x 507 %add = fadd float %select, %y 508 store volatile float %add, ptr addrspace(1) undef 509 ret void 510} 511 512; GCN-LABEL: {{^}}add_select_negfabs_fabs_f32: 513; GCN: buffer_load_dword [[X:v[0-9]+]] 514; GCN: buffer_load_dword [[Y:v[0-9]+]] 515; GCN: buffer_load_dword [[Z:v[0-9]+]] 516 517; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], |[[Y]]|, -|[[X]]|, 518; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]] 519define amdgpu_kernel void @add_select_negfabs_fabs_f32(i32 %c) #0 { 520 %x = load volatile float, ptr addrspace(1) undef 521 %y = load volatile float, ptr addrspace(1) undef 522 %z = load volatile float, ptr addrspace(1) undef 523 %cmp = icmp eq i32 %c, 0 524 %fabs.x = call float @llvm.fabs.f32(float %x) 525 %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x 526 %fabs.y = call float @llvm.fabs.f32(float %y) 527 %select = select i1 %cmp, float %fneg.fabs.x, float %fabs.y 528 %add = fadd float %select, %z 529 store volatile float %add, ptr addrspace(1) undef 530 ret void 531} 532 533; GCN-LABEL: {{^}}add_select_fabs_negfabs_f32: 534; GCN: buffer_load_dword [[X:v[0-9]+]] 535; GCN: buffer_load_dword [[Y:v[0-9]+]] 536; GCN: buffer_load_dword [[Z:v[0-9]+]] 537 538; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -|[[Y]]|, |[[X]]|, 539; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]] 540define amdgpu_kernel void @add_select_fabs_negfabs_f32(i32 %c) #0 { 541 %x = load volatile float, ptr addrspace(1) undef 542 %y = load volatile float, ptr addrspace(1) undef 543 %z = load volatile float, ptr addrspace(1) undef 544 %cmp = icmp eq i32 %c, 0 545 %fabs.x = call float @llvm.fabs.f32(float %x) 546 %fabs.y = call float @llvm.fabs.f32(float %y) 547 %fneg.fabs.y = fsub float -0.000000e+00, %fabs.y 548 %select = select i1 %cmp, float %fabs.x, float %fneg.fabs.y 549 %add = fadd float %select, %z 550 store volatile float %add, ptr addrspace(1) undef 551 ret void 552} 553 554; GCN-LABEL: {{^}}add_select_neg_fabs_f32: 555; GCN: buffer_load_dword [[X:v[0-9]+]] 556; GCN: buffer_load_dword [[Y:v[0-9]+]] 557; GCN: buffer_load_dword [[Z:v[0-9]+]] 558 559; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], |[[Y]]|, -[[X]], 560; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]] 561define amdgpu_kernel void @add_select_neg_fabs_f32(i32 %c) #0 { 562 %x = load volatile float, ptr addrspace(1) undef 563 %y = load volatile float, ptr addrspace(1) undef 564 %z = load volatile float, ptr addrspace(1) undef 565 %cmp = icmp eq i32 %c, 0 566 %fneg.x = fsub float -0.000000e+00, %x 567 %fabs.y = call float @llvm.fabs.f32(float %y) 568 %select = select i1 %cmp, float %fneg.x, float %fabs.y 569 %add = fadd float %select, %z 570 store volatile float %add, ptr addrspace(1) undef 571 ret void 572} 573 574; GCN-LABEL: {{^}}add_select_fabs_neg_f32: 575; GCN: buffer_load_dword [[X:v[0-9]+]] 576; GCN: buffer_load_dword [[Y:v[0-9]+]] 577; GCN: buffer_load_dword [[Z:v[0-9]+]] 578 579; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -[[Y]], |[[X]]|, 580; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]] 581define amdgpu_kernel void @add_select_fabs_neg_f32(i32 %c) #0 { 582 %x = load volatile float, ptr addrspace(1) undef 583 %y = load volatile float, ptr addrspace(1) undef 584 %z = load volatile float, ptr addrspace(1) undef 585 %cmp = icmp eq i32 %c, 0 586 %fabs.x = call float @llvm.fabs.f32(float %x) 587 %fneg.y = fsub float -0.000000e+00, %y 588 %select = select i1 %cmp, float %fabs.x, float %fneg.y 589 %add = fadd float %select, %z 590 store volatile float %add, ptr addrspace(1) undef 591 ret void 592} 593 594; GCN-LABEL: {{^}}add_select_neg_negfabs_f32: 595; GCN: buffer_load_dword [[X:v[0-9]+]] 596; GCN: buffer_load_dword [[Y:v[0-9]+]] 597; GCN: buffer_load_dword [[Z:v[0-9]+]] 598 599; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], |[[Y]]|, [[X]], 600; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]] 601define amdgpu_kernel void @add_select_neg_negfabs_f32(i32 %c) #0 { 602 %x = load volatile float, ptr addrspace(1) undef 603 %y = load volatile float, ptr addrspace(1) undef 604 %z = load volatile float, ptr addrspace(1) undef 605 %cmp = icmp eq i32 %c, 0 606 %fneg.x = fsub float -0.000000e+00, %x 607 %fabs.y = call float @llvm.fabs.f32(float %y) 608 %fneg.fabs.y = fsub float -0.000000e+00, %fabs.y 609 %select = select i1 %cmp, float %fneg.x, float %fneg.fabs.y 610 %add = fadd float %select, %z 611 store volatile float %add, ptr addrspace(1) undef 612 ret void 613} 614 615; GCN-LABEL: {{^}}add_select_negfabs_neg_f32: 616; GCN: buffer_load_dword [[X:v[0-9]+]] 617; GCN: buffer_load_dword [[Y:v[0-9]+]] 618; GCN: buffer_load_dword [[Z:v[0-9]+]] 619 620; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], |[[X]]|, [[Y]], 621; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]] 622define amdgpu_kernel void @add_select_negfabs_neg_f32(i32 %c) #0 { 623 %x = load volatile float, ptr addrspace(1) undef 624 %y = load volatile float, ptr addrspace(1) undef 625 %z = load volatile float, ptr addrspace(1) undef 626 %cmp = icmp eq i32 %c, 0 627 %fabs.x = call float @llvm.fabs.f32(float %x) 628 %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x 629 %fneg.y = fsub float -0.000000e+00, %y 630 %select = select i1 %cmp, float %fneg.y, float %fneg.fabs.x 631 %add = fadd float %select, %z 632 store volatile float %add, ptr addrspace(1) undef 633 ret void 634} 635 636; GCN-LABEL: {{^}}mul_select_negfabs_posk_f32: 637; GCN: buffer_load_dword [[X:v[0-9]+]] 638; GCN: buffer_load_dword [[Y:v[0-9]+]] 639 640; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 0 641; GCN: s_cselect_b64 [[VCC:.*]], -1, 0 642; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 4.0, -|[[X]]|, [[VCC]] 643; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]] 644define amdgpu_kernel void @mul_select_negfabs_posk_f32(i32 %c) #0 { 645 %x = load volatile float, ptr addrspace(1) undef 646 %y = load volatile float, ptr addrspace(1) undef 647 %cmp = icmp eq i32 %c, 0 648 %fabs.x = call float @llvm.fabs.f32(float %x) 649 %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x 650 %select = select i1 %cmp, float %fneg.fabs.x, float 4.0 651 %add = fmul float %select, %y 652 store volatile float %add, ptr addrspace(1) undef 653 ret void 654} 655 656; GCN-LABEL: {{^}}mul_select_posk_negfabs_f32: 657; GCN: buffer_load_dword [[X:v[0-9]+]] 658; GCN: buffer_load_dword [[Y:v[0-9]+]] 659 660; GCN-DAG: s_cmp_lg_u32 s{{[0-9]+}}, 0 661; GCN: s_cselect_b64 [[VCC:.*]], -1, 0 662; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 4.0, -|[[X]]|, [[VCC]] 663; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]] 664define amdgpu_kernel void @mul_select_posk_negfabs_f32(i32 %c) #0 { 665 %x = load volatile float, ptr addrspace(1) undef 666 %y = load volatile float, ptr addrspace(1) undef 667 %cmp = icmp eq i32 %c, 0 668 %fabs.x = call float @llvm.fabs.f32(float %x) 669 %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x 670 %select = select i1 %cmp, float 4.0, float %fneg.fabs.x 671 %add = fmul float %select, %y 672 store volatile float %add, ptr addrspace(1) undef 673 ret void 674} 675 676; GCN-LABEL: {{^}}mul_select_negfabs_negk_f32: 677; GCN: buffer_load_dword [[X:v[0-9]+]] 678; GCN: buffer_load_dword [[Y:v[0-9]+]] 679 680; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -4.0, -|[[X]]|, s{{\[[0-9]+:[0-9]+\]}} 681; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]] 682define amdgpu_kernel void @mul_select_negfabs_negk_f32(i32 %c) #0 { 683 %x = load volatile float, ptr addrspace(1) undef 684 %y = load volatile float, ptr addrspace(1) undef 685 %cmp = icmp eq i32 %c, 0 686 %fabs.x = call float @llvm.fabs.f32(float %x) 687 %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x 688 %select = select i1 %cmp, float %fneg.fabs.x, float -4.0 689 %add = fmul float %select, %y 690 store volatile float %add, ptr addrspace(1) undef 691 ret void 692} 693 694; GCN-LABEL: {{^}}mul_select_negk_negfabs_f32: 695; GCN: buffer_load_dword [[X:v[0-9]+]] 696; GCN: buffer_load_dword [[Y:v[0-9]+]] 697 698; GCN: s_cmp_lg_u32 699; GCN: s_cselect_b64 [[VCC:.*]], -1, 0 700; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -4.0, -|[[X]]|, [[VCC]] 701; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]] 702define amdgpu_kernel void @mul_select_negk_negfabs_f32(i32 %c) #0 { 703 %x = load volatile float, ptr addrspace(1) undef 704 %y = load volatile float, ptr addrspace(1) undef 705 %cmp = icmp eq i32 %c, 0 706 %fabs.x = call float @llvm.fabs.f32(float %x) 707 %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x 708 %select = select i1 %cmp, float -4.0, float %fneg.fabs.x 709 %add = fmul float %select, %y 710 store volatile float %add, ptr addrspace(1) undef 711 ret void 712} 713 714; -------------------------------------------------------------------------------- 715; Don't fold if fneg can fold into the source 716; -------------------------------------------------------------------------------- 717 718; GCN-LABEL: {{^}}select_fneg_posk_src_add_f32: 719; GCN: buffer_load_dword [[X:v[0-9]+]] 720; GCN: buffer_load_dword [[Y:v[0-9]+]] 721 722; GCN: v_sub_f32_e32 [[ADD:v[0-9]+]], -4.0, [[X]] 723; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[ADD]], vcc 724; GCN-NEXT: buffer_store_dword [[SELECT]] 725define amdgpu_kernel void @select_fneg_posk_src_add_f32(i32 %c) #0 { 726 %x = load volatile float, ptr addrspace(1) undef 727 %y = load volatile float, ptr addrspace(1) undef 728 %cmp = icmp eq i32 %c, 0 729 %add = fadd float %x, 4.0 730 %fneg = fsub float -0.0, %add 731 %select = select i1 %cmp, float %fneg, float 2.0 732 store volatile float %select, ptr addrspace(1) undef 733 ret void 734} 735 736; GCN-LABEL: {{^}}select_fneg_posk_src_sub_f32: 737; GCN: buffer_load_dword [[X:v[0-9]+]] 738 739; GCN: v_sub_f32_e32 [[ADD:v[0-9]+]], 4.0, [[X]] 740; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[ADD]], vcc 741; GCN-NEXT: buffer_store_dword [[SELECT]] 742define amdgpu_kernel void @select_fneg_posk_src_sub_f32(i32 %c) #0 { 743 %x = load volatile float, ptr addrspace(1) undef 744 %cmp = icmp eq i32 %c, 0 745 %add = fsub float %x, 4.0 746 %fneg = fsub float -0.0, %add 747 %select = select i1 %cmp, float %fneg, float 2.0 748 store volatile float %select, ptr addrspace(1) undef 749 ret void 750} 751 752; GCN-LABEL: {{^}}select_fneg_posk_src_mul_f32: 753; GCN: buffer_load_dword [[X:v[0-9]+]] 754 755; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[X]] 756; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[MUL]], vcc 757; GCN-NEXT: buffer_store_dword [[SELECT]] 758define amdgpu_kernel void @select_fneg_posk_src_mul_f32(i32 %c) #0 { 759 %x = load volatile float, ptr addrspace(1) undef 760 %cmp = icmp eq i32 %c, 0 761 %mul = fmul float %x, 4.0 762 %fneg = fsub float -0.0, %mul 763 %select = select i1 %cmp, float %fneg, float 2.0 764 store volatile float %select, ptr addrspace(1) undef 765 ret void 766} 767 768; GCN-LABEL: {{^}}select_fneg_posk_src_fma_f32: 769; GCN: buffer_load_dword [[X:v[0-9]+]] 770; GCN: buffer_load_dword [[Z:v[0-9]+]] 771 772; GCN: v_fma_f32 [[FMA:v[0-9]+]], [[X]], -4.0, -[[Z]] 773; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[FMA]], vcc 774; GCN-NEXT: buffer_store_dword [[SELECT]] 775define amdgpu_kernel void @select_fneg_posk_src_fma_f32(i32 %c) #0 { 776 %x = load volatile float, ptr addrspace(1) undef 777 %z = load volatile float, ptr addrspace(1) undef 778 %cmp = icmp eq i32 %c, 0 779 %fma = call float @llvm.fma.f32(float %x, float 4.0, float %z) 780 %fneg = fsub float -0.0, %fma 781 %select = select i1 %cmp, float %fneg, float 2.0 782 store volatile float %select, ptr addrspace(1) undef 783 ret void 784} 785 786; GCN-LABEL: {{^}}select_fneg_posk_src_fmad_f32: 787; GCN: buffer_load_dword [[X:v[0-9]+]] 788; GCN: buffer_load_dword [[Z:v[0-9]+]] 789 790; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[X]], vcc 791; GCN-NEXT: buffer_store_dword [[SELECT]] 792define amdgpu_kernel void @select_fneg_posk_src_fmad_f32(i32 %c) #0 { 793 %x = load volatile float, ptr addrspace(1) undef 794 %z = load volatile float, ptr addrspace(1) undef 795 %cmp = icmp eq i32 %c, 0 796 %fmad = call float @llvm.fmuladd.f32(float %x, float 4.0, float %z) 797 %fneg = fsub float -0.0, %fmad 798 %select = select i1 %cmp, float %fneg, float 2.0 799 store volatile float %select, ptr addrspace(1) undef 800 ret void 801} 802 803; FIXME: This one should fold to rcp 804; GCN-LABEL: {{^}}select_fneg_posk_src_rcp_f32: 805; GCN: buffer_load_dword [[X:v[0-9]+]] 806 807; GCN: v_rcp_f32_e64 [[RCP:v[0-9]+]], -[[X]] 808; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[RCP]], vcc 809; GCN-NEXT: buffer_store_dword [[SELECT]] 810define amdgpu_kernel void @select_fneg_posk_src_rcp_f32(i32 %c) #0 { 811 %x = load volatile float, ptr addrspace(1) undef 812 %y = load volatile float, ptr addrspace(1) undef 813 %cmp = icmp eq i32 %c, 0 814 %rcp = call float @llvm.amdgcn.rcp.f32(float %x) 815 %fneg = fsub float -0.0, %rcp 816 %select = select i1 %cmp, float %fneg, float 2.0 817 store volatile float %select, ptr addrspace(1) undef 818 ret void 819} 820 821; GCN-LABEL: {{^}}mul_select_negfabs_posk_inv2pi_f32: 822; GCN: buffer_load_dword [[X:v[0-9]+]] 823; GCN: buffer_load_dword [[Y:v[0-9]+]] 824 825; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 0 826; GCN-DAG: s_cselect_b64 [[VCC:.*]], -1, 0 827 828; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e22f983 829; SI: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[K]], -|[[X]]|, [[VCC]] 830; SI: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]] 831 832; VI: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 0.15915494, -|[[X]]|, [[VCC]] 833; VI: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]] 834define amdgpu_kernel void @mul_select_negfabs_posk_inv2pi_f32(i32 %c) #0 { 835 %x = load volatile float, ptr addrspace(1) undef 836 %y = load volatile float, ptr addrspace(1) undef 837 %cmp = icmp eq i32 %c, 0 838 %fabs.x = call float @llvm.fabs.f32(float %x) 839 %fneg.fabs.x = fneg float %fabs.x 840 %select = select i1 %cmp, float %fneg.fabs.x, float 0x3FC45F3060000000 841 %add = fmul float %select, %y 842 store volatile float %add, ptr addrspace(1) undef 843 ret void 844} 845 846; GCN-LABEL: {{^}}mul_select_posk_inv2pi_negfabs_f32: 847; GCN: buffer_load_dword [[X:v[0-9]+]] 848; GCN: buffer_load_dword [[Y:v[0-9]+]] 849 850; GCN-DAG: s_cmp_lg_u32 s{{[0-9]+}}, 0 851 852; GCN-DAG: s_cselect_b64 [[VCC:.*]], -1, 0 853 854; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e22f983 855; SI: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[K]], -|[[X]]|, [[VCC]] 856; SI: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]] 857 858 859; VI: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 0.15915494, -|[[X]]|, [[VCC]] 860; VI: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]] 861define amdgpu_kernel void @mul_select_posk_inv2pi_negfabs_f32(i32 %c) #0 { 862 %x = load volatile float, ptr addrspace(1) undef 863 %y = load volatile float, ptr addrspace(1) undef 864 %cmp = icmp eq i32 %c, 0 865 %fabs.x = call float @llvm.fabs.f32(float %x) 866 %fneg.fabs.x = fneg float %fabs.x 867 %select = select i1 %cmp, float 0x3FC45F3060000000, float %fneg.fabs.x 868 %add = fmul float %select, %y 869 store volatile float %add, ptr addrspace(1) undef 870 ret void 871} 872 873; GCN-LABEL: {{^}}mul_select_negfabs_negk_inv2pi_f32: 874; GCN: buffer_load_dword [[X:v[0-9]+]] 875; GCN: buffer_load_dword [[Y:v[0-9]+]] 876; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xbe22f983 877; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[K]], -|[[X]]|, s 878; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]] 879define amdgpu_kernel void @mul_select_negfabs_negk_inv2pi_f32(i32 %c) #0 { 880 %x = load volatile float, ptr addrspace(1) undef 881 %y = load volatile float, ptr addrspace(1) undef 882 %cmp = icmp eq i32 %c, 0 883 %fabs.x = call float @llvm.fabs.f32(float %x) 884 %fneg.fabs.x = fneg float %fabs.x 885 %select = select i1 %cmp, float %fneg.fabs.x, float 0xBFC45F3060000000 886 %add = fmul float %select, %y 887 store volatile float %add, ptr addrspace(1) undef 888 ret void 889} 890 891; GCN-LABEL: {{^}}mul_select_negk_inv2pi_negfabs_f32: 892; GCN: buffer_load_dword [[X:v[0-9]+]] 893; GCN: buffer_load_dword [[Y:v[0-9]+]] 894 895; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xbe22f983 896; GCN: s_cmp_lg_u32 897; GCN: s_cselect_b64 s[0:1], -1, 0 898; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[K]], -|[[X]]|, s[0:1] 899; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]] 900define amdgpu_kernel void @mul_select_negk_inv2pi_negfabs_f32(i32 %c) #0 { 901 %x = load volatile float, ptr addrspace(1) undef 902 %y = load volatile float, ptr addrspace(1) undef 903 %cmp = icmp eq i32 %c, 0 904 %fabs.x = call float @llvm.fabs.f32(float %x) 905 %fneg.fabs.x = fneg float %fabs.x 906 %select = select i1 %cmp, float 0xBFC45F3060000000, float %fneg.fabs.x 907 %add = fmul float %select, %y 908 store volatile float %add, ptr addrspace(1) undef 909 ret void 910} 911 912; GCN-LABEL: {{^}}mul_select_negfabs_posk_0_f32: 913; GCN: buffer_load_dword [[X:v[0-9]+]] 914; GCN: buffer_load_dword [[Y:v[0-9]+]] 915; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 0 916; GCN: s_cselect_b64 [[VCC:.*]], -1, 0 917; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 0, -|[[X]]|, [[VCC]] 918; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]] 919define amdgpu_kernel void @mul_select_negfabs_posk_0_f32(i32 %c) #0 { 920 %x = load volatile float, ptr addrspace(1) undef 921 %y = load volatile float, ptr addrspace(1) undef 922 %cmp = icmp eq i32 %c, 0 923 %fabs.x = call float @llvm.fabs.f32(float %x) 924 %fneg.fabs.x = fneg float %fabs.x 925 %select = select i1 %cmp, float %fneg.fabs.x, float 0.0 926 %add = fmul float %select, %y 927 store volatile float %add, ptr addrspace(1) undef 928 ret void 929} 930 931 932; GCN-LABEL: {{^}}mul_select_posk_0_negfabs_f32: 933; GCN: buffer_load_dword [[X:v[0-9]+]] 934; GCN: buffer_load_dword [[Y:v[0-9]+]] 935 936; GCN-DAG: s_cmp_lg_u32 s{{[0-9]+}}, 0 937; GCN: s_cselect_b64 [[VCC:.*]], -1, 0 938; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 0, -|[[X]]|, [[VCC]] 939; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]] 940define amdgpu_kernel void @mul_select_posk_0_negfabs_f32(i32 %c) #0 { 941 %x = load volatile float, ptr addrspace(1) undef 942 %y = load volatile float, ptr addrspace(1) undef 943 %cmp = icmp eq i32 %c, 0 944 %fabs.x = call float @llvm.fabs.f32(float %x) 945 %fneg.fabs.x = fneg float %fabs.x 946 %select = select i1 %cmp, float 0.0, float %fneg.fabs.x 947 %add = fmul float %select, %y 948 store volatile float %add, ptr addrspace(1) undef 949 ret void 950} 951 952; GCN-LABEL: {{^}}mul_select_negfabs_negk_0_f32: 953; GCN: buffer_load_dword [[X:v[0-9]+]] 954; GCN: buffer_load_dword [[Y:v[0-9]+]] 955 956; GCN: v_bfrev_b32_e32 [[NEG0:v[0-9]+]], 1 957; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[NEG0]], -|[[X]]|, s{{\[[0-9]+:[0-9]+\]}} 958; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]] 959define amdgpu_kernel void @mul_select_negfabs_negk_0_f32(i32 %c) #0 { 960 %x = load volatile float, ptr addrspace(1) undef 961 %y = load volatile float, ptr addrspace(1) undef 962 %cmp = icmp eq i32 %c, 0 963 %fabs.x = call float @llvm.fabs.f32(float %x) 964 %fneg.fabs.x = fneg float %fabs.x 965 %select = select i1 %cmp, float %fneg.fabs.x, float -0.0 966 %add = fmul float %select, %y 967 store volatile float %add, ptr addrspace(1) undef 968 ret void 969} 970 971; GCN-LABEL: {{^}}mul_select_negk_0_negfabs_f32: 972; GCN: buffer_load_dword [[X:v[0-9]+]] 973; GCN: buffer_load_dword [[Y:v[0-9]+]] 974 975; GCN: v_bfrev_b32_e32 [[NEG0:v[0-9]+]], 1 976; GCN: s_cmp_lg_u32 977; GCN: s_cselect_b64 s[0:1], -1, 0 978; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[NEG0]], -|[[X]]|, s[0:1] 979; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]] 980define amdgpu_kernel void @mul_select_negk_0_negfabs_f32(i32 %c) #0 { 981 %x = load volatile float, ptr addrspace(1) undef 982 %y = load volatile float, ptr addrspace(1) undef 983 %cmp = icmp eq i32 %c, 0 984 %fabs.x = call float @llvm.fabs.f32(float %x) 985 %fneg.fabs.x = fneg float %fabs.x 986 %select = select i1 %cmp, float -0.0, float %fneg.fabs.x 987 %add = fmul float %select, %y 988 store volatile float %add, ptr addrspace(1) undef 989 ret void 990} 991 992 993declare float @llvm.fabs.f32(float) #1 994declare float @llvm.fma.f32(float, float, float) #1 995declare float @llvm.fmuladd.f32(float, float, float) #1 996declare float @llvm.amdgcn.rcp.f32(float) #1 997declare float @llvm.amdgcn.rcp.legacy(float) #1 998declare float @llvm.amdgcn.fmul.legacy(float, float) #1 999 1000attributes #0 = { nounwind } 1001attributes #1 = { nounwind readnone } 1002