1; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI %s 2 3declare i32 @llvm.amdgcn.workitem.id.x() #1 4declare { float, i1 } @llvm.amdgcn.div.scale.f32(float, float, i1) #1 5declare { double, i1 } @llvm.amdgcn.div.scale.f64(double, double, i1) #1 6declare float @llvm.fabs.f32(float) #1 7 8; SI-LABEL: {{^}}test_div_scale_f32_1: 9; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 10; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 11; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]] 12; SI: buffer_store_dword [[RESULT0]] 13; SI: s_endpgm 14define amdgpu_kernel void @test_div_scale_f32_1(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind { 15 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone 16 %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid 17 %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1 18 19 %a = load volatile float, ptr addrspace(1) %gep.0, align 4 20 %b = load volatile float, ptr addrspace(1) %gep.1, align 4 21 22 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone 23 %result0 = extractvalue { float, i1 } %result, 0 24 store float %result0, ptr addrspace(1) %out, align 4 25 ret void 26} 27 28; SI-LABEL: {{^}}test_div_scale_f32_2: 29; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 30; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 31; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]] 32; SI: buffer_store_dword [[RESULT0]] 33; SI: s_endpgm 34define amdgpu_kernel void @test_div_scale_f32_2(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind { 35 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone 36 %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid 37 %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1 38 39 %a = load volatile float, ptr addrspace(1) %gep.0, align 4 40 %b = load volatile float, ptr addrspace(1) %gep.1, align 4 41 42 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) nounwind readnone 43 %result0 = extractvalue { float, i1 } %result, 0 44 store float %result0, ptr addrspace(1) %out, align 4 45 ret void 46} 47 48; SI-LABEL: {{^}}test_div_scale_f64_1: 49; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 50; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8 51; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]] 52; SI: buffer_store_dwordx2 [[RESULT0]] 53; SI: s_endpgm 54define amdgpu_kernel void @test_div_scale_f64_1(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %in) nounwind { 55 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone 56 %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid 57 %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1 58 59 %a = load volatile double, ptr addrspace(1) %gep.0, align 8 60 %b = load volatile double, ptr addrspace(1) %gep.1, align 8 61 62 %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) nounwind readnone 63 %result0 = extractvalue { double, i1 } %result, 0 64 store double %result0, ptr addrspace(1) %out, align 8 65 ret void 66} 67 68; SI-LABEL: {{^}}test_div_scale_f64_2: 69; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 70; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8 71; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]] 72; SI: buffer_store_dwordx2 [[RESULT0]] 73; SI: s_endpgm 74define amdgpu_kernel void @test_div_scale_f64_2(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %in) nounwind { 75 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone 76 %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid 77 %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1 78 79 %a = load volatile double, ptr addrspace(1) %gep.0, align 8 80 %b = load volatile double, ptr addrspace(1) %gep.1, align 8 81 82 %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone 83 %result0 = extractvalue { double, i1 } %result, 0 84 store double %result0, ptr addrspace(1) %out, align 8 85 ret void 86} 87 88; SI-LABEL: {{^}}test_div_scale_f32_scalar_num_1: 89; SI-DAG: buffer_load_dword [[B:v[0-9]+]] 90; SI-DAG: s_load_dword [[A:s[0-9]+]] 91; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]] 92; SI: buffer_store_dword [[RESULT0]] 93; SI: s_endpgm 94define amdgpu_kernel void @test_div_scale_f32_scalar_num_1(ptr addrspace(1) %out, ptr addrspace(1) %in, float %a) nounwind { 95 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone 96 %gep = getelementptr float, ptr addrspace(1) %in, i32 %tid 97 98 %b = load float, ptr addrspace(1) %gep, align 4 99 100 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone 101 %result0 = extractvalue { float, i1 } %result, 0 102 store float %result0, ptr addrspace(1) %out, align 4 103 ret void 104} 105 106; SI-LABEL: {{^}}test_div_scale_f32_scalar_num_2: 107; SI-DAG: buffer_load_dword [[B:v[0-9]+]] 108; SI-DAG: s_load_dword [[A:s[0-9]+]] 109; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]] 110; SI: buffer_store_dword [[RESULT0]] 111; SI: s_endpgm 112define amdgpu_kernel void @test_div_scale_f32_scalar_num_2(ptr addrspace(1) %out, ptr addrspace(1) %in, float %a) nounwind { 113 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone 114 %gep = getelementptr float, ptr addrspace(1) %in, i32 %tid 115 116 %b = load float, ptr addrspace(1) %gep, align 4 117 118 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) nounwind readnone 119 %result0 = extractvalue { float, i1 } %result, 0 120 store float %result0, ptr addrspace(1) %out, align 4 121 ret void 122} 123 124; SI-LABEL: {{^}}test_div_scale_f32_scalar_den_1: 125; SI-DAG: buffer_load_dword [[A:v[0-9]+]] 126; SI-DAG: s_load_dword [[B:s[0-9]+]] 127; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]] 128; SI: buffer_store_dword [[RESULT0]] 129; SI: s_endpgm 130define amdgpu_kernel void @test_div_scale_f32_scalar_den_1(ptr addrspace(1) %out, ptr addrspace(1) %in, float %b) nounwind { 131 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone 132 %gep = getelementptr float, ptr addrspace(1) %in, i32 %tid 133 134 %a = load float, ptr addrspace(1) %gep, align 4 135 136 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone 137 %result0 = extractvalue { float, i1 } %result, 0 138 store float %result0, ptr addrspace(1) %out, align 4 139 ret void 140} 141 142; SI-LABEL: {{^}}test_div_scale_f32_scalar_den_2: 143; SI-DAG: buffer_load_dword [[A:v[0-9]+]] 144; SI-DAG: s_load_dword [[B:s[0-9]+]] 145; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]] 146; SI: buffer_store_dword [[RESULT0]] 147; SI: s_endpgm 148define amdgpu_kernel void @test_div_scale_f32_scalar_den_2(ptr addrspace(1) %out, ptr addrspace(1) %in, float %b) nounwind { 149 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone 150 %gep = getelementptr float, ptr addrspace(1) %in, i32 %tid 151 152 %a = load float, ptr addrspace(1) %gep, align 4 153 154 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) nounwind readnone 155 %result0 = extractvalue { float, i1 } %result, 0 156 store float %result0, ptr addrspace(1) %out, align 4 157 ret void 158} 159 160; SI-LABEL: {{^}}test_div_scale_f64_scalar_num_1: 161; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]] 162; SI-DAG: s_load_dwordx2 [[A:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd 163; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]] 164; SI: buffer_store_dwordx2 [[RESULT0]] 165; SI: s_endpgm 166define amdgpu_kernel void @test_div_scale_f64_scalar_num_1(ptr addrspace(1) %out, ptr addrspace(1) %in, double %a) nounwind { 167 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone 168 %gep = getelementptr double, ptr addrspace(1) %in, i32 %tid 169 170 %b = load double, ptr addrspace(1) %gep, align 8 171 172 %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) nounwind readnone 173 %result0 = extractvalue { double, i1 } %result, 0 174 store double %result0, ptr addrspace(1) %out, align 8 175 ret void 176} 177 178; SI-LABEL: {{^}}test_div_scale_f64_scalar_num_2: 179; SI-DAG: s_load_dwordx2 [[A:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd 180; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]] 181; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]] 182; SI: buffer_store_dwordx2 [[RESULT0]] 183; SI: s_endpgm 184define amdgpu_kernel void @test_div_scale_f64_scalar_num_2(ptr addrspace(1) %out, ptr addrspace(1) %in, double %a) nounwind { 185 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone 186 %gep = getelementptr double, ptr addrspace(1) %in, i32 %tid 187 188 %b = load double, ptr addrspace(1) %gep, align 8 189 190 %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone 191 %result0 = extractvalue { double, i1 } %result, 0 192 store double %result0, ptr addrspace(1) %out, align 8 193 ret void 194} 195 196; SI-LABEL: {{^}}test_div_scale_f64_scalar_den_1: 197; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]] 198; SI-DAG: s_load_dwordx2 [[B:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd 199; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]] 200; SI: buffer_store_dwordx2 [[RESULT0]] 201; SI: s_endpgm 202define amdgpu_kernel void @test_div_scale_f64_scalar_den_1(ptr addrspace(1) %out, ptr addrspace(1) %in, double %b) nounwind { 203 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone 204 %gep = getelementptr double, ptr addrspace(1) %in, i32 %tid 205 206 %a = load double, ptr addrspace(1) %gep, align 8 207 208 %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) nounwind readnone 209 %result0 = extractvalue { double, i1 } %result, 0 210 store double %result0, ptr addrspace(1) %out, align 8 211 ret void 212} 213 214; SI-LABEL: {{^}}test_div_scale_f64_scalar_den_2: 215; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]] 216; SI-DAG: s_load_dwordx2 [[B:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd 217; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]] 218; SI: buffer_store_dwordx2 [[RESULT0]] 219; SI: s_endpgm 220define amdgpu_kernel void @test_div_scale_f64_scalar_den_2(ptr addrspace(1) %out, ptr addrspace(1) %in, double %b) nounwind { 221 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone 222 %gep = getelementptr double, ptr addrspace(1) %in, i32 %tid 223 224 %a = load double, ptr addrspace(1) %gep, align 8 225 226 %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone 227 %result0 = extractvalue { double, i1 } %result, 0 228 store double %result0, ptr addrspace(1) %out, align 8 229 ret void 230} 231 232; SI-LABEL: {{^}}test_div_scale_f32_all_scalar_1: 233; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0x13 234; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0x1c 235; SI: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]] 236; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[VA]] 237; SI: buffer_store_dword [[RESULT0]] 238; SI: s_endpgm 239define amdgpu_kernel void @test_div_scale_f32_all_scalar_1(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b) nounwind { 240 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone 241 %result0 = extractvalue { float, i1 } %result, 0 242 store float %result0, ptr addrspace(1) %out, align 4 243 ret void 244} 245 246; SI-LABEL: {{^}}test_div_scale_f32_all_scalar_2: 247; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0x13 248; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0x1c 249; SI: v_mov_b32_e32 [[VB:v[0-9]+]], [[B]] 250; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[VB]], [[A]] 251; SI: buffer_store_dword [[RESULT0]] 252; SI: s_endpgm 253define amdgpu_kernel void @test_div_scale_f32_all_scalar_2(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b) nounwind { 254 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) nounwind readnone 255 %result0 = extractvalue { float, i1 } %result, 0 256 store float %result0, ptr addrspace(1) %out, align 4 257 ret void 258} 259 260; SI-LABEL: {{^}}test_div_scale_f64_all_scalar_1: 261; SI-DAG: s_load_dwordx2 s[[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]], {{s\[[0-9]+:[0-9]+\]}}, 0x13 262; SI-DAG: s_load_dwordx2 [[B:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x1d 263; SI-DAG: v_mov_b32_e32 v[[VA_LO:[0-9]+]], s[[A_LO]] 264; SI-DAG: v_mov_b32_e32 v[[VA_HI:[0-9]+]], s[[A_HI]] 265; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], v[[[VA_LO]]:[[VA_HI]]] 266; SI: buffer_store_dwordx2 [[RESULT0]] 267; SI: s_endpgm 268define amdgpu_kernel void @test_div_scale_f64_all_scalar_1(ptr addrspace(1) %out, [8 x i32], double %a, [8 x i32], double %b) nounwind { 269 %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) nounwind readnone 270 %result0 = extractvalue { double, i1 } %result, 0 271 store double %result0, ptr addrspace(1) %out, align 8 272 ret void 273} 274 275; SI-LABEL: {{^}}test_div_scale_f64_all_scalar_2: 276; SI-DAG: s_load_dwordx2 [[A:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x13 277; SI-DAG: s_load_dwordx2 s[[[B_LO:[0-9]+]]:[[B_HI:[0-9]+]]], {{s\[[0-9]+:[0-9]+\]}}, 0x1d 278; SI-DAG: v_mov_b32_e32 v[[VB_LO:[0-9]+]], s[[B_LO]] 279; SI-DAG: v_mov_b32_e32 v[[VB_HI:[0-9]+]], s[[B_HI]] 280; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], v[[[VB_LO]]:[[VB_HI]]], [[A]] 281; SI: buffer_store_dwordx2 [[RESULT0]] 282; SI: s_endpgm 283define amdgpu_kernel void @test_div_scale_f64_all_scalar_2(ptr addrspace(1) %out, [8 x i32], double %a, [8 x i32], double %b) nounwind { 284 %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone 285 %result0 = extractvalue { double, i1 } %result, 0 286 store double %result0, ptr addrspace(1) %out, align 8 287 ret void 288} 289 290; SI-LABEL: {{^}}test_div_scale_f32_inline_imm_num: 291; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 292; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[A]], 1.0 293; SI: buffer_store_dword [[RESULT0]] 294; SI: s_endpgm 295define amdgpu_kernel void @test_div_scale_f32_inline_imm_num(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind { 296 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone 297 %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid 298 %a = load float, ptr addrspace(1) %gep.0, align 4 299 300 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float 1.0, float %a, i1 false) nounwind readnone 301 %result0 = extractvalue { float, i1 } %result, 0 302 store float %result0, ptr addrspace(1) %out, align 4 303 ret void 304} 305 306; SI-LABEL: {{^}}test_div_scale_f32_inline_imm_den: 307; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 308; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], 2.0, 2.0, [[A]] 309; SI: buffer_store_dword [[RESULT0]] 310; SI: s_endpgm 311define amdgpu_kernel void @test_div_scale_f32_inline_imm_den(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind { 312 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone 313 %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid 314 %a = load float, ptr addrspace(1) %gep.0, align 4 315 316 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float 2.0, i1 false) nounwind readnone 317 %result0 = extractvalue { float, i1 } %result, 0 318 store float %result0, ptr addrspace(1) %out, align 4 319 ret void 320} 321 322; SI-LABEL: {{^}}test_div_scale_f32_fneg_num: 323; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 324; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 325; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], -[[A]] 326; SI: buffer_store_dword [[RESULT0]] 327; SI: s_endpgm 328define amdgpu_kernel void @test_div_scale_f32_fneg_num(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind { 329 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone 330 %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid 331 %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1 332 333 %a = load volatile float, ptr addrspace(1) %gep.0, align 4 334 %b = load volatile float, ptr addrspace(1) %gep.1, align 4 335 336 %a.fneg = fneg float %a 337 338 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a.fneg, float %b, i1 false) nounwind readnone 339 %result0 = extractvalue { float, i1 } %result, 0 340 store float %result0, ptr addrspace(1) %out, align 4 341 ret void 342} 343 344; SI-LABEL: {{^}}test_div_scale_f32_fabs_num: 345; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 346; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 347; SI: v_and_b32_e32 [[ABS_A:v[0-9]+]], 0x7fffffff, [[A]] 348; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[ABS_A]] 349; SI: buffer_store_dword [[RESULT0]] 350; SI: s_endpgm 351define amdgpu_kernel void @test_div_scale_f32_fabs_num(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind { 352 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone 353 %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid 354 %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1 355 356 %a = load volatile float, ptr addrspace(1) %gep.0, align 4 357 %b = load volatile float, ptr addrspace(1) %gep.1, align 4 358 359 %a.fabs = call float @llvm.fabs.f32(float %a) nounwind readnone 360 361 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a.fabs, float %b, i1 false) nounwind readnone 362 %result0 = extractvalue { float, i1 } %result, 0 363 store float %result0, ptr addrspace(1) %out, align 4 364 ret void 365} 366 367; SI-LABEL: {{^}}test_div_scale_f32_fneg_den: 368; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 369; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 370; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], -[[B]], -[[B]], [[A]] 371; SI: buffer_store_dword [[RESULT0]] 372; SI: s_endpgm 373define amdgpu_kernel void @test_div_scale_f32_fneg_den(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind { 374 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone 375 %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid 376 %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1 377 378 %a = load volatile float, ptr addrspace(1) %gep.0, align 4 379 %b = load volatile float, ptr addrspace(1) %gep.1, align 4 380 381 %b.fneg = fneg float %b 382 383 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b.fneg, i1 false) nounwind readnone 384 %result0 = extractvalue { float, i1 } %result, 0 385 store float %result0, ptr addrspace(1) %out, align 4 386 ret void 387} 388 389; SI-LABEL: {{^}}test_div_scale_f32_fabs_den: 390; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 391; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 392; SI: v_and_b32_e32 [[ABS_B:v[0-9]+]], 0x7fffffff, [[B]] 393; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[ABS_B]], [[ABS_B]], [[A]] 394; SI: buffer_store_dword [[RESULT0]] 395; SI: s_endpgm 396define amdgpu_kernel void @test_div_scale_f32_fabs_den(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind { 397 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone 398 %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid 399 %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1 400 401 %a = load volatile float, ptr addrspace(1) %gep.0, align 4 402 %b = load volatile float, ptr addrspace(1) %gep.1, align 4 403 404 %b.fabs = call float @llvm.fabs.f32(float %b) nounwind readnone 405 406 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b.fabs, i1 false) nounwind readnone 407 %result0 = extractvalue { float, i1 } %result, 0 408 store float %result0, ptr addrspace(1) %out, align 4 409 ret void 410} 411 412; SI-LABEL: {{^}}test_div_scale_f32_val_undef_val: 413; SI: s_mov_b32 [[K:s[0-9]+]], 0x41000000 414; SI: v_div_scale_f32 v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, [[K]], v{{[0-9]+}}, [[K]] 415define amdgpu_kernel void @test_div_scale_f32_val_undef_val(ptr addrspace(1) %out) #0 { 416 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float 8.0, float undef, i1 false) 417 %result0 = extractvalue { float, i1 } %result, 0 418 store float %result0, ptr addrspace(1) %out, align 4 419 ret void 420} 421 422; SI-LABEL: {{^}}test_div_scale_f32_undef_val_val: 423; SI: s_mov_b32 [[K:s[0-9]+]], 0x41000000 424; SI: v_div_scale_f32 v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[K]], v{{[0-9]+}} 425define amdgpu_kernel void @test_div_scale_f32_undef_val_val(ptr addrspace(1) %out) #0 { 426 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float undef, float 8.0, i1 false) 427 %result0 = extractvalue { float, i1 } %result, 0 428 store float %result0, ptr addrspace(1) %out, align 4 429 ret void 430} 431 432; SI-LABEL: {{^}}test_div_scale_f32_undef_undef_val: 433; SI-NOT: v0 434; SI: v_div_scale_f32 v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s0, s0, v0 435define amdgpu_kernel void @test_div_scale_f32_undef_undef_val(ptr addrspace(1) %out) #0 { 436 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float undef, float undef, i1 false) 437 %result0 = extractvalue { float, i1 } %result, 0 438 store float %result0, ptr addrspace(1) %out, align 4 439 ret void 440} 441 442; SI-LABEL: {{^}}test_div_scale_f64_val_undef_val: 443; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0{{$}} 444; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0x40200000 445; SI: v_div_scale_f64 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s[[[K_LO]]:[[K_HI]]], v[0:1], s[[[K_LO]]:[[K_HI]]] 446define amdgpu_kernel void @test_div_scale_f64_val_undef_val(ptr addrspace(1) %out) #0 { 447 %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double 8.0, double undef, i1 false) 448 %result0 = extractvalue { double, i1 } %result, 0 449 store double %result0, ptr addrspace(1) %out, align 8 450 ret void 451} 452 453attributes #0 = { nounwind } 454attributes #1 = { nounwind readnone speculatable } 455