1; RUN: llc < %s -mtriple=i386-linux-gnu -o - | FileCheck %s 2 3; This test checks that only a single js gets generated in the final code 4; for lowering the CMOV pseudos that get created for this IR. 5; CHECK-LABEL: foo1: 6; CHECK: js 7; CHECK-NOT: js 8define i32 @foo1(i32 %v1, i32 %v2, i32 %v3) nounwind { 9entry: 10 %cmp = icmp slt i32 %v1, 0 11 %v2.v3 = select i1 %cmp, i32 %v2, i32 %v3 12 %v1.v2 = select i1 %cmp, i32 %v1, i32 %v2 13 %sub = sub i32 %v1.v2, %v2.v3 14 ret i32 %sub 15} 16 17; This test checks that only a single js gets generated in the final code 18; for lowering the CMOV pseudos that get created for this IR. This makes 19; sure the code for the lowering for opposite conditions gets tested. 20; CHECK-LABEL: foo11: 21; CHECK: js 22; CHECK-NOT: js 23; CHECK-NOT: jns 24define i32 @foo11(i32 %v1, i32 %v2, i32 %v3) nounwind { 25entry: 26 %cmp1 = icmp slt i32 %v1, 0 27 %v2.v3 = select i1 %cmp1, i32 %v2, i32 %v3 28 %cmp2 = icmp sge i32 %v1, 0 29 %v1.v2 = select i1 %cmp2, i32 %v1, i32 %v2 30 %sub = sub i32 %v1.v2, %v2.v3 31 ret i32 %sub 32} 33 34; This test checks that only a single js gets generated in the final code 35; for lowering the CMOV pseudos that get created for this IR. 36; CHECK-LABEL: foo2: 37; CHECK: js 38; CHECK-NOT: js 39define i32 @foo2(i8 %v1, i8 %v2, i8 %v3) nounwind { 40entry: 41 %cmp = icmp slt i8 %v1, 0 42 %v2.v3 = select i1 %cmp, i8 %v2, i8 %v3 43 %v1.v2 = select i1 %cmp, i8 %v1, i8 %v2 44 %t1 = sext i8 %v2.v3 to i32 45 %t2 = sext i8 %v1.v2 to i32 46 %sub = sub i32 %t1, %t2 47 ret i32 %sub 48} 49 50; This test checks that only a single js gets generated in the final code 51; for lowering the CMOV pseudos that get created for this IR. 52; CHECK-LABEL: foo3: 53; CHECK: js 54; CHECK-NOT: js 55define i32 @foo3(i16 %v1, i16 %v2, i16 %v3) nounwind { 56entry: 57 %cmp = icmp slt i16 %v1, 0 58 %v2.v3 = select i1 %cmp, i16 %v2, i16 %v3 59 %v1.v2 = select i1 %cmp, i16 %v1, i16 %v2 60 %t1 = sext i16 %v2.v3 to i32 61 %t2 = sext i16 %v1.v2 to i32 62 %sub = sub i32 %t1, %t2 63 ret i32 %sub 64} 65 66; This test checks that only a single js gets generated in the final code 67; for lowering the CMOV pseudos that get created for this IR. 68; CHECK-LABEL: foo4: 69; CHECK: js 70; CHECK-NOT: js 71define float @foo4(i32 %v1, float %v2, float %v3, float %v4) nounwind { 72entry: 73 %cmp = icmp slt i32 %v1, 0 74 %t1 = select i1 %cmp, float %v2, float %v3 75 %t2 = select i1 %cmp, float %v3, float %v4 76 %sub = fsub float %t1, %t2 77 ret float %sub 78} 79 80; This test checks that only a single je gets generated in the final code 81; for lowering the CMOV pseudos that get created for this IR. 82; CHECK-LABEL: foo5: 83; CHECK: je 84; CHECK-NOT: je 85define double @foo5(i32 %v1, double %v2, double %v3, double %v4) nounwind { 86entry: 87 %cmp = icmp eq i32 %v1, 0 88 %t1 = select i1 %cmp, double %v2, double %v3 89 %t2 = select i1 %cmp, double %v3, double %v4 90 %sub = fsub double %t1, %t2 91 ret double %sub 92} 93 94; This test checks that only a single je gets generated in the final code 95; for lowering the CMOV pseudos that get created for this IR. 96; CHECK-LABEL: foo6: 97; CHECK: je 98; CHECK-NOT: je 99define <4 x float> @foo6(i32 %v1, <4 x float> %v2, <4 x float> %v3, <4 x float> %v4) nounwind { 100entry: 101 %cmp = icmp eq i32 %v1, 0 102 %t1 = select i1 %cmp, <4 x float> %v2, <4 x float> %v3 103 %t2 = select i1 %cmp, <4 x float> %v3, <4 x float> %v4 104 %sub = fsub <4 x float> %t1, %t2 105 ret <4 x float> %sub 106} 107 108; This test checks that only a single je gets generated in the final code 109; for lowering the CMOV pseudos that get created for this IR. 110; CHECK-LABEL: foo7: 111; CHECK: je 112; CHECK-NOT: je 113define <2 x double> @foo7(i32 %v1, <2 x double> %v2, <2 x double> %v3, <2 x double> %v4) nounwind { 114entry: 115 %cmp = icmp eq i32 %v1, 0 116 %t1 = select i1 %cmp, <2 x double> %v2, <2 x double> %v3 117 %t2 = select i1 %cmp, <2 x double> %v3, <2 x double> %v4 118 %sub = fsub <2 x double> %t1, %t2 119 ret <2 x double> %sub 120} 121 122; This test checks that only a single ja gets generated in the final code 123; for lowering the CMOV pseudos that get created for this IR. This combines 124; all the supported types together into one long string of selects based 125; on the same condition. 126; CHECK-LABEL: foo8: 127; CHECK: ja 128; CHECK-NOT: ja 129define void @foo8(i32 %v1, 130 i8 %v2, i8 %v3, 131 i16 %v12, i16 %v13, 132 i32 %v22, i32 %v23, 133 float %v32, float %v33, 134 double %v42, double %v43, 135 <4 x float> %v52, <4 x float> %v53, 136 <2 x double> %v62, <2 x double> %v63, 137 <8 x float> %v72, <8 x float> %v73, 138 <4 x double> %v82, <4 x double> %v83, 139 <16 x float> %v92, <16 x float> %v93, 140 <8 x double> %v102, <8 x double> %v103, 141 ptr %dst) nounwind { 142entry: 143 %add.ptr11 = getelementptr inbounds i8, ptr %dst, i32 2 144 145 %add.ptr21 = getelementptr inbounds i8, ptr %dst, i32 4 146 147 %add.ptr31 = getelementptr inbounds i8, ptr %dst, i32 8 148 149 %add.ptr41 = getelementptr inbounds i8, ptr %dst, i32 16 150 151 %add.ptr51 = getelementptr inbounds i8, ptr %dst, i32 32 152 153 %add.ptr61 = getelementptr inbounds i8, ptr %dst, i32 48 154 155 %add.ptr71 = getelementptr inbounds i8, ptr %dst, i32 64 156 157 %add.ptr81 = getelementptr inbounds i8, ptr %dst, i32 128 158 159 %add.ptr91 = getelementptr inbounds i8, ptr %dst, i32 64 160 161 %add.ptr101 = getelementptr inbounds i8, ptr %dst, i32 128 162 163 ; These operations are necessary, because select of two single use loads 164 ; ends up getting optimized into a select of two leas, followed by a 165 ; single load of the selected address. 166 %t13 = xor i16 %v13, 11 167 %t23 = xor i32 %v23, 1234 168 %t33 = fadd float %v33, %v32 169 %t43 = fadd double %v43, %v42 170 %t53 = fadd <4 x float> %v53, %v52 171 %t63 = fadd <2 x double> %v63, %v62 172 %t73 = fsub <8 x float> %v73, %v72 173 %t83 = fsub <4 x double> %v83, %v82 174 %t93 = fsub <16 x float> %v93, %v92 175 %t103 = fsub <8 x double> %v103, %v102 176 177 %cmp = icmp ugt i32 %v1, 31 178 %t11 = select i1 %cmp, i16 %v12, i16 %t13 179 %t21 = select i1 %cmp, i32 %v22, i32 %t23 180 %t31 = select i1 %cmp, float %v32, float %t33 181 %t41 = select i1 %cmp, double %v42, double %t43 182 %t51 = select i1 %cmp, <4 x float> %v52, <4 x float> %t53 183 %t61 = select i1 %cmp, <2 x double> %v62, <2 x double> %t63 184 %t71 = select i1 %cmp, <8 x float> %v72, <8 x float> %t73 185 %t81 = select i1 %cmp, <4 x double> %v82, <4 x double> %t83 186 %t91 = select i1 %cmp, <16 x float> %v92, <16 x float> %t93 187 %t101 = select i1 %cmp, <8 x double> %v102, <8 x double> %t103 188 189 store i16 %t11, ptr %add.ptr11, align 2 190 store i32 %t21, ptr %add.ptr21, align 4 191 store float %t31, ptr %add.ptr31, align 4 192 store double %t41, ptr %add.ptr41, align 8 193 store <4 x float> %t51, ptr %add.ptr51, align 16 194 store <2 x double> %t61, ptr %add.ptr61, align 16 195 store <8 x float> %t71, ptr %add.ptr71, align 32 196 store <4 x double> %t81, ptr %add.ptr81, align 32 197 store <16 x float> %t91, ptr %add.ptr91, align 32 198 store <8 x double> %t101, ptr %add.ptr101, align 32 199 200 ret void 201} 202 203; This test checks that only a single ja gets generated in the final code 204; for lowering the CMOV pseudos that get created for this IR. 205; on the same condition. 206; Contrary to my expectations, this doesn't exercise the code for 207; CMOV_V8I1, CMOV_V16I1, CMOV_V32I1, or CMOV_V64I1. Instead the selects all 208; get lowered into vector length number of selects, which all eventually turn 209; into a huge number of CMOV_GR8, which are all contiguous, so the optimization 210; kicks in as long as CMOV_GR8 is supported. I couldn't find a way to get 211; CMOV_V*I1 pseudo-opcodes to get generated. If a way exists to get CMOV_V*1 212; pseudo-opcodes to be generated, this test should be replaced with one that 213; tests those opcodes. 214; 215; CHECK-LABEL: foo9: 216; CHECK: ja 217; CHECK-NOT: ja 218define void @foo9(i32 %v1, 219 <8 x i1> %v12, <8 x i1> %v13, 220 <16 x i1> %v22, <16 x i1> %v23, 221 <32 x i1> %v32, <32 x i1> %v33, 222 <64 x i1> %v42, <64 x i1> %v43, 223 ptr %dst) nounwind { 224entry: 225 226 %add.ptr21 = getelementptr inbounds i8, ptr %dst, i32 4 227 228 %add.ptr31 = getelementptr inbounds i8, ptr %dst, i32 8 229 230 %add.ptr41 = getelementptr inbounds i8, ptr %dst, i32 16 231 232 ; These operations are necessary, because select of two single use loads 233 ; ends up getting optimized into a select of two leas, followed by a 234 ; single load of the selected address. 235 %t13 = xor <8 x i1> %v13, %v12 236 %t23 = xor <16 x i1> %v23, %v22 237 %t33 = xor <32 x i1> %v33, %v32 238 %t43 = xor <64 x i1> %v43, %v42 239 240 %cmp = icmp ugt i32 %v1, 31 241 %t11 = select i1 %cmp, <8 x i1> %v12, <8 x i1> %t13 242 %t21 = select i1 %cmp, <16 x i1> %v22, <16 x i1> %t23 243 %t31 = select i1 %cmp, <32 x i1> %v32, <32 x i1> %t33 244 %t41 = select i1 %cmp, <64 x i1> %v42, <64 x i1> %t43 245 246 store <8 x i1> %t11, ptr %dst, align 16 247 store <16 x i1> %t21, ptr %add.ptr21, align 4 248 store <32 x i1> %t31, ptr %add.ptr31, align 8 249 store <64 x i1> %t41, ptr %add.ptr41, align 16 250 251 ret void 252} 253