1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s 3 4define <2 x double> @test_vfrcz_sd(<2 x double> %a) { 5; CHECK-LABEL: @test_vfrcz_sd( 6; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> [[A:%.*]]) 7; CHECK-NEXT: ret <2 x double> [[TMP1]] 8; 9 %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1 10 %2 = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %1) 11 ret <2 x double> %2 12} 13 14define double @test_vfrcz_sd_0(double %a) { 15; CHECK-LABEL: @test_vfrcz_sd_0( 16; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[A:%.*]], i64 0 17; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> [[TMP1]]) 18; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i64 0 19; CHECK-NEXT: ret double [[TMP3]] 20; 21 %1 = insertelement <2 x double> poison, double %a, i32 0 22 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 23 %3 = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %2) 24 %4 = extractelement <2 x double> %3, i32 0 25 ret double %4 26} 27 28define double @test_vfrcz_sd_1(double %a) { 29; CHECK-LABEL: @test_vfrcz_sd_1( 30; CHECK-NEXT: ret double 0.000000e+00 31; 32 %1 = insertelement <2 x double> poison, double %a, i32 0 33 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 34 %3 = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %2) 35 %4 = extractelement <2 x double> %3, i32 1 36 ret double %4 37} 38 39define <4 x float> @test_vfrcz_ss(<4 x float> %a) { 40; CHECK-LABEL: @test_vfrcz_ss( 41; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> [[A:%.*]]) 42; CHECK-NEXT: ret <4 x float> [[TMP1]] 43; 44 %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1 45 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 46 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 47 %4 = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %3) 48 ret <4 x float> %4 49} 50 51define float @test_vfrcz_ss_0(float %a) { 52; CHECK-LABEL: @test_vfrcz_ss_0( 53; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i64 0 54; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> [[TMP1]]) 55; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i64 0 56; CHECK-NEXT: ret float [[TMP3]] 57; 58 %1 = insertelement <4 x float> poison, float %a, i32 0 59 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 60 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 61 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 62 %5 = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %4) 63 %6 = extractelement <4 x float> %5, i32 0 64 ret float %6 65} 66 67define float @test_vfrcz_ss_3(float %a) { 68; CHECK-LABEL: @test_vfrcz_ss_3( 69; CHECK-NEXT: ret float 0.000000e+00 70; 71 %1 = insertelement <4 x float> poison, float %a, i32 0 72 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 73 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 74 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 75 %5 = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %4) 76 %6 = extractelement <4 x float> %5, i32 3 77 ret float %6 78} 79 80define <2 x i64> @cmp_slt_v2i64(<2 x i64> %a, <2 x i64> %b) { 81; CHECK-LABEL: @cmp_slt_v2i64( 82; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i64> [[A:%.*]], [[B:%.*]] 83; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64> 84; CHECK-NEXT: ret <2 x i64> [[TMP2]] 85; 86 %1 = tail call <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64> %a, <2 x i64> %b) 87 ret <2 x i64> %1 88} 89 90define <2 x i64> @cmp_ult_v2i64(<2 x i64> %a, <2 x i64> %b) { 91; CHECK-LABEL: @cmp_ult_v2i64( 92; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i64> [[A:%.*]], [[B:%.*]] 93; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64> 94; CHECK-NEXT: ret <2 x i64> [[TMP2]] 95; 96 %1 = tail call <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64> %a, <2 x i64> %b) 97 ret <2 x i64> %1 98} 99 100define <2 x i64> @cmp_sle_v2i64(<2 x i64> %a, <2 x i64> %b) { 101; CHECK-LABEL: @cmp_sle_v2i64( 102; CHECK-NEXT: [[TMP1:%.*]] = icmp sle <2 x i64> [[A:%.*]], [[B:%.*]] 103; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64> 104; CHECK-NEXT: ret <2 x i64> [[TMP2]] 105; 106 %1 = tail call <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64> %a, <2 x i64> %b) 107 ret <2 x i64> %1 108} 109 110define <2 x i64> @cmp_ule_v2i64(<2 x i64> %a, <2 x i64> %b) { 111; CHECK-LABEL: @cmp_ule_v2i64( 112; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <2 x i64> [[A:%.*]], [[B:%.*]] 113; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64> 114; CHECK-NEXT: ret <2 x i64> [[TMP2]] 115; 116 %1 = tail call <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64> %a, <2 x i64> %b) 117 ret <2 x i64> %1 118} 119 120define <4 x i32> @cmp_sgt_v4i32(<4 x i32> %a, <4 x i32> %b) { 121; CHECK-LABEL: @cmp_sgt_v4i32( 122; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[A:%.*]], [[B:%.*]] 123; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32> 124; CHECK-NEXT: ret <4 x i32> [[TMP2]] 125; 126 %1 = tail call <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32> %a, <4 x i32> %b) 127 ret <4 x i32> %1 128} 129 130define <4 x i32> @cmp_ugt_v4i32(<4 x i32> %a, <4 x i32> %b) { 131; CHECK-LABEL: @cmp_ugt_v4i32( 132; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i32> [[A:%.*]], [[B:%.*]] 133; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32> 134; CHECK-NEXT: ret <4 x i32> [[TMP2]] 135; 136 %1 = tail call <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32> %a, <4 x i32> %b) 137 ret <4 x i32> %1 138} 139 140define <4 x i32> @cmp_sge_v4i32(<4 x i32> %a, <4 x i32> %b) { 141; CHECK-LABEL: @cmp_sge_v4i32( 142; CHECK-NEXT: [[TMP1:%.*]] = icmp sge <4 x i32> [[A:%.*]], [[B:%.*]] 143; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32> 144; CHECK-NEXT: ret <4 x i32> [[TMP2]] 145; 146 %1 = tail call <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32> %a, <4 x i32> %b) 147 ret <4 x i32> %1 148} 149 150define <4 x i32> @cmp_uge_v4i32(<4 x i32> %a, <4 x i32> %b) { 151; CHECK-LABEL: @cmp_uge_v4i32( 152; CHECK-NEXT: [[TMP1:%.*]] = icmp uge <4 x i32> [[A:%.*]], [[B:%.*]] 153; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32> 154; CHECK-NEXT: ret <4 x i32> [[TMP2]] 155; 156 %1 = tail call <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32> %a, <4 x i32> %b) 157 ret <4 x i32> %1 158} 159 160define <8 x i16> @cmp_seq_v8i16(<8 x i16> %a, <8 x i16> %b) { 161; CHECK-LABEL: @cmp_seq_v8i16( 162; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[B:%.*]] 163; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16> 164; CHECK-NEXT: ret <8 x i16> [[TMP2]] 165; 166 %1 = tail call <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16> %a, <8 x i16> %b) 167 ret <8 x i16> %1 168} 169 170define <8 x i16> @cmp_ueq_v8i16(<8 x i16> %a, <8 x i16> %b) { 171; CHECK-LABEL: @cmp_ueq_v8i16( 172; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[B:%.*]] 173; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16> 174; CHECK-NEXT: ret <8 x i16> [[TMP2]] 175; 176 %1 = tail call <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16> %a, <8 x i16> %b) 177 ret <8 x i16> %1 178} 179 180define <8 x i16> @cmp_sne_v8i16(<8 x i16> %a, <8 x i16> %b) { 181; CHECK-LABEL: @cmp_sne_v8i16( 182; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <8 x i16> [[A:%.*]], [[B:%.*]] 183; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16> 184; CHECK-NEXT: ret <8 x i16> [[TMP2]] 185; 186 %1 = tail call <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16> %a, <8 x i16> %b) 187 ret <8 x i16> %1 188} 189 190define <8 x i16> @cmp_une_v8i16(<8 x i16> %a, <8 x i16> %b) { 191; CHECK-LABEL: @cmp_une_v8i16( 192; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <8 x i16> [[A:%.*]], [[B:%.*]] 193; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16> 194; CHECK-NEXT: ret <8 x i16> [[TMP2]] 195; 196 %1 = tail call <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16> %a, <8 x i16> %b) 197 ret <8 x i16> %1 198} 199 200define <16 x i8> @cmp_strue_v16i8(<16 x i8> %a, <16 x i8> %b) { 201; CHECK-LABEL: @cmp_strue_v16i8( 202; CHECK-NEXT: ret <16 x i8> splat (i8 -1) 203; 204 %1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8> %a, <16 x i8> %b) 205 ret <16 x i8> %1 206} 207 208define <16 x i8> @cmp_utrue_v16i8(<16 x i8> %a, <16 x i8> %b) { 209; CHECK-LABEL: @cmp_utrue_v16i8( 210; CHECK-NEXT: ret <16 x i8> splat (i8 -1) 211; 212 %1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8> %a, <16 x i8> %b) 213 ret <16 x i8> %1 214} 215 216define <16 x i8> @cmp_sfalse_v16i8(<16 x i8> %a, <16 x i8> %b) { 217; CHECK-LABEL: @cmp_sfalse_v16i8( 218; CHECK-NEXT: ret <16 x i8> zeroinitializer 219; 220 %1 = tail call <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8> %a, <16 x i8> %b) 221 ret <16 x i8> %1 222} 223 224define <16 x i8> @cmp_ufalse_v16i8(<16 x i8> %a, <16 x i8> %b) { 225; CHECK-LABEL: @cmp_ufalse_v16i8( 226; CHECK-NEXT: ret <16 x i8> zeroinitializer 227; 228 %1 = tail call <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8> %a, <16 x i8> %b) 229 ret <16 x i8> %1 230} 231 232declare <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double>) nounwind readnone 233declare <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float>) nounwind readnone 234 235declare <16 x i8> @llvm.x86.xop.vpcomltb(<16 x i8>, <16 x i8>) nounwind readnone 236declare <8 x i16> @llvm.x86.xop.vpcomltw(<8 x i16>, <8 x i16>) nounwind readnone 237declare <4 x i32> @llvm.x86.xop.vpcomltd(<4 x i32>, <4 x i32>) nounwind readnone 238declare <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64>, <2 x i64>) nounwind readnone 239declare <16 x i8> @llvm.x86.xop.vpcomltub(<16 x i8>, <16 x i8>) nounwind readnone 240declare <8 x i16> @llvm.x86.xop.vpcomltuw(<8 x i16>, <8 x i16>) nounwind readnone 241declare <4 x i32> @llvm.x86.xop.vpcomltud(<4 x i32>, <4 x i32>) nounwind readnone 242declare <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64>, <2 x i64>) nounwind readnone 243 244declare <16 x i8> @llvm.x86.xop.vpcomleb(<16 x i8>, <16 x i8>) nounwind readnone 245declare <8 x i16> @llvm.x86.xop.vpcomlew(<8 x i16>, <8 x i16>) nounwind readnone 246declare <4 x i32> @llvm.x86.xop.vpcomled(<4 x i32>, <4 x i32>) nounwind readnone 247declare <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64>, <2 x i64>) nounwind readnone 248declare <16 x i8> @llvm.x86.xop.vpcomleub(<16 x i8>, <16 x i8>) nounwind readnone 249declare <8 x i16> @llvm.x86.xop.vpcomleuw(<8 x i16>, <8 x i16>) nounwind readnone 250declare <4 x i32> @llvm.x86.xop.vpcomleud(<4 x i32>, <4 x i32>) nounwind readnone 251declare <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64>, <2 x i64>) nounwind readnone 252 253declare <16 x i8> @llvm.x86.xop.vpcomgtb(<16 x i8>, <16 x i8>) nounwind readnone 254declare <8 x i16> @llvm.x86.xop.vpcomgtw(<8 x i16>, <8 x i16>) nounwind readnone 255declare <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32>, <4 x i32>) nounwind readnone 256declare <2 x i64> @llvm.x86.xop.vpcomgtq(<2 x i64>, <2 x i64>) nounwind readnone 257declare <16 x i8> @llvm.x86.xop.vpcomgtub(<16 x i8>, <16 x i8>) nounwind readnone 258declare <8 x i16> @llvm.x86.xop.vpcomgtuw(<8 x i16>, <8 x i16>) nounwind readnone 259declare <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32>, <4 x i32>) nounwind readnone 260declare <2 x i64> @llvm.x86.xop.vpcomgtuq(<2 x i64>, <2 x i64>) nounwind readnone 261 262declare <16 x i8> @llvm.x86.xop.vpcomgeb(<16 x i8>, <16 x i8>) nounwind readnone 263declare <8 x i16> @llvm.x86.xop.vpcomgew(<8 x i16>, <8 x i16>) nounwind readnone 264declare <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32>, <4 x i32>) nounwind readnone 265declare <2 x i64> @llvm.x86.xop.vpcomgeq(<2 x i64>, <2 x i64>) nounwind readnone 266declare <16 x i8> @llvm.x86.xop.vpcomgeub(<16 x i8>, <16 x i8>) nounwind readnone 267declare <8 x i16> @llvm.x86.xop.vpcomgeuw(<8 x i16>, <8 x i16>) nounwind readnone 268declare <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32>, <4 x i32>) nounwind readnone 269declare <2 x i64> @llvm.x86.xop.vpcomgeuq(<2 x i64>, <2 x i64>) nounwind readnone 270 271declare <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8>, <16 x i8>) nounwind readnone 272declare <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16>, <8 x i16>) nounwind readnone 273declare <4 x i32> @llvm.x86.xop.vpcomeqd(<4 x i32>, <4 x i32>) nounwind readnone 274declare <2 x i64> @llvm.x86.xop.vpcomeqq(<2 x i64>, <2 x i64>) nounwind readnone 275declare <16 x i8> @llvm.x86.xop.vpcomequb(<16 x i8>, <16 x i8>) nounwind readnone 276declare <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16>, <8 x i16>) nounwind readnone 277declare <4 x i32> @llvm.x86.xop.vpcomequd(<4 x i32>, <4 x i32>) nounwind readnone 278declare <2 x i64> @llvm.x86.xop.vpcomequq(<2 x i64>, <2 x i64>) nounwind readnone 279 280declare <16 x i8> @llvm.x86.xop.vpcomneb(<16 x i8>, <16 x i8>) nounwind readnone 281declare <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16>, <8 x i16>) nounwind readnone 282declare <4 x i32> @llvm.x86.xop.vpcomned(<4 x i32>, <4 x i32>) nounwind readnone 283declare <2 x i64> @llvm.x86.xop.vpcomneq(<2 x i64>, <2 x i64>) nounwind readnone 284declare <16 x i8> @llvm.x86.xop.vpcomneub(<16 x i8>, <16 x i8>) nounwind readnone 285declare <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16>, <8 x i16>) nounwind readnone 286declare <4 x i32> @llvm.x86.xop.vpcomneud(<4 x i32>, <4 x i32>) nounwind readnone 287declare <2 x i64> @llvm.x86.xop.vpcomneuq(<2 x i64>, <2 x i64>) nounwind readnone 288 289declare <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8>, <16 x i8>) nounwind readnone 290declare <8 x i16> @llvm.x86.xop.vpcomfalsew(<8 x i16>, <8 x i16>) nounwind readnone 291declare <4 x i32> @llvm.x86.xop.vpcomfalsed(<4 x i32>, <4 x i32>) nounwind readnone 292declare <2 x i64> @llvm.x86.xop.vpcomfalseq(<2 x i64>, <2 x i64>) nounwind readnone 293declare <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8>, <16 x i8>) nounwind readnone 294declare <8 x i16> @llvm.x86.xop.vpcomfalseuw(<8 x i16>, <8 x i16>) nounwind readnone 295declare <4 x i32> @llvm.x86.xop.vpcomfalseud(<4 x i32>, <4 x i32>) nounwind readnone 296declare <2 x i64> @llvm.x86.xop.vpcomfalseuq(<2 x i64>, <2 x i64>) nounwind readnone 297 298declare <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8>, <16 x i8>) nounwind readnone 299declare <4 x i32> @llvm.x86.xop.vpcomtrued(<4 x i32>, <4 x i32>) nounwind readnone 300declare <2 x i64> @llvm.x86.xop.vpcomtrueq(<2 x i64>, <2 x i64>) nounwind readnone 301declare <8 x i16> @llvm.x86.xop.vpcomtruew(<8 x i16>, <8 x i16>) nounwind readnone 302declare <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8>, <16 x i8>) nounwind readnone 303declare <8 x i16> @llvm.x86.xop.vpcomtrueuw(<8 x i16>, <8 x i16>) nounwind readnone 304declare <4 x i32> @llvm.x86.xop.vpcomtrueud(<4 x i32>, <4 x i32>) nounwind readnone 305declare <2 x i64> @llvm.x86.xop.vpcomtrueuq(<2 x i64>, <2 x i64>) nounwind readnone 306