1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,SSE2 3; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,SSE4 4; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 5; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX,AVX512 6; RUN: opt < %s -passes="default<O3>" -S -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,SSE2 7; RUN: opt < %s -passes="default<O3>" -S -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,SSE4 8; RUN: opt < %s -passes="default<O3>" -S -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 9; RUN: opt < %s -passes="default<O3>" -S -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX,AVX512 10 11; PR34072 - failure to canonicalize to (add (shuffle a, b),(shuffle a, b)) for optimal horizontal add patterns (with undemanded elements) 12 13; 14; v8i16 15; 16 17define <8 x i16> @add_v8i16_01234567(<8 x i16> %a, <8 x i16> %b) { 18; CHECK-LABEL: @add_v8i16_01234567( 19; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 20; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 21; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i16> [[TMP1]], [[TMP2]] 22; CHECK-NEXT: ret <8 x i16> [[TMP3]] 23; 24 %a0 = extractelement <8 x i16> %a, i32 0 25 %a1 = extractelement <8 x i16> %a, i32 1 26 %a2 = extractelement <8 x i16> %a, i32 2 27 %a3 = extractelement <8 x i16> %a, i32 3 28 %a4 = extractelement <8 x i16> %a, i32 4 29 %a5 = extractelement <8 x i16> %a, i32 5 30 %a6 = extractelement <8 x i16> %a, i32 6 31 %a7 = extractelement <8 x i16> %a, i32 7 32 %a01 = add i16 %a0, %a1 33 %a23 = add i16 %a2, %a3 34 %a45 = add i16 %a4, %a5 35 %a67 = add i16 %a6, %a7 36 %b0 = extractelement <8 x i16> %b, i32 0 37 %b1 = extractelement <8 x i16> %b, i32 1 38 %b2 = extractelement <8 x i16> %b, i32 2 39 %b3 = extractelement <8 x i16> %b, i32 3 40 %b4 = extractelement <8 x i16> %b, i32 4 41 %b5 = extractelement <8 x i16> %b, i32 5 42 %b6 = extractelement <8 x i16> %b, i32 6 43 %b7 = extractelement <8 x i16> %b, i32 7 44 %b01 = add i16 %b0, %b1 45 %b23 = add i16 %b2, %b3 46 %b45 = add i16 %b4, %b5 47 %b67 = add i16 %b6, %b7 48 %hadd0 = insertelement <8 x i16> poison, i16 %a01, i32 0 49 %hadd1 = insertelement <8 x i16> %hadd0, i16 %a23, i32 1 50 %hadd2 = insertelement <8 x i16> %hadd1, i16 %a45, i32 2 51 %hadd3 = insertelement <8 x i16> %hadd2, i16 %a67, i32 3 52 %hadd4 = insertelement <8 x i16> %hadd3, i16 %b01, i32 4 53 %hadd5 = insertelement <8 x i16> %hadd4, i16 %b23, i32 5 54 %hadd6 = insertelement <8 x i16> %hadd5, i16 %b45, i32 6 55 %hadd7 = insertelement <8 x i16> %hadd6, i16 %b67, i32 7 56 %result = shufflevector <8 x i16> %hadd7, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 57 ret <8 x i16> %result 58} 59 60define <8 x i16> @add_v8i16_u1234567(<8 x i16> %a, <8 x i16> %b) { 61; SSE2-LABEL: @add_v8i16_u1234567( 62; SSE2-NEXT: [[SHIFT3:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 7, i32 poison> 63; SSE2-NEXT: [[TMP6:%.*]] = add <8 x i16> [[A]], [[SHIFT3]] 64; SSE2-NEXT: [[TMP7:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 poison, i32 3, i32 4, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 65; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 poison, i32 2, i32 5, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 66; SSE2-NEXT: [[HADD1:%.*]] = add <8 x i16> [[TMP7]], [[TMP4]] 67; SSE2-NEXT: [[HADD3:%.*]] = shufflevector <8 x i16> [[HADD1]], <8 x i16> [[TMP6]], <8 x i32> <i32 poison, i32 1, i32 2, i32 14, i32 poison, i32 poison, i32 poison, i32 poison> 68; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 poison, i32 poison, i32 poison> 69; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 poison, i32 poison, i32 poison> 70; SSE2-NEXT: [[TMP3:%.*]] = add <8 x i16> [[TMP1]], [[TMP2]] 71; SSE2-NEXT: [[RESULT:%.*]] = shufflevector <8 x i16> [[HADD3]], <8 x i16> [[TMP3]], <8 x i32> <i32 poison, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 72; SSE2-NEXT: ret <8 x i16> [[RESULT]] 73; 74; SSE4-LABEL: @add_v8i16_u1234567( 75; SSE4-NEXT: [[TMP5:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i32> <i32 poison, i32 2, i32 5, i32 6, i32 8, i32 10, i32 12, i32 14> 76; SSE4-NEXT: [[TMP6:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 poison, i32 3, i32 4, i32 7, i32 9, i32 11, i32 13, i32 15> 77; SSE4-NEXT: [[TMP7:%.*]] = add <8 x i16> [[TMP5]], [[TMP6]] 78; SSE4-NEXT: ret <8 x i16> [[TMP7]] 79; 80; AVX-LABEL: @add_v8i16_u1234567( 81; AVX-NEXT: [[TMP5:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i32> <i32 poison, i32 2, i32 5, i32 6, i32 8, i32 10, i32 12, i32 14> 82; AVX-NEXT: [[TMP6:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 poison, i32 3, i32 4, i32 7, i32 9, i32 11, i32 13, i32 15> 83; AVX-NEXT: [[TMP7:%.*]] = add <8 x i16> [[TMP5]], [[TMP6]] 84; AVX-NEXT: ret <8 x i16> [[TMP7]] 85; 86 %a0 = extractelement <8 x i16> %a, i32 0 87 %a1 = extractelement <8 x i16> %a, i32 1 88 %a2 = extractelement <8 x i16> %a, i32 2 89 %a3 = extractelement <8 x i16> %a, i32 3 90 %a4 = extractelement <8 x i16> %a, i32 4 91 %a5 = extractelement <8 x i16> %a, i32 5 92 %a6 = extractelement <8 x i16> %a, i32 6 93 %a7 = extractelement <8 x i16> %a, i32 7 94 %a01 = add i16 %a0, %a1 95 %a23 = add i16 %a2, %a3 96 %a45 = add i16 %a4, %a5 97 %a67 = add i16 %a6, %a7 98 %b0 = extractelement <8 x i16> %b, i32 0 99 %b1 = extractelement <8 x i16> %b, i32 1 100 %b2 = extractelement <8 x i16> %b, i32 2 101 %b3 = extractelement <8 x i16> %b, i32 3 102 %b4 = extractelement <8 x i16> %b, i32 4 103 %b5 = extractelement <8 x i16> %b, i32 5 104 %b6 = extractelement <8 x i16> %b, i32 6 105 %b7 = extractelement <8 x i16> %b, i32 7 106 %b01 = add i16 %b0, %b1 107 %b23 = add i16 %b2, %b3 108 %b45 = add i16 %b4, %b5 109 %b67 = add i16 %b6, %b7 110 %hadd0 = insertelement <8 x i16> poison, i16 %a01, i32 0 111 %hadd1 = insertelement <8 x i16> %hadd0, i16 %a23, i32 1 112 %hadd2 = insertelement <8 x i16> %hadd1, i16 %a45, i32 2 113 %hadd3 = insertelement <8 x i16> %hadd2, i16 %a67, i32 3 114 %hadd4 = insertelement <8 x i16> %hadd3, i16 %b01, i32 4 115 %hadd5 = insertelement <8 x i16> %hadd4, i16 %b23, i32 5 116 %hadd6 = insertelement <8 x i16> %hadd5, i16 %b45, i32 6 117 %hadd7 = insertelement <8 x i16> %hadd6, i16 %b67, i32 7 118 %result = shufflevector <8 x i16> %hadd7, <8 x i16> %a, <8 x i32> <i32 poison, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 119 ret <8 x i16> %result 120} 121 122define <8 x i16> @add_v8i16_76u43210(<8 x i16> %a, <8 x i16> %b) { 123; SSE2-LABEL: @add_v8i16_76u43210( 124; SSE2-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 125; SSE2-NEXT: [[TMP1:%.*]] = add <8 x i16> [[A]], [[SHIFT]] 126; SSE2-NEXT: [[SHIFT2:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 5, i32 poison, i32 poison, i32 poison> 127; SSE2-NEXT: [[TMP2:%.*]] = add <8 x i16> [[B]], [[SHIFT2]] 128; SSE2-NEXT: [[SHIFT3:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 6> 129; SSE2-NEXT: [[TMP3:%.*]] = add <8 x i16> [[SHIFT3]], [[B]] 130; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 2, i32 4, i32 6, i32 8, i32 poison, i32 poison, i32 poison, i32 poison> 131; SSE2-NEXT: [[TMP5:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 3, i32 5, i32 7, i32 9, i32 poison, i32 poison, i32 poison, i32 poison> 132; SSE2-NEXT: [[TMP6:%.*]] = add <8 x i16> [[TMP4]], [[TMP5]] 133; SSE2-NEXT: [[HADD41:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP6]], <8 x i32> <i32 0, i32 8, i32 9, i32 10, i32 11, i32 poison, i32 poison, i32 poison> 134; SSE2-NEXT: [[HADD6:%.*]] = shufflevector <8 x i16> [[HADD41]], <8 x i16> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 poison, i32 12, i32 poison> 135; SSE2-NEXT: [[RESULT:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[HADD6]], <8 x i32> <i32 7, i32 14, i32 poison, i32 12, i32 11, i32 10, i32 9, i32 8> 136; SSE2-NEXT: ret <8 x i16> [[RESULT]] 137; 138; SSE4-LABEL: @add_v8i16_76u43210( 139; SSE4-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <8 x i32> <i32 0, i32 3, i32 4, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 140; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 1, i32 2, i32 5, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 141; SSE4-NEXT: [[HADD22:%.*]] = add <8 x i16> [[TMP1]], [[TMP2]] 142; SSE4-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B:%.*]], <8 x i32> <i32 6, i32 8, i32 poison, i32 12, i32 14, i32 poison, i32 poison, i32 poison> 143; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 7, i32 9, i32 poison, i32 13, i32 15, i32 poison, i32 poison, i32 poison> 144; SSE4-NEXT: [[TMP5:%.*]] = add <8 x i16> [[TMP3]], [[TMP4]] 145; SSE4-NEXT: [[RESULT:%.*]] = shufflevector <8 x i16> [[TMP5]], <8 x i16> [[HADD22]], <8 x i32> <i32 4, i32 3, i32 poison, i32 1, i32 0, i32 10, i32 9, i32 8> 146; SSE4-NEXT: ret <8 x i16> [[RESULT]] 147; 148; AVX-LABEL: @add_v8i16_76u43210( 149; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <8 x i32> <i32 0, i32 3, i32 4, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 150; AVX-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 1, i32 2, i32 5, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 151; AVX-NEXT: [[HADD22:%.*]] = add <8 x i16> [[TMP1]], [[TMP2]] 152; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B:%.*]], <8 x i32> <i32 6, i32 8, i32 poison, i32 12, i32 14, i32 poison, i32 poison, i32 poison> 153; AVX-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 7, i32 9, i32 poison, i32 13, i32 15, i32 poison, i32 poison, i32 poison> 154; AVX-NEXT: [[TMP5:%.*]] = add <8 x i16> [[TMP3]], [[TMP4]] 155; AVX-NEXT: [[RESULT:%.*]] = shufflevector <8 x i16> [[TMP5]], <8 x i16> [[HADD22]], <8 x i32> <i32 4, i32 3, i32 poison, i32 1, i32 0, i32 10, i32 9, i32 8> 156; AVX-NEXT: ret <8 x i16> [[RESULT]] 157; 158 %a0 = extractelement <8 x i16> %a, i32 0 159 %a1 = extractelement <8 x i16> %a, i32 1 160 %a2 = extractelement <8 x i16> %a, i32 2 161 %a3 = extractelement <8 x i16> %a, i32 3 162 %a4 = extractelement <8 x i16> %a, i32 4 163 %a5 = extractelement <8 x i16> %a, i32 5 164 %a6 = extractelement <8 x i16> %a, i32 6 165 %a7 = extractelement <8 x i16> %a, i32 7 166 %a01 = add i16 %a0, %a1 167 %a23 = add i16 %a2, %a3 168 %a45 = add i16 %a4, %a5 169 %a67 = add i16 %a6, %a7 170 %b0 = extractelement <8 x i16> %b, i32 0 171 %b1 = extractelement <8 x i16> %b, i32 1 172 %b2 = extractelement <8 x i16> %b, i32 2 173 %b3 = extractelement <8 x i16> %b, i32 3 174 %b4 = extractelement <8 x i16> %b, i32 4 175 %b5 = extractelement <8 x i16> %b, i32 5 176 %b6 = extractelement <8 x i16> %b, i32 6 177 %b7 = extractelement <8 x i16> %b, i32 7 178 %b01 = add i16 %b0, %b1 179 %b23 = add i16 %b2, %b3 180 %b45 = add i16 %b4, %b5 181 %b67 = add i16 %b6, %b7 182 %hadd0 = insertelement <8 x i16> poison, i16 %a01, i32 0 183 %hadd1 = insertelement <8 x i16> %hadd0, i16 %a23, i32 1 184 %hadd2 = insertelement <8 x i16> %hadd1, i16 %a45, i32 2 185 %hadd3 = insertelement <8 x i16> %hadd2, i16 %a67, i32 3 186 %hadd4 = insertelement <8 x i16> %hadd3, i16 %b01, i32 4 187 %hadd5 = insertelement <8 x i16> %hadd4, i16 %b23, i32 5 188 %hadd6 = insertelement <8 x i16> %hadd5, i16 %b45, i32 6 189 %hadd7 = insertelement <8 x i16> %hadd6, i16 %b67, i32 7 190 %result = shufflevector <8 x i16> %hadd7, <8 x i16> %a, <8 x i32> <i32 7, i32 6, i32 poison, i32 4, i32 3, i32 2, i32 1, i32 0> 191 ret <8 x i16> %result 192} 193 194; 195; v16i16 196; 197 198define <16 x i16> @add_v16i16_0123456789ABCDEF(<16 x i16> %a, <16 x i16> %b) { 199; CHECK-LABEL: @add_v16i16_0123456789ABCDEF( 200; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30> 201; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31> 202; CHECK-NEXT: [[TMP3:%.*]] = add <16 x i16> [[TMP1]], [[TMP2]] 203; CHECK-NEXT: ret <16 x i16> [[TMP3]] 204; 205 %a0 = extractelement <16 x i16> %a, i32 0 206 %a1 = extractelement <16 x i16> %a, i32 1 207 %a2 = extractelement <16 x i16> %a, i32 2 208 %a3 = extractelement <16 x i16> %a, i32 3 209 %a4 = extractelement <16 x i16> %a, i32 4 210 %a5 = extractelement <16 x i16> %a, i32 5 211 %a6 = extractelement <16 x i16> %a, i32 6 212 %a7 = extractelement <16 x i16> %a, i32 7 213 %a8 = extractelement <16 x i16> %a, i32 8 214 %a9 = extractelement <16 x i16> %a, i32 9 215 %aA = extractelement <16 x i16> %a, i32 10 216 %aB = extractelement <16 x i16> %a, i32 11 217 %aC = extractelement <16 x i16> %a, i32 12 218 %aD = extractelement <16 x i16> %a, i32 13 219 %aE = extractelement <16 x i16> %a, i32 14 220 %aF = extractelement <16 x i16> %a, i32 15 221 %a01 = add i16 %a0, %a1 222 %a23 = add i16 %a2, %a3 223 %a45 = add i16 %a4, %a5 224 %a67 = add i16 %a6, %a7 225 %a89 = add i16 %a8, %a9 226 %aAB = add i16 %aA, %aB 227 %aCD = add i16 %aC, %aD 228 %aEF = add i16 %aE, %aF 229 %b0 = extractelement <16 x i16> %b, i32 0 230 %b1 = extractelement <16 x i16> %b, i32 1 231 %b2 = extractelement <16 x i16> %b, i32 2 232 %b3 = extractelement <16 x i16> %b, i32 3 233 %b4 = extractelement <16 x i16> %b, i32 4 234 %b5 = extractelement <16 x i16> %b, i32 5 235 %b6 = extractelement <16 x i16> %b, i32 6 236 %b7 = extractelement <16 x i16> %b, i32 7 237 %b8 = extractelement <16 x i16> %b, i32 8 238 %b9 = extractelement <16 x i16> %b, i32 9 239 %bA = extractelement <16 x i16> %b, i32 10 240 %bB = extractelement <16 x i16> %b, i32 11 241 %bC = extractelement <16 x i16> %b, i32 12 242 %bD = extractelement <16 x i16> %b, i32 13 243 %bE = extractelement <16 x i16> %b, i32 14 244 %bF = extractelement <16 x i16> %b, i32 15 245 %b01 = add i16 %b0, %b1 246 %b23 = add i16 %b2, %b3 247 %b45 = add i16 %b4, %b5 248 %b67 = add i16 %b6, %b7 249 %b89 = add i16 %b8, %b9 250 %bAB = add i16 %bA, %bB 251 %bCD = add i16 %bC, %bD 252 %bEF = add i16 %bE, %bF 253 %hadd0 = insertelement <16 x i16> poison, i16 %a01, i32 0 254 %hadd1 = insertelement <16 x i16> %hadd0, i16 %a23, i32 1 255 %hadd2 = insertelement <16 x i16> %hadd1, i16 %a45, i32 2 256 %hadd3 = insertelement <16 x i16> %hadd2, i16 %a67, i32 3 257 %hadd4 = insertelement <16 x i16> %hadd3, i16 %b01, i32 4 258 %hadd5 = insertelement <16 x i16> %hadd4, i16 %b23, i32 5 259 %hadd6 = insertelement <16 x i16> %hadd5, i16 %b45, i32 6 260 %hadd7 = insertelement <16 x i16> %hadd6, i16 %b67, i32 7 261 %hadd8 = insertelement <16 x i16> %hadd7, i16 %a89, i32 8 262 %hadd9 = insertelement <16 x i16> %hadd8, i16 %aAB, i32 9 263 %haddA = insertelement <16 x i16> %hadd9, i16 %aCD, i32 10 264 %haddB = insertelement <16 x i16> %haddA, i16 %aEF, i32 11 265 %haddC = insertelement <16 x i16> %haddB, i16 %b89, i32 12 266 %haddD = insertelement <16 x i16> %haddC, i16 %bAB, i32 13 267 %haddE = insertelement <16 x i16> %haddD, i16 %bCD, i32 14 268 %haddF = insertelement <16 x i16> %haddE, i16 %bEF, i32 15 269 %result = shufflevector <16 x i16> %haddF, <16 x i16> %a, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 270 ret <16 x i16> %result 271} 272 273define <16 x i16> @add_v16i16_0123u56789uBCDEF(<16 x i16> %a, <16 x i16> %b) { 274; SSE2-LABEL: @add_v16i16_0123u56789uBCDEF( 275; SSE2-NEXT: [[A8:%.*]] = extractelement <16 x i16> [[A:%.*]], i64 8 276; SSE2-NEXT: [[A9:%.*]] = extractelement <16 x i16> [[A]], i64 9 277; SSE2-NEXT: [[A89:%.*]] = add i16 [[A8]], [[A9]] 278; SSE2-NEXT: [[BC:%.*]] = extractelement <16 x i16> [[B:%.*]], i64 12 279; SSE2-NEXT: [[BD:%.*]] = extractelement <16 x i16> [[B]], i64 13 280; SSE2-NEXT: [[BE:%.*]] = extractelement <16 x i16> [[B]], i64 14 281; SSE2-NEXT: [[BF:%.*]] = extractelement <16 x i16> [[B]], i64 15 282; SSE2-NEXT: [[BCD:%.*]] = add i16 [[BC]], [[BD]] 283; SSE2-NEXT: [[BEF:%.*]] = add i16 [[BE]], [[BF]] 284; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 285; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 286; SSE2-NEXT: [[TMP3:%.*]] = add <16 x i16> [[TMP1]], [[TMP2]] 287; SSE2-NEXT: [[HADD8:%.*]] = insertelement <16 x i16> [[TMP3]], i16 [[A89]], i64 8 288; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 10, i32 poison, i32 14, i32 24, i32 26, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 289; SSE2-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 11, i32 poison, i32 15, i32 25, i32 27, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 290; SSE2-NEXT: [[TMP6:%.*]] = add <16 x i16> [[TMP4]], [[TMP5]] 291; SSE2-NEXT: [[HADDD1:%.*]] = shufflevector <16 x i16> [[HADD8]], <16 x i16> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 5, i32 6, i32 7, i32 8, i32 16, i32 poison, i32 18, i32 19, i32 20, i32 poison, i32 poison> 292; SSE2-NEXT: [[HADDE:%.*]] = insertelement <16 x i16> [[HADDD1]], i16 [[BCD]], i64 14 293; SSE2-NEXT: [[HADDF:%.*]] = insertelement <16 x i16> [[HADDE]], i16 [[BEF]], i64 15 294; SSE2-NEXT: [[RESULT:%.*]] = shufflevector <16 x i16> [[HADDF]], <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 5, i32 6, i32 7, i32 8, i32 9, i32 poison, i32 11, i32 12, i32 13, i32 14, i32 15> 295; SSE2-NEXT: ret <16 x i16> [[RESULT]] 296; 297; SSE4-LABEL: @add_v16i16_0123u56789uBCDEF( 298; SSE4-NEXT: [[A8:%.*]] = extractelement <16 x i16> [[A:%.*]], i64 8 299; SSE4-NEXT: [[A9:%.*]] = extractelement <16 x i16> [[A]], i64 9 300; SSE4-NEXT: [[A89:%.*]] = add i16 [[A8]], [[A9]] 301; SSE4-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 302; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 303; SSE4-NEXT: [[TMP3:%.*]] = add <16 x i16> [[TMP1]], [[TMP2]] 304; SSE4-NEXT: [[HADD8:%.*]] = insertelement <16 x i16> [[TMP3]], i16 [[A89]], i64 8 305; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <16 x i32> <i32 10, i32 poison, i32 14, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 306; SSE4-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <16 x i32> <i32 11, i32 poison, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 307; SSE4-NEXT: [[TMP6:%.*]] = add <16 x i16> [[TMP4]], [[TMP5]] 308; SSE4-NEXT: [[HADDB2:%.*]] = shufflevector <16 x i16> [[HADD8]], <16 x i16> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 5, i32 6, i32 7, i32 8, i32 16, i32 poison, i32 18, i32 poison, i32 poison, i32 poison, i32 poison> 309; SSE4-NEXT: [[TMP7:%.*]] = shufflevector <16 x i16> [[B]], <16 x i16> poison, <16 x i32> <i32 8, i32 10, i32 12, i32 14, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 310; SSE4-NEXT: [[TMP8:%.*]] = shufflevector <16 x i16> [[B]], <16 x i16> poison, <16 x i32> <i32 9, i32 11, i32 13, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 311; SSE4-NEXT: [[TMP9:%.*]] = add <16 x i16> [[TMP7]], [[TMP8]] 312; SSE4-NEXT: [[RESULT:%.*]] = shufflevector <16 x i16> [[HADDB2]], <16 x i16> [[TMP9]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 5, i32 6, i32 7, i32 8, i32 9, i32 poison, i32 11, i32 16, i32 17, i32 18, i32 19> 313; SSE4-NEXT: ret <16 x i16> [[RESULT]] 314; 315; AVX2-LABEL: @add_v16i16_0123u56789uBCDEF( 316; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 18, i32 20, i32 22, i32 9, i32 10, i32 poison, i32 14, i32 24, i32 26, i32 28, i32 30> 317; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 19, i32 21, i32 23, i32 8, i32 11, i32 poison, i32 15, i32 25, i32 27, i32 29, i32 31> 318; AVX2-NEXT: [[RESULT:%.*]] = add <16 x i16> [[TMP1]], [[TMP2]] 319; AVX2-NEXT: ret <16 x i16> [[RESULT]] 320; 321; AVX512-LABEL: @add_v16i16_0123u56789uBCDEF( 322; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 18, i32 20, i32 22, i32 8, i32 10, i32 poison, i32 14, i32 24, i32 26, i32 28, i32 30> 323; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 19, i32 21, i32 23, i32 9, i32 11, i32 poison, i32 15, i32 25, i32 27, i32 29, i32 31> 324; AVX512-NEXT: [[RESULT:%.*]] = add <16 x i16> [[TMP1]], [[TMP2]] 325; AVX512-NEXT: ret <16 x i16> [[RESULT]] 326; 327 %a0 = extractelement <16 x i16> %a, i32 0 328 %a1 = extractelement <16 x i16> %a, i32 1 329 %a2 = extractelement <16 x i16> %a, i32 2 330 %a3 = extractelement <16 x i16> %a, i32 3 331 %a4 = extractelement <16 x i16> %a, i32 4 332 %a5 = extractelement <16 x i16> %a, i32 5 333 %a6 = extractelement <16 x i16> %a, i32 6 334 %a7 = extractelement <16 x i16> %a, i32 7 335 %a8 = extractelement <16 x i16> %a, i32 8 336 %a9 = extractelement <16 x i16> %a, i32 9 337 %aA = extractelement <16 x i16> %a, i32 10 338 %aB = extractelement <16 x i16> %a, i32 11 339 %aC = extractelement <16 x i16> %a, i32 12 340 %aD = extractelement <16 x i16> %a, i32 13 341 %aE = extractelement <16 x i16> %a, i32 14 342 %aF = extractelement <16 x i16> %a, i32 15 343 %a01 = add i16 %a0, %a1 344 %a23 = add i16 %a2, %a3 345 %a45 = add i16 %a4, %a5 346 %a67 = add i16 %a6, %a7 347 %a89 = add i16 %a8, %a9 348 %aAB = add i16 %aA, %aB 349 %aCD = add i16 %aC, %aD 350 %aEF = add i16 %aE, %aF 351 %b0 = extractelement <16 x i16> %b, i32 0 352 %b1 = extractelement <16 x i16> %b, i32 1 353 %b2 = extractelement <16 x i16> %b, i32 2 354 %b3 = extractelement <16 x i16> %b, i32 3 355 %b4 = extractelement <16 x i16> %b, i32 4 356 %b5 = extractelement <16 x i16> %b, i32 5 357 %b6 = extractelement <16 x i16> %b, i32 6 358 %b7 = extractelement <16 x i16> %b, i32 7 359 %b8 = extractelement <16 x i16> %b, i32 8 360 %b9 = extractelement <16 x i16> %b, i32 9 361 %bA = extractelement <16 x i16> %b, i32 10 362 %bB = extractelement <16 x i16> %b, i32 11 363 %bC = extractelement <16 x i16> %b, i32 12 364 %bD = extractelement <16 x i16> %b, i32 13 365 %bE = extractelement <16 x i16> %b, i32 14 366 %bF = extractelement <16 x i16> %b, i32 15 367 %b01 = add i16 %b0, %b1 368 %b23 = add i16 %b2, %b3 369 %b45 = add i16 %b4, %b5 370 %b67 = add i16 %b6, %b7 371 %b89 = add i16 %b8, %b9 372 %bAB = add i16 %bA, %bB 373 %bCD = add i16 %bC, %bD 374 %bEF = add i16 %bE, %bF 375 %hadd0 = insertelement <16 x i16> poison, i16 %a01, i32 0 376 %hadd1 = insertelement <16 x i16> %hadd0, i16 %a23, i32 1 377 %hadd2 = insertelement <16 x i16> %hadd1, i16 %a45, i32 2 378 %hadd3 = insertelement <16 x i16> %hadd2, i16 %a67, i32 3 379 %hadd4 = insertelement <16 x i16> %hadd3, i16 %b01, i32 4 380 %hadd5 = insertelement <16 x i16> %hadd4, i16 %b23, i32 5 381 %hadd6 = insertelement <16 x i16> %hadd5, i16 %b45, i32 6 382 %hadd7 = insertelement <16 x i16> %hadd6, i16 %b67, i32 7 383 %hadd8 = insertelement <16 x i16> %hadd7, i16 %a89, i32 8 384 %hadd9 = insertelement <16 x i16> %hadd8, i16 %aAB, i32 9 385 %haddA = insertelement <16 x i16> %hadd9, i16 %aCD, i32 10 386 %haddB = insertelement <16 x i16> %haddA, i16 %aEF, i32 11 387 %haddC = insertelement <16 x i16> %haddB, i16 %b89, i32 12 388 %haddD = insertelement <16 x i16> %haddC, i16 %bAB, i32 13 389 %haddE = insertelement <16 x i16> %haddD, i16 %bCD, i32 14 390 %haddF = insertelement <16 x i16> %haddE, i16 %bEF, i32 15 391 %result = shufflevector <16 x i16> %haddF, <16 x i16> %a, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 5, i32 6, i32 7, i32 8, i32 9, i32 poison, i32 11, i32 12, i32 13, i32 14, i32 15> 392 ret <16 x i16> %result 393} 394 395define <16 x i16> @add_v16i16_FEuCBA98765432u0(<16 x i16> %a, <16 x i16> %b) { 396; SSE2-LABEL: @add_v16i16_FEuCBA98765432u0( 397; SSE2-NEXT: [[A8:%.*]] = extractelement <16 x i16> [[A:%.*]], i64 8 398; SSE2-NEXT: [[A9:%.*]] = extractelement <16 x i16> [[A]], i64 9 399; SSE2-NEXT: [[A89:%.*]] = add i16 [[A8]], [[A9]] 400; SSE2-NEXT: [[BC:%.*]] = extractelement <16 x i16> [[B:%.*]], i64 12 401; SSE2-NEXT: [[BD:%.*]] = extractelement <16 x i16> [[B]], i64 13 402; SSE2-NEXT: [[BE:%.*]] = extractelement <16 x i16> [[B]], i64 14 403; SSE2-NEXT: [[BF:%.*]] = extractelement <16 x i16> [[B]], i64 15 404; SSE2-NEXT: [[BCD:%.*]] = add i16 [[BC]], [[BD]] 405; SSE2-NEXT: [[BEF:%.*]] = add i16 [[BE]], [[BF]] 406; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 0, i32 poison, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 407; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 1, i32 poison, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 408; SSE2-NEXT: [[TMP3:%.*]] = add <16 x i16> [[TMP1]], [[TMP2]] 409; SSE2-NEXT: [[HADD8:%.*]] = insertelement <16 x i16> [[TMP3]], i16 [[A89]], i64 8 410; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 10, i32 12, i32 14, i32 24, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 411; SSE2-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 11, i32 13, i32 15, i32 25, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 412; SSE2-NEXT: [[TMP6:%.*]] = add <16 x i16> [[TMP4]], [[TMP5]] 413; SSE2-NEXT: [[HADDC1:%.*]] = shufflevector <16 x i16> [[HADD8]], <16 x i16> [[TMP6]], <16 x i32> <i32 0, i32 poison, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison> 414; SSE2-NEXT: [[HADDE:%.*]] = insertelement <16 x i16> [[HADDC1]], i16 [[BCD]], i64 14 415; SSE2-NEXT: [[HADDF:%.*]] = insertelement <16 x i16> [[HADDE]], i16 [[BEF]], i64 15 416; SSE2-NEXT: [[RESULT:%.*]] = shufflevector <16 x i16> [[HADDF]], <16 x i16> poison, <16 x i32> <i32 15, i32 14, i32 poison, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 poison, i32 0> 417; SSE2-NEXT: ret <16 x i16> [[RESULT]] 418; 419; SSE4-LABEL: @add_v16i16_FEuCBA98765432u0( 420; SSE4-NEXT: [[A8:%.*]] = extractelement <16 x i16> [[A:%.*]], i64 8 421; SSE4-NEXT: [[A9:%.*]] = extractelement <16 x i16> [[A]], i64 9 422; SSE4-NEXT: [[A89:%.*]] = add i16 [[A8]], [[A9]] 423; SSE4-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 poison, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 424; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 1, i32 poison, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 425; SSE4-NEXT: [[TMP3:%.*]] = add <16 x i16> [[TMP1]], [[TMP2]] 426; SSE4-NEXT: [[HADD8:%.*]] = insertelement <16 x i16> [[TMP3]], i16 [[A89]], i64 8 427; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <16 x i32> <i32 11, i32 12, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 428; SSE4-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <16 x i32> <i32 10, i32 13, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 429; SSE4-NEXT: [[TMP6:%.*]] = add <16 x i16> [[TMP4]], [[TMP5]] 430; SSE4-NEXT: [[HADDA2:%.*]] = shufflevector <16 x i16> [[HADD8]], <16 x i16> [[TMP6]], <16 x i32> <i32 0, i32 poison, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 16, i32 17, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 431; SSE4-NEXT: [[TMP7:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 14, i32 24, i32 poison, i32 28, i32 30, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 432; SSE4-NEXT: [[TMP8:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 15, i32 25, i32 poison, i32 29, i32 31, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 433; SSE4-NEXT: [[TMP9:%.*]] = add <16 x i16> [[TMP7]], [[TMP8]] 434; SSE4-NEXT: [[RESULT:%.*]] = shufflevector <16 x i16> [[TMP9]], <16 x i16> [[HADDA2]], <16 x i32> <i32 4, i32 3, i32 poison, i32 1, i32 0, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 poison, i32 16> 435; SSE4-NEXT: ret <16 x i16> [[RESULT]] 436; 437; AVX2-LABEL: @add_v16i16_FEuCBA98765432u0( 438; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 poison, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 9, i32 10, i32 12, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 439; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 1, i32 poison, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 8, i32 11, i32 13, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 440; AVX2-NEXT: [[HADDA:%.*]] = add <16 x i16> [[TMP1]], [[TMP2]] 441; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 14, i32 24, i32 poison, i32 28, i32 30, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 442; AVX2-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 15, i32 25, i32 poison, i32 29, i32 31, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 443; AVX2-NEXT: [[TMP5:%.*]] = add <16 x i16> [[TMP3]], [[TMP4]] 444; AVX2-NEXT: [[RESULT:%.*]] = shufflevector <16 x i16> [[TMP5]], <16 x i16> [[HADDA]], <16 x i32> <i32 4, i32 3, i32 poison, i32 1, i32 0, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 poison, i32 16> 445; AVX2-NEXT: ret <16 x i16> [[RESULT]] 446; 447; AVX512-LABEL: @add_v16i16_FEuCBA98765432u0( 448; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 poison, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 11, i32 12, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 449; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 1, i32 poison, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 9, i32 10, i32 13, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 450; AVX512-NEXT: [[HADDA2:%.*]] = add <16 x i16> [[TMP1]], [[TMP2]] 451; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 14, i32 24, i32 poison, i32 28, i32 30, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 452; AVX512-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 15, i32 25, i32 poison, i32 29, i32 31, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 453; AVX512-NEXT: [[TMP5:%.*]] = add <16 x i16> [[TMP3]], [[TMP4]] 454; AVX512-NEXT: [[RESULT:%.*]] = shufflevector <16 x i16> [[TMP5]], <16 x i16> [[HADDA2]], <16 x i32> <i32 4, i32 3, i32 poison, i32 1, i32 0, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 poison, i32 16> 455; AVX512-NEXT: ret <16 x i16> [[RESULT]] 456; 457 %a0 = extractelement <16 x i16> %a, i32 0 458 %a1 = extractelement <16 x i16> %a, i32 1 459 %a2 = extractelement <16 x i16> %a, i32 2 460 %a3 = extractelement <16 x i16> %a, i32 3 461 %a4 = extractelement <16 x i16> %a, i32 4 462 %a5 = extractelement <16 x i16> %a, i32 5 463 %a6 = extractelement <16 x i16> %a, i32 6 464 %a7 = extractelement <16 x i16> %a, i32 7 465 %a8 = extractelement <16 x i16> %a, i32 8 466 %a9 = extractelement <16 x i16> %a, i32 9 467 %aA = extractelement <16 x i16> %a, i32 10 468 %aB = extractelement <16 x i16> %a, i32 11 469 %aC = extractelement <16 x i16> %a, i32 12 470 %aD = extractelement <16 x i16> %a, i32 13 471 %aE = extractelement <16 x i16> %a, i32 14 472 %aF = extractelement <16 x i16> %a, i32 15 473 %a01 = add i16 %a0, %a1 474 %a23 = add i16 %a2, %a3 475 %a45 = add i16 %a4, %a5 476 %a67 = add i16 %a6, %a7 477 %a89 = add i16 %a8, %a9 478 %aAB = add i16 %aA, %aB 479 %aCD = add i16 %aC, %aD 480 %aEF = add i16 %aE, %aF 481 %b0 = extractelement <16 x i16> %b, i32 0 482 %b1 = extractelement <16 x i16> %b, i32 1 483 %b2 = extractelement <16 x i16> %b, i32 2 484 %b3 = extractelement <16 x i16> %b, i32 3 485 %b4 = extractelement <16 x i16> %b, i32 4 486 %b5 = extractelement <16 x i16> %b, i32 5 487 %b6 = extractelement <16 x i16> %b, i32 6 488 %b7 = extractelement <16 x i16> %b, i32 7 489 %b8 = extractelement <16 x i16> %b, i32 8 490 %b9 = extractelement <16 x i16> %b, i32 9 491 %bA = extractelement <16 x i16> %b, i32 10 492 %bB = extractelement <16 x i16> %b, i32 11 493 %bC = extractelement <16 x i16> %b, i32 12 494 %bD = extractelement <16 x i16> %b, i32 13 495 %bE = extractelement <16 x i16> %b, i32 14 496 %bF = extractelement <16 x i16> %b, i32 15 497 %b01 = add i16 %b0, %b1 498 %b23 = add i16 %b2, %b3 499 %b45 = add i16 %b4, %b5 500 %b67 = add i16 %b6, %b7 501 %b89 = add i16 %b8, %b9 502 %bAB = add i16 %bA, %bB 503 %bCD = add i16 %bC, %bD 504 %bEF = add i16 %bE, %bF 505 %hadd0 = insertelement <16 x i16> poison, i16 %a01, i32 0 506 %hadd1 = insertelement <16 x i16> %hadd0, i16 %a23, i32 1 507 %hadd2 = insertelement <16 x i16> %hadd1, i16 %a45, i32 2 508 %hadd3 = insertelement <16 x i16> %hadd2, i16 %a67, i32 3 509 %hadd4 = insertelement <16 x i16> %hadd3, i16 %b01, i32 4 510 %hadd5 = insertelement <16 x i16> %hadd4, i16 %b23, i32 5 511 %hadd6 = insertelement <16 x i16> %hadd5, i16 %b45, i32 6 512 %hadd7 = insertelement <16 x i16> %hadd6, i16 %b67, i32 7 513 %hadd8 = insertelement <16 x i16> %hadd7, i16 %a89, i32 8 514 %hadd9 = insertelement <16 x i16> %hadd8, i16 %aAB, i32 9 515 %haddA = insertelement <16 x i16> %hadd9, i16 %aCD, i32 10 516 %haddB = insertelement <16 x i16> %haddA, i16 %aEF, i32 11 517 %haddC = insertelement <16 x i16> %haddB, i16 %b89, i32 12 518 %haddD = insertelement <16 x i16> %haddC, i16 %bAB, i32 13 519 %haddE = insertelement <16 x i16> %haddD, i16 %bCD, i32 14 520 %haddF = insertelement <16 x i16> %haddE, i16 %bEF, i32 15 521 %result = shufflevector <16 x i16> %haddF, <16 x i16> %a, <16 x i32> <i32 15, i32 14, i32 poison, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 poison, i32 0> 522 ret <16 x i16> %result 523} 524 525; 526; v4i32 527; 528 529define <4 x i32> @add_v4i32_0123(<4 x i32> %a, <4 x i32> %b) { 530; CHECK-LABEL: @add_v4i32_0123( 531; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> 532; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> 533; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]] 534; CHECK-NEXT: ret <4 x i32> [[TMP3]] 535; 536 %a0 = extractelement <4 x i32> %a, i32 0 537 %a1 = extractelement <4 x i32> %a, i32 1 538 %a2 = extractelement <4 x i32> %a, i32 2 539 %a3 = extractelement <4 x i32> %a, i32 3 540 %a01 = add i32 %a0, %a1 541 %a23 = add i32 %a2, %a3 542 %b0 = extractelement <4 x i32> %b, i32 0 543 %b1 = extractelement <4 x i32> %b, i32 1 544 %b2 = extractelement <4 x i32> %b, i32 2 545 %b3 = extractelement <4 x i32> %b, i32 3 546 %b01 = add i32 %b0, %b1 547 %b23 = add i32 %b2, %b3 548 %hadd0 = insertelement <4 x i32> poison, i32 %a01, i32 0 549 %hadd1 = insertelement <4 x i32> %hadd0, i32 %a23, i32 1 550 %hadd2 = insertelement <4 x i32> %hadd1, i32 %b01, i32 2 551 %hadd3 = insertelement <4 x i32> %hadd2, i32 %b23, i32 3 552 %result = shufflevector <4 x i32> %hadd3, <4 x i32> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 553 ret <4 x i32> %result 554} 555 556define <4 x i32> @add_v4i32_u123(<4 x i32> %a, <4 x i32> %b) { 557; CHECK-LABEL: @add_v4i32_u123( 558; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> <i32 poison, i32 2, i32 5, i32 6> 559; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 poison, i32 3, i32 4, i32 7> 560; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]] 561; CHECK-NEXT: ret <4 x i32> [[TMP4]] 562; 563 %a0 = extractelement <4 x i32> %a, i32 0 564 %a1 = extractelement <4 x i32> %a, i32 1 565 %a2 = extractelement <4 x i32> %a, i32 2 566 %a3 = extractelement <4 x i32> %a, i32 3 567 %a01 = add i32 %a0, %a1 568 %a23 = add i32 %a2, %a3 569 %b0 = extractelement <4 x i32> %b, i32 0 570 %b1 = extractelement <4 x i32> %b, i32 1 571 %b2 = extractelement <4 x i32> %b, i32 2 572 %b3 = extractelement <4 x i32> %b, i32 3 573 %b01 = add i32 %b0, %b1 574 %b23 = add i32 %b2, %b3 575 %hadd0 = insertelement <4 x i32> poison, i32 %a01, i32 0 576 %hadd1 = insertelement <4 x i32> %hadd0, i32 %a23, i32 1 577 %hadd2 = insertelement <4 x i32> %hadd1, i32 %b01, i32 2 578 %hadd3 = insertelement <4 x i32> %hadd2, i32 %b23, i32 3 579 %result = shufflevector <4 x i32> %hadd3, <4 x i32> %a, <4 x i32> <i32 poison, i32 1, i32 2, i32 3> 580 ret <4 x i32> %result 581} 582 583define <4 x i32> @add_v4i32_0u23(<4 x i32> %a, <4 x i32> %b) { 584; CHECK-LABEL: @add_v4i32_0u23( 585; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> <i32 0, i32 poison, i32 5, i32 6> 586; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 poison, i32 4, i32 7> 587; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]] 588; CHECK-NEXT: ret <4 x i32> [[TMP4]] 589; 590 %a0 = extractelement <4 x i32> %a, i32 0 591 %a1 = extractelement <4 x i32> %a, i32 1 592 %a2 = extractelement <4 x i32> %a, i32 2 593 %a3 = extractelement <4 x i32> %a, i32 3 594 %a01 = add i32 %a0, %a1 595 %a23 = add i32 %a2, %a3 596 %b0 = extractelement <4 x i32> %b, i32 0 597 %b1 = extractelement <4 x i32> %b, i32 1 598 %b2 = extractelement <4 x i32> %b, i32 2 599 %b3 = extractelement <4 x i32> %b, i32 3 600 %b01 = add i32 %b0, %b1 601 %b23 = add i32 %b2, %b3 602 %hadd0 = insertelement <4 x i32> poison, i32 %a01, i32 0 603 %hadd1 = insertelement <4 x i32> %hadd0, i32 %a23, i32 1 604 %hadd2 = insertelement <4 x i32> %hadd1, i32 %b01, i32 2 605 %hadd3 = insertelement <4 x i32> %hadd2, i32 %b23, i32 3 606 %result = shufflevector <4 x i32> %hadd3, <4 x i32> %a, <4 x i32> <i32 0, i32 poison, i32 2, i32 3> 607 ret <4 x i32> %result 608} 609 610define <4 x i32> @add_v4i32_01u3(<4 x i32> %a, <4 x i32> %b) { 611; SSE2-LABEL: @add_v4i32_01u3( 612; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 poison, i32 6> 613; SSE2-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 poison, i32 7> 614; SSE2-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]] 615; SSE2-NEXT: ret <4 x i32> [[TMP4]] 616; 617; SSE4-LABEL: @add_v4i32_01u3( 618; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> <i32 1, i32 2, i32 poison, i32 6> 619; SSE4-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 3, i32 poison, i32 7> 620; SSE4-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]] 621; SSE4-NEXT: ret <4 x i32> [[TMP4]] 622; 623; AVX2-LABEL: @add_v4i32_01u3( 624; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> <i32 1, i32 2, i32 poison, i32 6> 625; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 3, i32 poison, i32 7> 626; AVX2-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]] 627; AVX2-NEXT: ret <4 x i32> [[TMP4]] 628; 629; AVX512-LABEL: @add_v4i32_01u3( 630; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 poison, i32 6> 631; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 poison, i32 7> 632; AVX512-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]] 633; AVX512-NEXT: ret <4 x i32> [[TMP4]] 634; 635 %a0 = extractelement <4 x i32> %a, i32 0 636 %a1 = extractelement <4 x i32> %a, i32 1 637 %a2 = extractelement <4 x i32> %a, i32 2 638 %a3 = extractelement <4 x i32> %a, i32 3 639 %a01 = add i32 %a0, %a1 640 %a23 = add i32 %a2, %a3 641 %b0 = extractelement <4 x i32> %b, i32 0 642 %b1 = extractelement <4 x i32> %b, i32 1 643 %b2 = extractelement <4 x i32> %b, i32 2 644 %b3 = extractelement <4 x i32> %b, i32 3 645 %b01 = add i32 %b0, %b1 646 %b23 = add i32 %b2, %b3 647 %hadd0 = insertelement <4 x i32> poison, i32 %a01, i32 0 648 %hadd1 = insertelement <4 x i32> %hadd0, i32 %a23, i32 1 649 %hadd2 = insertelement <4 x i32> %hadd1, i32 %b01, i32 2 650 %hadd3 = insertelement <4 x i32> %hadd2, i32 %b23, i32 3 651 %result = shufflevector <4 x i32> %hadd3, <4 x i32> %a, <4 x i32> <i32 0, i32 1, i32 poison, i32 3> 652 ret <4 x i32> %result 653} 654 655define <4 x i32> @add_v4i32_012u(<4 x i32> %a, <4 x i32> %b) { 656; CHECK-LABEL: @add_v4i32_012u( 657; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 4, i32 poison> 658; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 poison> 659; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]] 660; CHECK-NEXT: ret <4 x i32> [[TMP4]] 661; 662 %a0 = extractelement <4 x i32> %a, i32 0 663 %a1 = extractelement <4 x i32> %a, i32 1 664 %a2 = extractelement <4 x i32> %a, i32 2 665 %a3 = extractelement <4 x i32> %a, i32 3 666 %a01 = add i32 %a0, %a1 667 %a23 = add i32 %a2, %a3 668 %b0 = extractelement <4 x i32> %b, i32 0 669 %b1 = extractelement <4 x i32> %b, i32 1 670 %b2 = extractelement <4 x i32> %b, i32 2 671 %b3 = extractelement <4 x i32> %b, i32 3 672 %b01 = add i32 %b0, %b1 673 %b23 = add i32 %b2, %b3 674 %hadd0 = insertelement <4 x i32> poison, i32 %a01, i32 0 675 %hadd1 = insertelement <4 x i32> %hadd0, i32 %a23, i32 1 676 %hadd2 = insertelement <4 x i32> %hadd1, i32 %b01, i32 2 677 %hadd3 = insertelement <4 x i32> %hadd2, i32 %b23, i32 3 678 %result = shufflevector <4 x i32> %hadd3, <4 x i32> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 679 ret <4 x i32> %result 680} 681 682define <4 x i32> @add_v4i32_uu23(<4 x i32> %a, <4 x i32> %b) { 683; CHECK-LABEL: @add_v4i32_uu23( 684; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> poison, <4 x i32> <i32 poison, i32 poison, i32 1, i32 2> 685; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 3> 686; CHECK-NEXT: [[RESULT1:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]] 687; CHECK-NEXT: ret <4 x i32> [[RESULT1]] 688; 689 %a0 = extractelement <4 x i32> %a, i32 0 690 %a1 = extractelement <4 x i32> %a, i32 1 691 %a2 = extractelement <4 x i32> %a, i32 2 692 %a3 = extractelement <4 x i32> %a, i32 3 693 %a01 = add i32 %a0, %a1 694 %a23 = add i32 %a2, %a3 695 %b0 = extractelement <4 x i32> %b, i32 0 696 %b1 = extractelement <4 x i32> %b, i32 1 697 %b2 = extractelement <4 x i32> %b, i32 2 698 %b3 = extractelement <4 x i32> %b, i32 3 699 %b01 = add i32 %b0, %b1 700 %b23 = add i32 %b2, %b3 701 %hadd0 = insertelement <4 x i32> poison, i32 %a01, i32 0 702 %hadd1 = insertelement <4 x i32> %hadd0, i32 %a23, i32 1 703 %hadd2 = insertelement <4 x i32> %hadd1, i32 %b01, i32 2 704 %hadd3 = insertelement <4 x i32> %hadd2, i32 %b23, i32 3 705 %result = shufflevector <4 x i32> %hadd3, <4 x i32> %a, <4 x i32> <i32 poison, i32 poison, i32 2, i32 3> 706 ret <4 x i32> %result 707} 708 709define <4 x i32> @add_v4i32_01uu(<4 x i32> %a, <4 x i32> %b) { 710; CHECK-LABEL: @add_v4i32_01uu( 711; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 poison, i32 poison> 712; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <4 x i32> <i32 0, i32 3, i32 poison, i32 poison> 713; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]] 714; CHECK-NEXT: ret <4 x i32> [[TMP3]] 715; 716 %a0 = extractelement <4 x i32> %a, i32 0 717 %a1 = extractelement <4 x i32> %a, i32 1 718 %a2 = extractelement <4 x i32> %a, i32 2 719 %a3 = extractelement <4 x i32> %a, i32 3 720 %a01 = add i32 %a0, %a1 721 %a23 = add i32 %a2, %a3 722 %b0 = extractelement <4 x i32> %b, i32 0 723 %b1 = extractelement <4 x i32> %b, i32 1 724 %b2 = extractelement <4 x i32> %b, i32 2 725 %b3 = extractelement <4 x i32> %b, i32 3 726 %b01 = add i32 %b0, %b1 727 %b23 = add i32 %b2, %b3 728 %hadd0 = insertelement <4 x i32> poison, i32 %a01, i32 0 729 %hadd1 = insertelement <4 x i32> %hadd0, i32 %a23, i32 1 730 %hadd2 = insertelement <4 x i32> %hadd1, i32 %b01, i32 2 731 %hadd3 = insertelement <4 x i32> %hadd2, i32 %b23, i32 3 732 %result = shufflevector <4 x i32> %hadd3, <4 x i32> %a, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 733 ret <4 x i32> %result 734} 735 736define <4 x i32> @add_v4i32_32u0(<4 x i32> %a, <4 x i32> %b) { 737; SSE2-LABEL: @add_v4i32_32u0( 738; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[A:%.*]], <4 x i32> <i32 2, i32 0, i32 poison, i32 4> 739; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[A]], <4 x i32> <i32 3, i32 1, i32 poison, i32 5> 740; SSE2-NEXT: [[RESULT1:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]] 741; SSE2-NEXT: ret <4 x i32> [[RESULT1]] 742; 743; SSE4-LABEL: @add_v4i32_32u0( 744; SSE4-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[A:%.*]], <4 x i32> <i32 2, i32 0, i32 poison, i32 4> 745; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[A]], <4 x i32> <i32 3, i32 1, i32 poison, i32 5> 746; SSE4-NEXT: [[RESULT:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]] 747; SSE4-NEXT: ret <4 x i32> [[RESULT]] 748; 749; AVX2-LABEL: @add_v4i32_32u0( 750; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[A:%.*]], <4 x i32> <i32 2, i32 0, i32 poison, i32 4> 751; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[A]], <4 x i32> <i32 3, i32 1, i32 poison, i32 5> 752; AVX2-NEXT: [[RESULT:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]] 753; AVX2-NEXT: ret <4 x i32> [[RESULT]] 754; 755; AVX512-LABEL: @add_v4i32_32u0( 756; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[A:%.*]], <4 x i32> <i32 2, i32 0, i32 poison, i32 4> 757; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[A]], <4 x i32> <i32 3, i32 1, i32 poison, i32 5> 758; AVX512-NEXT: [[RESULT1:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]] 759; AVX512-NEXT: ret <4 x i32> [[RESULT1]] 760; 761 %a0 = extractelement <4 x i32> %a, i32 0 762 %a1 = extractelement <4 x i32> %a, i32 1 763 %a2 = extractelement <4 x i32> %a, i32 2 764 %a3 = extractelement <4 x i32> %a, i32 3 765 %a01 = add i32 %a0, %a1 766 %a23 = add i32 %a2, %a3 767 %b0 = extractelement <4 x i32> %b, i32 0 768 %b1 = extractelement <4 x i32> %b, i32 1 769 %b2 = extractelement <4 x i32> %b, i32 2 770 %b3 = extractelement <4 x i32> %b, i32 3 771 %b01 = add i32 %b0, %b1 772 %b23 = add i32 %b2, %b3 773 %hadd0 = insertelement <4 x i32> poison, i32 %a01, i32 0 774 %hadd1 = insertelement <4 x i32> %hadd0, i32 %a23, i32 1 775 %hadd2 = insertelement <4 x i32> %hadd1, i32 %b01, i32 2 776 %hadd3 = insertelement <4 x i32> %hadd2, i32 %b23, i32 3 777 %result = shufflevector <4 x i32> %hadd3, <4 x i32> %a, <4 x i32> <i32 3, i32 2, i32 poison, i32 0> 778 ret <4 x i32> %result 779} 780 781; 782; v8i32 783; 784 785define <8 x i32> @add_v8i32_01234567(<8 x i32> %a, <8 x i32> %b) { 786; CHECK-LABEL: @add_v8i32_01234567( 787; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14> 788; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15> 789; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP1]], [[TMP2]] 790; CHECK-NEXT: ret <8 x i32> [[TMP3]] 791; 792 %a0 = extractelement <8 x i32> %a, i32 0 793 %a1 = extractelement <8 x i32> %a, i32 1 794 %a2 = extractelement <8 x i32> %a, i32 2 795 %a3 = extractelement <8 x i32> %a, i32 3 796 %a4 = extractelement <8 x i32> %a, i32 4 797 %a5 = extractelement <8 x i32> %a, i32 5 798 %a6 = extractelement <8 x i32> %a, i32 6 799 %a7 = extractelement <8 x i32> %a, i32 7 800 %a01 = add i32 %a0, %a1 801 %a23 = add i32 %a2, %a3 802 %a45 = add i32 %a4, %a5 803 %a67 = add i32 %a6, %a7 804 %b0 = extractelement <8 x i32> %b, i32 0 805 %b1 = extractelement <8 x i32> %b, i32 1 806 %b2 = extractelement <8 x i32> %b, i32 2 807 %b3 = extractelement <8 x i32> %b, i32 3 808 %b4 = extractelement <8 x i32> %b, i32 4 809 %b5 = extractelement <8 x i32> %b, i32 5 810 %b6 = extractelement <8 x i32> %b, i32 6 811 %b7 = extractelement <8 x i32> %b, i32 7 812 %b01 = add i32 %b0, %b1 813 %b23 = add i32 %b2, %b3 814 %b45 = add i32 %b4, %b5 815 %b67 = add i32 %b6, %b7 816 %hadd0 = insertelement <8 x i32> poison, i32 %a01, i32 0 817 %hadd1 = insertelement <8 x i32> %hadd0, i32 %a23, i32 1 818 %hadd2 = insertelement <8 x i32> %hadd1, i32 %b01, i32 2 819 %hadd3 = insertelement <8 x i32> %hadd2, i32 %b23, i32 3 820 %hadd4 = insertelement <8 x i32> %hadd3, i32 %a45, i32 4 821 %hadd5 = insertelement <8 x i32> %hadd4, i32 %a67, i32 5 822 %hadd6 = insertelement <8 x i32> %hadd5, i32 %b45, i32 6 823 %hadd7 = insertelement <8 x i32> %hadd6, i32 %b67, i32 7 824 %result = shufflevector <8 x i32> %hadd7, <8 x i32> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 825 ret <8 x i32> %result 826} 827 828define <8 x i32> @add_v8i32_01234u67(<8 x i32> %a, <8 x i32> %b) { 829; SSE2-LABEL: @add_v8i32_01234u67( 830; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 poison, i32 13, i32 14> 831; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 poison, i32 12, i32 15> 832; SSE2-NEXT: [[RESULT:%.*]] = add <8 x i32> [[TMP1]], [[TMP2]] 833; SSE2-NEXT: ret <8 x i32> [[RESULT]] 834; 835; SSE4-LABEL: @add_v8i32_01234u67( 836; SSE4-NEXT: [[A4:%.*]] = extractelement <8 x i32> [[A:%.*]], i64 4 837; SSE4-NEXT: [[A5:%.*]] = extractelement <8 x i32> [[A]], i64 5 838; SSE4-NEXT: [[A45:%.*]] = add i32 [[A4]], [[A5]] 839; SSE4-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 poison, i32 poison, i32 poison, i32 poison> 840; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 poison, i32 poison, i32 poison, i32 poison> 841; SSE4-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP1]], [[TMP2]] 842; SSE4-NEXT: [[HADD4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[A45]], i64 4 843; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <8 x i32> <i32 5, i32 6, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 844; SSE4-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <8 x i32> <i32 4, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 845; SSE4-NEXT: [[TMP6:%.*]] = add <8 x i32> [[TMP4]], [[TMP5]] 846; SSE4-NEXT: [[RESULT:%.*]] = shufflevector <8 x i32> [[HADD4]], <8 x i32> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 poison, i32 8, i32 9> 847; SSE4-NEXT: ret <8 x i32> [[RESULT]] 848; 849; AVX-LABEL: @add_v8i32_01234u67( 850; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 poison, i32 13, i32 14> 851; AVX-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 poison, i32 12, i32 15> 852; AVX-NEXT: [[RESULT:%.*]] = add <8 x i32> [[TMP1]], [[TMP2]] 853; AVX-NEXT: ret <8 x i32> [[RESULT]] 854; 855 %a0 = extractelement <8 x i32> %a, i32 0 856 %a1 = extractelement <8 x i32> %a, i32 1 857 %a2 = extractelement <8 x i32> %a, i32 2 858 %a3 = extractelement <8 x i32> %a, i32 3 859 %a4 = extractelement <8 x i32> %a, i32 4 860 %a5 = extractelement <8 x i32> %a, i32 5 861 %a6 = extractelement <8 x i32> %a, i32 6 862 %a7 = extractelement <8 x i32> %a, i32 7 863 %a01 = add i32 %a0, %a1 864 %a23 = add i32 %a2, %a3 865 %a45 = add i32 %a4, %a5 866 %a67 = add i32 %a6, %a7 867 %b0 = extractelement <8 x i32> %b, i32 0 868 %b1 = extractelement <8 x i32> %b, i32 1 869 %b2 = extractelement <8 x i32> %b, i32 2 870 %b3 = extractelement <8 x i32> %b, i32 3 871 %b4 = extractelement <8 x i32> %b, i32 4 872 %b5 = extractelement <8 x i32> %b, i32 5 873 %b6 = extractelement <8 x i32> %b, i32 6 874 %b7 = extractelement <8 x i32> %b, i32 7 875 %b01 = add i32 %b0, %b1 876 %b23 = add i32 %b2, %b3 877 %b45 = add i32 %b4, %b5 878 %b67 = add i32 %b6, %b7 879 %hadd0 = insertelement <8 x i32> poison, i32 %a01, i32 0 880 %hadd1 = insertelement <8 x i32> %hadd0, i32 %a23, i32 1 881 %hadd2 = insertelement <8 x i32> %hadd1, i32 %b01, i32 2 882 %hadd3 = insertelement <8 x i32> %hadd2, i32 %b23, i32 3 883 %hadd4 = insertelement <8 x i32> %hadd3, i32 %a45, i32 4 884 %hadd5 = insertelement <8 x i32> %hadd4, i32 %a67, i32 5 885 %hadd6 = insertelement <8 x i32> %hadd5, i32 %b45, i32 6 886 %hadd7 = insertelement <8 x i32> %hadd6, i32 %b67, i32 7 887 %result = shufflevector <8 x i32> %hadd7, <8 x i32> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 poison, i32 6, i32 7> 888 ret <8 x i32> %result 889} 890 891; 892; v4f32 893; 894 895define <4 x float> @add_v4f32_0123(<4 x float> %a, <4 x float> %b) { 896; CHECK-LABEL: @add_v4f32_0123( 897; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> 898; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> 899; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]] 900; CHECK-NEXT: ret <4 x float> [[TMP3]] 901; 902 %a0 = extractelement <4 x float> %a, i32 0 903 %a1 = extractelement <4 x float> %a, i32 1 904 %a2 = extractelement <4 x float> %a, i32 2 905 %a3 = extractelement <4 x float> %a, i32 3 906 %a01 = fadd float %a0, %a1 907 %a23 = fadd float %a2, %a3 908 %b0 = extractelement <4 x float> %b, i32 0 909 %b1 = extractelement <4 x float> %b, i32 1 910 %b2 = extractelement <4 x float> %b, i32 2 911 %b3 = extractelement <4 x float> %b, i32 3 912 %b01 = fadd float %b0, %b1 913 %b23 = fadd float %b2, %b3 914 %hadd0 = insertelement <4 x float> poison, float %a01, i32 0 915 %hadd1 = insertelement <4 x float> %hadd0, float %a23, i32 1 916 %hadd2 = insertelement <4 x float> %hadd1, float %b01, i32 2 917 %hadd3 = insertelement <4 x float> %hadd2, float %b23, i32 3 918 %result = shufflevector <4 x float> %hadd3, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 919 ret <4 x float> %result 920} 921 922define <4 x float> @add_v4f32_u123(<4 x float> %a, <4 x float> %b) { 923; CHECK-LABEL: @add_v4f32_u123( 924; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i32> <i32 poison, i32 2, i32 5, i32 6> 925; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 poison, i32 3, i32 4, i32 7> 926; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x float> [[TMP2]], [[TMP3]] 927; CHECK-NEXT: ret <4 x float> [[TMP4]] 928; 929 %a0 = extractelement <4 x float> %a, i32 0 930 %a1 = extractelement <4 x float> %a, i32 1 931 %a2 = extractelement <4 x float> %a, i32 2 932 %a3 = extractelement <4 x float> %a, i32 3 933 %a01 = fadd float %a0, %a1 934 %a23 = fadd float %a2, %a3 935 %b0 = extractelement <4 x float> %b, i32 0 936 %b1 = extractelement <4 x float> %b, i32 1 937 %b2 = extractelement <4 x float> %b, i32 2 938 %b3 = extractelement <4 x float> %b, i32 3 939 %b01 = fadd float %b0, %b1 940 %b23 = fadd float %b2, %b3 941 %hadd0 = insertelement <4 x float> poison, float %a01, i32 0 942 %hadd1 = insertelement <4 x float> %hadd0, float %a23, i32 1 943 %hadd2 = insertelement <4 x float> %hadd1, float %b01, i32 2 944 %hadd3 = insertelement <4 x float> %hadd2, float %b23, i32 3 945 %result = shufflevector <4 x float> %hadd3, <4 x float> %a, <4 x i32> <i32 poison, i32 1, i32 2, i32 3> 946 ret <4 x float> %result 947} 948 949define <4 x float> @add_v4f32_0u23(<4 x float> %a, <4 x float> %b) { 950; CHECK-LABEL: @add_v4f32_0u23( 951; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i32> <i32 0, i32 poison, i32 5, i32 6> 952; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 poison, i32 4, i32 7> 953; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x float> [[TMP2]], [[TMP3]] 954; CHECK-NEXT: ret <4 x float> [[TMP4]] 955; 956 %a0 = extractelement <4 x float> %a, i32 0 957 %a1 = extractelement <4 x float> %a, i32 1 958 %a2 = extractelement <4 x float> %a, i32 2 959 %a3 = extractelement <4 x float> %a, i32 3 960 %a01 = fadd float %a0, %a1 961 %a23 = fadd float %a2, %a3 962 %b0 = extractelement <4 x float> %b, i32 0 963 %b1 = extractelement <4 x float> %b, i32 1 964 %b2 = extractelement <4 x float> %b, i32 2 965 %b3 = extractelement <4 x float> %b, i32 3 966 %b01 = fadd float %b0, %b1 967 %b23 = fadd float %b2, %b3 968 %hadd0 = insertelement <4 x float> poison, float %a01, i32 0 969 %hadd1 = insertelement <4 x float> %hadd0, float %a23, i32 1 970 %hadd2 = insertelement <4 x float> %hadd1, float %b01, i32 2 971 %hadd3 = insertelement <4 x float> %hadd2, float %b23, i32 3 972 %result = shufflevector <4 x float> %hadd3, <4 x float> %a, <4 x i32> <i32 0, i32 poison, i32 2, i32 3> 973 ret <4 x float> %result 974} 975 976define <4 x float> @add_v4f32_01u3(<4 x float> %a, <4 x float> %b) { 977; CHECK-LABEL: @add_v4f32_01u3( 978; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 poison, i32 6> 979; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 poison, i32 7> 980; CHECK-NEXT: [[RESULT1:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]] 981; CHECK-NEXT: ret <4 x float> [[RESULT1]] 982; 983 %a0 = extractelement <4 x float> %a, i32 0 984 %a1 = extractelement <4 x float> %a, i32 1 985 %a2 = extractelement <4 x float> %a, i32 2 986 %a3 = extractelement <4 x float> %a, i32 3 987 %a01 = fadd float %a0, %a1 988 %a23 = fadd float %a2, %a3 989 %b0 = extractelement <4 x float> %b, i32 0 990 %b1 = extractelement <4 x float> %b, i32 1 991 %b2 = extractelement <4 x float> %b, i32 2 992 %b3 = extractelement <4 x float> %b, i32 3 993 %b01 = fadd float %b0, %b1 994 %b23 = fadd float %b2, %b3 995 %hadd0 = insertelement <4 x float> poison, float %a01, i32 0 996 %hadd1 = insertelement <4 x float> %hadd0, float %a23, i32 1 997 %hadd2 = insertelement <4 x float> %hadd1, float %b01, i32 2 998 %hadd3 = insertelement <4 x float> %hadd2, float %b23, i32 3 999 %result = shufflevector <4 x float> %hadd3, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 poison, i32 3> 1000 ret <4 x float> %result 1001} 1002 1003define <4 x float> @add_v4f32_012u(<4 x float> %a, <4 x float> %b) { 1004; SSE2-LABEL: @add_v4f32_012u( 1005; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 4, i32 poison> 1006; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 poison> 1007; SSE2-NEXT: [[RESULT1:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]] 1008; SSE2-NEXT: ret <4 x float> [[RESULT1]] 1009; 1010; SSE4-LABEL: @add_v4f32_012u( 1011; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i32> <i32 1, i32 2, i32 4, i32 poison> 1012; SSE4-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 0, i32 3, i32 5, i32 poison> 1013; SSE4-NEXT: [[TMP4:%.*]] = fadd <4 x float> [[TMP2]], [[TMP3]] 1014; SSE4-NEXT: ret <4 x float> [[TMP4]] 1015; 1016; AVX2-LABEL: @add_v4f32_012u( 1017; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i32> <i32 1, i32 2, i32 4, i32 poison> 1018; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 0, i32 3, i32 5, i32 poison> 1019; AVX2-NEXT: [[TMP4:%.*]] = fadd <4 x float> [[TMP2]], [[TMP3]] 1020; AVX2-NEXT: ret <4 x float> [[TMP4]] 1021; 1022; AVX512-LABEL: @add_v4f32_012u( 1023; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 4, i32 poison> 1024; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 poison> 1025; AVX512-NEXT: [[RESULT1:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]] 1026; AVX512-NEXT: ret <4 x float> [[RESULT1]] 1027; 1028 %a0 = extractelement <4 x float> %a, i32 0 1029 %a1 = extractelement <4 x float> %a, i32 1 1030 %a2 = extractelement <4 x float> %a, i32 2 1031 %a3 = extractelement <4 x float> %a, i32 3 1032 %a01 = fadd float %a0, %a1 1033 %a23 = fadd float %a2, %a3 1034 %b0 = extractelement <4 x float> %b, i32 0 1035 %b1 = extractelement <4 x float> %b, i32 1 1036 %b2 = extractelement <4 x float> %b, i32 2 1037 %b3 = extractelement <4 x float> %b, i32 3 1038 %b01 = fadd float %b0, %b1 1039 %b23 = fadd float %b2, %b3 1040 %hadd0 = insertelement <4 x float> poison, float %a01, i32 0 1041 %hadd1 = insertelement <4 x float> %hadd0, float %a23, i32 1 1042 %hadd2 = insertelement <4 x float> %hadd1, float %b01, i32 2 1043 %hadd3 = insertelement <4 x float> %hadd2, float %b23, i32 3 1044 %result = shufflevector <4 x float> %hadd3, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 1045 ret <4 x float> %result 1046} 1047 1048define <4 x float> @add_v4f32_uu23(<4 x float> %a, <4 x float> %b) { 1049; CHECK-LABEL: @add_v4f32_uu23( 1050; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 1, i32 2> 1051; CHECK-NEXT: [[RESULT1:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 3> 1052; CHECK-NEXT: [[RESULT2:%.*]] = fadd <4 x float> [[TMP2]], [[RESULT1]] 1053; CHECK-NEXT: ret <4 x float> [[RESULT2]] 1054; 1055 %a0 = extractelement <4 x float> %a, i32 0 1056 %a1 = extractelement <4 x float> %a, i32 1 1057 %a2 = extractelement <4 x float> %a, i32 2 1058 %a3 = extractelement <4 x float> %a, i32 3 1059 %a01 = fadd float %a0, %a1 1060 %a23 = fadd float %a2, %a3 1061 %b0 = extractelement <4 x float> %b, i32 0 1062 %b1 = extractelement <4 x float> %b, i32 1 1063 %b2 = extractelement <4 x float> %b, i32 2 1064 %b3 = extractelement <4 x float> %b, i32 3 1065 %b01 = fadd float %b0, %b1 1066 %b23 = fadd float %b2, %b3 1067 %hadd0 = insertelement <4 x float> poison, float %a01, i32 0 1068 %hadd1 = insertelement <4 x float> %hadd0, float %a23, i32 1 1069 %hadd2 = insertelement <4 x float> %hadd1, float %b01, i32 2 1070 %hadd3 = insertelement <4 x float> %hadd2, float %b23, i32 3 1071 %result = shufflevector <4 x float> %hadd3, <4 x float> %a, <4 x i32> <i32 poison, i32 poison, i32 2, i32 3> 1072 ret <4 x float> %result 1073} 1074 1075define <4 x float> @add_v4f32_01uu(<4 x float> %a, <4 x float> %b) { 1076; CHECK-LABEL: @add_v4f32_01uu( 1077; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 2, i32 poison, i32 poison> 1078; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> <i32 0, i32 3, i32 poison, i32 poison> 1079; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x float> [[TMP2]], [[TMP3]] 1080; CHECK-NEXT: ret <4 x float> [[TMP4]] 1081; 1082 %a0 = extractelement <4 x float> %a, i32 0 1083 %a1 = extractelement <4 x float> %a, i32 1 1084 %a2 = extractelement <4 x float> %a, i32 2 1085 %a3 = extractelement <4 x float> %a, i32 3 1086 %a01 = fadd float %a0, %a1 1087 %a23 = fadd float %a2, %a3 1088 %b0 = extractelement <4 x float> %b, i32 0 1089 %b1 = extractelement <4 x float> %b, i32 1 1090 %b2 = extractelement <4 x float> %b, i32 2 1091 %b3 = extractelement <4 x float> %b, i32 3 1092 %b01 = fadd float %b0, %b1 1093 %b23 = fadd float %b2, %b3 1094 %hadd0 = insertelement <4 x float> poison, float %a01, i32 0 1095 %hadd1 = insertelement <4 x float> %hadd0, float %a23, i32 1 1096 %hadd2 = insertelement <4 x float> %hadd1, float %b01, i32 2 1097 %hadd3 = insertelement <4 x float> %hadd2, float %b23, i32 3 1098 %result = shufflevector <4 x float> %hadd3, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 1099 ret <4 x float> %result 1100} 1101 1102; 1103; v8f32 1104; 1105 1106define <8 x float> @add_v8f32_01234567(<8 x float> %a, <8 x float> %b) { 1107; CHECK-LABEL: @add_v8f32_01234567( 1108; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14> 1109; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15> 1110; CHECK-NEXT: [[TMP3:%.*]] = fadd <8 x float> [[TMP1]], [[TMP2]] 1111; CHECK-NEXT: ret <8 x float> [[TMP3]] 1112; 1113 %a0 = extractelement <8 x float> %a, i32 0 1114 %a1 = extractelement <8 x float> %a, i32 1 1115 %a2 = extractelement <8 x float> %a, i32 2 1116 %a3 = extractelement <8 x float> %a, i32 3 1117 %a4 = extractelement <8 x float> %a, i32 4 1118 %a5 = extractelement <8 x float> %a, i32 5 1119 %a6 = extractelement <8 x float> %a, i32 6 1120 %a7 = extractelement <8 x float> %a, i32 7 1121 %a01 = fadd float %a0, %a1 1122 %a23 = fadd float %a2, %a3 1123 %a45 = fadd float %a4, %a5 1124 %a67 = fadd float %a6, %a7 1125 %b0 = extractelement <8 x float> %b, i32 0 1126 %b1 = extractelement <8 x float> %b, i32 1 1127 %b2 = extractelement <8 x float> %b, i32 2 1128 %b3 = extractelement <8 x float> %b, i32 3 1129 %b4 = extractelement <8 x float> %b, i32 4 1130 %b5 = extractelement <8 x float> %b, i32 5 1131 %b6 = extractelement <8 x float> %b, i32 6 1132 %b7 = extractelement <8 x float> %b, i32 7 1133 %b01 = fadd float %b0, %b1 1134 %b23 = fadd float %b2, %b3 1135 %b45 = fadd float %b4, %b5 1136 %b67 = fadd float %b6, %b7 1137 %hadd0 = insertelement <8 x float> poison, float %a01, i32 0 1138 %hadd1 = insertelement <8 x float> %hadd0, float %a23, i32 1 1139 %hadd2 = insertelement <8 x float> %hadd1, float %b01, i32 2 1140 %hadd3 = insertelement <8 x float> %hadd2, float %b23, i32 3 1141 %hadd4 = insertelement <8 x float> %hadd3, float %a45, i32 4 1142 %hadd5 = insertelement <8 x float> %hadd4, float %a67, i32 5 1143 %hadd6 = insertelement <8 x float> %hadd5, float %b45, i32 6 1144 %hadd7 = insertelement <8 x float> %hadd6, float %b67, i32 7 1145 %result = shufflevector <8 x float> %hadd7, <8 x float> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1146 ret <8 x float> %result 1147} 1148 1149define <8 x float> @add_v8f32_012u4567(<8 x float> %a, <8 x float> %b) { 1150; SSE2-LABEL: @add_v8f32_012u4567( 1151; SSE2-NEXT: [[A6:%.*]] = extractelement <8 x float> [[A:%.*]], i64 6 1152; SSE2-NEXT: [[A7:%.*]] = extractelement <8 x float> [[A]], i64 7 1153; SSE2-NEXT: [[A67:%.*]] = fadd float [[A6]], [[A7]] 1154; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[B:%.*]], <8 x float> poison, <2 x i32> <i32 5, i32 6> 1155; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <2 x i32> <i32 4, i32 7> 1156; SSE2-NEXT: [[TMP3:%.*]] = fadd <2 x float> [[TMP1]], [[TMP2]] 1157; SSE2-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> <i32 0, i32 2, i32 8, i32 poison, i32 4, i32 poison, i32 poison, i32 poison> 1158; SSE2-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 poison, i32 5, i32 poison, i32 poison, i32 poison> 1159; SSE2-NEXT: [[TMP6:%.*]] = fadd <8 x float> [[TMP5]], [[TMP8]] 1160; SSE2-NEXT: [[HADD5:%.*]] = insertelement <8 x float> [[TMP6]], float [[A67]], i64 5 1161; SSE2-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 1162; SSE2-NEXT: [[RESULT:%.*]] = shufflevector <8 x float> [[HADD5]], <8 x float> [[TMP7]], <8 x i32> <i32 0, i32 1, i32 2, i32 poison, i32 4, i32 5, i32 8, i32 9> 1163; SSE2-NEXT: ret <8 x float> [[RESULT]] 1164; 1165; SSE4-LABEL: @add_v8f32_012u4567( 1166; SSE4-NEXT: [[A6:%.*]] = extractelement <8 x float> [[A:%.*]], i64 6 1167; SSE4-NEXT: [[A7:%.*]] = extractelement <8 x float> [[A]], i64 7 1168; SSE4-NEXT: [[A67:%.*]] = fadd float [[A6]], [[A7]] 1169; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 poison, i32 4, i32 poison, i32 poison, i32 poison> 1170; SSE4-NEXT: [[TMP7:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 poison, i32 5, i32 poison, i32 poison, i32 poison> 1171; SSE4-NEXT: [[TMP6:%.*]] = fadd <8 x float> [[TMP4]], [[TMP7]] 1172; SSE4-NEXT: [[HADD5:%.*]] = insertelement <8 x float> [[TMP6]], float [[A67]], i64 5 1173; SSE4-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <8 x i32> <i32 5, i32 6, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 1174; SSE4-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <8 x i32> <i32 4, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 1175; SSE4-NEXT: [[TMP9:%.*]] = fadd <8 x float> [[TMP8]], [[TMP5]] 1176; SSE4-NEXT: [[RESULT:%.*]] = shufflevector <8 x float> [[HADD5]], <8 x float> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 poison, i32 4, i32 5, i32 8, i32 9> 1177; SSE4-NEXT: ret <8 x float> [[RESULT]] 1178; 1179; AVX-LABEL: @add_v8f32_012u4567( 1180; AVX-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 poison, i32 4, i32 6, i32 13, i32 14> 1181; AVX-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 poison, i32 5, i32 7, i32 12, i32 15> 1182; AVX-NEXT: [[TMP7:%.*]] = fadd <8 x float> [[TMP5]], [[TMP6]] 1183; AVX-NEXT: ret <8 x float> [[TMP7]] 1184; 1185 %a0 = extractelement <8 x float> %a, i32 0 1186 %a1 = extractelement <8 x float> %a, i32 1 1187 %a2 = extractelement <8 x float> %a, i32 2 1188 %a3 = extractelement <8 x float> %a, i32 3 1189 %a4 = extractelement <8 x float> %a, i32 4 1190 %a5 = extractelement <8 x float> %a, i32 5 1191 %a6 = extractelement <8 x float> %a, i32 6 1192 %a7 = extractelement <8 x float> %a, i32 7 1193 %a01 = fadd float %a0, %a1 1194 %a23 = fadd float %a2, %a3 1195 %a45 = fadd float %a4, %a5 1196 %a67 = fadd float %a6, %a7 1197 %b0 = extractelement <8 x float> %b, i32 0 1198 %b1 = extractelement <8 x float> %b, i32 1 1199 %b2 = extractelement <8 x float> %b, i32 2 1200 %b3 = extractelement <8 x float> %b, i32 3 1201 %b4 = extractelement <8 x float> %b, i32 4 1202 %b5 = extractelement <8 x float> %b, i32 5 1203 %b6 = extractelement <8 x float> %b, i32 6 1204 %b7 = extractelement <8 x float> %b, i32 7 1205 %b01 = fadd float %b0, %b1 1206 %b23 = fadd float %b2, %b3 1207 %b45 = fadd float %b4, %b5 1208 %b67 = fadd float %b6, %b7 1209 %hadd0 = insertelement <8 x float> poison, float %a01, i32 0 1210 %hadd1 = insertelement <8 x float> %hadd0, float %a23, i32 1 1211 %hadd2 = insertelement <8 x float> %hadd1, float %b01, i32 2 1212 %hadd3 = insertelement <8 x float> %hadd2, float %b23, i32 3 1213 %hadd4 = insertelement <8 x float> %hadd3, float %a45, i32 4 1214 %hadd5 = insertelement <8 x float> %hadd4, float %a67, i32 5 1215 %hadd6 = insertelement <8 x float> %hadd5, float %b45, i32 6 1216 %hadd7 = insertelement <8 x float> %hadd6, float %b67, i32 7 1217 %result = shufflevector <8 x float> %hadd7, <8 x float> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 poison, i32 4, i32 5, i32 6, i32 7> 1218 ret <8 x float> %result 1219} 1220 1221define <8 x float> @add_v8f32_76u43210(<8 x float> %a, <8 x float> %b) { 1222; SSE2-LABEL: @add_v8f32_76u43210( 1223; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 1224; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 1225; SSE2-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]] 1226; SSE2-NEXT: [[B0:%.*]] = extractelement <8 x float> [[B:%.*]], i64 0 1227; SSE2-NEXT: [[B1:%.*]] = extractelement <8 x float> [[B]], i64 1 1228; SSE2-NEXT: [[B01:%.*]] = fadd float [[B0]], [[B1]] 1229; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <2 x i32> <i32 5, i32 6> 1230; SSE2-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <2 x i32> <i32 4, i32 7> 1231; SSE2-NEXT: [[TMP6:%.*]] = fadd <2 x float> [[TMP4]], [[TMP5]] 1232; SSE2-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison> 1233; SSE2-NEXT: [[HADD4:%.*]] = insertelement <8 x float> [[TMP7]], float [[B01]], i64 4 1234; SSE2-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 1235; SSE2-NEXT: [[RESULT:%.*]] = shufflevector <8 x float> [[TMP8]], <8 x float> [[HADD4]], <8 x i32> <i32 1, i32 0, i32 poison, i32 12, i32 11, i32 10, i32 9, i32 8> 1236; SSE2-NEXT: ret <8 x float> [[RESULT]] 1237; 1238; SSE4-LABEL: @add_v8f32_76u43210( 1239; SSE4-NEXT: [[B0:%.*]] = extractelement <8 x float> [[B:%.*]], i64 0 1240; SSE4-NEXT: [[B1:%.*]] = extractelement <8 x float> [[B]], i64 1 1241; SSE4-NEXT: [[B01:%.*]] = fadd float [[B0]], [[B1]] 1242; SSE4-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 poison, i32 poison, i32 poison> 1243; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 poison, i32 poison, i32 poison> 1244; SSE4-NEXT: [[RESULT:%.*]] = fadd <8 x float> [[TMP1]], [[TMP2]] 1245; SSE4-NEXT: [[HADD4:%.*]] = insertelement <8 x float> [[RESULT]], float [[B01]], i64 4 1246; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <8 x i32> <i32 5, i32 6, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 1247; SSE4-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <8 x i32> <i32 4, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 1248; SSE4-NEXT: [[TMP6:%.*]] = fadd <8 x float> [[TMP4]], [[TMP5]] 1249; SSE4-NEXT: [[RESULT1:%.*]] = shufflevector <8 x float> [[TMP6]], <8 x float> [[HADD4]], <8 x i32> <i32 1, i32 0, i32 poison, i32 12, i32 11, i32 10, i32 9, i32 8> 1250; SSE4-NEXT: ret <8 x float> [[RESULT1]] 1251; 1252; AVX-LABEL: @add_v8f32_76u43210( 1253; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[B:%.*]], <8 x float> [[A:%.*]], <8 x i32> <i32 6, i32 5, i32 poison, i32 0, i32 14, i32 12, i32 10, i32 8> 1254; AVX-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[B]], <8 x float> [[A]], <8 x i32> <i32 7, i32 4, i32 poison, i32 1, i32 15, i32 13, i32 11, i32 9> 1255; AVX-NEXT: [[RESULT:%.*]] = fadd <8 x float> [[TMP1]], [[TMP2]] 1256; AVX-NEXT: ret <8 x float> [[RESULT]] 1257; 1258 %a0 = extractelement <8 x float> %a, i32 0 1259 %a1 = extractelement <8 x float> %a, i32 1 1260 %a2 = extractelement <8 x float> %a, i32 2 1261 %a3 = extractelement <8 x float> %a, i32 3 1262 %a4 = extractelement <8 x float> %a, i32 4 1263 %a5 = extractelement <8 x float> %a, i32 5 1264 %a6 = extractelement <8 x float> %a, i32 6 1265 %a7 = extractelement <8 x float> %a, i32 7 1266 %a01 = fadd float %a0, %a1 1267 %a23 = fadd float %a2, %a3 1268 %a45 = fadd float %a4, %a5 1269 %a67 = fadd float %a6, %a7 1270 %b0 = extractelement <8 x float> %b, i32 0 1271 %b1 = extractelement <8 x float> %b, i32 1 1272 %b2 = extractelement <8 x float> %b, i32 2 1273 %b3 = extractelement <8 x float> %b, i32 3 1274 %b4 = extractelement <8 x float> %b, i32 4 1275 %b5 = extractelement <8 x float> %b, i32 5 1276 %b6 = extractelement <8 x float> %b, i32 6 1277 %b7 = extractelement <8 x float> %b, i32 7 1278 %b01 = fadd float %b0, %b1 1279 %b23 = fadd float %b2, %b3 1280 %b45 = fadd float %b4, %b5 1281 %b67 = fadd float %b6, %b7 1282 %hadd0 = insertelement <8 x float> poison, float %a01, i32 0 1283 %hadd1 = insertelement <8 x float> %hadd0, float %a23, i32 1 1284 %hadd2 = insertelement <8 x float> %hadd1, float %a45, i32 2 1285 %hadd3 = insertelement <8 x float> %hadd2, float %a67, i32 3 1286 %hadd4 = insertelement <8 x float> %hadd3, float %b01, i32 4 1287 %hadd5 = insertelement <8 x float> %hadd4, float %b23, i32 5 1288 %hadd6 = insertelement <8 x float> %hadd5, float %b45, i32 6 1289 %hadd7 = insertelement <8 x float> %hadd6, float %b67, i32 7 1290 %result = shufflevector <8 x float> %hadd7, <8 x float> %a, <8 x i32> <i32 7, i32 6, i32 poison, i32 4, i32 3, i32 2, i32 1, i32 0> 1291 ret <8 x float> %result 1292} 1293 1294; 1295; v2f64 1296; 1297 1298define <2 x double> @add_v2f64_01(<2 x double> %a, <2 x double> %b) { 1299; CHECK-LABEL: @add_v2f64_01( 1300; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x i32> <i32 0, i32 2> 1301; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3> 1302; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] 1303; CHECK-NEXT: ret <2 x double> [[TMP3]] 1304; 1305 %a0 = extractelement <2 x double> %a, i32 0 1306 %a1 = extractelement <2 x double> %a, i32 1 1307 %a01 = fadd double %a0, %a1 1308 %b0 = extractelement <2 x double> %b, i32 0 1309 %b1 = extractelement <2 x double> %b, i32 1 1310 %b01 = fadd double %b0, %b1 1311 %hadd0 = insertelement <2 x double> poison, double %a01, i32 0 1312 %hadd1 = insertelement <2 x double> %hadd0, double %b01, i32 1 1313 %result = shufflevector <2 x double> %hadd1, <2 x double> %a, <2 x i32> <i32 0, i32 1> 1314 ret <2 x double> %result 1315} 1316 1317define <2 x double> @add_v2f64_u1(<2 x double> %a, <2 x double> %b) { 1318; CHECK-LABEL: @add_v2f64_u1( 1319; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <2 x i32> <i32 poison, i32 0> 1320; CHECK-NEXT: [[RESULT1:%.*]] = fadd <2 x double> [[TMP1]], [[B]] 1321; CHECK-NEXT: ret <2 x double> [[RESULT1]] 1322; 1323 %a0 = extractelement <2 x double> %a, i32 0 1324 %a1 = extractelement <2 x double> %a, i32 1 1325 %a01 = fadd double %a0, %a1 1326 %b0 = extractelement <2 x double> %b, i32 0 1327 %b1 = extractelement <2 x double> %b, i32 1 1328 %b01 = fadd double %b0, %b1 1329 %hadd0 = insertelement <2 x double> poison, double %a01, i32 0 1330 %hadd1 = insertelement <2 x double> %hadd0, double %b01, i32 1 1331 %result = shufflevector <2 x double> %hadd1, <2 x double> %a, <2 x i32> <i32 poison, i32 1> 1332 ret <2 x double> %result 1333} 1334 1335define <2 x double> @add_v2f64_0u(<2 x double> %a, <2 x double> %b) { 1336; CHECK-LABEL: @add_v2f64_0u( 1337; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <2 x double> [[TMP1:%.*]], <2 x double> poison, <2 x i32> <i32 1, i32 poison> 1338; CHECK-NEXT: [[RESULT1:%.*]] = fadd <2 x double> [[TMP1]], [[RESULT]] 1339; CHECK-NEXT: ret <2 x double> [[RESULT1]] 1340; 1341 %a0 = extractelement <2 x double> %a, i32 0 1342 %a1 = extractelement <2 x double> %a, i32 1 1343 %a01 = fadd double %a0, %a1 1344 %b0 = extractelement <2 x double> %b, i32 0 1345 %b1 = extractelement <2 x double> %b, i32 1 1346 %b01 = fadd double %b0, %b1 1347 %hadd0 = insertelement <2 x double> poison, double %a01, i32 0 1348 %hadd1 = insertelement <2 x double> %hadd0, double %b01, i32 1 1349 %result = shufflevector <2 x double> %hadd1, <2 x double> %a, <2 x i32> <i32 0, i32 poison> 1350 ret <2 x double> %result 1351} 1352 1353; 1354; v4f64 1355; 1356 1357define <4 x double> @add_v4f64_0123(<4 x double> %a, <4 x double> %b) { 1358; CHECK-LABEL: @add_v4f64_0123( 1359; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> 1360; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> 1361; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]] 1362; CHECK-NEXT: ret <4 x double> [[TMP3]] 1363; 1364 %a0 = extractelement <4 x double> %a, i32 0 1365 %a1 = extractelement <4 x double> %a, i32 1 1366 %a2 = extractelement <4 x double> %a, i32 2 1367 %a3 = extractelement <4 x double> %a, i32 3 1368 %a01 = fadd double %a0, %a1 1369 %a23 = fadd double %a2, %a3 1370 %b0 = extractelement <4 x double> %b, i32 0 1371 %b1 = extractelement <4 x double> %b, i32 1 1372 %b2 = extractelement <4 x double> %b, i32 2 1373 %b3 = extractelement <4 x double> %b, i32 3 1374 %b01 = fadd double %b0, %b1 1375 %b23 = fadd double %b2, %b3 1376 %hadd0 = insertelement <4 x double> poison, double %a01, i32 0 1377 %hadd1 = insertelement <4 x double> %hadd0, double %b01, i32 1 1378 %hadd2 = insertelement <4 x double> %hadd1, double %a23, i32 2 1379 %hadd3 = insertelement <4 x double> %hadd2, double %b23, i32 3 1380 %result = shufflevector <4 x double> %hadd3, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1381 ret <4 x double> %result 1382} 1383 1384define <4 x double> @add_v4f64_u123(<4 x double> %a, <4 x double> %b) { 1385; SSE2-LABEL: @add_v4f64_u123( 1386; SSE2-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B:%.*]], i64 2 1387; SSE2-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i64 3 1388; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B]], <4 x double> [[A:%.*]], <2 x i32> <i32 0, i32 6> 1389; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> [[A]], <2 x i32> <i32 1, i32 7> 1390; SSE2-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] 1391; SSE2-NEXT: [[B23:%.*]] = fadd double [[B2]], [[B3]] 1392; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 poison, i32 0, i32 1, i32 poison> 1393; SSE2-NEXT: [[RESULT:%.*]] = insertelement <4 x double> [[TMP4]], double [[B23]], i64 3 1394; SSE2-NEXT: ret <4 x double> [[RESULT]] 1395; 1396; SSE4-LABEL: @add_v4f64_u123( 1397; SSE4-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B:%.*]], i64 2 1398; SSE4-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i64 3 1399; SSE4-NEXT: [[B23:%.*]] = fadd double [[B2]], [[B3]] 1400; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> [[A:%.*]], <4 x i32> <i32 poison, i32 0, i32 6, i32 poison> 1401; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[B]], <4 x double> [[A]], <4 x i32> <i32 poison, i32 1, i32 7, i32 poison> 1402; SSE4-NEXT: [[TMP3:%.*]] = fadd <4 x double> [[TMP2]], [[TMP4]] 1403; SSE4-NEXT: [[RESULT:%.*]] = insertelement <4 x double> [[TMP3]], double [[B23]], i64 3 1404; SSE4-NEXT: ret <4 x double> [[RESULT]] 1405; 1406; AVX-LABEL: @add_v4f64_u123( 1407; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> [[A:%.*]], <4 x i32> <i32 poison, i32 0, i32 6, i32 2> 1408; AVX-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[B]], <4 x double> [[A]], <4 x i32> <i32 poison, i32 1, i32 7, i32 3> 1409; AVX-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[TMP2]], [[TMP3]] 1410; AVX-NEXT: ret <4 x double> [[TMP4]] 1411; 1412 %a0 = extractelement <4 x double> %a, i32 0 1413 %a1 = extractelement <4 x double> %a, i32 1 1414 %a2 = extractelement <4 x double> %a, i32 2 1415 %a3 = extractelement <4 x double> %a, i32 3 1416 %a01 = fadd double %a0, %a1 1417 %a23 = fadd double %a2, %a3 1418 %b0 = extractelement <4 x double> %b, i32 0 1419 %b1 = extractelement <4 x double> %b, i32 1 1420 %b2 = extractelement <4 x double> %b, i32 2 1421 %b3 = extractelement <4 x double> %b, i32 3 1422 %b01 = fadd double %b0, %b1 1423 %b23 = fadd double %b2, %b3 1424 %hadd0 = insertelement <4 x double> poison, double %a01, i32 0 1425 %hadd1 = insertelement <4 x double> %hadd0, double %b01, i32 1 1426 %hadd2 = insertelement <4 x double> %hadd1, double %a23, i32 2 1427 %hadd3 = insertelement <4 x double> %hadd2, double %b23, i32 3 1428 %result = shufflevector <4 x double> %hadd3, <4 x double> %a, <4 x i32> <i32 poison, i32 1, i32 2, i32 3> 1429 ret <4 x double> %result 1430} 1431 1432define <4 x double> @add_v4f64_0u23(<4 x double> %a, <4 x double> %b) { 1433; SSE2-LABEL: @add_v4f64_0u23( 1434; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> poison, <2 x i32> <i32 1, i32 2> 1435; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <2 x i32> <i32 0, i32 3> 1436; SSE2-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] 1437; SSE2-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B:%.*]], i64 2 1438; SSE2-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i64 3 1439; SSE2-NEXT: [[B23:%.*]] = fadd double [[B2]], [[B3]] 1440; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 1, i32 poison> 1441; SSE2-NEXT: [[RESULT:%.*]] = insertelement <4 x double> [[TMP4]], double [[B23]], i64 3 1442; SSE2-NEXT: ret <4 x double> [[RESULT]] 1443; 1444; SSE4-LABEL: @add_v4f64_0u23( 1445; SSE4-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B:%.*]], i64 2 1446; SSE4-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i64 3 1447; SSE4-NEXT: [[B23:%.*]] = fadd double [[B2]], [[B3]] 1448; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> poison, <4 x i32> <i32 1, i32 poison, i32 2, i32 poison> 1449; SSE4-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> <i32 0, i32 poison, i32 3, i32 poison> 1450; SSE4-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[TMP2]], [[TMP3]] 1451; SSE4-NEXT: [[RESULT:%.*]] = insertelement <4 x double> [[TMP4]], double [[B23]], i64 3 1452; SSE4-NEXT: ret <4 x double> [[RESULT]] 1453; 1454; AVX-LABEL: @add_v4f64_0u23( 1455; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 1, i32 poison, i32 2, i32 6> 1456; AVX-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 0, i32 poison, i32 3, i32 7> 1457; AVX-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[TMP2]], [[TMP3]] 1458; AVX-NEXT: ret <4 x double> [[TMP4]] 1459; 1460 %a0 = extractelement <4 x double> %a, i32 0 1461 %a1 = extractelement <4 x double> %a, i32 1 1462 %a2 = extractelement <4 x double> %a, i32 2 1463 %a3 = extractelement <4 x double> %a, i32 3 1464 %a01 = fadd double %a0, %a1 1465 %a23 = fadd double %a2, %a3 1466 %b0 = extractelement <4 x double> %b, i32 0 1467 %b1 = extractelement <4 x double> %b, i32 1 1468 %b2 = extractelement <4 x double> %b, i32 2 1469 %b3 = extractelement <4 x double> %b, i32 3 1470 %b01 = fadd double %b0, %b1 1471 %b23 = fadd double %b2, %b3 1472 %hadd0 = insertelement <4 x double> poison, double %a01, i32 0 1473 %hadd1 = insertelement <4 x double> %hadd0, double %b01, i32 1 1474 %hadd2 = insertelement <4 x double> %hadd1, double %a23, i32 2 1475 %hadd3 = insertelement <4 x double> %hadd2, double %b23, i32 3 1476 %result = shufflevector <4 x double> %hadd3, <4 x double> %a, <4 x i32> <i32 0, i32 poison, i32 2, i32 3> 1477 ret <4 x double> %result 1478} 1479 1480define <4 x double> @add_v4f64_01u3(<4 x double> %a, <4 x double> %b) { 1481; SSE2-LABEL: @add_v4f64_01u3( 1482; SSE2-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B:%.*]], i64 2 1483; SSE2-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i64 3 1484; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B]], <2 x i32> <i32 0, i32 4> 1485; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 5> 1486; SSE2-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] 1487; SSE2-NEXT: [[B23:%.*]] = fadd double [[B2]], [[B3]] 1488; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 1489; SSE2-NEXT: [[RESULT:%.*]] = insertelement <4 x double> [[TMP4]], double [[B23]], i64 3 1490; SSE2-NEXT: ret <4 x double> [[RESULT]] 1491; 1492; SSE4-LABEL: @add_v4f64_01u3( 1493; SSE4-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B:%.*]], i64 2 1494; SSE4-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i64 3 1495; SSE4-NEXT: [[B23:%.*]] = fadd double [[B2]], [[B3]] 1496; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B]], <4 x i32> <i32 0, i32 4, i32 poison, i32 poison> 1497; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 5, i32 poison, i32 poison> 1498; SSE4-NEXT: [[TMP3:%.*]] = fadd <4 x double> [[TMP2]], [[TMP4]] 1499; SSE4-NEXT: [[RESULT:%.*]] = insertelement <4 x double> [[TMP3]], double [[B23]], i64 3 1500; SSE4-NEXT: ret <4 x double> [[RESULT]] 1501; 1502; AVX-LABEL: @add_v4f64_01u3( 1503; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 poison, i32 6> 1504; AVX-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 5, i32 poison, i32 7> 1505; AVX-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[TMP2]], [[TMP3]] 1506; AVX-NEXT: ret <4 x double> [[TMP4]] 1507; 1508 %a0 = extractelement <4 x double> %a, i32 0 1509 %a1 = extractelement <4 x double> %a, i32 1 1510 %a2 = extractelement <4 x double> %a, i32 2 1511 %a3 = extractelement <4 x double> %a, i32 3 1512 %a01 = fadd double %a0, %a1 1513 %a23 = fadd double %a2, %a3 1514 %b0 = extractelement <4 x double> %b, i32 0 1515 %b1 = extractelement <4 x double> %b, i32 1 1516 %b2 = extractelement <4 x double> %b, i32 2 1517 %b3 = extractelement <4 x double> %b, i32 3 1518 %b01 = fadd double %b0, %b1 1519 %b23 = fadd double %b2, %b3 1520 %hadd0 = insertelement <4 x double> poison, double %a01, i32 0 1521 %hadd1 = insertelement <4 x double> %hadd0, double %b01, i32 1 1522 %hadd2 = insertelement <4 x double> %hadd1, double %a23, i32 2 1523 %hadd3 = insertelement <4 x double> %hadd2, double %b23, i32 3 1524 %result = shufflevector <4 x double> %hadd3, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 poison, i32 3> 1525 ret <4 x double> %result 1526} 1527 1528define <4 x double> @add_v4f64_012u(<4 x double> %a, <4 x double> %b) { 1529; SSE2-LABEL: @add_v4f64_012u( 1530; SSE2-NEXT: [[A2:%.*]] = extractelement <4 x double> [[A:%.*]], i64 2 1531; SSE2-NEXT: [[A3:%.*]] = extractelement <4 x double> [[A]], i64 3 1532; SSE2-NEXT: [[A23:%.*]] = fadd double [[A2]], [[A3]] 1533; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 4> 1534; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 5> 1535; SSE2-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] 1536; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 1537; SSE2-NEXT: [[RESULT:%.*]] = insertelement <4 x double> [[TMP4]], double [[A23]], i64 2 1538; SSE2-NEXT: ret <4 x double> [[RESULT]] 1539; 1540; SSE4-LABEL: @add_v4f64_012u( 1541; SSE4-NEXT: [[A2:%.*]] = extractelement <4 x double> [[A:%.*]], i64 2 1542; SSE4-NEXT: [[A3:%.*]] = extractelement <4 x double> [[A]], i64 3 1543; SSE4-NEXT: [[A23:%.*]] = fadd double [[A2]], [[A3]] 1544; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 poison, i32 poison> 1545; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 5, i32 poison, i32 poison> 1546; SSE4-NEXT: [[TMP3:%.*]] = fadd <4 x double> [[TMP2]], [[TMP4]] 1547; SSE4-NEXT: [[RESULT:%.*]] = insertelement <4 x double> [[TMP3]], double [[A23]], i64 2 1548; SSE4-NEXT: ret <4 x double> [[RESULT]] 1549; 1550; AVX-LABEL: @add_v4f64_012u( 1551; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 poison> 1552; AVX-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 poison> 1553; AVX-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[TMP2]], [[TMP3]] 1554; AVX-NEXT: ret <4 x double> [[TMP4]] 1555; 1556 %a0 = extractelement <4 x double> %a, i32 0 1557 %a1 = extractelement <4 x double> %a, i32 1 1558 %a2 = extractelement <4 x double> %a, i32 2 1559 %a3 = extractelement <4 x double> %a, i32 3 1560 %a01 = fadd double %a0, %a1 1561 %a23 = fadd double %a2, %a3 1562 %b0 = extractelement <4 x double> %b, i32 0 1563 %b1 = extractelement <4 x double> %b, i32 1 1564 %b2 = extractelement <4 x double> %b, i32 2 1565 %b3 = extractelement <4 x double> %b, i32 3 1566 %b01 = fadd double %b0, %b1 1567 %b23 = fadd double %b2, %b3 1568 %hadd0 = insertelement <4 x double> poison, double %a01, i32 0 1569 %hadd1 = insertelement <4 x double> %hadd0, double %b01, i32 1 1570 %hadd2 = insertelement <4 x double> %hadd1, double %a23, i32 2 1571 %hadd3 = insertelement <4 x double> %hadd2, double %b23, i32 3 1572 %result = shufflevector <4 x double> %hadd3, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 1573 ret <4 x double> %result 1574} 1575 1576define <4 x double> @add_v4f64_uu23(<4 x double> %a, <4 x double> %b) { 1577; SSE2-LABEL: @add_v4f64_uu23( 1578; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 2, i32 6> 1579; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 3, i32 7> 1580; SSE2-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] 1581; SSE2-NEXT: [[RESULT1:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 1> 1582; SSE2-NEXT: ret <4 x double> [[RESULT1]] 1583; 1584; SSE4-LABEL: @add_v4f64_uu23( 1585; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 poison, i32 poison, i32 2, i32 6> 1586; SSE4-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 poison, i32 poison, i32 3, i32 7> 1587; SSE4-NEXT: [[RESULT1:%.*]] = fadd <4 x double> [[TMP2]], [[TMP3]] 1588; SSE4-NEXT: ret <4 x double> [[RESULT1]] 1589; 1590; AVX-LABEL: @add_v4f64_uu23( 1591; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 poison, i32 poison, i32 2, i32 6> 1592; AVX-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 poison, i32 poison, i32 3, i32 7> 1593; AVX-NEXT: [[RESULT1:%.*]] = fadd <4 x double> [[TMP2]], [[TMP3]] 1594; AVX-NEXT: ret <4 x double> [[RESULT1]] 1595; 1596 %a0 = extractelement <4 x double> %a, i32 0 1597 %a1 = extractelement <4 x double> %a, i32 1 1598 %a2 = extractelement <4 x double> %a, i32 2 1599 %a3 = extractelement <4 x double> %a, i32 3 1600 %a01 = fadd double %a0, %a1 1601 %a23 = fadd double %a2, %a3 1602 %b0 = extractelement <4 x double> %b, i32 0 1603 %b1 = extractelement <4 x double> %b, i32 1 1604 %b2 = extractelement <4 x double> %b, i32 2 1605 %b3 = extractelement <4 x double> %b, i32 3 1606 %b01 = fadd double %b0, %b1 1607 %b23 = fadd double %b2, %b3 1608 %hadd0 = insertelement <4 x double> poison, double %a01, i32 0 1609 %hadd1 = insertelement <4 x double> %hadd0, double %b01, i32 1 1610 %hadd2 = insertelement <4 x double> %hadd1, double %a23, i32 2 1611 %hadd3 = insertelement <4 x double> %hadd2, double %b23, i32 3 1612 %result = shufflevector <4 x double> %hadd3, <4 x double> %a, <4 x i32> <i32 poison, i32 poison, i32 2, i32 3> 1613 ret <4 x double> %result 1614} 1615 1616define <4 x double> @add_v4f64_01uu(<4 x double> %a, <4 x double> %b) { 1617; SSE2-LABEL: @add_v4f64_01uu( 1618; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 4> 1619; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 5> 1620; SSE2-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] 1621; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 1622; SSE2-NEXT: ret <4 x double> [[TMP4]] 1623; 1624; SSE4-LABEL: @add_v4f64_01uu( 1625; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 poison, i32 poison> 1626; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 5, i32 poison, i32 poison> 1627; SSE4-NEXT: [[TMP3:%.*]] = fadd <4 x double> [[TMP2]], [[TMP4]] 1628; SSE4-NEXT: ret <4 x double> [[TMP3]] 1629; 1630; AVX-LABEL: @add_v4f64_01uu( 1631; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 poison, i32 poison> 1632; AVX-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 5, i32 poison, i32 poison> 1633; AVX-NEXT: [[TMP3:%.*]] = fadd <4 x double> [[TMP2]], [[TMP4]] 1634; AVX-NEXT: ret <4 x double> [[TMP3]] 1635; 1636 %a0 = extractelement <4 x double> %a, i32 0 1637 %a1 = extractelement <4 x double> %a, i32 1 1638 %a2 = extractelement <4 x double> %a, i32 2 1639 %a3 = extractelement <4 x double> %a, i32 3 1640 %a01 = fadd double %a0, %a1 1641 %a23 = fadd double %a2, %a3 1642 %b0 = extractelement <4 x double> %b, i32 0 1643 %b1 = extractelement <4 x double> %b, i32 1 1644 %b2 = extractelement <4 x double> %b, i32 2 1645 %b3 = extractelement <4 x double> %b, i32 3 1646 %b01 = fadd double %b0, %b1 1647 %b23 = fadd double %b2, %b3 1648 %hadd0 = insertelement <4 x double> poison, double %a01, i32 0 1649 %hadd1 = insertelement <4 x double> %hadd0, double %b01, i32 1 1650 %hadd2 = insertelement <4 x double> %hadd1, double %a23, i32 2 1651 %hadd3 = insertelement <4 x double> %hadd2, double %b23, i32 3 1652 %result = shufflevector <4 x double> %hadd3, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 1653 ret <4 x double> %result 1654} 1655 1656define <4 x double> @add_v4f64_32u0(<4 x double> %a, <4 x double> %b) { 1657; SSE2-LABEL: @add_v4f64_32u0( 1658; SSE2-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i64 0 1659; SSE2-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i64 1 1660; SSE2-NEXT: [[A01:%.*]] = fadd double [[A0]], [[A1]] 1661; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 2, i32 0> 1662; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <2 x i32> <i32 3, i32 1> 1663; SSE2-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] 1664; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 1665; SSE2-NEXT: [[RESULT:%.*]] = insertelement <4 x double> [[TMP4]], double [[A01]], i64 3 1666; SSE2-NEXT: ret <4 x double> [[RESULT]] 1667; 1668; SSE4-LABEL: @add_v4f64_32u0( 1669; SSE4-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i64 0 1670; SSE4-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i64 1 1671; SSE4-NEXT: [[A01:%.*]] = fadd double [[A0]], [[A1]] 1672; SSE4-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <4 x i32> <i32 2, i32 0, i32 poison, i32 poison> 1673; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <4 x i32> <i32 3, i32 1, i32 poison, i32 poison> 1674; SSE4-NEXT: [[RESULT:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]] 1675; SSE4-NEXT: [[RESULT1:%.*]] = insertelement <4 x double> [[RESULT]], double [[A01]], i64 3 1676; SSE4-NEXT: ret <4 x double> [[RESULT1]] 1677; 1678; AVX-LABEL: @add_v4f64_32u0( 1679; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> [[A:%.*]], <4 x i32> <i32 2, i32 0, i32 poison, i32 4> 1680; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> [[A]], <4 x i32> <i32 3, i32 1, i32 poison, i32 5> 1681; AVX-NEXT: [[RESULT:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]] 1682; AVX-NEXT: ret <4 x double> [[RESULT]] 1683; 1684 %a0 = extractelement <4 x double> %a, i32 0 1685 %a1 = extractelement <4 x double> %a, i32 1 1686 %a2 = extractelement <4 x double> %a, i32 2 1687 %a3 = extractelement <4 x double> %a, i32 3 1688 %a01 = fadd double %a0, %a1 1689 %a23 = fadd double %a2, %a3 1690 %b0 = extractelement <4 x double> %b, i32 0 1691 %b1 = extractelement <4 x double> %b, i32 1 1692 %b2 = extractelement <4 x double> %b, i32 2 1693 %b3 = extractelement <4 x double> %b, i32 3 1694 %b01 = fadd double %b0, %b1 1695 %b23 = fadd double %b2, %b3 1696 %hadd0 = insertelement <4 x double> poison, double %a01, i32 0 1697 %hadd1 = insertelement <4 x double> %hadd0, double %a23, i32 1 1698 %hadd2 = insertelement <4 x double> %hadd1, double %b01, i32 2 1699 %hadd3 = insertelement <4 x double> %hadd2, double %b23, i32 3 1700 %result = shufflevector <4 x double> %hadd3, <4 x double> %a, <4 x i32> <i32 3, i32 2, i32 poison, i32 0> 1701 ret <4 x double> %result 1702} 1703