1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,SSE2 3; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,SSE4 4; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 5; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX,AVX512 6; RUN: opt < %s -passes="default<O3>" -S -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,SSE2 7; RUN: opt < %s -passes="default<O3>" -S -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,SSE4 8; RUN: opt < %s -passes="default<O3>" -S -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 9; RUN: opt < %s -passes="default<O3>" -S -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX,AVX512 10 11; PR34072 - failure to canonicalize to (sub (shuffle a, b),(shuffle a, b)) for optimal horizontal sub patterns (with undemanded elements) 12 13; 14; v8i16 15; 16 17define <8 x i16> @sub_v8i16_01234567(<8 x i16> %a, <8 x i16> %b) { 18; CHECK-LABEL: @sub_v8i16_01234567( 19; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 20; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 21; CHECK-NEXT: [[TMP3:%.*]] = sub <8 x i16> [[TMP1]], [[TMP2]] 22; CHECK-NEXT: ret <8 x i16> [[TMP3]] 23; 24 %a0 = extractelement <8 x i16> %a, i32 0 25 %a1 = extractelement <8 x i16> %a, i32 1 26 %a2 = extractelement <8 x i16> %a, i32 2 27 %a3 = extractelement <8 x i16> %a, i32 3 28 %a4 = extractelement <8 x i16> %a, i32 4 29 %a5 = extractelement <8 x i16> %a, i32 5 30 %a6 = extractelement <8 x i16> %a, i32 6 31 %a7 = extractelement <8 x i16> %a, i32 7 32 %a01 = sub i16 %a0, %a1 33 %a23 = sub i16 %a2, %a3 34 %a45 = sub i16 %a4, %a5 35 %a67 = sub i16 %a6, %a7 36 %b0 = extractelement <8 x i16> %b, i32 0 37 %b1 = extractelement <8 x i16> %b, i32 1 38 %b2 = extractelement <8 x i16> %b, i32 2 39 %b3 = extractelement <8 x i16> %b, i32 3 40 %b4 = extractelement <8 x i16> %b, i32 4 41 %b5 = extractelement <8 x i16> %b, i32 5 42 %b6 = extractelement <8 x i16> %b, i32 6 43 %b7 = extractelement <8 x i16> %b, i32 7 44 %b01 = sub i16 %b0, %b1 45 %b23 = sub i16 %b2, %b3 46 %b45 = sub i16 %b4, %b5 47 %b67 = sub i16 %b6, %b7 48 %hsub0 = insertelement <8 x i16> poison, i16 %a01, i32 0 49 %hsub1 = insertelement <8 x i16> %hsub0, i16 %a23, i32 1 50 %hsub2 = insertelement <8 x i16> %hsub1, i16 %a45, i32 2 51 %hsub3 = insertelement <8 x i16> %hsub2, i16 %a67, i32 3 52 %hsub4 = insertelement <8 x i16> %hsub3, i16 %b01, i32 4 53 %hsub5 = insertelement <8 x i16> %hsub4, i16 %b23, i32 5 54 %hsub6 = insertelement <8 x i16> %hsub5, i16 %b45, i32 6 55 %hsub7 = insertelement <8 x i16> %hsub6, i16 %b67, i32 7 56 %result = shufflevector <8 x i16> %hsub7, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 57 ret <8 x i16> %result 58} 59 60define <8 x i16> @sub_v8i16_u1234567(<8 x i16> %a, <8 x i16> %b) { 61; SSE2-LABEL: @sub_v8i16_u1234567( 62; SSE2-NEXT: [[SHIFT3:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 7, i32 poison> 63; SSE2-NEXT: [[TMP6:%.*]] = sub <8 x i16> [[A]], [[SHIFT3]] 64; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 poison, i32 2, i32 4, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 65; SSE2-NEXT: [[TMP5:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 poison, i32 3, i32 5, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 66; SSE2-NEXT: [[HSUB22:%.*]] = sub <8 x i16> [[TMP4]], [[TMP5]] 67; SSE2-NEXT: [[HSUB3:%.*]] = shufflevector <8 x i16> [[HSUB22]], <8 x i16> [[TMP6]], <8 x i32> <i32 poison, i32 1, i32 2, i32 14, i32 poison, i32 poison, i32 poison, i32 poison> 68; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 poison, i32 poison, i32 poison> 69; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 poison, i32 poison, i32 poison> 70; SSE2-NEXT: [[TMP3:%.*]] = sub <8 x i16> [[TMP1]], [[TMP2]] 71; SSE2-NEXT: [[RESULT:%.*]] = shufflevector <8 x i16> [[HSUB3]], <8 x i16> [[TMP3]], <8 x i32> <i32 poison, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 72; SSE2-NEXT: ret <8 x i16> [[RESULT]] 73; 74; SSE4-LABEL: @sub_v8i16_u1234567( 75; SSE4-NEXT: [[TMP5:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i32> <i32 poison, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 76; SSE4-NEXT: [[TMP6:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 poison, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 77; SSE4-NEXT: [[TMP7:%.*]] = sub <8 x i16> [[TMP5]], [[TMP6]] 78; SSE4-NEXT: ret <8 x i16> [[TMP7]] 79; 80; AVX-LABEL: @sub_v8i16_u1234567( 81; AVX-NEXT: [[TMP5:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i32> <i32 poison, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 82; AVX-NEXT: [[TMP6:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 poison, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 83; AVX-NEXT: [[TMP7:%.*]] = sub <8 x i16> [[TMP5]], [[TMP6]] 84; AVX-NEXT: ret <8 x i16> [[TMP7]] 85; 86 %a0 = extractelement <8 x i16> %a, i32 0 87 %a1 = extractelement <8 x i16> %a, i32 1 88 %a2 = extractelement <8 x i16> %a, i32 2 89 %a3 = extractelement <8 x i16> %a, i32 3 90 %a4 = extractelement <8 x i16> %a, i32 4 91 %a5 = extractelement <8 x i16> %a, i32 5 92 %a6 = extractelement <8 x i16> %a, i32 6 93 %a7 = extractelement <8 x i16> %a, i32 7 94 %a01 = sub i16 %a0, %a1 95 %a23 = sub i16 %a2, %a3 96 %a45 = sub i16 %a4, %a5 97 %a67 = sub i16 %a6, %a7 98 %b0 = extractelement <8 x i16> %b, i32 0 99 %b1 = extractelement <8 x i16> %b, i32 1 100 %b2 = extractelement <8 x i16> %b, i32 2 101 %b3 = extractelement <8 x i16> %b, i32 3 102 %b4 = extractelement <8 x i16> %b, i32 4 103 %b5 = extractelement <8 x i16> %b, i32 5 104 %b6 = extractelement <8 x i16> %b, i32 6 105 %b7 = extractelement <8 x i16> %b, i32 7 106 %b01 = sub i16 %b0, %b1 107 %b23 = sub i16 %b2, %b3 108 %b45 = sub i16 %b4, %b5 109 %b67 = sub i16 %b6, %b7 110 %hsub0 = insertelement <8 x i16> poison, i16 %a01, i32 0 111 %hsub1 = insertelement <8 x i16> %hsub0, i16 %a23, i32 1 112 %hsub2 = insertelement <8 x i16> %hsub1, i16 %a45, i32 2 113 %hsub3 = insertelement <8 x i16> %hsub2, i16 %a67, i32 3 114 %hsub4 = insertelement <8 x i16> %hsub3, i16 %b01, i32 4 115 %hsub5 = insertelement <8 x i16> %hsub4, i16 %b23, i32 5 116 %hsub6 = insertelement <8 x i16> %hsub5, i16 %b45, i32 6 117 %hsub7 = insertelement <8 x i16> %hsub6, i16 %b67, i32 7 118 %result = shufflevector <8 x i16> %hsub7, <8 x i16> %a, <8 x i32> <i32 poison, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 119 ret <8 x i16> %result 120} 121 122define <8 x i16> @sub_v8i16_76u43210(<8 x i16> %a, <8 x i16> %b) { 123; SSE2-LABEL: @sub_v8i16_76u43210( 124; SSE2-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 125; SSE2-NEXT: [[TMP1:%.*]] = sub <8 x i16> [[A]], [[SHIFT]] 126; SSE2-NEXT: [[SHIFT2:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 5, i32 poison, i32 poison, i32 poison> 127; SSE2-NEXT: [[TMP2:%.*]] = sub <8 x i16> [[B]], [[SHIFT2]] 128; SSE2-NEXT: [[SHIFT3:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 6> 129; SSE2-NEXT: [[TMP3:%.*]] = sub <8 x i16> [[SHIFT3]], [[B]] 130; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 2, i32 4, i32 6, i32 8, i32 poison, i32 poison, i32 poison, i32 poison> 131; SSE2-NEXT: [[TMP5:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 3, i32 5, i32 7, i32 9, i32 poison, i32 poison, i32 poison, i32 poison> 132; SSE2-NEXT: [[TMP6:%.*]] = sub <8 x i16> [[TMP4]], [[TMP5]] 133; SSE2-NEXT: [[HSUB41:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP6]], <8 x i32> <i32 0, i32 8, i32 9, i32 10, i32 11, i32 poison, i32 poison, i32 poison> 134; SSE2-NEXT: [[HSUB6:%.*]] = shufflevector <8 x i16> [[HSUB41]], <8 x i16> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 poison, i32 12, i32 poison> 135; SSE2-NEXT: [[RESULT:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[HSUB6]], <8 x i32> <i32 7, i32 14, i32 poison, i32 12, i32 11, i32 10, i32 9, i32 8> 136; SSE2-NEXT: ret <8 x i16> [[RESULT]] 137; 138; SSE4-LABEL: @sub_v8i16_76u43210( 139; SSE4-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 140; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 141; SSE4-NEXT: [[HSUB22:%.*]] = sub <8 x i16> [[TMP1]], [[TMP2]] 142; SSE4-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B:%.*]], <8 x i32> <i32 6, i32 8, i32 poison, i32 12, i32 14, i32 poison, i32 poison, i32 poison> 143; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 7, i32 9, i32 poison, i32 13, i32 15, i32 poison, i32 poison, i32 poison> 144; SSE4-NEXT: [[TMP5:%.*]] = sub <8 x i16> [[TMP3]], [[TMP4]] 145; SSE4-NEXT: [[RESULT:%.*]] = shufflevector <8 x i16> [[TMP5]], <8 x i16> [[HSUB22]], <8 x i32> <i32 4, i32 3, i32 poison, i32 1, i32 0, i32 10, i32 9, i32 8> 146; SSE4-NEXT: ret <8 x i16> [[RESULT]] 147; 148; AVX-LABEL: @sub_v8i16_76u43210( 149; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 150; AVX-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 151; AVX-NEXT: [[HSUB22:%.*]] = sub <8 x i16> [[TMP1]], [[TMP2]] 152; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B:%.*]], <8 x i32> <i32 6, i32 8, i32 poison, i32 12, i32 14, i32 poison, i32 poison, i32 poison> 153; AVX-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 7, i32 9, i32 poison, i32 13, i32 15, i32 poison, i32 poison, i32 poison> 154; AVX-NEXT: [[TMP5:%.*]] = sub <8 x i16> [[TMP3]], [[TMP4]] 155; AVX-NEXT: [[RESULT:%.*]] = shufflevector <8 x i16> [[TMP5]], <8 x i16> [[HSUB22]], <8 x i32> <i32 4, i32 3, i32 poison, i32 1, i32 0, i32 10, i32 9, i32 8> 156; AVX-NEXT: ret <8 x i16> [[RESULT]] 157; 158 %a0 = extractelement <8 x i16> %a, i32 0 159 %a1 = extractelement <8 x i16> %a, i32 1 160 %a2 = extractelement <8 x i16> %a, i32 2 161 %a3 = extractelement <8 x i16> %a, i32 3 162 %a4 = extractelement <8 x i16> %a, i32 4 163 %a5 = extractelement <8 x i16> %a, i32 5 164 %a6 = extractelement <8 x i16> %a, i32 6 165 %a7 = extractelement <8 x i16> %a, i32 7 166 %a01 = sub i16 %a0, %a1 167 %a23 = sub i16 %a2, %a3 168 %a45 = sub i16 %a4, %a5 169 %a67 = sub i16 %a6, %a7 170 %b0 = extractelement <8 x i16> %b, i32 0 171 %b1 = extractelement <8 x i16> %b, i32 1 172 %b2 = extractelement <8 x i16> %b, i32 2 173 %b3 = extractelement <8 x i16> %b, i32 3 174 %b4 = extractelement <8 x i16> %b, i32 4 175 %b5 = extractelement <8 x i16> %b, i32 5 176 %b6 = extractelement <8 x i16> %b, i32 6 177 %b7 = extractelement <8 x i16> %b, i32 7 178 %b01 = sub i16 %b0, %b1 179 %b23 = sub i16 %b2, %b3 180 %b45 = sub i16 %b4, %b5 181 %b67 = sub i16 %b6, %b7 182 %hsub0 = insertelement <8 x i16> poison, i16 %a01, i32 0 183 %hsub1 = insertelement <8 x i16> %hsub0, i16 %a23, i32 1 184 %hsub2 = insertelement <8 x i16> %hsub1, i16 %a45, i32 2 185 %hsub3 = insertelement <8 x i16> %hsub2, i16 %a67, i32 3 186 %hsub4 = insertelement <8 x i16> %hsub3, i16 %b01, i32 4 187 %hsub5 = insertelement <8 x i16> %hsub4, i16 %b23, i32 5 188 %hsub6 = insertelement <8 x i16> %hsub5, i16 %b45, i32 6 189 %hsub7 = insertelement <8 x i16> %hsub6, i16 %b67, i32 7 190 %result = shufflevector <8 x i16> %hsub7, <8 x i16> %a, <8 x i32> <i32 7, i32 6, i32 poison, i32 4, i32 3, i32 2, i32 1, i32 0> 191 ret <8 x i16> %result 192} 193 194; 195; v16i16 196; 197 198define <16 x i16> @sub_v16i16_0123456789ABCDEF(<16 x i16> %a, <16 x i16> %b) { 199; CHECK-LABEL: @sub_v16i16_0123456789ABCDEF( 200; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30> 201; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31> 202; CHECK-NEXT: [[TMP3:%.*]] = sub <16 x i16> [[TMP1]], [[TMP2]] 203; CHECK-NEXT: ret <16 x i16> [[TMP3]] 204; 205 %a0 = extractelement <16 x i16> %a, i32 0 206 %a1 = extractelement <16 x i16> %a, i32 1 207 %a2 = extractelement <16 x i16> %a, i32 2 208 %a3 = extractelement <16 x i16> %a, i32 3 209 %a4 = extractelement <16 x i16> %a, i32 4 210 %a5 = extractelement <16 x i16> %a, i32 5 211 %a6 = extractelement <16 x i16> %a, i32 6 212 %a7 = extractelement <16 x i16> %a, i32 7 213 %a8 = extractelement <16 x i16> %a, i32 8 214 %a9 = extractelement <16 x i16> %a, i32 9 215 %aA = extractelement <16 x i16> %a, i32 10 216 %aB = extractelement <16 x i16> %a, i32 11 217 %aC = extractelement <16 x i16> %a, i32 12 218 %aD = extractelement <16 x i16> %a, i32 13 219 %aE = extractelement <16 x i16> %a, i32 14 220 %aF = extractelement <16 x i16> %a, i32 15 221 %a01 = sub i16 %a0, %a1 222 %a23 = sub i16 %a2, %a3 223 %a45 = sub i16 %a4, %a5 224 %a67 = sub i16 %a6, %a7 225 %a89 = sub i16 %a8, %a9 226 %aAB = sub i16 %aA, %aB 227 %aCD = sub i16 %aC, %aD 228 %aEF = sub i16 %aE, %aF 229 %b0 = extractelement <16 x i16> %b, i32 0 230 %b1 = extractelement <16 x i16> %b, i32 1 231 %b2 = extractelement <16 x i16> %b, i32 2 232 %b3 = extractelement <16 x i16> %b, i32 3 233 %b4 = extractelement <16 x i16> %b, i32 4 234 %b5 = extractelement <16 x i16> %b, i32 5 235 %b6 = extractelement <16 x i16> %b, i32 6 236 %b7 = extractelement <16 x i16> %b, i32 7 237 %b8 = extractelement <16 x i16> %b, i32 8 238 %b9 = extractelement <16 x i16> %b, i32 9 239 %bA = extractelement <16 x i16> %b, i32 10 240 %bB = extractelement <16 x i16> %b, i32 11 241 %bC = extractelement <16 x i16> %b, i32 12 242 %bD = extractelement <16 x i16> %b, i32 13 243 %bE = extractelement <16 x i16> %b, i32 14 244 %bF = extractelement <16 x i16> %b, i32 15 245 %b01 = sub i16 %b0, %b1 246 %b23 = sub i16 %b2, %b3 247 %b45 = sub i16 %b4, %b5 248 %b67 = sub i16 %b6, %b7 249 %b89 = sub i16 %b8, %b9 250 %bAB = sub i16 %bA, %bB 251 %bCD = sub i16 %bC, %bD 252 %bEF = sub i16 %bE, %bF 253 %hsub0 = insertelement <16 x i16> poison, i16 %a01, i32 0 254 %hsub1 = insertelement <16 x i16> %hsub0, i16 %a23, i32 1 255 %hsub2 = insertelement <16 x i16> %hsub1, i16 %a45, i32 2 256 %hsub3 = insertelement <16 x i16> %hsub2, i16 %a67, i32 3 257 %hsub4 = insertelement <16 x i16> %hsub3, i16 %b01, i32 4 258 %hsub5 = insertelement <16 x i16> %hsub4, i16 %b23, i32 5 259 %hsub6 = insertelement <16 x i16> %hsub5, i16 %b45, i32 6 260 %hsub7 = insertelement <16 x i16> %hsub6, i16 %b67, i32 7 261 %hsub8 = insertelement <16 x i16> %hsub7, i16 %a89, i32 8 262 %hsub9 = insertelement <16 x i16> %hsub8, i16 %aAB, i32 9 263 %hsubA = insertelement <16 x i16> %hsub9, i16 %aCD, i32 10 264 %hsubB = insertelement <16 x i16> %hsubA, i16 %aEF, i32 11 265 %hsubC = insertelement <16 x i16> %hsubB, i16 %b89, i32 12 266 %hsubD = insertelement <16 x i16> %hsubC, i16 %bAB, i32 13 267 %hsubE = insertelement <16 x i16> %hsubD, i16 %bCD, i32 14 268 %hsubF = insertelement <16 x i16> %hsubE, i16 %bEF, i32 15 269 %result = shufflevector <16 x i16> %hsubF, <16 x i16> %a, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 270 ret <16 x i16> %result 271} 272 273define <16 x i16> @sub_v16i16_0123u56789uBCDEF(<16 x i16> %a, <16 x i16> %b) { 274; SSE2-LABEL: @sub_v16i16_0123u56789uBCDEF( 275; SSE2-NEXT: [[A8:%.*]] = extractelement <16 x i16> [[A:%.*]], i64 8 276; SSE2-NEXT: [[A9:%.*]] = extractelement <16 x i16> [[A]], i64 9 277; SSE2-NEXT: [[A89:%.*]] = sub i16 [[A8]], [[A9]] 278; SSE2-NEXT: [[BC:%.*]] = extractelement <16 x i16> [[B:%.*]], i64 12 279; SSE2-NEXT: [[BD:%.*]] = extractelement <16 x i16> [[B]], i64 13 280; SSE2-NEXT: [[BE:%.*]] = extractelement <16 x i16> [[B]], i64 14 281; SSE2-NEXT: [[BF:%.*]] = extractelement <16 x i16> [[B]], i64 15 282; SSE2-NEXT: [[BCD:%.*]] = sub i16 [[BC]], [[BD]] 283; SSE2-NEXT: [[BEF:%.*]] = sub i16 [[BE]], [[BF]] 284; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 285; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 286; SSE2-NEXT: [[TMP3:%.*]] = sub <16 x i16> [[TMP1]], [[TMP2]] 287; SSE2-NEXT: [[HSUB8:%.*]] = insertelement <16 x i16> [[TMP3]], i16 [[A89]], i64 8 288; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 10, i32 poison, i32 14, i32 24, i32 26, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 289; SSE2-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 11, i32 poison, i32 15, i32 25, i32 27, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 290; SSE2-NEXT: [[TMP6:%.*]] = sub <16 x i16> [[TMP4]], [[TMP5]] 291; SSE2-NEXT: [[HSUBD1:%.*]] = shufflevector <16 x i16> [[HSUB8]], <16 x i16> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 5, i32 6, i32 7, i32 8, i32 16, i32 poison, i32 18, i32 19, i32 20, i32 poison, i32 poison> 292; SSE2-NEXT: [[HSUBE:%.*]] = insertelement <16 x i16> [[HSUBD1]], i16 [[BCD]], i64 14 293; SSE2-NEXT: [[HSUBF:%.*]] = insertelement <16 x i16> [[HSUBE]], i16 [[BEF]], i64 15 294; SSE2-NEXT: [[RESULT:%.*]] = shufflevector <16 x i16> [[HSUBF]], <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 5, i32 6, i32 7, i32 8, i32 9, i32 poison, i32 11, i32 12, i32 13, i32 14, i32 15> 295; SSE2-NEXT: ret <16 x i16> [[RESULT]] 296; 297; SSE4-LABEL: @sub_v16i16_0123u56789uBCDEF( 298; SSE4-NEXT: [[A8:%.*]] = extractelement <16 x i16> [[A:%.*]], i64 8 299; SSE4-NEXT: [[A9:%.*]] = extractelement <16 x i16> [[A]], i64 9 300; SSE4-NEXT: [[A89:%.*]] = sub i16 [[A8]], [[A9]] 301; SSE4-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 302; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 303; SSE4-NEXT: [[TMP3:%.*]] = sub <16 x i16> [[TMP1]], [[TMP2]] 304; SSE4-NEXT: [[HSUB8:%.*]] = insertelement <16 x i16> [[TMP3]], i16 [[A89]], i64 8 305; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <16 x i32> <i32 10, i32 poison, i32 14, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 306; SSE4-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <16 x i32> <i32 11, i32 poison, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 307; SSE4-NEXT: [[TMP6:%.*]] = sub <16 x i16> [[TMP4]], [[TMP5]] 308; SSE4-NEXT: [[HSUBB2:%.*]] = shufflevector <16 x i16> [[HSUB8]], <16 x i16> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 5, i32 6, i32 7, i32 8, i32 16, i32 poison, i32 18, i32 poison, i32 poison, i32 poison, i32 poison> 309; SSE4-NEXT: [[TMP7:%.*]] = shufflevector <16 x i16> [[B]], <16 x i16> poison, <16 x i32> <i32 8, i32 10, i32 12, i32 14, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 310; SSE4-NEXT: [[TMP8:%.*]] = shufflevector <16 x i16> [[B]], <16 x i16> poison, <16 x i32> <i32 9, i32 11, i32 13, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 311; SSE4-NEXT: [[TMP9:%.*]] = sub <16 x i16> [[TMP7]], [[TMP8]] 312; SSE4-NEXT: [[RESULT:%.*]] = shufflevector <16 x i16> [[HSUBB2]], <16 x i16> [[TMP9]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 5, i32 6, i32 7, i32 8, i32 9, i32 poison, i32 11, i32 16, i32 17, i32 18, i32 19> 313; SSE4-NEXT: ret <16 x i16> [[RESULT]] 314; 315; AVX-LABEL: @sub_v16i16_0123u56789uBCDEF( 316; AVX-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 18, i32 20, i32 22, i32 8, i32 10, i32 poison, i32 14, i32 24, i32 26, i32 28, i32 30> 317; AVX-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 19, i32 21, i32 23, i32 9, i32 11, i32 poison, i32 15, i32 25, i32 27, i32 29, i32 31> 318; AVX-NEXT: [[RESULT:%.*]] = sub <16 x i16> [[TMP1]], [[TMP2]] 319; AVX-NEXT: ret <16 x i16> [[RESULT]] 320; 321 %a0 = extractelement <16 x i16> %a, i32 0 322 %a1 = extractelement <16 x i16> %a, i32 1 323 %a2 = extractelement <16 x i16> %a, i32 2 324 %a3 = extractelement <16 x i16> %a, i32 3 325 %a4 = extractelement <16 x i16> %a, i32 4 326 %a5 = extractelement <16 x i16> %a, i32 5 327 %a6 = extractelement <16 x i16> %a, i32 6 328 %a7 = extractelement <16 x i16> %a, i32 7 329 %a8 = extractelement <16 x i16> %a, i32 8 330 %a9 = extractelement <16 x i16> %a, i32 9 331 %aA = extractelement <16 x i16> %a, i32 10 332 %aB = extractelement <16 x i16> %a, i32 11 333 %aC = extractelement <16 x i16> %a, i32 12 334 %aD = extractelement <16 x i16> %a, i32 13 335 %aE = extractelement <16 x i16> %a, i32 14 336 %aF = extractelement <16 x i16> %a, i32 15 337 %a01 = sub i16 %a0, %a1 338 %a23 = sub i16 %a2, %a3 339 %a45 = sub i16 %a4, %a5 340 %a67 = sub i16 %a6, %a7 341 %a89 = sub i16 %a8, %a9 342 %aAB = sub i16 %aA, %aB 343 %aCD = sub i16 %aC, %aD 344 %aEF = sub i16 %aE, %aF 345 %b0 = extractelement <16 x i16> %b, i32 0 346 %b1 = extractelement <16 x i16> %b, i32 1 347 %b2 = extractelement <16 x i16> %b, i32 2 348 %b3 = extractelement <16 x i16> %b, i32 3 349 %b4 = extractelement <16 x i16> %b, i32 4 350 %b5 = extractelement <16 x i16> %b, i32 5 351 %b6 = extractelement <16 x i16> %b, i32 6 352 %b7 = extractelement <16 x i16> %b, i32 7 353 %b8 = extractelement <16 x i16> %b, i32 8 354 %b9 = extractelement <16 x i16> %b, i32 9 355 %bA = extractelement <16 x i16> %b, i32 10 356 %bB = extractelement <16 x i16> %b, i32 11 357 %bC = extractelement <16 x i16> %b, i32 12 358 %bD = extractelement <16 x i16> %b, i32 13 359 %bE = extractelement <16 x i16> %b, i32 14 360 %bF = extractelement <16 x i16> %b, i32 15 361 %b01 = sub i16 %b0, %b1 362 %b23 = sub i16 %b2, %b3 363 %b45 = sub i16 %b4, %b5 364 %b67 = sub i16 %b6, %b7 365 %b89 = sub i16 %b8, %b9 366 %bAB = sub i16 %bA, %bB 367 %bCD = sub i16 %bC, %bD 368 %bEF = sub i16 %bE, %bF 369 %hsub0 = insertelement <16 x i16> poison, i16 %a01, i32 0 370 %hsub1 = insertelement <16 x i16> %hsub0, i16 %a23, i32 1 371 %hsub2 = insertelement <16 x i16> %hsub1, i16 %a45, i32 2 372 %hsub3 = insertelement <16 x i16> %hsub2, i16 %a67, i32 3 373 %hsub4 = insertelement <16 x i16> %hsub3, i16 %b01, i32 4 374 %hsub5 = insertelement <16 x i16> %hsub4, i16 %b23, i32 5 375 %hsub6 = insertelement <16 x i16> %hsub5, i16 %b45, i32 6 376 %hsub7 = insertelement <16 x i16> %hsub6, i16 %b67, i32 7 377 %hsub8 = insertelement <16 x i16> %hsub7, i16 %a89, i32 8 378 %hsub9 = insertelement <16 x i16> %hsub8, i16 %aAB, i32 9 379 %hsubA = insertelement <16 x i16> %hsub9, i16 %aCD, i32 10 380 %hsubB = insertelement <16 x i16> %hsubA, i16 %aEF, i32 11 381 %hsubC = insertelement <16 x i16> %hsubB, i16 %b89, i32 12 382 %hsubD = insertelement <16 x i16> %hsubC, i16 %bAB, i32 13 383 %hsubE = insertelement <16 x i16> %hsubD, i16 %bCD, i32 14 384 %hsubF = insertelement <16 x i16> %hsubE, i16 %bEF, i32 15 385 %result = shufflevector <16 x i16> %hsubF, <16 x i16> %a, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 5, i32 6, i32 7, i32 8, i32 9, i32 poison, i32 11, i32 12, i32 13, i32 14, i32 15> 386 ret <16 x i16> %result 387} 388 389define <16 x i16> @sub_v16i16_FEuCBA98765432u0(<16 x i16> %a, <16 x i16> %b) { 390; SSE2-LABEL: @sub_v16i16_FEuCBA98765432u0( 391; SSE2-NEXT: [[A8:%.*]] = extractelement <16 x i16> [[A:%.*]], i64 8 392; SSE2-NEXT: [[A9:%.*]] = extractelement <16 x i16> [[A]], i64 9 393; SSE2-NEXT: [[A89:%.*]] = sub i16 [[A8]], [[A9]] 394; SSE2-NEXT: [[BC:%.*]] = extractelement <16 x i16> [[B:%.*]], i64 12 395; SSE2-NEXT: [[BD:%.*]] = extractelement <16 x i16> [[B]], i64 13 396; SSE2-NEXT: [[BE:%.*]] = extractelement <16 x i16> [[B]], i64 14 397; SSE2-NEXT: [[BF:%.*]] = extractelement <16 x i16> [[B]], i64 15 398; SSE2-NEXT: [[BCD:%.*]] = sub i16 [[BC]], [[BD]] 399; SSE2-NEXT: [[BEF:%.*]] = sub i16 [[BE]], [[BF]] 400; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 0, i32 poison, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 401; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 1, i32 poison, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 402; SSE2-NEXT: [[TMP3:%.*]] = sub <16 x i16> [[TMP1]], [[TMP2]] 403; SSE2-NEXT: [[HSUB8:%.*]] = insertelement <16 x i16> [[TMP3]], i16 [[A89]], i64 8 404; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 10, i32 12, i32 14, i32 24, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 405; SSE2-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 11, i32 13, i32 15, i32 25, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 406; SSE2-NEXT: [[TMP6:%.*]] = sub <16 x i16> [[TMP4]], [[TMP5]] 407; SSE2-NEXT: [[HSUBC1:%.*]] = shufflevector <16 x i16> [[HSUB8]], <16 x i16> [[TMP6]], <16 x i32> <i32 0, i32 poison, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison> 408; SSE2-NEXT: [[HSUBE:%.*]] = insertelement <16 x i16> [[HSUBC1]], i16 [[BCD]], i64 14 409; SSE2-NEXT: [[HSUBF:%.*]] = insertelement <16 x i16> [[HSUBE]], i16 [[BEF]], i64 15 410; SSE2-NEXT: [[RESULT:%.*]] = shufflevector <16 x i16> [[HSUBF]], <16 x i16> poison, <16 x i32> <i32 15, i32 14, i32 poison, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 poison, i32 0> 411; SSE2-NEXT: ret <16 x i16> [[RESULT]] 412; 413; SSE4-LABEL: @sub_v16i16_FEuCBA98765432u0( 414; SSE4-NEXT: [[A8:%.*]] = extractelement <16 x i16> [[A:%.*]], i64 8 415; SSE4-NEXT: [[A9:%.*]] = extractelement <16 x i16> [[A]], i64 9 416; SSE4-NEXT: [[A89:%.*]] = sub i16 [[A8]], [[A9]] 417; SSE4-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 poison, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 418; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 1, i32 poison, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 419; SSE4-NEXT: [[TMP3:%.*]] = sub <16 x i16> [[TMP1]], [[TMP2]] 420; SSE4-NEXT: [[HSUB8:%.*]] = insertelement <16 x i16> [[TMP3]], i16 [[A89]], i64 8 421; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <16 x i32> <i32 10, i32 12, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 422; SSE4-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <16 x i32> <i32 11, i32 13, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 423; SSE4-NEXT: [[TMP6:%.*]] = sub <16 x i16> [[TMP4]], [[TMP5]] 424; SSE4-NEXT: [[HSUBA2:%.*]] = shufflevector <16 x i16> [[HSUB8]], <16 x i16> [[TMP6]], <16 x i32> <i32 0, i32 poison, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 16, i32 17, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 425; SSE4-NEXT: [[TMP7:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 14, i32 24, i32 poison, i32 28, i32 30, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 426; SSE4-NEXT: [[TMP8:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 15, i32 25, i32 poison, i32 29, i32 31, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 427; SSE4-NEXT: [[TMP9:%.*]] = sub <16 x i16> [[TMP7]], [[TMP8]] 428; SSE4-NEXT: [[RESULT:%.*]] = shufflevector <16 x i16> [[TMP9]], <16 x i16> [[HSUBA2]], <16 x i32> <i32 4, i32 3, i32 poison, i32 1, i32 0, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 poison, i32 16> 429; SSE4-NEXT: ret <16 x i16> [[RESULT]] 430; 431; AVX2-LABEL: @sub_v16i16_FEuCBA98765432u0( 432; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 poison, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 433; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 1, i32 poison, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 9, i32 11, i32 13, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 434; AVX2-NEXT: [[HSUBA:%.*]] = sub <16 x i16> [[TMP1]], [[TMP2]] 435; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 14, i32 24, i32 poison, i32 28, i32 30, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 436; AVX2-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 15, i32 25, i32 poison, i32 29, i32 31, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 437; AVX2-NEXT: [[TMP5:%.*]] = sub <16 x i16> [[TMP3]], [[TMP4]] 438; AVX2-NEXT: [[RESULT:%.*]] = shufflevector <16 x i16> [[TMP5]], <16 x i16> [[HSUBA]], <16 x i32> <i32 4, i32 3, i32 poison, i32 1, i32 0, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 poison, i32 16> 439; AVX2-NEXT: ret <16 x i16> [[RESULT]] 440; 441; AVX512-LABEL: @sub_v16i16_FEuCBA98765432u0( 442; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 poison, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 443; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 1, i32 poison, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 9, i32 11, i32 13, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 444; AVX512-NEXT: [[HSUBA2:%.*]] = sub <16 x i16> [[TMP1]], [[TMP2]] 445; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 14, i32 24, i32 poison, i32 28, i32 30, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 446; AVX512-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 15, i32 25, i32 poison, i32 29, i32 31, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 447; AVX512-NEXT: [[TMP5:%.*]] = sub <16 x i16> [[TMP3]], [[TMP4]] 448; AVX512-NEXT: [[RESULT:%.*]] = shufflevector <16 x i16> [[TMP5]], <16 x i16> [[HSUBA2]], <16 x i32> <i32 4, i32 3, i32 poison, i32 1, i32 0, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 poison, i32 16> 449; AVX512-NEXT: ret <16 x i16> [[RESULT]] 450; 451 %a0 = extractelement <16 x i16> %a, i32 0 452 %a1 = extractelement <16 x i16> %a, i32 1 453 %a2 = extractelement <16 x i16> %a, i32 2 454 %a3 = extractelement <16 x i16> %a, i32 3 455 %a4 = extractelement <16 x i16> %a, i32 4 456 %a5 = extractelement <16 x i16> %a, i32 5 457 %a6 = extractelement <16 x i16> %a, i32 6 458 %a7 = extractelement <16 x i16> %a, i32 7 459 %a8 = extractelement <16 x i16> %a, i32 8 460 %a9 = extractelement <16 x i16> %a, i32 9 461 %aA = extractelement <16 x i16> %a, i32 10 462 %aB = extractelement <16 x i16> %a, i32 11 463 %aC = extractelement <16 x i16> %a, i32 12 464 %aD = extractelement <16 x i16> %a, i32 13 465 %aE = extractelement <16 x i16> %a, i32 14 466 %aF = extractelement <16 x i16> %a, i32 15 467 %a01 = sub i16 %a0, %a1 468 %a23 = sub i16 %a2, %a3 469 %a45 = sub i16 %a4, %a5 470 %a67 = sub i16 %a6, %a7 471 %a89 = sub i16 %a8, %a9 472 %aAB = sub i16 %aA, %aB 473 %aCD = sub i16 %aC, %aD 474 %aEF = sub i16 %aE, %aF 475 %b0 = extractelement <16 x i16> %b, i32 0 476 %b1 = extractelement <16 x i16> %b, i32 1 477 %b2 = extractelement <16 x i16> %b, i32 2 478 %b3 = extractelement <16 x i16> %b, i32 3 479 %b4 = extractelement <16 x i16> %b, i32 4 480 %b5 = extractelement <16 x i16> %b, i32 5 481 %b6 = extractelement <16 x i16> %b, i32 6 482 %b7 = extractelement <16 x i16> %b, i32 7 483 %b8 = extractelement <16 x i16> %b, i32 8 484 %b9 = extractelement <16 x i16> %b, i32 9 485 %bA = extractelement <16 x i16> %b, i32 10 486 %bB = extractelement <16 x i16> %b, i32 11 487 %bC = extractelement <16 x i16> %b, i32 12 488 %bD = extractelement <16 x i16> %b, i32 13 489 %bE = extractelement <16 x i16> %b, i32 14 490 %bF = extractelement <16 x i16> %b, i32 15 491 %b01 = sub i16 %b0, %b1 492 %b23 = sub i16 %b2, %b3 493 %b45 = sub i16 %b4, %b5 494 %b67 = sub i16 %b6, %b7 495 %b89 = sub i16 %b8, %b9 496 %bAB = sub i16 %bA, %bB 497 %bCD = sub i16 %bC, %bD 498 %bEF = sub i16 %bE, %bF 499 %hsub0 = insertelement <16 x i16> poison, i16 %a01, i32 0 500 %hsub1 = insertelement <16 x i16> %hsub0, i16 %a23, i32 1 501 %hsub2 = insertelement <16 x i16> %hsub1, i16 %a45, i32 2 502 %hsub3 = insertelement <16 x i16> %hsub2, i16 %a67, i32 3 503 %hsub4 = insertelement <16 x i16> %hsub3, i16 %b01, i32 4 504 %hsub5 = insertelement <16 x i16> %hsub4, i16 %b23, i32 5 505 %hsub6 = insertelement <16 x i16> %hsub5, i16 %b45, i32 6 506 %hsub7 = insertelement <16 x i16> %hsub6, i16 %b67, i32 7 507 %hsub8 = insertelement <16 x i16> %hsub7, i16 %a89, i32 8 508 %hsub9 = insertelement <16 x i16> %hsub8, i16 %aAB, i32 9 509 %hsubA = insertelement <16 x i16> %hsub9, i16 %aCD, i32 10 510 %hsubB = insertelement <16 x i16> %hsubA, i16 %aEF, i32 11 511 %hsubC = insertelement <16 x i16> %hsubB, i16 %b89, i32 12 512 %hsubD = insertelement <16 x i16> %hsubC, i16 %bAB, i32 13 513 %hsubE = insertelement <16 x i16> %hsubD, i16 %bCD, i32 14 514 %hsubF = insertelement <16 x i16> %hsubE, i16 %bEF, i32 15 515 %result = shufflevector <16 x i16> %hsubF, <16 x i16> %a, <16 x i32> <i32 15, i32 14, i32 poison, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 poison, i32 0> 516 ret <16 x i16> %result 517} 518 519; 520; v4i32 521; 522 523define <4 x i32> @sub_v4i32_0123(<4 x i32> %a, <4 x i32> %b) { 524; CHECK-LABEL: @sub_v4i32_0123( 525; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> 526; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> 527; CHECK-NEXT: [[TMP3:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]] 528; CHECK-NEXT: ret <4 x i32> [[TMP3]] 529; 530 %a0 = extractelement <4 x i32> %a, i32 0 531 %a1 = extractelement <4 x i32> %a, i32 1 532 %a2 = extractelement <4 x i32> %a, i32 2 533 %a3 = extractelement <4 x i32> %a, i32 3 534 %a01 = sub i32 %a0, %a1 535 %a23 = sub i32 %a2, %a3 536 %b0 = extractelement <4 x i32> %b, i32 0 537 %b1 = extractelement <4 x i32> %b, i32 1 538 %b2 = extractelement <4 x i32> %b, i32 2 539 %b3 = extractelement <4 x i32> %b, i32 3 540 %b01 = sub i32 %b0, %b1 541 %b23 = sub i32 %b2, %b3 542 %hsub0 = insertelement <4 x i32> poison, i32 %a01, i32 0 543 %hsub1 = insertelement <4 x i32> %hsub0, i32 %a23, i32 1 544 %hsub2 = insertelement <4 x i32> %hsub1, i32 %b01, i32 2 545 %hsub3 = insertelement <4 x i32> %hsub2, i32 %b23, i32 3 546 %result = shufflevector <4 x i32> %hsub3, <4 x i32> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 547 ret <4 x i32> %result 548} 549 550define <4 x i32> @sub_v4i32_u123(<4 x i32> %a, <4 x i32> %b) { 551; CHECK-LABEL: @sub_v4i32_u123( 552; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> <i32 poison, i32 2, i32 4, i32 6> 553; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 poison, i32 3, i32 5, i32 7> 554; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]] 555; CHECK-NEXT: ret <4 x i32> [[TMP4]] 556; 557 %a0 = extractelement <4 x i32> %a, i32 0 558 %a1 = extractelement <4 x i32> %a, i32 1 559 %a2 = extractelement <4 x i32> %a, i32 2 560 %a3 = extractelement <4 x i32> %a, i32 3 561 %a01 = sub i32 %a0, %a1 562 %a23 = sub i32 %a2, %a3 563 %b0 = extractelement <4 x i32> %b, i32 0 564 %b1 = extractelement <4 x i32> %b, i32 1 565 %b2 = extractelement <4 x i32> %b, i32 2 566 %b3 = extractelement <4 x i32> %b, i32 3 567 %b01 = sub i32 %b0, %b1 568 %b23 = sub i32 %b2, %b3 569 %hsub0 = insertelement <4 x i32> poison, i32 %a01, i32 0 570 %hsub1 = insertelement <4 x i32> %hsub0, i32 %a23, i32 1 571 %hsub2 = insertelement <4 x i32> %hsub1, i32 %b01, i32 2 572 %hsub3 = insertelement <4 x i32> %hsub2, i32 %b23, i32 3 573 %result = shufflevector <4 x i32> %hsub3, <4 x i32> %a, <4 x i32> <i32 poison, i32 1, i32 2, i32 3> 574 ret <4 x i32> %result 575} 576 577define <4 x i32> @sub_v4i32_0u23(<4 x i32> %a, <4 x i32> %b) { 578; CHECK-LABEL: @sub_v4i32_0u23( 579; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> <i32 0, i32 poison, i32 4, i32 6> 580; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 poison, i32 5, i32 7> 581; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]] 582; CHECK-NEXT: ret <4 x i32> [[TMP4]] 583; 584 %a0 = extractelement <4 x i32> %a, i32 0 585 %a1 = extractelement <4 x i32> %a, i32 1 586 %a2 = extractelement <4 x i32> %a, i32 2 587 %a3 = extractelement <4 x i32> %a, i32 3 588 %a01 = sub i32 %a0, %a1 589 %a23 = sub i32 %a2, %a3 590 %b0 = extractelement <4 x i32> %b, i32 0 591 %b1 = extractelement <4 x i32> %b, i32 1 592 %b2 = extractelement <4 x i32> %b, i32 2 593 %b3 = extractelement <4 x i32> %b, i32 3 594 %b01 = sub i32 %b0, %b1 595 %b23 = sub i32 %b2, %b3 596 %hsub0 = insertelement <4 x i32> poison, i32 %a01, i32 0 597 %hsub1 = insertelement <4 x i32> %hsub0, i32 %a23, i32 1 598 %hsub2 = insertelement <4 x i32> %hsub1, i32 %b01, i32 2 599 %hsub3 = insertelement <4 x i32> %hsub2, i32 %b23, i32 3 600 %result = shufflevector <4 x i32> %hsub3, <4 x i32> %a, <4 x i32> <i32 0, i32 poison, i32 2, i32 3> 601 ret <4 x i32> %result 602} 603 604define <4 x i32> @sub_v4i32_01u3(<4 x i32> %a, <4 x i32> %b) { 605; SSE2-LABEL: @sub_v4i32_01u3( 606; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 poison, i32 6> 607; SSE2-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 poison, i32 7> 608; SSE2-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]] 609; SSE2-NEXT: ret <4 x i32> [[TMP4]] 610; 611; SSE4-LABEL: @sub_v4i32_01u3( 612; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 poison, i32 6> 613; SSE4-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 poison, i32 7> 614; SSE4-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]] 615; SSE4-NEXT: ret <4 x i32> [[TMP4]] 616; 617; AVX2-LABEL: @sub_v4i32_01u3( 618; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 poison, i32 6> 619; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 poison, i32 7> 620; AVX2-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]] 621; AVX2-NEXT: ret <4 x i32> [[TMP4]] 622; 623; AVX512-LABEL: @sub_v4i32_01u3( 624; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 poison, i32 6> 625; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 poison, i32 7> 626; AVX512-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]] 627; AVX512-NEXT: ret <4 x i32> [[TMP4]] 628; 629 %a0 = extractelement <4 x i32> %a, i32 0 630 %a1 = extractelement <4 x i32> %a, i32 1 631 %a2 = extractelement <4 x i32> %a, i32 2 632 %a3 = extractelement <4 x i32> %a, i32 3 633 %a01 = sub i32 %a0, %a1 634 %a23 = sub i32 %a2, %a3 635 %b0 = extractelement <4 x i32> %b, i32 0 636 %b1 = extractelement <4 x i32> %b, i32 1 637 %b2 = extractelement <4 x i32> %b, i32 2 638 %b3 = extractelement <4 x i32> %b, i32 3 639 %b01 = sub i32 %b0, %b1 640 %b23 = sub i32 %b2, %b3 641 %hsub0 = insertelement <4 x i32> poison, i32 %a01, i32 0 642 %hsub1 = insertelement <4 x i32> %hsub0, i32 %a23, i32 1 643 %hsub2 = insertelement <4 x i32> %hsub1, i32 %b01, i32 2 644 %hsub3 = insertelement <4 x i32> %hsub2, i32 %b23, i32 3 645 %result = shufflevector <4 x i32> %hsub3, <4 x i32> %a, <4 x i32> <i32 0, i32 1, i32 poison, i32 3> 646 ret <4 x i32> %result 647} 648 649define <4 x i32> @sub_v4i32_012u(<4 x i32> %a, <4 x i32> %b) { 650; CHECK-LABEL: @sub_v4i32_012u( 651; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 4, i32 poison> 652; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 poison> 653; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]] 654; CHECK-NEXT: ret <4 x i32> [[TMP4]] 655; 656 %a0 = extractelement <4 x i32> %a, i32 0 657 %a1 = extractelement <4 x i32> %a, i32 1 658 %a2 = extractelement <4 x i32> %a, i32 2 659 %a3 = extractelement <4 x i32> %a, i32 3 660 %a01 = sub i32 %a0, %a1 661 %a23 = sub i32 %a2, %a3 662 %b0 = extractelement <4 x i32> %b, i32 0 663 %b1 = extractelement <4 x i32> %b, i32 1 664 %b2 = extractelement <4 x i32> %b, i32 2 665 %b3 = extractelement <4 x i32> %b, i32 3 666 %b01 = sub i32 %b0, %b1 667 %b23 = sub i32 %b2, %b3 668 %hsub0 = insertelement <4 x i32> poison, i32 %a01, i32 0 669 %hsub1 = insertelement <4 x i32> %hsub0, i32 %a23, i32 1 670 %hsub2 = insertelement <4 x i32> %hsub1, i32 %b01, i32 2 671 %hsub3 = insertelement <4 x i32> %hsub2, i32 %b23, i32 3 672 %result = shufflevector <4 x i32> %hsub3, <4 x i32> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 673 ret <4 x i32> %result 674} 675 676define <4 x i32> @sub_v4i32_uu23(<4 x i32> %a, <4 x i32> %b) { 677; CHECK-LABEL: @sub_v4i32_uu23( 678; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 2> 679; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <4 x i32> <i32 poison, i32 poison, i32 1, i32 3> 680; CHECK-NEXT: [[RESULT1:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]] 681; CHECK-NEXT: ret <4 x i32> [[RESULT1]] 682; 683 %a0 = extractelement <4 x i32> %a, i32 0 684 %a1 = extractelement <4 x i32> %a, i32 1 685 %a2 = extractelement <4 x i32> %a, i32 2 686 %a3 = extractelement <4 x i32> %a, i32 3 687 %a01 = sub i32 %a0, %a1 688 %a23 = sub i32 %a2, %a3 689 %b0 = extractelement <4 x i32> %b, i32 0 690 %b1 = extractelement <4 x i32> %b, i32 1 691 %b2 = extractelement <4 x i32> %b, i32 2 692 %b3 = extractelement <4 x i32> %b, i32 3 693 %b01 = sub i32 %b0, %b1 694 %b23 = sub i32 %b2, %b3 695 %hsub0 = insertelement <4 x i32> poison, i32 %a01, i32 0 696 %hsub1 = insertelement <4 x i32> %hsub0, i32 %a23, i32 1 697 %hsub2 = insertelement <4 x i32> %hsub1, i32 %b01, i32 2 698 %hsub3 = insertelement <4 x i32> %hsub2, i32 %b23, i32 3 699 %result = shufflevector <4 x i32> %hsub3, <4 x i32> %a, <4 x i32> <i32 poison, i32 poison, i32 2, i32 3> 700 ret <4 x i32> %result 701} 702 703define <4 x i32> @sub_v4i32_01uu(<4 x i32> %a, <4 x i32> %b) { 704; CHECK-LABEL: @sub_v4i32_01uu( 705; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> poison, <4 x i32> <i32 0, i32 2, i32 poison, i32 poison> 706; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <4 x i32> <i32 1, i32 3, i32 poison, i32 poison> 707; CHECK-NEXT: [[TMP3:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]] 708; CHECK-NEXT: ret <4 x i32> [[TMP3]] 709; 710 %a0 = extractelement <4 x i32> %a, i32 0 711 %a1 = extractelement <4 x i32> %a, i32 1 712 %a2 = extractelement <4 x i32> %a, i32 2 713 %a3 = extractelement <4 x i32> %a, i32 3 714 %a01 = sub i32 %a0, %a1 715 %a23 = sub i32 %a2, %a3 716 %b0 = extractelement <4 x i32> %b, i32 0 717 %b1 = extractelement <4 x i32> %b, i32 1 718 %b2 = extractelement <4 x i32> %b, i32 2 719 %b3 = extractelement <4 x i32> %b, i32 3 720 %b01 = sub i32 %b0, %b1 721 %b23 = sub i32 %b2, %b3 722 %hsub0 = insertelement <4 x i32> poison, i32 %a01, i32 0 723 %hsub1 = insertelement <4 x i32> %hsub0, i32 %a23, i32 1 724 %hsub2 = insertelement <4 x i32> %hsub1, i32 %b01, i32 2 725 %hsub3 = insertelement <4 x i32> %hsub2, i32 %b23, i32 3 726 %result = shufflevector <4 x i32> %hsub3, <4 x i32> %a, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 727 ret <4 x i32> %result 728} 729 730define <4 x i32> @sub_v4i32_32u0(<4 x i32> %a, <4 x i32> %b) { 731; SSE2-LABEL: @sub_v4i32_32u0( 732; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[A:%.*]], <4 x i32> <i32 2, i32 0, i32 poison, i32 4> 733; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[A]], <4 x i32> <i32 3, i32 1, i32 poison, i32 5> 734; SSE2-NEXT: [[RESULT1:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]] 735; SSE2-NEXT: ret <4 x i32> [[RESULT1]] 736; 737; SSE4-LABEL: @sub_v4i32_32u0( 738; SSE4-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[A:%.*]], <4 x i32> <i32 2, i32 0, i32 poison, i32 4> 739; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[A]], <4 x i32> <i32 3, i32 1, i32 poison, i32 5> 740; SSE4-NEXT: [[RESULT:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]] 741; SSE4-NEXT: ret <4 x i32> [[RESULT]] 742; 743; AVX2-LABEL: @sub_v4i32_32u0( 744; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[A:%.*]], <4 x i32> <i32 2, i32 0, i32 poison, i32 4> 745; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[A]], <4 x i32> <i32 3, i32 1, i32 poison, i32 5> 746; AVX2-NEXT: [[RESULT:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]] 747; AVX2-NEXT: ret <4 x i32> [[RESULT]] 748; 749; AVX512-LABEL: @sub_v4i32_32u0( 750; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[A:%.*]], <4 x i32> <i32 2, i32 0, i32 poison, i32 4> 751; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[A]], <4 x i32> <i32 3, i32 1, i32 poison, i32 5> 752; AVX512-NEXT: [[RESULT1:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]] 753; AVX512-NEXT: ret <4 x i32> [[RESULT1]] 754; 755 %a0 = extractelement <4 x i32> %a, i32 0 756 %a1 = extractelement <4 x i32> %a, i32 1 757 %a2 = extractelement <4 x i32> %a, i32 2 758 %a3 = extractelement <4 x i32> %a, i32 3 759 %a01 = sub i32 %a0, %a1 760 %a23 = sub i32 %a2, %a3 761 %b0 = extractelement <4 x i32> %b, i32 0 762 %b1 = extractelement <4 x i32> %b, i32 1 763 %b2 = extractelement <4 x i32> %b, i32 2 764 %b3 = extractelement <4 x i32> %b, i32 3 765 %b01 = sub i32 %b0, %b1 766 %b23 = sub i32 %b2, %b3 767 %hsub0 = insertelement <4 x i32> poison, i32 %a01, i32 0 768 %hsub1 = insertelement <4 x i32> %hsub0, i32 %a23, i32 1 769 %hsub2 = insertelement <4 x i32> %hsub1, i32 %b01, i32 2 770 %hsub3 = insertelement <4 x i32> %hsub2, i32 %b23, i32 3 771 %result = shufflevector <4 x i32> %hsub3, <4 x i32> %a, <4 x i32> <i32 3, i32 2, i32 poison, i32 0> 772 ret <4 x i32> %result 773} 774 775; 776; v8i32 777; 778 779define <8 x i32> @sub_v8i32_01234567(<8 x i32> %a, <8 x i32> %b) { 780; CHECK-LABEL: @sub_v8i32_01234567( 781; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14> 782; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15> 783; CHECK-NEXT: [[TMP3:%.*]] = sub <8 x i32> [[TMP1]], [[TMP2]] 784; CHECK-NEXT: ret <8 x i32> [[TMP3]] 785; 786 %a0 = extractelement <8 x i32> %a, i32 0 787 %a1 = extractelement <8 x i32> %a, i32 1 788 %a2 = extractelement <8 x i32> %a, i32 2 789 %a3 = extractelement <8 x i32> %a, i32 3 790 %a4 = extractelement <8 x i32> %a, i32 4 791 %a5 = extractelement <8 x i32> %a, i32 5 792 %a6 = extractelement <8 x i32> %a, i32 6 793 %a7 = extractelement <8 x i32> %a, i32 7 794 %a01 = sub i32 %a0, %a1 795 %a23 = sub i32 %a2, %a3 796 %a45 = sub i32 %a4, %a5 797 %a67 = sub i32 %a6, %a7 798 %b0 = extractelement <8 x i32> %b, i32 0 799 %b1 = extractelement <8 x i32> %b, i32 1 800 %b2 = extractelement <8 x i32> %b, i32 2 801 %b3 = extractelement <8 x i32> %b, i32 3 802 %b4 = extractelement <8 x i32> %b, i32 4 803 %b5 = extractelement <8 x i32> %b, i32 5 804 %b6 = extractelement <8 x i32> %b, i32 6 805 %b7 = extractelement <8 x i32> %b, i32 7 806 %b01 = sub i32 %b0, %b1 807 %b23 = sub i32 %b2, %b3 808 %b45 = sub i32 %b4, %b5 809 %b67 = sub i32 %b6, %b7 810 %hsub0 = insertelement <8 x i32> poison, i32 %a01, i32 0 811 %hsub1 = insertelement <8 x i32> %hsub0, i32 %a23, i32 1 812 %hsub2 = insertelement <8 x i32> %hsub1, i32 %b01, i32 2 813 %hsub3 = insertelement <8 x i32> %hsub2, i32 %b23, i32 3 814 %hsub4 = insertelement <8 x i32> %hsub3, i32 %a45, i32 4 815 %hsub5 = insertelement <8 x i32> %hsub4, i32 %a67, i32 5 816 %hsub6 = insertelement <8 x i32> %hsub5, i32 %b45, i32 6 817 %hsub7 = insertelement <8 x i32> %hsub6, i32 %b67, i32 7 818 %result = shufflevector <8 x i32> %hsub7, <8 x i32> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 819 ret <8 x i32> %result 820} 821 822define <8 x i32> @sub_v8i32_01234u67(<8 x i32> %a, <8 x i32> %b) { 823; SSE2-LABEL: @sub_v8i32_01234u67( 824; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 poison, i32 12, i32 14> 825; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 poison, i32 13, i32 15> 826; SSE2-NEXT: [[RESULT:%.*]] = sub <8 x i32> [[TMP1]], [[TMP2]] 827; SSE2-NEXT: ret <8 x i32> [[RESULT]] 828; 829; SSE4-LABEL: @sub_v8i32_01234u67( 830; SSE4-NEXT: [[A4:%.*]] = extractelement <8 x i32> [[A:%.*]], i64 4 831; SSE4-NEXT: [[A5:%.*]] = extractelement <8 x i32> [[A]], i64 5 832; SSE4-NEXT: [[A45:%.*]] = sub i32 [[A4]], [[A5]] 833; SSE4-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 poison, i32 poison, i32 poison, i32 poison> 834; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 poison, i32 poison, i32 poison, i32 poison> 835; SSE4-NEXT: [[TMP3:%.*]] = sub <8 x i32> [[TMP1]], [[TMP2]] 836; SSE4-NEXT: [[HSUB4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[A45]], i64 4 837; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <8 x i32> <i32 4, i32 6, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 838; SSE4-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <8 x i32> <i32 5, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 839; SSE4-NEXT: [[TMP6:%.*]] = sub <8 x i32> [[TMP4]], [[TMP5]] 840; SSE4-NEXT: [[RESULT:%.*]] = shufflevector <8 x i32> [[HSUB4]], <8 x i32> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 poison, i32 8, i32 9> 841; SSE4-NEXT: ret <8 x i32> [[RESULT]] 842; 843; AVX-LABEL: @sub_v8i32_01234u67( 844; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 poison, i32 12, i32 14> 845; AVX-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 poison, i32 13, i32 15> 846; AVX-NEXT: [[RESULT:%.*]] = sub <8 x i32> [[TMP1]], [[TMP2]] 847; AVX-NEXT: ret <8 x i32> [[RESULT]] 848; 849 %a0 = extractelement <8 x i32> %a, i32 0 850 %a1 = extractelement <8 x i32> %a, i32 1 851 %a2 = extractelement <8 x i32> %a, i32 2 852 %a3 = extractelement <8 x i32> %a, i32 3 853 %a4 = extractelement <8 x i32> %a, i32 4 854 %a5 = extractelement <8 x i32> %a, i32 5 855 %a6 = extractelement <8 x i32> %a, i32 6 856 %a7 = extractelement <8 x i32> %a, i32 7 857 %a01 = sub i32 %a0, %a1 858 %a23 = sub i32 %a2, %a3 859 %a45 = sub i32 %a4, %a5 860 %a67 = sub i32 %a6, %a7 861 %b0 = extractelement <8 x i32> %b, i32 0 862 %b1 = extractelement <8 x i32> %b, i32 1 863 %b2 = extractelement <8 x i32> %b, i32 2 864 %b3 = extractelement <8 x i32> %b, i32 3 865 %b4 = extractelement <8 x i32> %b, i32 4 866 %b5 = extractelement <8 x i32> %b, i32 5 867 %b6 = extractelement <8 x i32> %b, i32 6 868 %b7 = extractelement <8 x i32> %b, i32 7 869 %b01 = sub i32 %b0, %b1 870 %b23 = sub i32 %b2, %b3 871 %b45 = sub i32 %b4, %b5 872 %b67 = sub i32 %b6, %b7 873 %hsub0 = insertelement <8 x i32> poison, i32 %a01, i32 0 874 %hsub1 = insertelement <8 x i32> %hsub0, i32 %a23, i32 1 875 %hsub2 = insertelement <8 x i32> %hsub1, i32 %b01, i32 2 876 %hsub3 = insertelement <8 x i32> %hsub2, i32 %b23, i32 3 877 %hsub4 = insertelement <8 x i32> %hsub3, i32 %a45, i32 4 878 %hsub5 = insertelement <8 x i32> %hsub4, i32 %a67, i32 5 879 %hsub6 = insertelement <8 x i32> %hsub5, i32 %b45, i32 6 880 %hsub7 = insertelement <8 x i32> %hsub6, i32 %b67, i32 7 881 %result = shufflevector <8 x i32> %hsub7, <8 x i32> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 poison, i32 6, i32 7> 882 ret <8 x i32> %result 883} 884 885; 886; v4f32 887; 888 889define <4 x float> @sub_v4f32_0123(<4 x float> %a, <4 x float> %b) { 890; CHECK-LABEL: @sub_v4f32_0123( 891; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> 892; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> 893; CHECK-NEXT: [[TMP3:%.*]] = fsub <4 x float> [[TMP1]], [[TMP2]] 894; CHECK-NEXT: ret <4 x float> [[TMP3]] 895; 896 %a0 = extractelement <4 x float> %a, i32 0 897 %a1 = extractelement <4 x float> %a, i32 1 898 %a2 = extractelement <4 x float> %a, i32 2 899 %a3 = extractelement <4 x float> %a, i32 3 900 %a01 = fsub float %a0, %a1 901 %a23 = fsub float %a2, %a3 902 %b0 = extractelement <4 x float> %b, i32 0 903 %b1 = extractelement <4 x float> %b, i32 1 904 %b2 = extractelement <4 x float> %b, i32 2 905 %b3 = extractelement <4 x float> %b, i32 3 906 %b01 = fsub float %b0, %b1 907 %b23 = fsub float %b2, %b3 908 %hsub0 = insertelement <4 x float> poison, float %a01, i32 0 909 %hsub1 = insertelement <4 x float> %hsub0, float %a23, i32 1 910 %hsub2 = insertelement <4 x float> %hsub1, float %b01, i32 2 911 %hsub3 = insertelement <4 x float> %hsub2, float %b23, i32 3 912 %result = shufflevector <4 x float> %hsub3, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 913 ret <4 x float> %result 914} 915 916define <4 x float> @sub_v4f32_u123(<4 x float> %a, <4 x float> %b) { 917; CHECK-LABEL: @sub_v4f32_u123( 918; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i32> <i32 poison, i32 2, i32 4, i32 6> 919; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 poison, i32 3, i32 5, i32 7> 920; CHECK-NEXT: [[TMP4:%.*]] = fsub <4 x float> [[TMP2]], [[TMP3]] 921; CHECK-NEXT: ret <4 x float> [[TMP4]] 922; 923 %a0 = extractelement <4 x float> %a, i32 0 924 %a1 = extractelement <4 x float> %a, i32 1 925 %a2 = extractelement <4 x float> %a, i32 2 926 %a3 = extractelement <4 x float> %a, i32 3 927 %a01 = fsub float %a0, %a1 928 %a23 = fsub float %a2, %a3 929 %b0 = extractelement <4 x float> %b, i32 0 930 %b1 = extractelement <4 x float> %b, i32 1 931 %b2 = extractelement <4 x float> %b, i32 2 932 %b3 = extractelement <4 x float> %b, i32 3 933 %b01 = fsub float %b0, %b1 934 %b23 = fsub float %b2, %b3 935 %hsub0 = insertelement <4 x float> poison, float %a01, i32 0 936 %hsub1 = insertelement <4 x float> %hsub0, float %a23, i32 1 937 %hsub2 = insertelement <4 x float> %hsub1, float %b01, i32 2 938 %hsub3 = insertelement <4 x float> %hsub2, float %b23, i32 3 939 %result = shufflevector <4 x float> %hsub3, <4 x float> %a, <4 x i32> <i32 poison, i32 1, i32 2, i32 3> 940 ret <4 x float> %result 941} 942 943define <4 x float> @sub_v4f32_0u23(<4 x float> %a, <4 x float> %b) { 944; CHECK-LABEL: @sub_v4f32_0u23( 945; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i32> <i32 0, i32 poison, i32 4, i32 6> 946; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 poison, i32 5, i32 7> 947; CHECK-NEXT: [[TMP4:%.*]] = fsub <4 x float> [[TMP2]], [[TMP3]] 948; CHECK-NEXT: ret <4 x float> [[TMP4]] 949; 950 %a0 = extractelement <4 x float> %a, i32 0 951 %a1 = extractelement <4 x float> %a, i32 1 952 %a2 = extractelement <4 x float> %a, i32 2 953 %a3 = extractelement <4 x float> %a, i32 3 954 %a01 = fsub float %a0, %a1 955 %a23 = fsub float %a2, %a3 956 %b0 = extractelement <4 x float> %b, i32 0 957 %b1 = extractelement <4 x float> %b, i32 1 958 %b2 = extractelement <4 x float> %b, i32 2 959 %b3 = extractelement <4 x float> %b, i32 3 960 %b01 = fsub float %b0, %b1 961 %b23 = fsub float %b2, %b3 962 %hsub0 = insertelement <4 x float> poison, float %a01, i32 0 963 %hsub1 = insertelement <4 x float> %hsub0, float %a23, i32 1 964 %hsub2 = insertelement <4 x float> %hsub1, float %b01, i32 2 965 %hsub3 = insertelement <4 x float> %hsub2, float %b23, i32 3 966 %result = shufflevector <4 x float> %hsub3, <4 x float> %a, <4 x i32> <i32 0, i32 poison, i32 2, i32 3> 967 ret <4 x float> %result 968} 969 970define <4 x float> @sub_v4f32_01u3(<4 x float> %a, <4 x float> %b) { 971; CHECK-LABEL: @sub_v4f32_01u3( 972; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 poison, i32 6> 973; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 poison, i32 7> 974; CHECK-NEXT: [[RESULT1:%.*]] = fsub <4 x float> [[TMP1]], [[TMP2]] 975; CHECK-NEXT: ret <4 x float> [[RESULT1]] 976; 977 %a0 = extractelement <4 x float> %a, i32 0 978 %a1 = extractelement <4 x float> %a, i32 1 979 %a2 = extractelement <4 x float> %a, i32 2 980 %a3 = extractelement <4 x float> %a, i32 3 981 %a01 = fsub float %a0, %a1 982 %a23 = fsub float %a2, %a3 983 %b0 = extractelement <4 x float> %b, i32 0 984 %b1 = extractelement <4 x float> %b, i32 1 985 %b2 = extractelement <4 x float> %b, i32 2 986 %b3 = extractelement <4 x float> %b, i32 3 987 %b01 = fsub float %b0, %b1 988 %b23 = fsub float %b2, %b3 989 %hsub0 = insertelement <4 x float> poison, float %a01, i32 0 990 %hsub1 = insertelement <4 x float> %hsub0, float %a23, i32 1 991 %hsub2 = insertelement <4 x float> %hsub1, float %b01, i32 2 992 %hsub3 = insertelement <4 x float> %hsub2, float %b23, i32 3 993 %result = shufflevector <4 x float> %hsub3, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 poison, i32 3> 994 ret <4 x float> %result 995} 996 997define <4 x float> @sub_v4f32_012u(<4 x float> %a, <4 x float> %b) { 998; SSE2-LABEL: @sub_v4f32_012u( 999; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 4, i32 poison> 1000; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 poison> 1001; SSE2-NEXT: [[RESULT1:%.*]] = fsub <4 x float> [[TMP1]], [[TMP2]] 1002; SSE2-NEXT: ret <4 x float> [[RESULT1]] 1003; 1004; SSE4-LABEL: @sub_v4f32_012u( 1005; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 4, i32 poison> 1006; SSE4-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 poison> 1007; SSE4-NEXT: [[TMP4:%.*]] = fsub <4 x float> [[TMP2]], [[TMP3]] 1008; SSE4-NEXT: ret <4 x float> [[TMP4]] 1009; 1010; AVX2-LABEL: @sub_v4f32_012u( 1011; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 4, i32 poison> 1012; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 poison> 1013; AVX2-NEXT: [[TMP4:%.*]] = fsub <4 x float> [[TMP2]], [[TMP3]] 1014; AVX2-NEXT: ret <4 x float> [[TMP4]] 1015; 1016; AVX512-LABEL: @sub_v4f32_012u( 1017; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 4, i32 poison> 1018; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 poison> 1019; AVX512-NEXT: [[RESULT1:%.*]] = fsub <4 x float> [[TMP1]], [[TMP2]] 1020; AVX512-NEXT: ret <4 x float> [[RESULT1]] 1021; 1022 %a0 = extractelement <4 x float> %a, i32 0 1023 %a1 = extractelement <4 x float> %a, i32 1 1024 %a2 = extractelement <4 x float> %a, i32 2 1025 %a3 = extractelement <4 x float> %a, i32 3 1026 %a01 = fsub float %a0, %a1 1027 %a23 = fsub float %a2, %a3 1028 %b0 = extractelement <4 x float> %b, i32 0 1029 %b1 = extractelement <4 x float> %b, i32 1 1030 %b2 = extractelement <4 x float> %b, i32 2 1031 %b3 = extractelement <4 x float> %b, i32 3 1032 %b01 = fsub float %b0, %b1 1033 %b23 = fsub float %b2, %b3 1034 %hsub0 = insertelement <4 x float> poison, float %a01, i32 0 1035 %hsub1 = insertelement <4 x float> %hsub0, float %a23, i32 1 1036 %hsub2 = insertelement <4 x float> %hsub1, float %b01, i32 2 1037 %hsub3 = insertelement <4 x float> %hsub2, float %b23, i32 3 1038 %result = shufflevector <4 x float> %hsub3, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 1039 ret <4 x float> %result 1040} 1041 1042define <4 x float> @sub_v4f32_uu23(<4 x float> %a, <4 x float> %b) { 1043; CHECK-LABEL: @sub_v4f32_uu23( 1044; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 2> 1045; CHECK-NEXT: [[RESULT1:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 1, i32 3> 1046; CHECK-NEXT: [[RESULT2:%.*]] = fsub <4 x float> [[TMP2]], [[RESULT1]] 1047; CHECK-NEXT: ret <4 x float> [[RESULT2]] 1048; 1049 %a0 = extractelement <4 x float> %a, i32 0 1050 %a1 = extractelement <4 x float> %a, i32 1 1051 %a2 = extractelement <4 x float> %a, i32 2 1052 %a3 = extractelement <4 x float> %a, i32 3 1053 %a01 = fsub float %a0, %a1 1054 %a23 = fsub float %a2, %a3 1055 %b0 = extractelement <4 x float> %b, i32 0 1056 %b1 = extractelement <4 x float> %b, i32 1 1057 %b2 = extractelement <4 x float> %b, i32 2 1058 %b3 = extractelement <4 x float> %b, i32 3 1059 %b01 = fsub float %b0, %b1 1060 %b23 = fsub float %b2, %b3 1061 %hsub0 = insertelement <4 x float> poison, float %a01, i32 0 1062 %hsub1 = insertelement <4 x float> %hsub0, float %a23, i32 1 1063 %hsub2 = insertelement <4 x float> %hsub1, float %b01, i32 2 1064 %hsub3 = insertelement <4 x float> %hsub2, float %b23, i32 3 1065 %result = shufflevector <4 x float> %hsub3, <4 x float> %a, <4 x i32> <i32 poison, i32 poison, i32 2, i32 3> 1066 ret <4 x float> %result 1067} 1068 1069define <4 x float> @sub_v4f32_01uu(<4 x float> %a, <4 x float> %b) { 1070; CHECK-LABEL: @sub_v4f32_01uu( 1071; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 0, i32 2, i32 poison, i32 poison> 1072; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> <i32 1, i32 3, i32 poison, i32 poison> 1073; CHECK-NEXT: [[TMP4:%.*]] = fsub <4 x float> [[TMP2]], [[TMP3]] 1074; CHECK-NEXT: ret <4 x float> [[TMP4]] 1075; 1076 %a0 = extractelement <4 x float> %a, i32 0 1077 %a1 = extractelement <4 x float> %a, i32 1 1078 %a2 = extractelement <4 x float> %a, i32 2 1079 %a3 = extractelement <4 x float> %a, i32 3 1080 %a01 = fsub float %a0, %a1 1081 %a23 = fsub float %a2, %a3 1082 %b0 = extractelement <4 x float> %b, i32 0 1083 %b1 = extractelement <4 x float> %b, i32 1 1084 %b2 = extractelement <4 x float> %b, i32 2 1085 %b3 = extractelement <4 x float> %b, i32 3 1086 %b01 = fsub float %b0, %b1 1087 %b23 = fsub float %b2, %b3 1088 %hsub0 = insertelement <4 x float> poison, float %a01, i32 0 1089 %hsub1 = insertelement <4 x float> %hsub0, float %a23, i32 1 1090 %hsub2 = insertelement <4 x float> %hsub1, float %b01, i32 2 1091 %hsub3 = insertelement <4 x float> %hsub2, float %b23, i32 3 1092 %result = shufflevector <4 x float> %hsub3, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 1093 ret <4 x float> %result 1094} 1095 1096; 1097; v8f32 1098; 1099 1100define <8 x float> @sub_v8f32_01234567(<8 x float> %a, <8 x float> %b) { 1101; CHECK-LABEL: @sub_v8f32_01234567( 1102; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14> 1103; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15> 1104; CHECK-NEXT: [[TMP3:%.*]] = fsub <8 x float> [[TMP1]], [[TMP2]] 1105; CHECK-NEXT: ret <8 x float> [[TMP3]] 1106; 1107 %a0 = extractelement <8 x float> %a, i32 0 1108 %a1 = extractelement <8 x float> %a, i32 1 1109 %a2 = extractelement <8 x float> %a, i32 2 1110 %a3 = extractelement <8 x float> %a, i32 3 1111 %a4 = extractelement <8 x float> %a, i32 4 1112 %a5 = extractelement <8 x float> %a, i32 5 1113 %a6 = extractelement <8 x float> %a, i32 6 1114 %a7 = extractelement <8 x float> %a, i32 7 1115 %a01 = fsub float %a0, %a1 1116 %a23 = fsub float %a2, %a3 1117 %a45 = fsub float %a4, %a5 1118 %a67 = fsub float %a6, %a7 1119 %b0 = extractelement <8 x float> %b, i32 0 1120 %b1 = extractelement <8 x float> %b, i32 1 1121 %b2 = extractelement <8 x float> %b, i32 2 1122 %b3 = extractelement <8 x float> %b, i32 3 1123 %b4 = extractelement <8 x float> %b, i32 4 1124 %b5 = extractelement <8 x float> %b, i32 5 1125 %b6 = extractelement <8 x float> %b, i32 6 1126 %b7 = extractelement <8 x float> %b, i32 7 1127 %b01 = fsub float %b0, %b1 1128 %b23 = fsub float %b2, %b3 1129 %b45 = fsub float %b4, %b5 1130 %b67 = fsub float %b6, %b7 1131 %hsub0 = insertelement <8 x float> poison, float %a01, i32 0 1132 %hsub1 = insertelement <8 x float> %hsub0, float %a23, i32 1 1133 %hsub2 = insertelement <8 x float> %hsub1, float %b01, i32 2 1134 %hsub3 = insertelement <8 x float> %hsub2, float %b23, i32 3 1135 %hsub4 = insertelement <8 x float> %hsub3, float %a45, i32 4 1136 %hsub5 = insertelement <8 x float> %hsub4, float %a67, i32 5 1137 %hsub6 = insertelement <8 x float> %hsub5, float %b45, i32 6 1138 %hsub7 = insertelement <8 x float> %hsub6, float %b67, i32 7 1139 %result = shufflevector <8 x float> %hsub7, <8 x float> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1140 ret <8 x float> %result 1141} 1142 1143define <8 x float> @sub_v8f32_012u4567(<8 x float> %a, <8 x float> %b) { 1144; SSE2-LABEL: @sub_v8f32_012u4567( 1145; SSE2-NEXT: [[A6:%.*]] = extractelement <8 x float> [[A:%.*]], i64 6 1146; SSE2-NEXT: [[A7:%.*]] = extractelement <8 x float> [[A]], i64 7 1147; SSE2-NEXT: [[A67:%.*]] = fsub float [[A6]], [[A7]] 1148; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[B:%.*]], <8 x float> poison, <2 x i32> <i32 4, i32 6> 1149; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <2 x i32> <i32 5, i32 7> 1150; SSE2-NEXT: [[TMP3:%.*]] = fsub <2 x float> [[TMP1]], [[TMP2]] 1151; SSE2-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> <i32 0, i32 2, i32 8, i32 poison, i32 4, i32 poison, i32 poison, i32 poison> 1152; SSE2-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 poison, i32 5, i32 poison, i32 poison, i32 poison> 1153; SSE2-NEXT: [[TMP6:%.*]] = fsub <8 x float> [[TMP5]], [[TMP8]] 1154; SSE2-NEXT: [[HSUB5:%.*]] = insertelement <8 x float> [[TMP6]], float [[A67]], i64 5 1155; SSE2-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 1156; SSE2-NEXT: [[RESULT:%.*]] = shufflevector <8 x float> [[HSUB5]], <8 x float> [[TMP7]], <8 x i32> <i32 0, i32 1, i32 2, i32 poison, i32 4, i32 5, i32 8, i32 9> 1157; SSE2-NEXT: ret <8 x float> [[RESULT]] 1158; 1159; SSE4-LABEL: @sub_v8f32_012u4567( 1160; SSE4-NEXT: [[A6:%.*]] = extractelement <8 x float> [[A:%.*]], i64 6 1161; SSE4-NEXT: [[A7:%.*]] = extractelement <8 x float> [[A]], i64 7 1162; SSE4-NEXT: [[A67:%.*]] = fsub float [[A6]], [[A7]] 1163; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 poison, i32 4, i32 poison, i32 poison, i32 poison> 1164; SSE4-NEXT: [[TMP7:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 poison, i32 5, i32 poison, i32 poison, i32 poison> 1165; SSE4-NEXT: [[TMP6:%.*]] = fsub <8 x float> [[TMP4]], [[TMP7]] 1166; SSE4-NEXT: [[HSUB5:%.*]] = insertelement <8 x float> [[TMP6]], float [[A67]], i64 5 1167; SSE4-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <8 x i32> <i32 4, i32 6, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 1168; SSE4-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <8 x i32> <i32 5, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 1169; SSE4-NEXT: [[TMP9:%.*]] = fsub <8 x float> [[TMP8]], [[TMP5]] 1170; SSE4-NEXT: [[RESULT:%.*]] = shufflevector <8 x float> [[HSUB5]], <8 x float> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 poison, i32 4, i32 5, i32 8, i32 9> 1171; SSE4-NEXT: ret <8 x float> [[RESULT]] 1172; 1173; AVX-LABEL: @sub_v8f32_012u4567( 1174; AVX-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 poison, i32 4, i32 6, i32 12, i32 14> 1175; AVX-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 poison, i32 5, i32 7, i32 13, i32 15> 1176; AVX-NEXT: [[TMP7:%.*]] = fsub <8 x float> [[TMP5]], [[TMP6]] 1177; AVX-NEXT: ret <8 x float> [[TMP7]] 1178; 1179 %a0 = extractelement <8 x float> %a, i32 0 1180 %a1 = extractelement <8 x float> %a, i32 1 1181 %a2 = extractelement <8 x float> %a, i32 2 1182 %a3 = extractelement <8 x float> %a, i32 3 1183 %a4 = extractelement <8 x float> %a, i32 4 1184 %a5 = extractelement <8 x float> %a, i32 5 1185 %a6 = extractelement <8 x float> %a, i32 6 1186 %a7 = extractelement <8 x float> %a, i32 7 1187 %a01 = fsub float %a0, %a1 1188 %a23 = fsub float %a2, %a3 1189 %a45 = fsub float %a4, %a5 1190 %a67 = fsub float %a6, %a7 1191 %b0 = extractelement <8 x float> %b, i32 0 1192 %b1 = extractelement <8 x float> %b, i32 1 1193 %b2 = extractelement <8 x float> %b, i32 2 1194 %b3 = extractelement <8 x float> %b, i32 3 1195 %b4 = extractelement <8 x float> %b, i32 4 1196 %b5 = extractelement <8 x float> %b, i32 5 1197 %b6 = extractelement <8 x float> %b, i32 6 1198 %b7 = extractelement <8 x float> %b, i32 7 1199 %b01 = fsub float %b0, %b1 1200 %b23 = fsub float %b2, %b3 1201 %b45 = fsub float %b4, %b5 1202 %b67 = fsub float %b6, %b7 1203 %hsub0 = insertelement <8 x float> poison, float %a01, i32 0 1204 %hsub1 = insertelement <8 x float> %hsub0, float %a23, i32 1 1205 %hsub2 = insertelement <8 x float> %hsub1, float %b01, i32 2 1206 %hsub3 = insertelement <8 x float> %hsub2, float %b23, i32 3 1207 %hsub4 = insertelement <8 x float> %hsub3, float %a45, i32 4 1208 %hsub5 = insertelement <8 x float> %hsub4, float %a67, i32 5 1209 %hsub6 = insertelement <8 x float> %hsub5, float %b45, i32 6 1210 %hsub7 = insertelement <8 x float> %hsub6, float %b67, i32 7 1211 %result = shufflevector <8 x float> %hsub7, <8 x float> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 poison, i32 4, i32 5, i32 6, i32 7> 1212 ret <8 x float> %result 1213} 1214 1215define <8 x float> @sub_v8f32_76u43210(<8 x float> %a, <8 x float> %b) { 1216; SSE2-LABEL: @sub_v8f32_76u43210( 1217; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 1218; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 1219; SSE2-NEXT: [[TMP3:%.*]] = fsub <4 x float> [[TMP1]], [[TMP2]] 1220; SSE2-NEXT: [[B0:%.*]] = extractelement <8 x float> [[B:%.*]], i64 0 1221; SSE2-NEXT: [[B1:%.*]] = extractelement <8 x float> [[B]], i64 1 1222; SSE2-NEXT: [[B01:%.*]] = fsub float [[B0]], [[B1]] 1223; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <2 x i32> <i32 4, i32 6> 1224; SSE2-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <2 x i32> <i32 5, i32 7> 1225; SSE2-NEXT: [[TMP6:%.*]] = fsub <2 x float> [[TMP4]], [[TMP5]] 1226; SSE2-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison> 1227; SSE2-NEXT: [[HSUB4:%.*]] = insertelement <8 x float> [[TMP7]], float [[B01]], i64 4 1228; SSE2-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 1229; SSE2-NEXT: [[RESULT:%.*]] = shufflevector <8 x float> [[TMP8]], <8 x float> [[HSUB4]], <8 x i32> <i32 1, i32 0, i32 poison, i32 12, i32 11, i32 10, i32 9, i32 8> 1230; SSE2-NEXT: ret <8 x float> [[RESULT]] 1231; 1232; SSE4-LABEL: @sub_v8f32_76u43210( 1233; SSE4-NEXT: [[B0:%.*]] = extractelement <8 x float> [[B:%.*]], i64 0 1234; SSE4-NEXT: [[B1:%.*]] = extractelement <8 x float> [[B]], i64 1 1235; SSE4-NEXT: [[B01:%.*]] = fsub float [[B0]], [[B1]] 1236; SSE4-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 poison, i32 poison, i32 poison> 1237; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 poison, i32 poison, i32 poison> 1238; SSE4-NEXT: [[RESULT:%.*]] = fsub <8 x float> [[TMP1]], [[TMP2]] 1239; SSE4-NEXT: [[HSUB4:%.*]] = insertelement <8 x float> [[RESULT]], float [[B01]], i64 4 1240; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <8 x i32> <i32 4, i32 6, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 1241; SSE4-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <8 x i32> <i32 5, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 1242; SSE4-NEXT: [[TMP6:%.*]] = fsub <8 x float> [[TMP4]], [[TMP5]] 1243; SSE4-NEXT: [[RESULT1:%.*]] = shufflevector <8 x float> [[TMP6]], <8 x float> [[HSUB4]], <8 x i32> <i32 1, i32 0, i32 poison, i32 12, i32 11, i32 10, i32 9, i32 8> 1244; SSE4-NEXT: ret <8 x float> [[RESULT1]] 1245; 1246; AVX-LABEL: @sub_v8f32_76u43210( 1247; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[B:%.*]], <8 x float> [[A:%.*]], <8 x i32> <i32 6, i32 4, i32 poison, i32 0, i32 14, i32 12, i32 10, i32 8> 1248; AVX-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[B]], <8 x float> [[A]], <8 x i32> <i32 7, i32 5, i32 poison, i32 1, i32 15, i32 13, i32 11, i32 9> 1249; AVX-NEXT: [[RESULT:%.*]] = fsub <8 x float> [[TMP1]], [[TMP2]] 1250; AVX-NEXT: ret <8 x float> [[RESULT]] 1251; 1252 %a0 = extractelement <8 x float> %a, i32 0 1253 %a1 = extractelement <8 x float> %a, i32 1 1254 %a2 = extractelement <8 x float> %a, i32 2 1255 %a3 = extractelement <8 x float> %a, i32 3 1256 %a4 = extractelement <8 x float> %a, i32 4 1257 %a5 = extractelement <8 x float> %a, i32 5 1258 %a6 = extractelement <8 x float> %a, i32 6 1259 %a7 = extractelement <8 x float> %a, i32 7 1260 %a01 = fsub float %a0, %a1 1261 %a23 = fsub float %a2, %a3 1262 %a45 = fsub float %a4, %a5 1263 %a67 = fsub float %a6, %a7 1264 %b0 = extractelement <8 x float> %b, i32 0 1265 %b1 = extractelement <8 x float> %b, i32 1 1266 %b2 = extractelement <8 x float> %b, i32 2 1267 %b3 = extractelement <8 x float> %b, i32 3 1268 %b4 = extractelement <8 x float> %b, i32 4 1269 %b5 = extractelement <8 x float> %b, i32 5 1270 %b6 = extractelement <8 x float> %b, i32 6 1271 %b7 = extractelement <8 x float> %b, i32 7 1272 %b01 = fsub float %b0, %b1 1273 %b23 = fsub float %b2, %b3 1274 %b45 = fsub float %b4, %b5 1275 %b67 = fsub float %b6, %b7 1276 %hsub0 = insertelement <8 x float> poison, float %a01, i32 0 1277 %hsub1 = insertelement <8 x float> %hsub0, float %a23, i32 1 1278 %hsub2 = insertelement <8 x float> %hsub1, float %a45, i32 2 1279 %hsub3 = insertelement <8 x float> %hsub2, float %a67, i32 3 1280 %hsub4 = insertelement <8 x float> %hsub3, float %b01, i32 4 1281 %hsub5 = insertelement <8 x float> %hsub4, float %b23, i32 5 1282 %hsub6 = insertelement <8 x float> %hsub5, float %b45, i32 6 1283 %hsub7 = insertelement <8 x float> %hsub6, float %b67, i32 7 1284 %result = shufflevector <8 x float> %hsub7, <8 x float> %a, <8 x i32> <i32 7, i32 6, i32 poison, i32 4, i32 3, i32 2, i32 1, i32 0> 1285 ret <8 x float> %result 1286} 1287 1288; 1289; v2f64 1290; 1291 1292define <2 x double> @sub_v2f64_01(<2 x double> %a, <2 x double> %b) { 1293; CHECK-LABEL: @sub_v2f64_01( 1294; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x i32> <i32 0, i32 2> 1295; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> <i32 1, i32 3> 1296; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]] 1297; CHECK-NEXT: ret <2 x double> [[TMP3]] 1298; 1299 %a0 = extractelement <2 x double> %a, i32 0 1300 %a1 = extractelement <2 x double> %a, i32 1 1301 %a01 = fsub double %a0, %a1 1302 %b0 = extractelement <2 x double> %b, i32 0 1303 %b1 = extractelement <2 x double> %b, i32 1 1304 %b01 = fsub double %b0, %b1 1305 %hsub0 = insertelement <2 x double> poison, double %a01, i32 0 1306 %hsub1 = insertelement <2 x double> %hsub0, double %b01, i32 1 1307 %result = shufflevector <2 x double> %hsub1, <2 x double> %a, <2 x i32> <i32 0, i32 1> 1308 ret <2 x double> %result 1309} 1310 1311define <2 x double> @sub_v2f64_u1(<2 x double> %a, <2 x double> %b) { 1312; CHECK-LABEL: @sub_v2f64_u1( 1313; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <2 x i32> <i32 poison, i32 0> 1314; CHECK-NEXT: [[RESULT1:%.*]] = fsub <2 x double> [[TMP1]], [[B]] 1315; CHECK-NEXT: ret <2 x double> [[RESULT1]] 1316; 1317 %a0 = extractelement <2 x double> %a, i32 0 1318 %a1 = extractelement <2 x double> %a, i32 1 1319 %a01 = fsub double %a0, %a1 1320 %b0 = extractelement <2 x double> %b, i32 0 1321 %b1 = extractelement <2 x double> %b, i32 1 1322 %b01 = fsub double %b0, %b1 1323 %hsub0 = insertelement <2 x double> poison, double %a01, i32 0 1324 %hsub1 = insertelement <2 x double> %hsub0, double %b01, i32 1 1325 %result = shufflevector <2 x double> %hsub1, <2 x double> %a, <2 x i32> <i32 poison, i32 1> 1326 ret <2 x double> %result 1327} 1328 1329define <2 x double> @sub_v2f64_0u(<2 x double> %a, <2 x double> %b) { 1330; CHECK-LABEL: @sub_v2f64_0u( 1331; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <2 x double> [[TMP1:%.*]], <2 x double> poison, <2 x i32> <i32 1, i32 poison> 1332; CHECK-NEXT: [[RESULT1:%.*]] = fsub <2 x double> [[TMP1]], [[RESULT]] 1333; CHECK-NEXT: ret <2 x double> [[RESULT1]] 1334; 1335 %a0 = extractelement <2 x double> %a, i32 0 1336 %a1 = extractelement <2 x double> %a, i32 1 1337 %a01 = fsub double %a0, %a1 1338 %b0 = extractelement <2 x double> %b, i32 0 1339 %b1 = extractelement <2 x double> %b, i32 1 1340 %b01 = fsub double %b0, %b1 1341 %hsub0 = insertelement <2 x double> poison, double %a01, i32 0 1342 %hsub1 = insertelement <2 x double> %hsub0, double %b01, i32 1 1343 %result = shufflevector <2 x double> %hsub1, <2 x double> %a, <2 x i32> <i32 0, i32 poison> 1344 ret <2 x double> %result 1345} 1346 1347; 1348; v4f64 1349; 1350 1351define <4 x double> @sub_v4f64_0123(<4 x double> %a, <4 x double> %b) { 1352; CHECK-LABEL: @sub_v4f64_0123( 1353; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> 1354; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> 1355; CHECK-NEXT: [[TMP3:%.*]] = fsub <4 x double> [[TMP1]], [[TMP2]] 1356; CHECK-NEXT: ret <4 x double> [[TMP3]] 1357; 1358 %a0 = extractelement <4 x double> %a, i32 0 1359 %a1 = extractelement <4 x double> %a, i32 1 1360 %a2 = extractelement <4 x double> %a, i32 2 1361 %a3 = extractelement <4 x double> %a, i32 3 1362 %a01 = fsub double %a0, %a1 1363 %a23 = fsub double %a2, %a3 1364 %b0 = extractelement <4 x double> %b, i32 0 1365 %b1 = extractelement <4 x double> %b, i32 1 1366 %b2 = extractelement <4 x double> %b, i32 2 1367 %b3 = extractelement <4 x double> %b, i32 3 1368 %b01 = fsub double %b0, %b1 1369 %b23 = fsub double %b2, %b3 1370 %hsub0 = insertelement <4 x double> poison, double %a01, i32 0 1371 %hsub1 = insertelement <4 x double> %hsub0, double %b01, i32 1 1372 %hsub2 = insertelement <4 x double> %hsub1, double %a23, i32 2 1373 %hsub3 = insertelement <4 x double> %hsub2, double %b23, i32 3 1374 %result = shufflevector <4 x double> %hsub3, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1375 ret <4 x double> %result 1376} 1377 1378define <4 x double> @sub_v4f64_u123(<4 x double> %a, <4 x double> %b) { 1379; SSE2-LABEL: @sub_v4f64_u123( 1380; SSE2-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B:%.*]], i64 2 1381; SSE2-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i64 3 1382; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B]], <4 x double> [[A:%.*]], <2 x i32> <i32 0, i32 6> 1383; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> [[A]], <2 x i32> <i32 1, i32 7> 1384; SSE2-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]] 1385; SSE2-NEXT: [[B23:%.*]] = fsub double [[B2]], [[B3]] 1386; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 poison, i32 0, i32 1, i32 poison> 1387; SSE2-NEXT: [[RESULT:%.*]] = insertelement <4 x double> [[TMP4]], double [[B23]], i64 3 1388; SSE2-NEXT: ret <4 x double> [[RESULT]] 1389; 1390; SSE4-LABEL: @sub_v4f64_u123( 1391; SSE4-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B:%.*]], i64 2 1392; SSE4-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i64 3 1393; SSE4-NEXT: [[B23:%.*]] = fsub double [[B2]], [[B3]] 1394; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> [[A:%.*]], <4 x i32> <i32 poison, i32 0, i32 6, i32 poison> 1395; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[B]], <4 x double> [[A]], <4 x i32> <i32 poison, i32 1, i32 7, i32 poison> 1396; SSE4-NEXT: [[TMP3:%.*]] = fsub <4 x double> [[TMP2]], [[TMP4]] 1397; SSE4-NEXT: [[RESULT:%.*]] = insertelement <4 x double> [[TMP3]], double [[B23]], i64 3 1398; SSE4-NEXT: ret <4 x double> [[RESULT]] 1399; 1400; AVX-LABEL: @sub_v4f64_u123( 1401; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> [[A:%.*]], <4 x i32> <i32 poison, i32 0, i32 6, i32 2> 1402; AVX-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[B]], <4 x double> [[A]], <4 x i32> <i32 poison, i32 1, i32 7, i32 3> 1403; AVX-NEXT: [[TMP4:%.*]] = fsub <4 x double> [[TMP2]], [[TMP3]] 1404; AVX-NEXT: ret <4 x double> [[TMP4]] 1405; 1406 %a0 = extractelement <4 x double> %a, i32 0 1407 %a1 = extractelement <4 x double> %a, i32 1 1408 %a2 = extractelement <4 x double> %a, i32 2 1409 %a3 = extractelement <4 x double> %a, i32 3 1410 %a01 = fsub double %a0, %a1 1411 %a23 = fsub double %a2, %a3 1412 %b0 = extractelement <4 x double> %b, i32 0 1413 %b1 = extractelement <4 x double> %b, i32 1 1414 %b2 = extractelement <4 x double> %b, i32 2 1415 %b3 = extractelement <4 x double> %b, i32 3 1416 %b01 = fsub double %b0, %b1 1417 %b23 = fsub double %b2, %b3 1418 %hsub0 = insertelement <4 x double> poison, double %a01, i32 0 1419 %hsub1 = insertelement <4 x double> %hsub0, double %b01, i32 1 1420 %hsub2 = insertelement <4 x double> %hsub1, double %a23, i32 2 1421 %hsub3 = insertelement <4 x double> %hsub2, double %b23, i32 3 1422 %result = shufflevector <4 x double> %hsub3, <4 x double> %a, <4 x i32> <i32 poison, i32 1, i32 2, i32 3> 1423 ret <4 x double> %result 1424} 1425 1426define <4 x double> @sub_v4f64_0u23(<4 x double> %a, <4 x double> %b) { 1427; SSE2-LABEL: @sub_v4f64_0u23( 1428; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> poison, <2 x i32> <i32 0, i32 2> 1429; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <2 x i32> <i32 1, i32 3> 1430; SSE2-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]] 1431; SSE2-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B:%.*]], i64 2 1432; SSE2-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i64 3 1433; SSE2-NEXT: [[B23:%.*]] = fsub double [[B2]], [[B3]] 1434; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 1, i32 poison> 1435; SSE2-NEXT: [[RESULT:%.*]] = insertelement <4 x double> [[TMP4]], double [[B23]], i64 3 1436; SSE2-NEXT: ret <4 x double> [[RESULT]] 1437; 1438; SSE4-LABEL: @sub_v4f64_0u23( 1439; SSE4-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B:%.*]], i64 2 1440; SSE4-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i64 3 1441; SSE4-NEXT: [[B23:%.*]] = fsub double [[B2]], [[B3]] 1442; SSE4-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[TMP2:%.*]], <4 x double> poison, <4 x i32> <i32 1, i32 poison, i32 3, i32 poison> 1443; SSE4-NEXT: [[TMP4:%.*]] = fsub <4 x double> [[TMP2]], [[TMP3]] 1444; SSE4-NEXT: [[RESULT:%.*]] = insertelement <4 x double> [[TMP4]], double [[B23]], i64 3 1445; SSE4-NEXT: ret <4 x double> [[RESULT]] 1446; 1447; AVX-LABEL: @sub_v4f64_0u23( 1448; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 6> 1449; AVX-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 poison, i32 3, i32 7> 1450; AVX-NEXT: [[TMP4:%.*]] = fsub <4 x double> [[TMP2]], [[TMP3]] 1451; AVX-NEXT: ret <4 x double> [[TMP4]] 1452; 1453 %a0 = extractelement <4 x double> %a, i32 0 1454 %a1 = extractelement <4 x double> %a, i32 1 1455 %a2 = extractelement <4 x double> %a, i32 2 1456 %a3 = extractelement <4 x double> %a, i32 3 1457 %a01 = fsub double %a0, %a1 1458 %a23 = fsub double %a2, %a3 1459 %b0 = extractelement <4 x double> %b, i32 0 1460 %b1 = extractelement <4 x double> %b, i32 1 1461 %b2 = extractelement <4 x double> %b, i32 2 1462 %b3 = extractelement <4 x double> %b, i32 3 1463 %b01 = fsub double %b0, %b1 1464 %b23 = fsub double %b2, %b3 1465 %hsub0 = insertelement <4 x double> poison, double %a01, i32 0 1466 %hsub1 = insertelement <4 x double> %hsub0, double %b01, i32 1 1467 %hsub2 = insertelement <4 x double> %hsub1, double %a23, i32 2 1468 %hsub3 = insertelement <4 x double> %hsub2, double %b23, i32 3 1469 %result = shufflevector <4 x double> %hsub3, <4 x double> %a, <4 x i32> <i32 0, i32 poison, i32 2, i32 3> 1470 ret <4 x double> %result 1471} 1472 1473define <4 x double> @sub_v4f64_01u3(<4 x double> %a, <4 x double> %b) { 1474; SSE2-LABEL: @sub_v4f64_01u3( 1475; SSE2-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B:%.*]], i64 2 1476; SSE2-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i64 3 1477; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B]], <2 x i32> <i32 0, i32 4> 1478; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 5> 1479; SSE2-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]] 1480; SSE2-NEXT: [[B23:%.*]] = fsub double [[B2]], [[B3]] 1481; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 1482; SSE2-NEXT: [[RESULT:%.*]] = insertelement <4 x double> [[TMP4]], double [[B23]], i64 3 1483; SSE2-NEXT: ret <4 x double> [[RESULT]] 1484; 1485; SSE4-LABEL: @sub_v4f64_01u3( 1486; SSE4-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B:%.*]], i64 2 1487; SSE4-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i64 3 1488; SSE4-NEXT: [[B23:%.*]] = fsub double [[B2]], [[B3]] 1489; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B]], <4 x i32> <i32 0, i32 4, i32 poison, i32 poison> 1490; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 5, i32 poison, i32 poison> 1491; SSE4-NEXT: [[TMP3:%.*]] = fsub <4 x double> [[TMP2]], [[TMP4]] 1492; SSE4-NEXT: [[RESULT:%.*]] = insertelement <4 x double> [[TMP3]], double [[B23]], i64 3 1493; SSE4-NEXT: ret <4 x double> [[RESULT]] 1494; 1495; AVX-LABEL: @sub_v4f64_01u3( 1496; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 poison, i32 6> 1497; AVX-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 5, i32 poison, i32 7> 1498; AVX-NEXT: [[TMP4:%.*]] = fsub <4 x double> [[TMP2]], [[TMP3]] 1499; AVX-NEXT: ret <4 x double> [[TMP4]] 1500; 1501 %a0 = extractelement <4 x double> %a, i32 0 1502 %a1 = extractelement <4 x double> %a, i32 1 1503 %a2 = extractelement <4 x double> %a, i32 2 1504 %a3 = extractelement <4 x double> %a, i32 3 1505 %a01 = fsub double %a0, %a1 1506 %a23 = fsub double %a2, %a3 1507 %b0 = extractelement <4 x double> %b, i32 0 1508 %b1 = extractelement <4 x double> %b, i32 1 1509 %b2 = extractelement <4 x double> %b, i32 2 1510 %b3 = extractelement <4 x double> %b, i32 3 1511 %b01 = fsub double %b0, %b1 1512 %b23 = fsub double %b2, %b3 1513 %hsub0 = insertelement <4 x double> poison, double %a01, i32 0 1514 %hsub1 = insertelement <4 x double> %hsub0, double %b01, i32 1 1515 %hsub2 = insertelement <4 x double> %hsub1, double %a23, i32 2 1516 %hsub3 = insertelement <4 x double> %hsub2, double %b23, i32 3 1517 %result = shufflevector <4 x double> %hsub3, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 poison, i32 3> 1518 ret <4 x double> %result 1519} 1520 1521define <4 x double> @sub_v4f64_012u(<4 x double> %a, <4 x double> %b) { 1522; SSE2-LABEL: @sub_v4f64_012u( 1523; SSE2-NEXT: [[A2:%.*]] = extractelement <4 x double> [[A:%.*]], i64 2 1524; SSE2-NEXT: [[A3:%.*]] = extractelement <4 x double> [[A]], i64 3 1525; SSE2-NEXT: [[A23:%.*]] = fsub double [[A2]], [[A3]] 1526; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 4> 1527; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 5> 1528; SSE2-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]] 1529; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 1530; SSE2-NEXT: [[RESULT:%.*]] = insertelement <4 x double> [[TMP4]], double [[A23]], i64 2 1531; SSE2-NEXT: ret <4 x double> [[RESULT]] 1532; 1533; SSE4-LABEL: @sub_v4f64_012u( 1534; SSE4-NEXT: [[A2:%.*]] = extractelement <4 x double> [[A:%.*]], i64 2 1535; SSE4-NEXT: [[A3:%.*]] = extractelement <4 x double> [[A]], i64 3 1536; SSE4-NEXT: [[A23:%.*]] = fsub double [[A2]], [[A3]] 1537; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 poison, i32 poison> 1538; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 5, i32 poison, i32 poison> 1539; SSE4-NEXT: [[TMP3:%.*]] = fsub <4 x double> [[TMP2]], [[TMP4]] 1540; SSE4-NEXT: [[RESULT:%.*]] = insertelement <4 x double> [[TMP3]], double [[A23]], i64 2 1541; SSE4-NEXT: ret <4 x double> [[RESULT]] 1542; 1543; AVX-LABEL: @sub_v4f64_012u( 1544; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 poison> 1545; AVX-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 poison> 1546; AVX-NEXT: [[TMP4:%.*]] = fsub <4 x double> [[TMP2]], [[TMP3]] 1547; AVX-NEXT: ret <4 x double> [[TMP4]] 1548; 1549 %a0 = extractelement <4 x double> %a, i32 0 1550 %a1 = extractelement <4 x double> %a, i32 1 1551 %a2 = extractelement <4 x double> %a, i32 2 1552 %a3 = extractelement <4 x double> %a, i32 3 1553 %a01 = fsub double %a0, %a1 1554 %a23 = fsub double %a2, %a3 1555 %b0 = extractelement <4 x double> %b, i32 0 1556 %b1 = extractelement <4 x double> %b, i32 1 1557 %b2 = extractelement <4 x double> %b, i32 2 1558 %b3 = extractelement <4 x double> %b, i32 3 1559 %b01 = fsub double %b0, %b1 1560 %b23 = fsub double %b2, %b3 1561 %hsub0 = insertelement <4 x double> poison, double %a01, i32 0 1562 %hsub1 = insertelement <4 x double> %hsub0, double %b01, i32 1 1563 %hsub2 = insertelement <4 x double> %hsub1, double %a23, i32 2 1564 %hsub3 = insertelement <4 x double> %hsub2, double %b23, i32 3 1565 %result = shufflevector <4 x double> %hsub3, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 1566 ret <4 x double> %result 1567} 1568 1569define <4 x double> @sub_v4f64_uu23(<4 x double> %a, <4 x double> %b) { 1570; SSE2-LABEL: @sub_v4f64_uu23( 1571; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 2, i32 6> 1572; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 3, i32 7> 1573; SSE2-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]] 1574; SSE2-NEXT: [[RESULT1:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 1> 1575; SSE2-NEXT: ret <4 x double> [[RESULT1]] 1576; 1577; SSE4-LABEL: @sub_v4f64_uu23( 1578; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 poison, i32 poison, i32 2, i32 6> 1579; SSE4-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 poison, i32 poison, i32 3, i32 7> 1580; SSE4-NEXT: [[RESULT1:%.*]] = fsub <4 x double> [[TMP2]], [[TMP3]] 1581; SSE4-NEXT: ret <4 x double> [[RESULT1]] 1582; 1583; AVX-LABEL: @sub_v4f64_uu23( 1584; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 poison, i32 poison, i32 2, i32 6> 1585; AVX-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 poison, i32 poison, i32 3, i32 7> 1586; AVX-NEXT: [[RESULT1:%.*]] = fsub <4 x double> [[TMP2]], [[TMP3]] 1587; AVX-NEXT: ret <4 x double> [[RESULT1]] 1588; 1589 %a0 = extractelement <4 x double> %a, i32 0 1590 %a1 = extractelement <4 x double> %a, i32 1 1591 %a2 = extractelement <4 x double> %a, i32 2 1592 %a3 = extractelement <4 x double> %a, i32 3 1593 %a01 = fsub double %a0, %a1 1594 %a23 = fsub double %a2, %a3 1595 %b0 = extractelement <4 x double> %b, i32 0 1596 %b1 = extractelement <4 x double> %b, i32 1 1597 %b2 = extractelement <4 x double> %b, i32 2 1598 %b3 = extractelement <4 x double> %b, i32 3 1599 %b01 = fsub double %b0, %b1 1600 %b23 = fsub double %b2, %b3 1601 %hsub0 = insertelement <4 x double> poison, double %a01, i32 0 1602 %hsub1 = insertelement <4 x double> %hsub0, double %b01, i32 1 1603 %hsub2 = insertelement <4 x double> %hsub1, double %a23, i32 2 1604 %hsub3 = insertelement <4 x double> %hsub2, double %b23, i32 3 1605 %result = shufflevector <4 x double> %hsub3, <4 x double> %a, <4 x i32> <i32 poison, i32 poison, i32 2, i32 3> 1606 ret <4 x double> %result 1607} 1608 1609define <4 x double> @sub_v4f64_01uu(<4 x double> %a, <4 x double> %b) { 1610; SSE2-LABEL: @sub_v4f64_01uu( 1611; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 4> 1612; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 5> 1613; SSE2-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]] 1614; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 1615; SSE2-NEXT: ret <4 x double> [[TMP4]] 1616; 1617; SSE4-LABEL: @sub_v4f64_01uu( 1618; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 poison, i32 poison> 1619; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 5, i32 poison, i32 poison> 1620; SSE4-NEXT: [[TMP3:%.*]] = fsub <4 x double> [[TMP2]], [[TMP4]] 1621; SSE4-NEXT: ret <4 x double> [[TMP3]] 1622; 1623; AVX-LABEL: @sub_v4f64_01uu( 1624; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 poison, i32 poison> 1625; AVX-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 5, i32 poison, i32 poison> 1626; AVX-NEXT: [[TMP3:%.*]] = fsub <4 x double> [[TMP2]], [[TMP4]] 1627; AVX-NEXT: ret <4 x double> [[TMP3]] 1628; 1629 %a0 = extractelement <4 x double> %a, i32 0 1630 %a1 = extractelement <4 x double> %a, i32 1 1631 %a2 = extractelement <4 x double> %a, i32 2 1632 %a3 = extractelement <4 x double> %a, i32 3 1633 %a01 = fsub double %a0, %a1 1634 %a23 = fsub double %a2, %a3 1635 %b0 = extractelement <4 x double> %b, i32 0 1636 %b1 = extractelement <4 x double> %b, i32 1 1637 %b2 = extractelement <4 x double> %b, i32 2 1638 %b3 = extractelement <4 x double> %b, i32 3 1639 %b01 = fsub double %b0, %b1 1640 %b23 = fsub double %b2, %b3 1641 %hsub0 = insertelement <4 x double> poison, double %a01, i32 0 1642 %hsub1 = insertelement <4 x double> %hsub0, double %b01, i32 1 1643 %hsub2 = insertelement <4 x double> %hsub1, double %a23, i32 2 1644 %hsub3 = insertelement <4 x double> %hsub2, double %b23, i32 3 1645 %result = shufflevector <4 x double> %hsub3, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 1646 ret <4 x double> %result 1647} 1648 1649define <4 x double> @sub_v4f64_32u0(<4 x double> %a, <4 x double> %b) { 1650; SSE2-LABEL: @sub_v4f64_32u0( 1651; SSE2-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i64 0 1652; SSE2-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i64 1 1653; SSE2-NEXT: [[A01:%.*]] = fsub double [[A0]], [[A1]] 1654; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 2, i32 0> 1655; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <2 x i32> <i32 3, i32 1> 1656; SSE2-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]] 1657; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 1658; SSE2-NEXT: [[RESULT:%.*]] = insertelement <4 x double> [[TMP4]], double [[A01]], i64 3 1659; SSE2-NEXT: ret <4 x double> [[RESULT]] 1660; 1661; SSE4-LABEL: @sub_v4f64_32u0( 1662; SSE4-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i64 0 1663; SSE4-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i64 1 1664; SSE4-NEXT: [[A01:%.*]] = fsub double [[A0]], [[A1]] 1665; SSE4-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <4 x i32> <i32 2, i32 0, i32 poison, i32 poison> 1666; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <4 x i32> <i32 3, i32 1, i32 poison, i32 poison> 1667; SSE4-NEXT: [[RESULT:%.*]] = fsub <4 x double> [[TMP1]], [[TMP2]] 1668; SSE4-NEXT: [[RESULT1:%.*]] = insertelement <4 x double> [[RESULT]], double [[A01]], i64 3 1669; SSE4-NEXT: ret <4 x double> [[RESULT1]] 1670; 1671; AVX-LABEL: @sub_v4f64_32u0( 1672; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> [[A:%.*]], <4 x i32> <i32 2, i32 0, i32 poison, i32 4> 1673; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> [[A]], <4 x i32> <i32 3, i32 1, i32 poison, i32 5> 1674; AVX-NEXT: [[RESULT:%.*]] = fsub <4 x double> [[TMP1]], [[TMP2]] 1675; AVX-NEXT: ret <4 x double> [[RESULT]] 1676; 1677 %a0 = extractelement <4 x double> %a, i32 0 1678 %a1 = extractelement <4 x double> %a, i32 1 1679 %a2 = extractelement <4 x double> %a, i32 2 1680 %a3 = extractelement <4 x double> %a, i32 3 1681 %a01 = fsub double %a0, %a1 1682 %a23 = fsub double %a2, %a3 1683 %b0 = extractelement <4 x double> %b, i32 0 1684 %b1 = extractelement <4 x double> %b, i32 1 1685 %b2 = extractelement <4 x double> %b, i32 2 1686 %b3 = extractelement <4 x double> %b, i32 3 1687 %b01 = fsub double %b0, %b1 1688 %b23 = fsub double %b2, %b3 1689 %hsub0 = insertelement <4 x double> poison, double %a01, i32 0 1690 %hsub1 = insertelement <4 x double> %hsub0, double %a23, i32 1 1691 %hsub2 = insertelement <4 x double> %hsub1, double %b01, i32 2 1692 %hsub3 = insertelement <4 x double> %hsub2, double %b23, i32 3 1693 %result = shufflevector <4 x double> %hsub3, <4 x double> %a, <4 x i32> <i32 3, i32 2, i32 poison, i32 0> 1694 ret <4 x double> %result 1695} 1696