1b598c2c2SSanjay Patel; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2baab4aa1SSanjay Patel; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=SSE2 | FileCheck %s --check-prefixes=CHECK,SSE 3baab4aa1SSanjay Patel; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=AVX2 | FileCheck %s --check-prefixes=CHECK,AVX 4b598c2c2SSanjay Patel 5b598c2c2SSanjay Pateldeclare void @use(float) 6b598c2c2SSanjay Patel 7baab4aa1SSanjay Patel; TODO: The insert is costed as free, so creating a shuffle appears to be a loss. 8baab4aa1SSanjay Patel 9b598c2c2SSanjay Pateldefine <4 x float> @ext0_v4f32(<4 x float> %x, <4 x float> %y) { 10b598c2c2SSanjay Patel; CHECK-LABEL: @ext0_v4f32( 110e1312fbSAlexey Bataev; CHECK-NEXT: [[TMP1:%.*]] = fneg <4 x float> [[X:%.*]] 120e1312fbSAlexey Bataev; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[TMP1]], <4 x i32> <i32 4, i32 1, i32 2, i32 3> 13b598c2c2SSanjay Patel; CHECK-NEXT: ret <4 x float> [[R]] 14b598c2c2SSanjay Patel; 15b598c2c2SSanjay Patel %e = extractelement <4 x float> %x, i32 0 16b598c2c2SSanjay Patel %n = fneg float %e 17b598c2c2SSanjay Patel %r = insertelement <4 x float> %y, float %n, i32 0 18b598c2c2SSanjay Patel ret <4 x float> %r 19b598c2c2SSanjay Patel} 20b598c2c2SSanjay Patel 21ff93ca7dShanbeomdefine <4 x float> @ext0_v2f32v4f32(<2 x float> %x, <4 x float> %y) { 22ff93ca7dShanbeom; CHECK-LABEL: @ext0_v2f32v4f32( 23ff93ca7dShanbeom; CHECK-NEXT: [[E:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 24ff93ca7dShanbeom; CHECK-NEXT: [[N:%.*]] = fneg float [[E]] 25ff93ca7dShanbeom; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[N]], i32 0 26ff93ca7dShanbeom; CHECK-NEXT: ret <4 x float> [[R]] 27ff93ca7dShanbeom; 28ff93ca7dShanbeom %e = extractelement <2 x float> %x, i32 0 29ff93ca7dShanbeom %n = fneg float %e 30ff93ca7dShanbeom %r = insertelement <4 x float> %y, float %n, i32 0 31ff93ca7dShanbeom ret <4 x float> %r 32ff93ca7dShanbeom} 33ff93ca7dShanbeom 34b598c2c2SSanjay Patel; Eliminating extract/insert is profitable. 35b598c2c2SSanjay Patel 36b598c2c2SSanjay Pateldefine <4 x float> @ext2_v4f32(<4 x float> %x, <4 x float> %y) { 37b598c2c2SSanjay Patel; CHECK-LABEL: @ext2_v4f32( 38baab4aa1SSanjay Patel; CHECK-NEXT: [[TMP1:%.*]] = fneg <4 x float> [[X:%.*]] 39baab4aa1SSanjay Patel; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 6, i32 3> 40b598c2c2SSanjay Patel; CHECK-NEXT: ret <4 x float> [[R]] 41b598c2c2SSanjay Patel; 42b598c2c2SSanjay Patel %e = extractelement <4 x float> %x, i32 2 43b598c2c2SSanjay Patel %n = fneg float %e 44b598c2c2SSanjay Patel %r = insertelement <4 x float> %y, float %n, i32 2 45b598c2c2SSanjay Patel ret <4 x float> %r 46b598c2c2SSanjay Patel} 47b598c2c2SSanjay Patel 48ff93ca7dShanbeomdefine <4 x float> @ext2_v2f32v4f32(<2 x float> %x, <4 x float> %y) { 49ff93ca7dShanbeom; CHECK-LABEL: @ext2_v2f32v4f32( 50ff93ca7dShanbeom; CHECK-NEXT: [[TMP1:%.*]] = fneg <2 x float> [[X:%.*]] 51ff93ca7dShanbeom; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 2, i32 poison> 52ff93ca7dShanbeom; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 3> 53ff93ca7dShanbeom; CHECK-NEXT: ret <4 x float> [[R]] 54ff93ca7dShanbeom; 55ff93ca7dShanbeom %e = extractelement <2 x float> %x, i32 2 56ff93ca7dShanbeom %n = fneg float %e 57ff93ca7dShanbeom %r = insertelement <4 x float> %y, float %n, i32 2 58ff93ca7dShanbeom ret <4 x float> %r 59ff93ca7dShanbeom} 60ff93ca7dShanbeom 61b598c2c2SSanjay Patel; Eliminating extract/insert is still profitable. Flags propagate. 62b598c2c2SSanjay Patel 63b598c2c2SSanjay Pateldefine <2 x double> @ext1_v2f64(<2 x double> %x, <2 x double> %y) { 64b598c2c2SSanjay Patel; CHECK-LABEL: @ext1_v2f64( 65baab4aa1SSanjay Patel; CHECK-NEXT: [[TMP1:%.*]] = fneg nsz <2 x double> [[X:%.*]] 66baab4aa1SSanjay Patel; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x double> [[Y:%.*]], <2 x double> [[TMP1]], <2 x i32> <i32 0, i32 3> 67b598c2c2SSanjay Patel; CHECK-NEXT: ret <2 x double> [[R]] 68b598c2c2SSanjay Patel; 69b598c2c2SSanjay Patel %e = extractelement <2 x double> %x, i32 1 70b598c2c2SSanjay Patel %n = fneg nsz double %e 71b598c2c2SSanjay Patel %r = insertelement <2 x double> %y, double %n, i32 1 72b598c2c2SSanjay Patel ret <2 x double> %r 73b598c2c2SSanjay Patel} 74b598c2c2SSanjay Patel 75ff93ca7dShanbeomdefine <4 x double> @ext1_v2f64v4f64(<2 x double> %x, <4 x double> %y) { 76ff93ca7dShanbeom; SSE-LABEL: @ext1_v2f64v4f64( 77ff93ca7dShanbeom; SSE-NEXT: [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 1 78ff93ca7dShanbeom; SSE-NEXT: [[N:%.*]] = fneg nsz double [[E]] 79ff93ca7dShanbeom; SSE-NEXT: [[R:%.*]] = insertelement <4 x double> [[Y:%.*]], double [[N]], i32 1 80ff93ca7dShanbeom; SSE-NEXT: ret <4 x double> [[R]] 81ff93ca7dShanbeom; 82ff93ca7dShanbeom; AVX-LABEL: @ext1_v2f64v4f64( 83ff93ca7dShanbeom; AVX-NEXT: [[TMP1:%.*]] = fneg nsz <2 x double> [[X:%.*]] 84ff93ca7dShanbeom; AVX-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison> 85ff93ca7dShanbeom; AVX-NEXT: [[R:%.*]] = shufflevector <4 x double> [[Y:%.*]], <4 x double> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 2, i32 3> 86ff93ca7dShanbeom; AVX-NEXT: ret <4 x double> [[R]] 87ff93ca7dShanbeom; 88ff93ca7dShanbeom %e = extractelement <2 x double> %x, i32 1 89ff93ca7dShanbeom %n = fneg nsz double %e 90ff93ca7dShanbeom %r = insertelement <4 x double> %y, double %n, i32 1 91ff93ca7dShanbeom ret <4 x double> %r 92ff93ca7dShanbeom} 93ff93ca7dShanbeom 94b598c2c2SSanjay Pateldefine <8 x float> @ext7_v8f32(<8 x float> %x, <8 x float> %y) { 95*7cdbde70SSimon Pilgrim; CHECK-LABEL: @ext7_v8f32( 96*7cdbde70SSimon Pilgrim; CHECK-NEXT: [[TMP1:%.*]] = fneg <8 x float> [[X:%.*]] 97*7cdbde70SSimon Pilgrim; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x float> [[Y:%.*]], <8 x float> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15> 98*7cdbde70SSimon Pilgrim; CHECK-NEXT: ret <8 x float> [[R]] 99b598c2c2SSanjay Patel; 100b598c2c2SSanjay Patel %e = extractelement <8 x float> %x, i32 7 101b598c2c2SSanjay Patel %n = fneg float %e 102b598c2c2SSanjay Patel %r = insertelement <8 x float> %y, float %n, i32 7 103b598c2c2SSanjay Patel ret <8 x float> %r 104b598c2c2SSanjay Patel} 105b598c2c2SSanjay Patel 106ff93ca7dShanbeomdefine <8 x float> @ext7_v4f32v8f32(<4 x float> %x, <8 x float> %y) { 107ff93ca7dShanbeom; CHECK-LABEL: @ext7_v4f32v8f32( 108ff93ca7dShanbeom; CHECK-NEXT: [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3 109ff93ca7dShanbeom; CHECK-NEXT: [[N:%.*]] = fneg float [[E]] 110ff93ca7dShanbeom; CHECK-NEXT: [[R:%.*]] = insertelement <8 x float> [[Y:%.*]], float [[N]], i32 7 111ff93ca7dShanbeom; CHECK-NEXT: ret <8 x float> [[R]] 112ff93ca7dShanbeom; 113ff93ca7dShanbeom %e = extractelement <4 x float> %x, i32 3 114ff93ca7dShanbeom %n = fneg float %e 115ff93ca7dShanbeom %r = insertelement <8 x float> %y, float %n, i32 7 116ff93ca7dShanbeom ret <8 x float> %r 117ff93ca7dShanbeom} 118ff93ca7dShanbeom 119baab4aa1SSanjay Patel; Same as above with an extra use of the extracted element. 120baab4aa1SSanjay Patel 121b598c2c2SSanjay Pateldefine <8 x float> @ext7_v8f32_use1(<8 x float> %x, <8 x float> %y) { 122baab4aa1SSanjay Patel; SSE-LABEL: @ext7_v8f32_use1( 123baab4aa1SSanjay Patel; SSE-NEXT: [[E:%.*]] = extractelement <8 x float> [[X:%.*]], i32 5 124baab4aa1SSanjay Patel; SSE-NEXT: call void @use(float [[E]]) 125baab4aa1SSanjay Patel; SSE-NEXT: [[N:%.*]] = fneg float [[E]] 126baab4aa1SSanjay Patel; SSE-NEXT: [[R:%.*]] = insertelement <8 x float> [[Y:%.*]], float [[N]], i32 5 127baab4aa1SSanjay Patel; SSE-NEXT: ret <8 x float> [[R]] 128baab4aa1SSanjay Patel; 129baab4aa1SSanjay Patel; AVX-LABEL: @ext7_v8f32_use1( 130baab4aa1SSanjay Patel; AVX-NEXT: [[E:%.*]] = extractelement <8 x float> [[X:%.*]], i32 5 131baab4aa1SSanjay Patel; AVX-NEXT: call void @use(float [[E]]) 132baab4aa1SSanjay Patel; AVX-NEXT: [[TMP1:%.*]] = fneg <8 x float> [[X]] 133baab4aa1SSanjay Patel; AVX-NEXT: [[R:%.*]] = shufflevector <8 x float> [[Y:%.*]], <8 x float> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 13, i32 6, i32 7> 134baab4aa1SSanjay Patel; AVX-NEXT: ret <8 x float> [[R]] 135b598c2c2SSanjay Patel; 136b598c2c2SSanjay Patel %e = extractelement <8 x float> %x, i32 5 137b598c2c2SSanjay Patel call void @use(float %e) 138b598c2c2SSanjay Patel %n = fneg float %e 139b598c2c2SSanjay Patel %r = insertelement <8 x float> %y, float %n, i32 5 140b598c2c2SSanjay Patel ret <8 x float> %r 141b598c2c2SSanjay Patel} 142b598c2c2SSanjay Patel 143ff93ca7dShanbeomdefine <8 x float> @ext7_v4f32v8f32_use1(<4 x float> %x, <8 x float> %y) { 144ff93ca7dShanbeom; CHECK-LABEL: @ext7_v4f32v8f32_use1( 145ff93ca7dShanbeom; CHECK-NEXT: [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3 146ff93ca7dShanbeom; CHECK-NEXT: call void @use(float [[E]]) 147ff93ca7dShanbeom; CHECK-NEXT: [[N:%.*]] = fneg float [[E]] 148ff93ca7dShanbeom; CHECK-NEXT: [[R:%.*]] = insertelement <8 x float> [[Y:%.*]], float [[N]], i32 3 149ff93ca7dShanbeom; CHECK-NEXT: ret <8 x float> [[R]] 150ff93ca7dShanbeom; 151ff93ca7dShanbeom %e = extractelement <4 x float> %x, i32 3 152ff93ca7dShanbeom call void @use(float %e) 153ff93ca7dShanbeom %n = fneg float %e 154ff93ca7dShanbeom %r = insertelement <8 x float> %y, float %n, i32 3 155ff93ca7dShanbeom ret <8 x float> %r 156ff93ca7dShanbeom} 157ff93ca7dShanbeom 158baab4aa1SSanjay Patel; Negative test - the transform is likely not profitable if the fneg has another use. 159baab4aa1SSanjay Patel 160b598c2c2SSanjay Pateldefine <8 x float> @ext7_v8f32_use2(<8 x float> %x, <8 x float> %y) { 161b598c2c2SSanjay Patel; CHECK-LABEL: @ext7_v8f32_use2( 162b598c2c2SSanjay Patel; CHECK-NEXT: [[E:%.*]] = extractelement <8 x float> [[X:%.*]], i32 3 163b598c2c2SSanjay Patel; CHECK-NEXT: [[N:%.*]] = fneg float [[E]] 164b598c2c2SSanjay Patel; CHECK-NEXT: call void @use(float [[N]]) 165b598c2c2SSanjay Patel; CHECK-NEXT: [[R:%.*]] = insertelement <8 x float> [[Y:%.*]], float [[N]], i32 3 166b598c2c2SSanjay Patel; CHECK-NEXT: ret <8 x float> [[R]] 167b598c2c2SSanjay Patel; 168b598c2c2SSanjay Patel %e = extractelement <8 x float> %x, i32 3 169b598c2c2SSanjay Patel %n = fneg float %e 170b598c2c2SSanjay Patel call void @use(float %n) 171b598c2c2SSanjay Patel %r = insertelement <8 x float> %y, float %n, i32 3 172b598c2c2SSanjay Patel ret <8 x float> %r 173b598c2c2SSanjay Patel} 174b598c2c2SSanjay Patel 175ff93ca7dShanbeomdefine <8 x float> @ext7_v4f32v8f32_use2(<4 x float> %x, <8 x float> %y) { 176ff93ca7dShanbeom; CHECK-LABEL: @ext7_v4f32v8f32_use2( 177ff93ca7dShanbeom; CHECK-NEXT: [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3 178ff93ca7dShanbeom; CHECK-NEXT: [[N:%.*]] = fneg float [[E]] 179ff93ca7dShanbeom; CHECK-NEXT: call void @use(float [[N]]) 180ff93ca7dShanbeom; CHECK-NEXT: [[R:%.*]] = insertelement <8 x float> [[Y:%.*]], float [[N]], i32 3 181ff93ca7dShanbeom; CHECK-NEXT: ret <8 x float> [[R]] 182ff93ca7dShanbeom; 183ff93ca7dShanbeom %e = extractelement <4 x float> %x, i32 3 184ff93ca7dShanbeom %n = fneg float %e 185ff93ca7dShanbeom call void @use(float %n) 186ff93ca7dShanbeom %r = insertelement <8 x float> %y, float %n, i32 3 187ff93ca7dShanbeom ret <8 x float> %r 188ff93ca7dShanbeom} 189ff93ca7dShanbeom 190baab4aa1SSanjay Patel; Negative test - can't convert variable index to a shuffle. 191baab4aa1SSanjay Patel 192b598c2c2SSanjay Pateldefine <2 x double> @ext_index_var_v2f64(<2 x double> %x, <2 x double> %y, i32 %index) { 193b598c2c2SSanjay Patel; CHECK-LABEL: @ext_index_var_v2f64( 194b598c2c2SSanjay Patel; CHECK-NEXT: [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 [[INDEX:%.*]] 195b598c2c2SSanjay Patel; CHECK-NEXT: [[N:%.*]] = fneg nsz double [[E]] 196b598c2c2SSanjay Patel; CHECK-NEXT: [[R:%.*]] = insertelement <2 x double> [[Y:%.*]], double [[N]], i32 [[INDEX]] 197b598c2c2SSanjay Patel; CHECK-NEXT: ret <2 x double> [[R]] 198b598c2c2SSanjay Patel; 199b598c2c2SSanjay Patel %e = extractelement <2 x double> %x, i32 %index 200b598c2c2SSanjay Patel %n = fneg nsz double %e 201b598c2c2SSanjay Patel %r = insertelement <2 x double> %y, double %n, i32 %index 202b598c2c2SSanjay Patel ret <2 x double> %r 203b598c2c2SSanjay Patel} 204b598c2c2SSanjay Patel 205ff93ca7dShanbeomdefine <4 x double> @ext_index_var_v2f64v4f64(<2 x double> %x, <4 x double> %y, i32 %index) { 206ff93ca7dShanbeom; CHECK-LABEL: @ext_index_var_v2f64v4f64( 207ff93ca7dShanbeom; CHECK-NEXT: [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 [[INDEX:%.*]] 208ff93ca7dShanbeom; CHECK-NEXT: [[N:%.*]] = fneg nsz double [[E]] 209ff93ca7dShanbeom; CHECK-NEXT: [[R:%.*]] = insertelement <4 x double> [[Y:%.*]], double [[N]], i32 [[INDEX]] 210ff93ca7dShanbeom; CHECK-NEXT: ret <4 x double> [[R]] 211ff93ca7dShanbeom; 212ff93ca7dShanbeom %e = extractelement <2 x double> %x, i32 %index 213ff93ca7dShanbeom %n = fneg nsz double %e 214ff93ca7dShanbeom %r = insertelement <4 x double> %y, double %n, i32 %index 215ff93ca7dShanbeom ret <4 x double> %r 216ff93ca7dShanbeom} 217ff93ca7dShanbeom 218baab4aa1SSanjay Patel; Negative test - require same extract/insert index for simple shuffle. 219baab4aa1SSanjay Patel; TODO: We could handle this by adjusting the cost calculation. 220baab4aa1SSanjay Patel 221b598c2c2SSanjay Pateldefine <2 x double> @ext1_v2f64_ins0(<2 x double> %x, <2 x double> %y) { 222b598c2c2SSanjay Patel; CHECK-LABEL: @ext1_v2f64_ins0( 223b598c2c2SSanjay Patel; CHECK-NEXT: [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 1 224b598c2c2SSanjay Patel; CHECK-NEXT: [[N:%.*]] = fneg nsz double [[E]] 225b598c2c2SSanjay Patel; CHECK-NEXT: [[R:%.*]] = insertelement <2 x double> [[Y:%.*]], double [[N]], i32 0 226b598c2c2SSanjay Patel; CHECK-NEXT: ret <2 x double> [[R]] 227b598c2c2SSanjay Patel; 228b598c2c2SSanjay Patel %e = extractelement <2 x double> %x, i32 1 229b598c2c2SSanjay Patel %n = fneg nsz double %e 230b598c2c2SSanjay Patel %r = insertelement <2 x double> %y, double %n, i32 0 231b598c2c2SSanjay Patel ret <2 x double> %r 232b598c2c2SSanjay Patel} 2336ace81dbSSanjay Patel 234ff93ca7dShanbeom; Negative test - extract from an index greater than the vector width of the destination 235ff93ca7dShanbeomdefine <2 x double> @ext3_v4f64v2f64(<4 x double> %x, <2 x double> %y) { 236ff93ca7dShanbeom; CHECK-LABEL: @ext3_v4f64v2f64( 237ff93ca7dShanbeom; CHECK-NEXT: [[E:%.*]] = extractelement <4 x double> [[X:%.*]], i32 3 238ff93ca7dShanbeom; CHECK-NEXT: [[N:%.*]] = fneg nsz double [[E]] 239ff93ca7dShanbeom; CHECK-NEXT: [[R:%.*]] = insertelement <2 x double> [[Y:%.*]], double [[N]], i32 1 240ff93ca7dShanbeom; CHECK-NEXT: ret <2 x double> [[R]] 241ff93ca7dShanbeom; 242ff93ca7dShanbeom %e = extractelement <4 x double> %x, i32 3 243ff93ca7dShanbeom %n = fneg nsz double %e 244ff93ca7dShanbeom %r = insertelement <2 x double> %y, double %n, i32 1 245ff93ca7dShanbeom ret <2 x double> %r 246ff93ca7dShanbeom} 247ff93ca7dShanbeom 248ff93ca7dShanbeomdefine <4 x double> @ext1_v2f64v4f64_ins0(<2 x double> %x, <4 x double> %y) { 249ff93ca7dShanbeom; CHECK-LABEL: @ext1_v2f64v4f64_ins0( 250ff93ca7dShanbeom; CHECK-NEXT: [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 1 251ff93ca7dShanbeom; CHECK-NEXT: [[N:%.*]] = fneg nsz double [[E]] 252ff93ca7dShanbeom; CHECK-NEXT: [[R:%.*]] = insertelement <4 x double> [[Y:%.*]], double [[N]], i32 0 253ff93ca7dShanbeom; CHECK-NEXT: ret <4 x double> [[R]] 254ff93ca7dShanbeom; 255ff93ca7dShanbeom %e = extractelement <2 x double> %x, i32 1 256ff93ca7dShanbeom %n = fneg nsz double %e 257ff93ca7dShanbeom %r = insertelement <4 x double> %y, double %n, i32 0 258ff93ca7dShanbeom ret <4 x double> %r 259ff93ca7dShanbeom} 260ff93ca7dShanbeom 261baab4aa1SSanjay Patel; Negative test - avoid changing poison ops 262baab4aa1SSanjay Patel 2636ace81dbSSanjay Pateldefine <4 x float> @ext12_v4f32(<4 x float> %x, <4 x float> %y) { 2646ace81dbSSanjay Patel; CHECK-LABEL: @ext12_v4f32( 2656ace81dbSSanjay Patel; CHECK-NEXT: [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i32 12 2666ace81dbSSanjay Patel; CHECK-NEXT: [[N:%.*]] = fneg float [[E]] 2676ace81dbSSanjay Patel; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[N]], i32 12 2686ace81dbSSanjay Patel; CHECK-NEXT: ret <4 x float> [[R]] 2696ace81dbSSanjay Patel; 2706ace81dbSSanjay Patel %e = extractelement <4 x float> %x, i32 12 2716ace81dbSSanjay Patel %n = fneg float %e 2726ace81dbSSanjay Patel %r = insertelement <4 x float> %y, float %n, i32 12 2736ace81dbSSanjay Patel ret <4 x float> %r 2746ace81dbSSanjay Patel} 2758d76fbb5SSanjay Patel 276ff93ca7dShanbeomdefine <4 x float> @ext12_v2f32v4f32(<2 x float> %x, <4 x float> %y) { 277ff93ca7dShanbeom; CHECK-LABEL: @ext12_v2f32v4f32( 278ff93ca7dShanbeom; CHECK-NEXT: [[E:%.*]] = extractelement <2 x float> [[X:%.*]], i32 6 279ff93ca7dShanbeom; CHECK-NEXT: [[N:%.*]] = fneg float [[E]] 280ff93ca7dShanbeom; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[N]], i32 12 281ff93ca7dShanbeom; CHECK-NEXT: ret <4 x float> [[R]] 282ff93ca7dShanbeom; 283ff93ca7dShanbeom %e = extractelement <2 x float> %x, i32 6 284ff93ca7dShanbeom %n = fneg float %e 285ff93ca7dShanbeom %r = insertelement <4 x float> %y, float %n, i32 12 286ff93ca7dShanbeom ret <4 x float> %r 287ff93ca7dShanbeom} 288ff93ca7dShanbeom 2898d76fbb5SSanjay Patel; This used to crash because we assumed matching a true, unary fneg instruction. 2908d76fbb5SSanjay Patel 2918d76fbb5SSanjay Pateldefine <2 x float> @ext1_v2f32_fsub(<2 x float> %x) { 2928d76fbb5SSanjay Patel; CHECK-LABEL: @ext1_v2f32_fsub( 2938d76fbb5SSanjay Patel; CHECK-NEXT: [[TMP1:%.*]] = fneg <2 x float> [[X:%.*]] 2948d76fbb5SSanjay Patel; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[TMP1]], <2 x i32> <i32 0, i32 3> 2958d76fbb5SSanjay Patel; CHECK-NEXT: ret <2 x float> [[R]] 2968d76fbb5SSanjay Patel; 2978d76fbb5SSanjay Patel %e = extractelement <2 x float> %x, i32 1 2988d76fbb5SSanjay Patel %s = fsub float -0.0, %e 2998d76fbb5SSanjay Patel %r = insertelement <2 x float> %x, float %s, i32 1 3008d76fbb5SSanjay Patel ret <2 x float> %r 3018d76fbb5SSanjay Patel} 3028d76fbb5SSanjay Patel 3038d76fbb5SSanjay Patel; This used to crash because we assumed matching a true, unary fneg instruction. 3048d76fbb5SSanjay Patel 3058d76fbb5SSanjay Pateldefine <2 x float> @ext1_v2f32_fsub_fmf(<2 x float> %x, <2 x float> %y) { 3068d76fbb5SSanjay Patel; CHECK-LABEL: @ext1_v2f32_fsub_fmf( 3078d76fbb5SSanjay Patel; CHECK-NEXT: [[TMP1:%.*]] = fneg nnan nsz <2 x float> [[X:%.*]] 3088d76fbb5SSanjay Patel; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[Y:%.*]], <2 x float> [[TMP1]], <2 x i32> <i32 0, i32 3> 3098d76fbb5SSanjay Patel; CHECK-NEXT: ret <2 x float> [[R]] 3108d76fbb5SSanjay Patel; 3118d76fbb5SSanjay Patel %e = extractelement <2 x float> %x, i32 1 3128d76fbb5SSanjay Patel %s = fsub nsz nnan float 0.0, %e 3138d76fbb5SSanjay Patel %r = insertelement <2 x float> %y, float %s, i32 1 3148d76fbb5SSanjay Patel ret <2 x float> %r 3158d76fbb5SSanjay Patel} 316ff93ca7dShanbeom 317ff93ca7dShanbeomdefine <4 x float> @ext1_v2f32v4f32_fsub_fmf(<2 x float> %x, <4 x float> %y) { 318ff93ca7dShanbeom; CHECK-LABEL: @ext1_v2f32v4f32_fsub_fmf( 319ff93ca7dShanbeom; CHECK-NEXT: [[TMP1:%.*]] = fneg nnan nsz <2 x float> [[X:%.*]] 320ff93ca7dShanbeom; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison> 321ff93ca7dShanbeom; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 2, i32 3> 322ff93ca7dShanbeom; CHECK-NEXT: ret <4 x float> [[R]] 323ff93ca7dShanbeom; 324ff93ca7dShanbeom %e = extractelement <2 x float> %x, i32 1 325ff93ca7dShanbeom %s = fsub nsz nnan float 0.0, %e 326ff93ca7dShanbeom %r = insertelement <4 x float> %y, float %s, i32 1 327ff93ca7dShanbeom ret <4 x float> %r 328ff93ca7dShanbeom} 329