1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 2; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s --check-prefixes=CHECK,SSE 3; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,AVX 4 5; Fold "shuffle (binop (shuffle, shuffle)), undef" --> "binop (shuffle), (shuffle)" 6 7declare void @use_v4f64(<4 x double>) 8 9define <4 x double> @fadd_v4f64(<4 x double> %a, <4 x double> %b) { 10; CHECK-LABEL: define <4 x double> @fadd_v4f64( 11; CHECK-SAME: <4 x double> [[A:%.*]], <4 x double> [[B:%.*]]) #[[ATTR0:[0-9]+]] { 12; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1> 13; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 14; CHECK-NEXT: [[POST:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]] 15; CHECK-NEXT: ret <4 x double> [[POST]] 16; 17 %a1 = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 18 %b1 = shufflevector <4 x double> %b, <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0> 19 %op = fadd <4 x double> %a1, %b1 20 %post = shufflevector <4 x double> %op, <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2> 21 ret <4 x double> %post 22} 23 24define <4 x double> @fadd_v4f64_poison_idx(<4 x double> %a, <4 x double> %b) { 25; CHECK-LABEL: define <4 x double> @fadd_v4f64_poison_idx( 26; CHECK-SAME: <4 x double> [[A:%.*]], <4 x double> [[B:%.*]]) #[[ATTR0]] { 27; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 poison> 28; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 poison> 29; CHECK-NEXT: [[POST:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]] 30; CHECK-NEXT: ret <4 x double> [[POST]] 31; 32 %a1 = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 33 %b1 = shufflevector <4 x double> %b, <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0> 34 %op = fadd <4 x double> %a1, %b1 35 %post = shufflevector <4 x double> %op, <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 4> 36 ret <4 x double> %post 37} 38 39define <4 x double> @fadd_v4f64_mixed_types(<4 x double> %a, <2 x double> %b) { 40; CHECK-LABEL: define <4 x double> @fadd_v4f64_mixed_types( 41; CHECK-SAME: <4 x double> [[A:%.*]], <2 x double> [[B:%.*]]) #[[ATTR0]] { 42; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1> 43; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[B]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 44; CHECK-NEXT: [[POST:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]] 45; CHECK-NEXT: ret <4 x double> [[POST]] 46; 47 %a1 = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 48 %b1 = shufflevector <2 x double> %b, <2 x double> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0> 49 %op = fadd <4 x double> %a1, %b1 50 %post = shufflevector <4 x double> %op, <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2> 51 ret <4 x double> %post 52} 53 54define <4 x float> @fadd_v4f32_mixed_types(<4 x float> %a0) { 55; CHECK-LABEL: define <4 x float> @fadd_v4f32_mixed_types( 56; CHECK-SAME: <4 x float> [[A0:%.*]]) #[[ATTR0]] { 57; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A0]], <4 x float> zeroinitializer, <4 x i32> <i32 1, i32 5, i32 poison, i32 poison> 58; CHECK-NEXT: [[POST:%.*]] = fmul <4 x float> [[TMP1]], <float 0.000000e+00, float 0.000000e+00, float undef, float undef> 59; CHECK-NEXT: ret <4 x float> [[POST]] 60; 61 %pre = shufflevector <4 x float> %a0, <4 x float> zeroinitializer, <2 x i32> <i32 1, i32 5> 62 %op = fmul <2 x float> %pre, zeroinitializer 63 %post = shufflevector <2 x float> %op, <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 64 ret <4 x float> %post 65} 66 67; Negative test - multiple use of fadd 68define <4 x double> @fadd_v4f64_multiuse_op(<4 x double> %a, <4 x double> %b) { 69; CHECK-LABEL: define <4 x double> @fadd_v4f64_multiuse_op( 70; CHECK-SAME: <4 x double> [[A:%.*]], <4 x double> [[B:%.*]]) #[[ATTR0]] { 71; CHECK-NEXT: [[A1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 72; CHECK-NEXT: [[B1:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0> 73; CHECK-NEXT: [[OP:%.*]] = fadd <4 x double> [[A1]], [[B1]] 74; CHECK-NEXT: [[POST:%.*]] = shufflevector <4 x double> [[OP]], <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2> 75; CHECK-NEXT: call void @use_v4f64(<4 x double> [[OP]]) 76; CHECK-NEXT: ret <4 x double> [[POST]] 77; 78 %a1 = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 79 %b1 = shufflevector <4 x double> %b, <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0> 80 %op = fadd <4 x double> %a1, %b1 81 %post = shufflevector <4 x double> %op, <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2> 82 call void @use_v4f64(<4 x double> %op) 83 ret <4 x double> %post 84} 85 86; Negative test - multiple use of inner shuffle (only fold if the moved shuffle is cheaper). 87define <4 x double> @fadd_v4f64_multiuse_shuffle(<4 x double> %a, <4 x double> %b) { 88; SSE-LABEL: define <4 x double> @fadd_v4f64_multiuse_shuffle( 89; SSE-SAME: <4 x double> [[A:%.*]], <4 x double> [[B:%.*]]) #[[ATTR0]] { 90; SSE-NEXT: [[A1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 91; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1> 92; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 93; SSE-NEXT: [[POST:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]] 94; SSE-NEXT: call void @use_v4f64(<4 x double> [[A1]]) 95; SSE-NEXT: ret <4 x double> [[POST]] 96; 97; AVX-LABEL: define <4 x double> @fadd_v4f64_multiuse_shuffle( 98; AVX-SAME: <4 x double> [[A:%.*]], <4 x double> [[B:%.*]]) #[[ATTR0]] { 99; AVX-NEXT: [[A1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 100; AVX-NEXT: [[B1:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0> 101; AVX-NEXT: [[OP:%.*]] = fadd <4 x double> [[A1]], [[B1]] 102; AVX-NEXT: [[POST:%.*]] = shufflevector <4 x double> [[OP]], <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2> 103; AVX-NEXT: call void @use_v4f64(<4 x double> [[A1]]) 104; AVX-NEXT: ret <4 x double> [[POST]] 105; 106 %a1 = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 107 %b1 = shufflevector <4 x double> %b, <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0> 108 %op = fadd <4 x double> %a1, %b1 109 %post = shufflevector <4 x double> %op, <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2> 110 call void @use_v4f64(<4 x double> %a1) 111 ret <4 x double> %post 112} 113 114define <4 x i32> @sdiv_v4i32(<4 x i32> %a, <4 x i32> %b) { 115; CHECK-LABEL: define <4 x i32> @sdiv_v4i32( 116; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR0]] { 117; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 3> 118; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 119; CHECK-NEXT: [[POST:%.*]] = sdiv <4 x i32> [[TMP1]], [[TMP2]] 120; CHECK-NEXT: ret <4 x i32> [[POST]] 121; 122 %a1 = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 123 %b1 = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0> 124 %op = sdiv <4 x i32> %a1, %b1 125 %post = shufflevector <4 x i32> %op, <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 0> 126 ret <4 x i32> %post 127} 128 129; Negative test - don't introduce poison element into div/rem instruction 130define <4 x i32> @sdiv_v4i32_poison(<4 x i32> %a, <4 x i32> %b) { 131; CHECK-LABEL: define <4 x i32> @sdiv_v4i32_poison( 132; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR0]] { 133; CHECK-NEXT: [[A1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 134; CHECK-NEXT: [[B1:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0> 135; CHECK-NEXT: [[OP:%.*]] = sdiv <4 x i32> [[A1]], [[B1]] 136; CHECK-NEXT: [[POST:%.*]] = shufflevector <4 x i32> [[OP]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 poison> 137; CHECK-NEXT: ret <4 x i32> [[POST]] 138; 139 %a1 = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 140 %b1 = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0> 141 %op = sdiv <4 x i32> %a1, %b1 142 %post = shufflevector <4 x i32> %op, <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 poison> 143 ret <4 x i32> %post 144} 145 146; Negative test - don't introduce poison element into div/rem instruction 147define <4 x i32> @sdiv_v4i32_poison_idx(<4 x i32> %a, <4 x i32> %b) { 148; CHECK-LABEL: define <4 x i32> @sdiv_v4i32_poison_idx( 149; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR0]] { 150; CHECK-NEXT: [[A1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 151; CHECK-NEXT: [[B1:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0> 152; CHECK-NEXT: [[OP:%.*]] = sdiv <4 x i32> [[A1]], [[B1]] 153; CHECK-NEXT: [[POST:%.*]] = shufflevector <4 x i32> [[OP]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 4> 154; CHECK-NEXT: ret <4 x i32> [[POST]] 155; 156 %a1 = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 157 %b1 = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0> 158 %op = sdiv <4 x i32> %a1, %b1 159 %post = shufflevector <4 x i32> %op, <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 4> 160 ret <4 x i32> %post 161} 162