1*f4a2713aSLionel Sambuc; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s 2*f4a2713aSLionel Sambuc; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core2 | FileCheck %s --check-prefix=SSE3 3*f4a2713aSLionel Sambuc; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck %s --check-prefix=AVX2 4*f4a2713aSLionel Sambuc 5*f4a2713aSLionel Sambuctarget datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" 6*f4a2713aSLionel Sambuctarget triple = "x86_64-apple-macosx10.8.0" 7*f4a2713aSLionel Sambuc 8*f4a2713aSLionel Sambucdefine i32 @add(i32 %arg) { 9*f4a2713aSLionel Sambuc ;CHECK: cost of 1 {{.*}} add 10*f4a2713aSLionel Sambuc %A = add <4 x i32> undef, undef 11*f4a2713aSLionel Sambuc ;CHECK: cost of 4 {{.*}} add 12*f4a2713aSLionel Sambuc %B = add <8 x i32> undef, undef 13*f4a2713aSLionel Sambuc ;CHECK: cost of 1 {{.*}} add 14*f4a2713aSLionel Sambuc %C = add <2 x i64> undef, undef 15*f4a2713aSLionel Sambuc ;CHECK: cost of 4 {{.*}} add 16*f4a2713aSLionel Sambuc %D = add <4 x i64> undef, undef 17*f4a2713aSLionel Sambuc ;CHECK: cost of 8 {{.*}} add 18*f4a2713aSLionel Sambuc %E = add <8 x i64> undef, undef 19*f4a2713aSLionel Sambuc ;CHECK: cost of 0 {{.*}} ret 20*f4a2713aSLionel Sambuc ret i32 undef 21*f4a2713aSLionel Sambuc} 22*f4a2713aSLionel Sambuc 23*f4a2713aSLionel Sambuc 24*f4a2713aSLionel Sambucdefine i32 @xor(i32 %arg) { 25*f4a2713aSLionel Sambuc ;CHECK: cost of 1 {{.*}} xor 26*f4a2713aSLionel Sambuc %A = xor <4 x i32> undef, undef 27*f4a2713aSLionel Sambuc ;CHECK: cost of 1 {{.*}} xor 28*f4a2713aSLionel Sambuc %B = xor <8 x i32> undef, undef 29*f4a2713aSLionel Sambuc ;CHECK: cost of 1 {{.*}} xor 30*f4a2713aSLionel Sambuc %C = xor <2 x i64> undef, undef 31*f4a2713aSLionel Sambuc ;CHECK: cost of 1 {{.*}} xor 32*f4a2713aSLionel Sambuc %D = xor <4 x i64> undef, undef 33*f4a2713aSLionel Sambuc ;CHECK: cost of 0 {{.*}} ret 34*f4a2713aSLionel Sambuc ret i32 undef 35*f4a2713aSLionel Sambuc} 36*f4a2713aSLionel Sambuc 37*f4a2713aSLionel Sambuc; CHECK: mul 38*f4a2713aSLionel Sambucdefine void @mul() { 39*f4a2713aSLionel Sambuc ; A <2 x i32> gets expanded to a <2 x i64> vector. 40*f4a2713aSLionel Sambuc ; A <2 x i64> vector multiply is implemented using 41*f4a2713aSLionel Sambuc ; 3 PMULUDQ and 2 PADDS and 4 shifts. 42*f4a2713aSLionel Sambuc ;CHECK: cost of 9 {{.*}} mul 43*f4a2713aSLionel Sambuc %A0 = mul <2 x i32> undef, undef 44*f4a2713aSLionel Sambuc ;CHECK: cost of 9 {{.*}} mul 45*f4a2713aSLionel Sambuc %A1 = mul <2 x i64> undef, undef 46*f4a2713aSLionel Sambuc ;CHECK: cost of 18 {{.*}} mul 47*f4a2713aSLionel Sambuc %A2 = mul <4 x i64> undef, undef 48*f4a2713aSLionel Sambuc ret void 49*f4a2713aSLionel Sambuc} 50*f4a2713aSLionel Sambuc 51*f4a2713aSLionel Sambuc; SSE3: sse3mull 52*f4a2713aSLionel Sambucdefine void @sse3mull() { 53*f4a2713aSLionel Sambuc ; SSE3: cost of 6 {{.*}} mul 54*f4a2713aSLionel Sambuc %A0 = mul <4 x i32> undef, undef 55*f4a2713aSLionel Sambuc ret void 56*f4a2713aSLionel Sambuc ; SSE3: avx2mull 57*f4a2713aSLionel Sambuc} 58*f4a2713aSLionel Sambuc 59*f4a2713aSLionel Sambuc; AVX2: avx2mull 60*f4a2713aSLionel Sambucdefine void @avx2mull() { 61*f4a2713aSLionel Sambuc ; AVX2: cost of 9 {{.*}} mul 62*f4a2713aSLionel Sambuc %A0 = mul <4 x i64> undef, undef 63*f4a2713aSLionel Sambuc ret void 64*f4a2713aSLionel Sambuc ; AVX2: fmul 65*f4a2713aSLionel Sambuc} 66*f4a2713aSLionel Sambuc 67*f4a2713aSLionel Sambuc; CHECK: fmul 68*f4a2713aSLionel Sambucdefine i32 @fmul(i32 %arg) { 69*f4a2713aSLionel Sambuc ;CHECK: cost of 2 {{.*}} fmul 70*f4a2713aSLionel Sambuc %A = fmul <4 x float> undef, undef 71*f4a2713aSLionel Sambuc ;CHECK: cost of 2 {{.*}} fmul 72*f4a2713aSLionel Sambuc %B = fmul <8 x float> undef, undef 73*f4a2713aSLionel Sambuc ret i32 undef 74*f4a2713aSLionel Sambuc} 75*f4a2713aSLionel Sambuc 76*f4a2713aSLionel Sambuc; AVX: shift 77*f4a2713aSLionel Sambuc; AVX2: shift 78*f4a2713aSLionel Sambucdefine void @shift() { 79*f4a2713aSLionel Sambuc ; AVX: cost of 2 {{.*}} shl 80*f4a2713aSLionel Sambuc ; AVX2: cost of 1 {{.*}} shl 81*f4a2713aSLionel Sambuc %A0 = shl <4 x i32> undef, undef 82*f4a2713aSLionel Sambuc ; AVX: cost of 2 {{.*}} shl 83*f4a2713aSLionel Sambuc ; AVX2: cost of 1 {{.*}} shl 84*f4a2713aSLionel Sambuc %A1 = shl <2 x i64> undef, undef 85*f4a2713aSLionel Sambuc 86*f4a2713aSLionel Sambuc ; AVX: cost of 2 {{.*}} lshr 87*f4a2713aSLionel Sambuc ; AVX2: cost of 1 {{.*}} lshr 88*f4a2713aSLionel Sambuc %B0 = lshr <4 x i32> undef, undef 89*f4a2713aSLionel Sambuc ; AVX: cost of 2 {{.*}} lshr 90*f4a2713aSLionel Sambuc ; AVX2: cost of 1 {{.*}} lshr 91*f4a2713aSLionel Sambuc %B1 = lshr <2 x i64> undef, undef 92*f4a2713aSLionel Sambuc 93*f4a2713aSLionel Sambuc ; AVX: cost of 2 {{.*}} ashr 94*f4a2713aSLionel Sambuc ; AVX2: cost of 1 {{.*}} ashr 95*f4a2713aSLionel Sambuc %C0 = ashr <4 x i32> undef, undef 96*f4a2713aSLionel Sambuc ; AVX: cost of 6 {{.*}} ashr 97*f4a2713aSLionel Sambuc ; AVX2: cost of 20 {{.*}} ashr 98*f4a2713aSLionel Sambuc %C1 = ashr <2 x i64> undef, undef 99*f4a2713aSLionel Sambuc 100*f4a2713aSLionel Sambuc ret void 101*f4a2713aSLionel Sambuc} 102*f4a2713aSLionel Sambuc 103*f4a2713aSLionel Sambuc; AVX: avx2shift 104*f4a2713aSLionel Sambuc; AVX2: avx2shift 105*f4a2713aSLionel Sambucdefine void @avx2shift() { 106*f4a2713aSLionel Sambuc ; AVX: cost of 2 {{.*}} shl 107*f4a2713aSLionel Sambuc ; AVX2: cost of 1 {{.*}} shl 108*f4a2713aSLionel Sambuc %A0 = shl <8 x i32> undef, undef 109*f4a2713aSLionel Sambuc ; AVX: cost of 2 {{.*}} shl 110*f4a2713aSLionel Sambuc ; AVX2: cost of 1 {{.*}} shl 111*f4a2713aSLionel Sambuc %A1 = shl <4 x i64> undef, undef 112*f4a2713aSLionel Sambuc 113*f4a2713aSLionel Sambuc ; AVX: cost of 2 {{.*}} lshr 114*f4a2713aSLionel Sambuc ; AVX2: cost of 1 {{.*}} lshr 115*f4a2713aSLionel Sambuc %B0 = lshr <8 x i32> undef, undef 116*f4a2713aSLionel Sambuc ; AVX: cost of 2 {{.*}} lshr 117*f4a2713aSLionel Sambuc ; AVX2: cost of 1 {{.*}} lshr 118*f4a2713aSLionel Sambuc %B1 = lshr <4 x i64> undef, undef 119*f4a2713aSLionel Sambuc 120*f4a2713aSLionel Sambuc ; AVX: cost of 2 {{.*}} ashr 121*f4a2713aSLionel Sambuc ; AVX2: cost of 1 {{.*}} ashr 122*f4a2713aSLionel Sambuc %C0 = ashr <8 x i32> undef, undef 123*f4a2713aSLionel Sambuc ; AVX: cost of 12 {{.*}} ashr 124*f4a2713aSLionel Sambuc ; AVX2: cost of 40 {{.*}} ashr 125*f4a2713aSLionel Sambuc %C1 = ashr <4 x i64> undef, undef 126*f4a2713aSLionel Sambuc 127*f4a2713aSLionel Sambuc ret void 128*f4a2713aSLionel Sambuc} 129