1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=slp-vectorizer -S -mcpu=core-i7 | FileCheck %s 3; RUN: opt < %s -passes=slp-vectorizer -slp-threshold=-100 -S -mcpu=core-i7 | FileCheck %s --check-prefix=FORCE_SLP 4 5define <4 x i8> @test(<4 x i8> %v, ptr %x) { 6; CHECK-LABEL: @test( 7; CHECK-NEXT: [[X0:%.*]] = load i8, ptr [[X:%.*]], align 4 8; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 1 9; CHECK-NEXT: [[X1:%.*]] = load i8, ptr [[G1]], align 4 10; CHECK-NEXT: [[V0:%.*]] = insertelement <4 x i8> [[V:%.*]], i8 [[X0]], i64 0 11; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x i8> [[V0]], i8 [[X1]], i64 1 12; CHECK-NEXT: [[V2:%.*]] = add <4 x i8> [[V0]], [[V1]] 13; CHECK-NEXT: ret <4 x i8> [[V2]] 14; 15; FORCE_SLP-LABEL: @test( 16; FORCE_SLP-NEXT: [[X0:%.*]] = load i8, ptr [[X:%.*]], align 4 17; FORCE_SLP-NEXT: [[G1:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 1 18; FORCE_SLP-NEXT: [[X1:%.*]] = load i8, ptr [[G1]], align 4 19; FORCE_SLP-NEXT: [[V0:%.*]] = insertelement <4 x i8> [[V:%.*]], i8 [[X0]], i64 0 20; FORCE_SLP-NEXT: [[V1:%.*]] = insertelement <4 x i8> [[V0]], i8 [[X1]], i64 1 21; FORCE_SLP-NEXT: [[V2:%.*]] = add <4 x i8> [[V0]], [[V1]] 22; FORCE_SLP-NEXT: ret <4 x i8> [[V2]] 23; 24 %x0 = load i8, ptr %x, align 4 25 %g1 = getelementptr inbounds i8, ptr %x, i64 1 26 %x1 = load i8, ptr %g1, align 4 27 %v0 = insertelement <4 x i8> %v, i8 %x0, i64 0 28 %v1 = insertelement <4 x i8> %v0, i8 %x1, i64 1 29 %v2 = add <4 x i8> %v0, %v1 30 ret <4 x i8> %v2 31} 32 33define <2 x i8> @test2(<2 x i8> %t6, ptr %t1) { 34; CHECK-LABEL: @test2( 35; CHECK-NEXT: [[T3:%.*]] = load i32, ptr [[T1:%.*]], align 4 36; CHECK-NEXT: [[T4:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 1 37; CHECK-NEXT: [[T5:%.*]] = load i32, ptr [[T4]], align 4 38; CHECK-NEXT: [[T7:%.*]] = trunc i32 [[T3]] to i8 39; CHECK-NEXT: [[T8:%.*]] = insertelement <2 x i8> [[T6:%.*]], i8 [[T7]], i64 0 40; CHECK-NEXT: [[T9:%.*]] = trunc i32 [[T5]] to i8 41; CHECK-NEXT: [[T10:%.*]] = insertelement <2 x i8> [[T8]], i8 [[T9]], i64 1 42; CHECK-NEXT: [[T11:%.*]] = add <2 x i8> [[T10]], [[T8]] 43; CHECK-NEXT: ret <2 x i8> [[T11]] 44; 45; FORCE_SLP-LABEL: @test2( 46; FORCE_SLP-NEXT: [[T3:%.*]] = load i32, ptr [[T1:%.*]], align 4 47; FORCE_SLP-NEXT: [[T4:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 1 48; FORCE_SLP-NEXT: [[T5:%.*]] = load i32, ptr [[T4]], align 4 49; FORCE_SLP-NEXT: [[T7:%.*]] = trunc i32 [[T3]] to i8 50; FORCE_SLP-NEXT: [[T8:%.*]] = insertelement <2 x i8> [[T6:%.*]], i8 [[T7]], i64 0 51; FORCE_SLP-NEXT: [[T9:%.*]] = trunc i32 [[T5]] to i8 52; FORCE_SLP-NEXT: [[T10:%.*]] = insertelement <2 x i8> [[T8]], i8 [[T9]], i64 1 53; FORCE_SLP-NEXT: [[T11:%.*]] = add <2 x i8> [[T10]], [[T8]] 54; FORCE_SLP-NEXT: ret <2 x i8> [[T11]] 55; 56 %t3 = load i32, ptr %t1, align 4 57 %t4 = getelementptr inbounds i32, ptr %t1, i64 1 58 %t5 = load i32, ptr %t4, align 4 59 %t7 = trunc i32 %t3 to i8 60 %t8 = insertelement <2 x i8> %t6, i8 %t7, i64 0 61 %t9 = trunc i32 %t5 to i8 62 %t10 = insertelement <2 x i8> %t8, i8 %t9, i64 1 63 %t11 = add <2 x i8> %t10, %t8 64 ret <2 x i8> %t11 65} 66 67define <2 x i8> @test_reorder(<2 x i8> %t6, ptr %t1) { 68; CHECK-LABEL: @test_reorder( 69; CHECK-NEXT: [[T3:%.*]] = load i32, ptr [[T1:%.*]], align 4 70; CHECK-NEXT: [[T4:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 1 71; CHECK-NEXT: [[T5:%.*]] = load i32, ptr [[T4]], align 4 72; CHECK-NEXT: [[T7:%.*]] = trunc i32 [[T3]] to i8 73; CHECK-NEXT: [[T8:%.*]] = insertelement <2 x i8> [[T6:%.*]], i8 [[T7]], i64 1 74; CHECK-NEXT: [[T9:%.*]] = trunc i32 [[T5]] to i8 75; CHECK-NEXT: [[T10:%.*]] = insertelement <2 x i8> [[T8]], i8 [[T9]], i64 0 76; CHECK-NEXT: [[T11:%.*]] = add <2 x i8> [[T10]], [[T8]] 77; CHECK-NEXT: ret <2 x i8> [[T11]] 78; 79; FORCE_SLP-LABEL: @test_reorder( 80; FORCE_SLP-NEXT: [[T3:%.*]] = load i32, ptr [[T1:%.*]], align 4 81; FORCE_SLP-NEXT: [[T4:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 1 82; FORCE_SLP-NEXT: [[T5:%.*]] = load i32, ptr [[T4]], align 4 83; FORCE_SLP-NEXT: [[T7:%.*]] = trunc i32 [[T3]] to i8 84; FORCE_SLP-NEXT: [[T8:%.*]] = insertelement <2 x i8> [[T6:%.*]], i8 [[T7]], i64 1 85; FORCE_SLP-NEXT: [[T9:%.*]] = trunc i32 [[T5]] to i8 86; FORCE_SLP-NEXT: [[T10:%.*]] = insertelement <2 x i8> [[T8]], i8 [[T9]], i64 0 87; FORCE_SLP-NEXT: [[T11:%.*]] = add <2 x i8> [[T10]], [[T8]] 88; FORCE_SLP-NEXT: ret <2 x i8> [[T11]] 89; 90 %t3 = load i32, ptr %t1, align 4 91 %t4 = getelementptr inbounds i32, ptr %t1, i64 1 92 %t5 = load i32, ptr %t4, align 4 93 %t7 = trunc i32 %t3 to i8 94 %t8 = insertelement <2 x i8> %t6, i8 %t7, i64 1 95 %t9 = trunc i32 %t5 to i8 96 %t10 = insertelement <2 x i8> %t8, i8 %t9, i64 0 97 %t11 = add <2 x i8> %t10, %t8 98 ret <2 x i8> %t11 99} 100 101define <4 x i8> @test_subvector(<4 x i8> %t6, ptr %t1) { 102; CHECK-LABEL: @test_subvector( 103; CHECK-NEXT: [[T3:%.*]] = load i32, ptr [[T1:%.*]], align 4 104; CHECK-NEXT: [[T4:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 1 105; CHECK-NEXT: [[T5:%.*]] = load i32, ptr [[T4]], align 4 106; CHECK-NEXT: [[T7:%.*]] = trunc i32 [[T3]] to i8 107; CHECK-NEXT: [[T8:%.*]] = insertelement <4 x i8> [[T6:%.*]], i8 [[T7]], i64 0 108; CHECK-NEXT: [[T9:%.*]] = trunc i32 [[T5]] to i8 109; CHECK-NEXT: [[T10:%.*]] = insertelement <4 x i8> [[T8]], i8 [[T9]], i64 1 110; CHECK-NEXT: [[T11:%.*]] = add <4 x i8> [[T10]], [[T8]] 111; CHECK-NEXT: ret <4 x i8> [[T11]] 112; 113; FORCE_SLP-LABEL: @test_subvector( 114; FORCE_SLP-NEXT: [[T3:%.*]] = load i32, ptr [[T1:%.*]], align 4 115; FORCE_SLP-NEXT: [[T4:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 1 116; FORCE_SLP-NEXT: [[T5:%.*]] = load i32, ptr [[T4]], align 4 117; FORCE_SLP-NEXT: [[T7:%.*]] = trunc i32 [[T3]] to i8 118; FORCE_SLP-NEXT: [[T8:%.*]] = insertelement <4 x i8> [[T6:%.*]], i8 [[T7]], i64 0 119; FORCE_SLP-NEXT: [[T9:%.*]] = trunc i32 [[T5]] to i8 120; FORCE_SLP-NEXT: [[T10:%.*]] = insertelement <4 x i8> [[T8]], i8 [[T9]], i64 1 121; FORCE_SLP-NEXT: [[T11:%.*]] = add <4 x i8> [[T10]], [[T8]] 122; FORCE_SLP-NEXT: ret <4 x i8> [[T11]] 123; 124 %t3 = load i32, ptr %t1, align 4 125 %t4 = getelementptr inbounds i32, ptr %t1, i64 1 126 %t5 = load i32, ptr %t4, align 4 127 %t7 = trunc i32 %t3 to i8 128 %t8 = insertelement <4 x i8> %t6, i8 %t7, i64 0 129 %t9 = trunc i32 %t5 to i8 130 %t10 = insertelement <4 x i8> %t8, i8 %t9, i64 1 131 %t11 = add <4 x i8> %t10, %t8 132 ret <4 x i8> %t11 133} 134 135define <4 x i8> @test_subvector_reorder(<4 x i8> %t6, ptr %t1) { 136; CHECK-LABEL: @test_subvector_reorder( 137; CHECK-NEXT: [[T3:%.*]] = load i32, ptr [[T1:%.*]], align 4 138; CHECK-NEXT: [[T4:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 1 139; CHECK-NEXT: [[T5:%.*]] = load i32, ptr [[T4]], align 4 140; CHECK-NEXT: [[T7:%.*]] = trunc i32 [[T3]] to i8 141; CHECK-NEXT: [[T8:%.*]] = insertelement <4 x i8> [[T6:%.*]], i8 [[T7]], i64 3 142; CHECK-NEXT: [[T9:%.*]] = trunc i32 [[T5]] to i8 143; CHECK-NEXT: [[T10:%.*]] = insertelement <4 x i8> [[T8]], i8 [[T9]], i64 2 144; CHECK-NEXT: [[T11:%.*]] = add <4 x i8> [[T10]], [[T8]] 145; CHECK-NEXT: ret <4 x i8> [[T11]] 146; 147; FORCE_SLP-LABEL: @test_subvector_reorder( 148; FORCE_SLP-NEXT: [[T3:%.*]] = load i32, ptr [[T1:%.*]], align 4 149; FORCE_SLP-NEXT: [[T4:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 1 150; FORCE_SLP-NEXT: [[T5:%.*]] = load i32, ptr [[T4]], align 4 151; FORCE_SLP-NEXT: [[T7:%.*]] = trunc i32 [[T3]] to i8 152; FORCE_SLP-NEXT: [[T8:%.*]] = insertelement <4 x i8> [[T6:%.*]], i8 [[T7]], i64 3 153; FORCE_SLP-NEXT: [[T9:%.*]] = trunc i32 [[T5]] to i8 154; FORCE_SLP-NEXT: [[T10:%.*]] = insertelement <4 x i8> [[T8]], i8 [[T9]], i64 2 155; FORCE_SLP-NEXT: [[T11:%.*]] = add <4 x i8> [[T10]], [[T8]] 156; FORCE_SLP-NEXT: ret <4 x i8> [[T11]] 157; 158 %t3 = load i32, ptr %t1, align 4 159 %t4 = getelementptr inbounds i32, ptr %t1, i64 1 160 %t5 = load i32, ptr %t4, align 4 161 %t7 = trunc i32 %t3 to i8 162 %t8 = insertelement <4 x i8> %t6, i8 %t7, i64 3 163 %t9 = trunc i32 %t5 to i8 164 %t10 = insertelement <4 x i8> %t8, i8 %t9, i64 2 165 %t11 = add <4 x i8> %t10, %t8 166 ret <4 x i8> %t11 167} 168