1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=slp-vectorizer -slp-threshold=-200 -mtriple=x86_64-unknown-linux -mcpu=core-avx2 -S | FileCheck %s 3 4define void @test_add_sdiv(ptr %arr1, ptr %arr2, i32 %a0, i32 %a1, i32 %a2, i32 %a3) { 5; CHECK-LABEL: @test_add_sdiv( 6; CHECK-NEXT: entry: 7; CHECK-NEXT: [[GEP1_2:%.*]] = getelementptr i32, ptr [[ARR1:%.*]], i32 2 8; CHECK-NEXT: [[GEP1_3:%.*]] = getelementptr i32, ptr [[ARR1]], i32 3 9; CHECK-NEXT: [[GEP2_2:%.*]] = getelementptr i32, ptr [[ARR2:%.*]], i32 2 10; CHECK-NEXT: [[GEP2_3:%.*]] = getelementptr i32, ptr [[ARR2]], i32 3 11; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[GEP1_2]], align 4 12; CHECK-NEXT: [[V3:%.*]] = load i32, ptr [[GEP1_3]], align 4 13; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[A0:%.*]], i32 0 14; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[A1:%.*]], i32 1 15; CHECK-NEXT: [[TMP2:%.*]] = add nsw <2 x i32> [[TMP1]], <i32 1146, i32 146> 16; CHECK-NEXT: [[Y2:%.*]] = add nsw i32 [[A2:%.*]], 42 17; CHECK-NEXT: [[Y3:%.*]] = add nsw i32 [[A3:%.*]], 0 18; CHECK-NEXT: [[RES2:%.*]] = sdiv i32 [[V2]], [[Y2]] 19; CHECK-NEXT: [[RES3:%.*]] = add nsw i32 [[V3]], [[Y3]] 20; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[ARR1]], align 4 21; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i32> [[TMP4]], [[TMP2]] 22; CHECK-NEXT: store <2 x i32> [[TMP5]], ptr [[ARR2]], align 4 23; CHECK-NEXT: store i32 [[RES2]], ptr [[GEP2_2]], align 4 24; CHECK-NEXT: store i32 [[RES3]], ptr [[GEP2_3]], align 4 25; CHECK-NEXT: ret void 26; 27entry: 28 %gep1.1 = getelementptr i32, ptr %arr1, i32 1 29 %gep1.2 = getelementptr i32, ptr %arr1, i32 2 30 %gep1.3 = getelementptr i32, ptr %arr1, i32 3 31 %gep2.1 = getelementptr i32, ptr %arr2, i32 1 32 %gep2.2 = getelementptr i32, ptr %arr2, i32 2 33 %gep2.3 = getelementptr i32, ptr %arr2, i32 3 34 %v0 = load i32, ptr %arr1 35 %v1 = load i32, ptr %gep1.1 36 %v2 = load i32, ptr %gep1.2 37 %v3 = load i32, ptr %gep1.3 38 %y0 = add nsw i32 %a0, 1146 39 %y1 = add nsw i32 %a1, 146 40 %y2 = add nsw i32 %a2, 42 41 ;; %y3 is zero if %a3 is zero 42 %y3 = add nsw i32 %a3, 0 43 %res0 = add nsw i32 %v0, %y0 44 %res1 = add nsw i32 %v1, %y1 45 ;; As such, doing alternate shuffling would be incorrect: 46 ;; %vadd = add nsw %v[0-3], %y[0-3] 47 ;; %vsdiv = sdiv %v[0-3], %y[0-3] 48 ;; %result = shuffle %vadd, %vsdiv, <mask> 49 ;; would be illegal. 50 %res2 = sdiv i32 %v2, %y2 51 %res3 = add nsw i32 %v3, %y3 52 store i32 %res0, ptr %arr2 53 store i32 %res1, ptr %gep2.1 54 store i32 %res2, ptr %gep2.2 55 store i32 %res3, ptr %gep2.3 56 ret void 57} 58 59;; Similar test, but now div/rem is main opcode and not the alternate one. Same issue. 60define void @test_urem_add(ptr %arr1, ptr %arr2, i32 %a0, i32 %a1, i32 %a2, i32 %a3) { 61; CHECK-LABEL: @test_urem_add( 62; CHECK-NEXT: entry: 63; CHECK-NEXT: [[GEP1_1:%.*]] = getelementptr i32, ptr [[ARR1:%.*]], i32 1 64; CHECK-NEXT: [[GEP1_2:%.*]] = getelementptr i32, ptr [[ARR1]], i32 2 65; CHECK-NEXT: [[GEP1_3:%.*]] = getelementptr i32, ptr [[ARR1]], i32 3 66; CHECK-NEXT: [[GEP2_1:%.*]] = getelementptr i32, ptr [[ARR2:%.*]], i32 1 67; CHECK-NEXT: [[GEP2_2:%.*]] = getelementptr i32, ptr [[ARR2]], i32 2 68; CHECK-NEXT: [[GEP2_3:%.*]] = getelementptr i32, ptr [[ARR2]], i32 3 69; CHECK-NEXT: [[V0:%.*]] = load i32, ptr [[ARR1]], align 4 70; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[GEP1_1]], align 4 71; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[GEP1_2]], align 4 72; CHECK-NEXT: [[V3:%.*]] = load i32, ptr [[GEP1_3]], align 4 73; CHECK-NEXT: [[Y0:%.*]] = add nsw i32 [[A0:%.*]], 1146 74; CHECK-NEXT: [[Y1:%.*]] = add nsw i32 [[A1:%.*]], 146 75; CHECK-NEXT: [[Y2:%.*]] = add nsw i32 [[A2:%.*]], 42 76; CHECK-NEXT: [[Y3:%.*]] = add nsw i32 [[A3:%.*]], 0 77; CHECK-NEXT: [[RES0:%.*]] = urem i32 [[V0]], [[Y0]] 78; CHECK-NEXT: [[RES1:%.*]] = urem i32 [[V1]], [[Y1]] 79; CHECK-NEXT: [[RES2:%.*]] = urem i32 [[V2]], [[Y2]] 80; CHECK-NEXT: [[RES3:%.*]] = add nsw i32 [[V3]], [[Y3]] 81; CHECK-NEXT: store i32 [[RES0]], ptr [[ARR2]], align 4 82; CHECK-NEXT: store i32 [[RES1]], ptr [[GEP2_1]], align 4 83; CHECK-NEXT: store i32 [[RES2]], ptr [[GEP2_2]], align 4 84; CHECK-NEXT: store i32 [[RES3]], ptr [[GEP2_3]], align 4 85; CHECK-NEXT: ret void 86; 87entry: 88 %gep1.1 = getelementptr i32, ptr %arr1, i32 1 89 %gep1.2 = getelementptr i32, ptr %arr1, i32 2 90 %gep1.3 = getelementptr i32, ptr %arr1, i32 3 91 %gep2.1 = getelementptr i32, ptr %arr2, i32 1 92 %gep2.2 = getelementptr i32, ptr %arr2, i32 2 93 %gep2.3 = getelementptr i32, ptr %arr2, i32 3 94 %v0 = load i32, ptr %arr1 95 %v1 = load i32, ptr %gep1.1 96 %v2 = load i32, ptr %gep1.2 97 %v3 = load i32, ptr %gep1.3 98 %y0 = add nsw i32 %a0, 1146 99 %y1 = add nsw i32 %a1, 146 100 %y2 = add nsw i32 %a2, 42 101 ;; %y3 is zero if %a3 is zero 102 %y3 = add nsw i32 %a3, 0 103 %res0 = urem i32 %v0, %y0 104 %res1 = urem i32 %v1, %y1 105 %res2 = urem i32 %v2, %y2 106 ;; As such, doing alternate shuffling would be incorrect: 107 ;; %vurem = urem %v[0-3], %y[0-3] 108 ;; %vadd = add nsw %v[0-3], %y[0-3] 109 ;; %result = shuffle %vurem, %vadd, <mask> 110 ;; would be illegal. 111 %res3 = add nsw i32 %v3, %y3 112 store i32 %res0, ptr %arr2 113 store i32 %res1, ptr %gep2.1 114 store i32 %res2, ptr %gep2.2 115 store i32 %res3, ptr %gep2.3 116 ret void 117} 118