1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=slp-vectorizer,instcombine -S -mtriple=x86_64-unknown-linux -mattr=+sse2 | FileCheck %s --check-prefixes=SSE 3; RUN: opt < %s -passes=slp-vectorizer,instcombine -S -mtriple=x86_64-unknown-linux -mattr=+avx | FileCheck %s --check-prefixes=AVX 4; RUN: opt < %s -passes=slp-vectorizer,instcombine -S -mtriple=x86_64-unknown-linux -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2 5; RUN: opt < %s -passes=slp-vectorizer,instcombine -S -mtriple=x86_64-unknown-linux -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512F 6; RUN: opt < %s -passes=slp-vectorizer,instcombine -S -mtriple=x86_64-unknown-linux -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX512VL 7 8define void @gather_load(ptr noalias nocapture %0, ptr noalias nocapture readonly %1) { 9; SSE-LABEL: @gather_load( 10; SSE-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1:%.*]], i64 4 11; SSE-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]] 12; SSE-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 44 13; SSE-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]] 14; SSE-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16 15; SSE-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]] 16; SSE-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]] 17; SSE-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0 18; SSE-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP6]], i64 1 19; SSE-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP8]], i64 2 20; SSE-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i64 3 21; SSE-NEXT: [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], <i32 1, i32 2, i32 3, i32 4> 22; SSE-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] 23; SSE-NEXT: ret void 24; 25; AVX-LABEL: @gather_load( 26; AVX-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1:%.*]], i64 4 27; AVX-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]] 28; AVX-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 44 29; AVX-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]] 30; AVX-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16 31; AVX-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]] 32; AVX-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]] 33; AVX-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0 34; AVX-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP6]], i64 1 35; AVX-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP8]], i64 2 36; AVX-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i64 3 37; AVX-NEXT: [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], <i32 1, i32 2, i32 3, i32 4> 38; AVX-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] 39; AVX-NEXT: ret void 40; 41; AVX2-LABEL: @gather_load( 42; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1:%.*]], i64 4 43; AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]] 44; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 44 45; AVX2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]] 46; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16 47; AVX2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]] 48; AVX2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]] 49; AVX2-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0 50; AVX2-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP6]], i64 1 51; AVX2-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP8]], i64 2 52; AVX2-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i64 3 53; AVX2-NEXT: [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], <i32 1, i32 2, i32 3, i32 4> 54; AVX2-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] 55; AVX2-NEXT: ret void 56; 57; AVX512F-LABEL: @gather_load( 58; AVX512F-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1:%.*]], i64 4 59; AVX512F-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]] 60; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 44 61; AVX512F-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]] 62; AVX512F-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16 63; AVX512F-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]] 64; AVX512F-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]] 65; AVX512F-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0 66; AVX512F-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP6]], i64 1 67; AVX512F-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP8]], i64 2 68; AVX512F-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i64 3 69; AVX512F-NEXT: [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], <i32 1, i32 2, i32 3, i32 4> 70; AVX512F-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] 71; AVX512F-NEXT: ret void 72; 73; AVX512VL-LABEL: @gather_load( 74; AVX512VL-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1:%.*]], i64 4 75; AVX512VL-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]] 76; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 44 77; AVX512VL-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]] 78; AVX512VL-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16 79; AVX512VL-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]] 80; AVX512VL-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]] 81; AVX512VL-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0 82; AVX512VL-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP6]], i64 1 83; AVX512VL-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP8]], i64 2 84; AVX512VL-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i64 3 85; AVX512VL-NEXT: [[TMP14:%.*]] = add nsw <4 x i32> [[TMP13]], <i32 1, i32 2, i32 3, i32 4> 86; AVX512VL-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] 87; AVX512VL-NEXT: ret void 88; 89 %3 = getelementptr inbounds i32, ptr %1, i64 1 90 %4 = load i32, ptr %1, align 4, !tbaa !2 91 %5 = getelementptr inbounds i32, ptr %0, i64 1 92 %6 = getelementptr inbounds i32, ptr %1, i64 11 93 %7 = load i32, ptr %6, align 4, !tbaa !2 94 %8 = getelementptr inbounds i32, ptr %0, i64 2 95 %9 = getelementptr inbounds i32, ptr %1, i64 4 96 %10 = load i32, ptr %9, align 4, !tbaa !2 97 %11 = getelementptr inbounds i32, ptr %0, i64 3 98 %12 = load i32, ptr %3, align 4, !tbaa !2 99 %13 = insertelement <4 x i32> poison, i32 %4, i32 0 100 %14 = insertelement <4 x i32> %13, i32 %7, i32 1 101 %15 = insertelement <4 x i32> %14, i32 %10, i32 2 102 %16 = insertelement <4 x i32> %15, i32 %12, i32 3 103 %17 = add nsw <4 x i32> %16, <i32 1, i32 2, i32 3, i32 4> 104 store <4 x i32> %17, ptr %0, align 4, !tbaa !2 105 ret void 106} 107 108define void @gather_load_2(ptr noalias nocapture %0, ptr noalias nocapture readonly %1) { 109; SSE-LABEL: @gather_load_2( 110; SSE-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1:%.*]], i64 4 111; SSE-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]] 112; SSE-NEXT: [[TMP5:%.*]] = add nsw i32 [[TMP4]], 1 113; SSE-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0:%.*]], i64 4 114; SSE-NEXT: store i32 [[TMP5]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]] 115; SSE-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40 116; SSE-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]] 117; SSE-NEXT: [[TMP9:%.*]] = add nsw i32 [[TMP8]], 2 118; SSE-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 8 119; SSE-NEXT: store i32 [[TMP9]], ptr [[TMP6]], align 4, !tbaa [[TBAA0]] 120; SSE-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 12 121; SSE-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !tbaa [[TBAA0]] 122; SSE-NEXT: [[TMP13:%.*]] = add nsw i32 [[TMP12]], 3 123; SSE-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 12 124; SSE-NEXT: store i32 [[TMP13]], ptr [[TMP10]], align 4, !tbaa [[TBAA0]] 125; SSE-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 20 126; SSE-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4, !tbaa [[TBAA0]] 127; SSE-NEXT: [[TMP17:%.*]] = add nsw i32 [[TMP16]], 4 128; SSE-NEXT: store i32 [[TMP17]], ptr [[TMP14]], align 4, !tbaa [[TBAA0]] 129; SSE-NEXT: ret void 130; 131; AVX-LABEL: @gather_load_2( 132; AVX-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1:%.*]], i64 4 133; AVX-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]] 134; AVX-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40 135; AVX-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]] 136; AVX-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 12 137; AVX-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]] 138; AVX-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 20 139; AVX-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]] 140; AVX-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0 141; AVX-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP6]], i64 1 142; AVX-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP8]], i64 2 143; AVX-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i64 3 144; AVX-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[TMP14]], <i32 1, i32 2, i32 3, i32 4> 145; AVX-NEXT: store <4 x i32> [[TMP15]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] 146; AVX-NEXT: ret void 147; 148; AVX2-LABEL: @gather_load_2( 149; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1:%.*]], i64 4 150; AVX2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]] 151; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40 152; AVX2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]] 153; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 12 154; AVX2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]] 155; AVX2-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 20 156; AVX2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]] 157; AVX2-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0 158; AVX2-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP6]], i64 1 159; AVX2-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP8]], i64 2 160; AVX2-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i64 3 161; AVX2-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[TMP14]], <i32 1, i32 2, i32 3, i32 4> 162; AVX2-NEXT: store <4 x i32> [[TMP15]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] 163; AVX2-NEXT: ret void 164; 165; AVX512F-LABEL: @gather_load_2( 166; AVX512F-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1:%.*]], i64 4 167; AVX512F-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]] 168; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40 169; AVX512F-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]] 170; AVX512F-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 12 171; AVX512F-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]] 172; AVX512F-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 20 173; AVX512F-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]] 174; AVX512F-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0 175; AVX512F-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP6]], i64 1 176; AVX512F-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP8]], i64 2 177; AVX512F-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i64 3 178; AVX512F-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[TMP14]], <i32 1, i32 2, i32 3, i32 4> 179; AVX512F-NEXT: store <4 x i32> [[TMP15]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] 180; AVX512F-NEXT: ret void 181; 182; AVX512VL-LABEL: @gather_load_2( 183; AVX512VL-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1:%.*]], i64 4 184; AVX512VL-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]] 185; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40 186; AVX512VL-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]] 187; AVX512VL-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 12 188; AVX512VL-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]] 189; AVX512VL-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 20 190; AVX512VL-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]] 191; AVX512VL-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0 192; AVX512VL-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP6]], i64 1 193; AVX512VL-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP8]], i64 2 194; AVX512VL-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i64 3 195; AVX512VL-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[TMP14]], <i32 1, i32 2, i32 3, i32 4> 196; AVX512VL-NEXT: store <4 x i32> [[TMP15]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] 197; AVX512VL-NEXT: ret void 198; 199 %3 = getelementptr inbounds i32, ptr %1, i64 1 200 %4 = load i32, ptr %3, align 4, !tbaa !2 201 %5 = add nsw i32 %4, 1 202 %6 = getelementptr inbounds i32, ptr %0, i64 1 203 store i32 %5, ptr %0, align 4, !tbaa !2 204 %7 = getelementptr inbounds i32, ptr %1, i64 10 205 %8 = load i32, ptr %7, align 4, !tbaa !2 206 %9 = add nsw i32 %8, 2 207 %10 = getelementptr inbounds i32, ptr %0, i64 2 208 store i32 %9, ptr %6, align 4, !tbaa !2 209 %11 = getelementptr inbounds i32, ptr %1, i64 3 210 %12 = load i32, ptr %11, align 4, !tbaa !2 211 %13 = add nsw i32 %12, 3 212 %14 = getelementptr inbounds i32, ptr %0, i64 3 213 store i32 %13, ptr %10, align 4, !tbaa !2 214 %15 = getelementptr inbounds i32, ptr %1, i64 5 215 %16 = load i32, ptr %15, align 4, !tbaa !2 216 %17 = add nsw i32 %16, 4 217 store i32 %17, ptr %14, align 4, !tbaa !2 218 ret void 219} 220 221 222define void @gather_load_3(ptr noalias nocapture %0, ptr noalias nocapture readonly %1) { 223; SSE-LABEL: @gather_load_3( 224; SSE-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]] 225; SSE-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 1 226; SSE-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0:%.*]], i64 4 227; SSE-NEXT: store i32 [[TMP4]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]] 228; SSE-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 44 229; SSE-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]] 230; SSE-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], 2 231; SSE-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 8 232; SSE-NEXT: store i32 [[TMP8]], ptr [[TMP5]], align 4, !tbaa [[TBAA0]] 233; SSE-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16 234; SSE-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !tbaa [[TBAA0]] 235; SSE-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 3 236; SSE-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 12 237; SSE-NEXT: store i32 [[TMP12]], ptr [[TMP9]], align 4, !tbaa [[TBAA0]] 238; SSE-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 60 239; SSE-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !tbaa [[TBAA0]] 240; SSE-NEXT: [[TMP16:%.*]] = add i32 [[TMP15]], 4 241; SSE-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16 242; SSE-NEXT: store i32 [[TMP16]], ptr [[TMP13]], align 4, !tbaa [[TBAA0]] 243; SSE-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 72 244; SSE-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4, !tbaa [[TBAA0]] 245; SSE-NEXT: [[TMP20:%.*]] = add i32 [[TMP19]], 1 246; SSE-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 20 247; SSE-NEXT: store i32 [[TMP20]], ptr [[TMP17]], align 4, !tbaa [[TBAA0]] 248; SSE-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 36 249; SSE-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4, !tbaa [[TBAA0]] 250; SSE-NEXT: [[TMP24:%.*]] = add i32 [[TMP23]], 2 251; SSE-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 24 252; SSE-NEXT: store i32 [[TMP24]], ptr [[TMP21]], align 4, !tbaa [[TBAA0]] 253; SSE-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 24 254; SSE-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4, !tbaa [[TBAA0]] 255; SSE-NEXT: [[TMP28:%.*]] = add i32 [[TMP27]], 3 256; SSE-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 28 257; SSE-NEXT: store i32 [[TMP28]], ptr [[TMP25]], align 4, !tbaa [[TBAA0]] 258; SSE-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 84 259; SSE-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4, !tbaa [[TBAA0]] 260; SSE-NEXT: [[TMP32:%.*]] = add i32 [[TMP31]], 4 261; SSE-NEXT: store i32 [[TMP32]], ptr [[TMP29]], align 4, !tbaa [[TBAA0]] 262; SSE-NEXT: ret void 263; 264; AVX-LABEL: @gather_load_3( 265; AVX-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]] 266; AVX-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 44 267; AVX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4, !tbaa [[TBAA0]] 268; AVX-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16 269; AVX-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]] 270; AVX-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 60 271; AVX-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !tbaa [[TBAA0]] 272; AVX-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 72 273; AVX-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !tbaa [[TBAA0]] 274; AVX-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 36 275; AVX-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !tbaa [[TBAA0]] 276; AVX-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 24 277; AVX-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !tbaa [[TBAA0]] 278; AVX-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 84 279; AVX-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4, !tbaa [[TBAA0]] 280; AVX-NEXT: [[TMP18:%.*]] = insertelement <8 x i32> poison, i32 [[TMP3]], i64 0 281; AVX-NEXT: [[TMP19:%.*]] = insertelement <8 x i32> [[TMP18]], i32 [[TMP5]], i64 1 282; AVX-NEXT: [[TMP20:%.*]] = insertelement <8 x i32> [[TMP19]], i32 [[TMP7]], i64 2 283; AVX-NEXT: [[TMP21:%.*]] = insertelement <8 x i32> [[TMP20]], i32 [[TMP9]], i64 3 284; AVX-NEXT: [[TMP22:%.*]] = insertelement <8 x i32> [[TMP21]], i32 [[TMP11]], i64 4 285; AVX-NEXT: [[TMP23:%.*]] = insertelement <8 x i32> [[TMP22]], i32 [[TMP13]], i64 5 286; AVX-NEXT: [[TMP24:%.*]] = insertelement <8 x i32> [[TMP23]], i32 [[TMP15]], i64 6 287; AVX-NEXT: [[TMP25:%.*]] = insertelement <8 x i32> [[TMP24]], i32 [[TMP17]], i64 7 288; AVX-NEXT: [[TMP26:%.*]] = add <8 x i32> [[TMP25]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4> 289; AVX-NEXT: store <8 x i32> [[TMP26]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] 290; AVX-NEXT: ret void 291; 292; AVX2-LABEL: @gather_load_3( 293; AVX2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]] 294; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 44 295; AVX2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4, !tbaa [[TBAA0]] 296; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16 297; AVX2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]] 298; AVX2-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 60 299; AVX2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !tbaa [[TBAA0]] 300; AVX2-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 72 301; AVX2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !tbaa [[TBAA0]] 302; AVX2-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 36 303; AVX2-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !tbaa [[TBAA0]] 304; AVX2-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 24 305; AVX2-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !tbaa [[TBAA0]] 306; AVX2-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 84 307; AVX2-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4, !tbaa [[TBAA0]] 308; AVX2-NEXT: [[TMP18:%.*]] = insertelement <8 x i32> poison, i32 [[TMP3]], i64 0 309; AVX2-NEXT: [[TMP19:%.*]] = insertelement <8 x i32> [[TMP18]], i32 [[TMP5]], i64 1 310; AVX2-NEXT: [[TMP20:%.*]] = insertelement <8 x i32> [[TMP19]], i32 [[TMP7]], i64 2 311; AVX2-NEXT: [[TMP21:%.*]] = insertelement <8 x i32> [[TMP20]], i32 [[TMP9]], i64 3 312; AVX2-NEXT: [[TMP22:%.*]] = insertelement <8 x i32> [[TMP21]], i32 [[TMP11]], i64 4 313; AVX2-NEXT: [[TMP23:%.*]] = insertelement <8 x i32> [[TMP22]], i32 [[TMP13]], i64 5 314; AVX2-NEXT: [[TMP24:%.*]] = insertelement <8 x i32> [[TMP23]], i32 [[TMP15]], i64 6 315; AVX2-NEXT: [[TMP25:%.*]] = insertelement <8 x i32> [[TMP24]], i32 [[TMP17]], i64 7 316; AVX2-NEXT: [[TMP26:%.*]] = add <8 x i32> [[TMP25]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4> 317; AVX2-NEXT: store <8 x i32> [[TMP26]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] 318; AVX2-NEXT: ret void 319; 320; AVX512F-LABEL: @gather_load_3( 321; AVX512F-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0 322; AVX512F-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer 323; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr i32, <8 x ptr> [[TMP4]], <8 x i64> <i64 0, i64 11, i64 4, i64 15, i64 18, i64 9, i64 6, i64 21> 324; AVX512F-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> splat (i1 true), <8 x i32> poison), !tbaa [[TBAA0]] 325; AVX512F-NEXT: [[TMP7:%.*]] = add <8 x i32> [[TMP6]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4> 326; AVX512F-NEXT: store <8 x i32> [[TMP7]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] 327; AVX512F-NEXT: ret void 328; 329; AVX512VL-LABEL: @gather_load_3( 330; AVX512VL-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0 331; AVX512VL-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer 332; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr i32, <8 x ptr> [[TMP4]], <8 x i64> <i64 0, i64 11, i64 4, i64 15, i64 18, i64 9, i64 6, i64 21> 333; AVX512VL-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> splat (i1 true), <8 x i32> poison), !tbaa [[TBAA0]] 334; AVX512VL-NEXT: [[TMP7:%.*]] = add <8 x i32> [[TMP6]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4> 335; AVX512VL-NEXT: store <8 x i32> [[TMP7]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] 336; AVX512VL-NEXT: ret void 337; 338 %3 = load i32, ptr %1, align 4, !tbaa !2 339 %4 = add i32 %3, 1 340 %5 = getelementptr inbounds i32, ptr %0, i64 1 341 store i32 %4, ptr %0, align 4, !tbaa !2 342 %6 = getelementptr inbounds i32, ptr %1, i64 11 343 %7 = load i32, ptr %6, align 4, !tbaa !2 344 %8 = add i32 %7, 2 345 %9 = getelementptr inbounds i32, ptr %0, i64 2 346 store i32 %8, ptr %5, align 4, !tbaa !2 347 %10 = getelementptr inbounds i32, ptr %1, i64 4 348 %11 = load i32, ptr %10, align 4, !tbaa !2 349 %12 = add i32 %11, 3 350 %13 = getelementptr inbounds i32, ptr %0, i64 3 351 store i32 %12, ptr %9, align 4, !tbaa !2 352 %14 = getelementptr inbounds i32, ptr %1, i64 15 353 %15 = load i32, ptr %14, align 4, !tbaa !2 354 %16 = add i32 %15, 4 355 %17 = getelementptr inbounds i32, ptr %0, i64 4 356 store i32 %16, ptr %13, align 4, !tbaa !2 357 %18 = getelementptr inbounds i32, ptr %1, i64 18 358 %19 = load i32, ptr %18, align 4, !tbaa !2 359 %20 = add i32 %19, 1 360 %21 = getelementptr inbounds i32, ptr %0, i64 5 361 store i32 %20, ptr %17, align 4, !tbaa !2 362 %22 = getelementptr inbounds i32, ptr %1, i64 9 363 %23 = load i32, ptr %22, align 4, !tbaa !2 364 %24 = add i32 %23, 2 365 %25 = getelementptr inbounds i32, ptr %0, i64 6 366 store i32 %24, ptr %21, align 4, !tbaa !2 367 %26 = getelementptr inbounds i32, ptr %1, i64 6 368 %27 = load i32, ptr %26, align 4, !tbaa !2 369 %28 = add i32 %27, 3 370 %29 = getelementptr inbounds i32, ptr %0, i64 7 371 store i32 %28, ptr %25, align 4, !tbaa !2 372 %30 = getelementptr inbounds i32, ptr %1, i64 21 373 %31 = load i32, ptr %30, align 4, !tbaa !2 374 %32 = add i32 %31, 4 375 store i32 %32, ptr %29, align 4, !tbaa !2 376 ret void 377} 378 379define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture readonly %t1) { 380; SSE-LABEL: @gather_load_4( 381; SSE-NEXT: [[T5:%.*]] = getelementptr inbounds nuw i8, ptr [[T0:%.*]], i64 4 382; SSE-NEXT: [[T6:%.*]] = getelementptr inbounds nuw i8, ptr [[T1:%.*]], i64 44 383; SSE-NEXT: [[T9:%.*]] = getelementptr inbounds nuw i8, ptr [[T0]], i64 8 384; SSE-NEXT: [[T10:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 16 385; SSE-NEXT: [[T13:%.*]] = getelementptr inbounds nuw i8, ptr [[T0]], i64 12 386; SSE-NEXT: [[T14:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 60 387; SSE-NEXT: [[T17:%.*]] = getelementptr inbounds nuw i8, ptr [[T0]], i64 16 388; SSE-NEXT: [[T18:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 72 389; SSE-NEXT: [[T21:%.*]] = getelementptr inbounds nuw i8, ptr [[T0]], i64 20 390; SSE-NEXT: [[T22:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 36 391; SSE-NEXT: [[T25:%.*]] = getelementptr inbounds nuw i8, ptr [[T0]], i64 24 392; SSE-NEXT: [[T26:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 24 393; SSE-NEXT: [[T29:%.*]] = getelementptr inbounds nuw i8, ptr [[T0]], i64 28 394; SSE-NEXT: [[T30:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 84 395; SSE-NEXT: [[T3:%.*]] = load i32, ptr [[T1]], align 4, !tbaa [[TBAA0]] 396; SSE-NEXT: [[T7:%.*]] = load i32, ptr [[T6]], align 4, !tbaa [[TBAA0]] 397; SSE-NEXT: [[T11:%.*]] = load i32, ptr [[T10]], align 4, !tbaa [[TBAA0]] 398; SSE-NEXT: [[T15:%.*]] = load i32, ptr [[T14]], align 4, !tbaa [[TBAA0]] 399; SSE-NEXT: [[T19:%.*]] = load i32, ptr [[T18]], align 4, !tbaa [[TBAA0]] 400; SSE-NEXT: [[T23:%.*]] = load i32, ptr [[T22]], align 4, !tbaa [[TBAA0]] 401; SSE-NEXT: [[T27:%.*]] = load i32, ptr [[T26]], align 4, !tbaa [[TBAA0]] 402; SSE-NEXT: [[T31:%.*]] = load i32, ptr [[T30]], align 4, !tbaa [[TBAA0]] 403; SSE-NEXT: [[T4:%.*]] = add i32 [[T3]], 1 404; SSE-NEXT: [[T8:%.*]] = add i32 [[T7]], 2 405; SSE-NEXT: [[T12:%.*]] = add i32 [[T11]], 3 406; SSE-NEXT: [[T16:%.*]] = add i32 [[T15]], 4 407; SSE-NEXT: [[T20:%.*]] = add i32 [[T19]], 1 408; SSE-NEXT: [[T24:%.*]] = add i32 [[T23]], 2 409; SSE-NEXT: [[T28:%.*]] = add i32 [[T27]], 3 410; SSE-NEXT: [[T32:%.*]] = add i32 [[T31]], 4 411; SSE-NEXT: store i32 [[T4]], ptr [[T0]], align 4, !tbaa [[TBAA0]] 412; SSE-NEXT: store i32 [[T8]], ptr [[T5]], align 4, !tbaa [[TBAA0]] 413; SSE-NEXT: store i32 [[T12]], ptr [[T9]], align 4, !tbaa [[TBAA0]] 414; SSE-NEXT: store i32 [[T16]], ptr [[T13]], align 4, !tbaa [[TBAA0]] 415; SSE-NEXT: store i32 [[T20]], ptr [[T17]], align 4, !tbaa [[TBAA0]] 416; SSE-NEXT: store i32 [[T24]], ptr [[T21]], align 4, !tbaa [[TBAA0]] 417; SSE-NEXT: store i32 [[T28]], ptr [[T25]], align 4, !tbaa [[TBAA0]] 418; SSE-NEXT: store i32 [[T32]], ptr [[T29]], align 4, !tbaa [[TBAA0]] 419; SSE-NEXT: ret void 420; 421; AVX-LABEL: @gather_load_4( 422; AVX-NEXT: [[T6:%.*]] = getelementptr inbounds nuw i8, ptr [[T1:%.*]], i64 44 423; AVX-NEXT: [[T10:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 16 424; AVX-NEXT: [[T14:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 60 425; AVX-NEXT: [[T18:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 72 426; AVX-NEXT: [[T22:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 36 427; AVX-NEXT: [[T26:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 24 428; AVX-NEXT: [[T30:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 84 429; AVX-NEXT: [[T3:%.*]] = load i32, ptr [[T1]], align 4, !tbaa [[TBAA0]] 430; AVX-NEXT: [[T7:%.*]] = load i32, ptr [[T6]], align 4, !tbaa [[TBAA0]] 431; AVX-NEXT: [[T11:%.*]] = load i32, ptr [[T10]], align 4, !tbaa [[TBAA0]] 432; AVX-NEXT: [[T15:%.*]] = load i32, ptr [[T14]], align 4, !tbaa [[TBAA0]] 433; AVX-NEXT: [[T19:%.*]] = load i32, ptr [[T18]], align 4, !tbaa [[TBAA0]] 434; AVX-NEXT: [[T23:%.*]] = load i32, ptr [[T22]], align 4, !tbaa [[TBAA0]] 435; AVX-NEXT: [[T27:%.*]] = load i32, ptr [[T26]], align 4, !tbaa [[TBAA0]] 436; AVX-NEXT: [[T31:%.*]] = load i32, ptr [[T30]], align 4, !tbaa [[TBAA0]] 437; AVX-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[T3]], i64 0 438; AVX-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[T7]], i64 1 439; AVX-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[T11]], i64 2 440; AVX-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[T15]], i64 3 441; AVX-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[T19]], i64 4 442; AVX-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[T23]], i64 5 443; AVX-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[T27]], i64 6 444; AVX-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[T31]], i64 7 445; AVX-NEXT: [[TMP9:%.*]] = add <8 x i32> [[TMP8]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4> 446; AVX-NEXT: store <8 x i32> [[TMP9]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]] 447; AVX-NEXT: ret void 448; 449; AVX2-LABEL: @gather_load_4( 450; AVX2-NEXT: [[T6:%.*]] = getelementptr inbounds nuw i8, ptr [[T1:%.*]], i64 44 451; AVX2-NEXT: [[T10:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 16 452; AVX2-NEXT: [[T14:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 60 453; AVX2-NEXT: [[T18:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 72 454; AVX2-NEXT: [[T22:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 36 455; AVX2-NEXT: [[T26:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 24 456; AVX2-NEXT: [[T30:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 84 457; AVX2-NEXT: [[T3:%.*]] = load i32, ptr [[T1]], align 4, !tbaa [[TBAA0]] 458; AVX2-NEXT: [[T7:%.*]] = load i32, ptr [[T6]], align 4, !tbaa [[TBAA0]] 459; AVX2-NEXT: [[T11:%.*]] = load i32, ptr [[T10]], align 4, !tbaa [[TBAA0]] 460; AVX2-NEXT: [[T15:%.*]] = load i32, ptr [[T14]], align 4, !tbaa [[TBAA0]] 461; AVX2-NEXT: [[T19:%.*]] = load i32, ptr [[T18]], align 4, !tbaa [[TBAA0]] 462; AVX2-NEXT: [[T23:%.*]] = load i32, ptr [[T22]], align 4, !tbaa [[TBAA0]] 463; AVX2-NEXT: [[T27:%.*]] = load i32, ptr [[T26]], align 4, !tbaa [[TBAA0]] 464; AVX2-NEXT: [[T31:%.*]] = load i32, ptr [[T30]], align 4, !tbaa [[TBAA0]] 465; AVX2-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[T3]], i64 0 466; AVX2-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[T7]], i64 1 467; AVX2-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[T11]], i64 2 468; AVX2-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[T15]], i64 3 469; AVX2-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[T19]], i64 4 470; AVX2-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[T23]], i64 5 471; AVX2-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[T27]], i64 6 472; AVX2-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[T31]], i64 7 473; AVX2-NEXT: [[TMP9:%.*]] = add <8 x i32> [[TMP8]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4> 474; AVX2-NEXT: store <8 x i32> [[TMP9]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]] 475; AVX2-NEXT: ret void 476; 477; AVX512F-LABEL: @gather_load_4( 478; AVX512F-NEXT: [[TMP1:%.*]] = insertelement <8 x ptr> poison, ptr [[T1:%.*]], i64 0 479; AVX512F-NEXT: [[TMP2:%.*]] = shufflevector <8 x ptr> [[TMP1]], <8 x ptr> poison, <8 x i32> zeroinitializer 480; AVX512F-NEXT: [[TMP3:%.*]] = getelementptr i32, <8 x ptr> [[TMP2]], <8 x i64> <i64 0, i64 11, i64 4, i64 15, i64 18, i64 9, i64 6, i64 21> 481; AVX512F-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP3]], i32 4, <8 x i1> splat (i1 true), <8 x i32> poison), !tbaa [[TBAA0]] 482; AVX512F-NEXT: [[TMP5:%.*]] = add <8 x i32> [[TMP4]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4> 483; AVX512F-NEXT: store <8 x i32> [[TMP5]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]] 484; AVX512F-NEXT: ret void 485; 486; AVX512VL-LABEL: @gather_load_4( 487; AVX512VL-NEXT: [[TMP1:%.*]] = insertelement <8 x ptr> poison, ptr [[T1:%.*]], i64 0 488; AVX512VL-NEXT: [[TMP2:%.*]] = shufflevector <8 x ptr> [[TMP1]], <8 x ptr> poison, <8 x i32> zeroinitializer 489; AVX512VL-NEXT: [[TMP3:%.*]] = getelementptr i32, <8 x ptr> [[TMP2]], <8 x i64> <i64 0, i64 11, i64 4, i64 15, i64 18, i64 9, i64 6, i64 21> 490; AVX512VL-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP3]], i32 4, <8 x i1> splat (i1 true), <8 x i32> poison), !tbaa [[TBAA0]] 491; AVX512VL-NEXT: [[TMP5:%.*]] = add <8 x i32> [[TMP4]], <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4> 492; AVX512VL-NEXT: store <8 x i32> [[TMP5]], ptr [[T0:%.*]], align 4, !tbaa [[TBAA0]] 493; AVX512VL-NEXT: ret void 494; 495 %t5 = getelementptr inbounds i32, ptr %t0, i64 1 496 %t6 = getelementptr inbounds i32, ptr %t1, i64 11 497 %t9 = getelementptr inbounds i32, ptr %t0, i64 2 498 %t10 = getelementptr inbounds i32, ptr %t1, i64 4 499 %t13 = getelementptr inbounds i32, ptr %t0, i64 3 500 %t14 = getelementptr inbounds i32, ptr %t1, i64 15 501 %t17 = getelementptr inbounds i32, ptr %t0, i64 4 502 %t18 = getelementptr inbounds i32, ptr %t1, i64 18 503 %t21 = getelementptr inbounds i32, ptr %t0, i64 5 504 %t22 = getelementptr inbounds i32, ptr %t1, i64 9 505 %t25 = getelementptr inbounds i32, ptr %t0, i64 6 506 %t26 = getelementptr inbounds i32, ptr %t1, i64 6 507 %t29 = getelementptr inbounds i32, ptr %t0, i64 7 508 %t30 = getelementptr inbounds i32, ptr %t1, i64 21 509 510 %t3 = load i32, ptr %t1, align 4, !tbaa !2 511 %t7 = load i32, ptr %t6, align 4, !tbaa !2 512 %t11 = load i32, ptr %t10, align 4, !tbaa !2 513 %t15 = load i32, ptr %t14, align 4, !tbaa !2 514 %t19 = load i32, ptr %t18, align 4, !tbaa !2 515 %t23 = load i32, ptr %t22, align 4, !tbaa !2 516 %t27 = load i32, ptr %t26, align 4, !tbaa !2 517 %t31 = load i32, ptr %t30, align 4, !tbaa !2 518 519 %t4 = add i32 %t3, 1 520 %t8 = add i32 %t7, 2 521 %t12 = add i32 %t11, 3 522 %t16 = add i32 %t15, 4 523 %t20 = add i32 %t19, 1 524 %t24 = add i32 %t23, 2 525 %t28 = add i32 %t27, 3 526 %t32 = add i32 %t31, 4 527 528 store i32 %t4, ptr %t0, align 4, !tbaa !2 529 store i32 %t8, ptr %t5, align 4, !tbaa !2 530 store i32 %t12, ptr %t9, align 4, !tbaa !2 531 store i32 %t16, ptr %t13, align 4, !tbaa !2 532 store i32 %t20, ptr %t17, align 4, !tbaa !2 533 store i32 %t24, ptr %t21, align 4, !tbaa !2 534 store i32 %t28, ptr %t25, align 4, !tbaa !2 535 store i32 %t32, ptr %t29, align 4, !tbaa !2 536 537 ret void 538} 539 540 541define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture readonly %1) { 542; SSE-LABEL: @gather_load_div( 543; SSE-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]] 544; SSE-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40 545; SSE-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 52 546; SSE-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 12 547; SSE-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 176 548; SSE-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[TBAA0]] 549; SSE-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0:%.*]], i64 16 550; SSE-NEXT: [[TMP10:%.*]] = load <2 x float>, ptr [[TMP6]], align 4, !tbaa [[TBAA0]] 551; SSE-NEXT: [[TMP11:%.*]] = load <2 x float>, ptr [[TMP4]], align 4, !tbaa [[TBAA0]] 552; SSE-NEXT: [[TMP12:%.*]] = load <2 x float>, ptr [[TMP5]], align 4, !tbaa [[TBAA0]] 553; SSE-NEXT: [[TMP13:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i64 0 554; SSE-NEXT: [[TMP14:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 555; SSE-NEXT: [[TMP15:%.*]] = shufflevector <4 x float> [[TMP13]], <4 x float> [[TMP14]], <4 x i32> <i32 0, i32 4, i32 poison, i32 poison> 556; SSE-NEXT: [[TMP16:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison> 557; SSE-NEXT: [[TMP17:%.*]] = shufflevector <4 x float> [[TMP15]], <4 x float> [[TMP16]], <4 x i32> <i32 0, i32 1, i32 4, i32 poison> 558; SSE-NEXT: [[TMP18:%.*]] = shufflevector <2 x float> [[TMP12]], <2 x float> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison> 559; SSE-NEXT: [[TMP19:%.*]] = shufflevector <4 x float> [[TMP17]], <4 x float> [[TMP18]], <4 x i32> <i32 0, i32 1, i32 2, i32 5> 560; SSE-NEXT: [[TMP20:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> [[TMP12]], <4 x i32> <i32 1, i32 2, i32 poison, i32 poison> 561; SSE-NEXT: [[TMP21:%.*]] = shufflevector <4 x float> [[TMP20]], <4 x float> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 5, i32 poison> 562; SSE-NEXT: [[TMP22:%.*]] = insertelement <4 x float> [[TMP21]], float [[TMP8]], i64 3 563; SSE-NEXT: [[TMP23:%.*]] = fdiv <4 x float> [[TMP19]], [[TMP22]] 564; SSE-NEXT: store <4 x float> [[TMP23]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]] 565; SSE-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 68 566; SSE-NEXT: [[TMP25:%.*]] = load float, ptr [[TMP24]], align 4, !tbaa [[TBAA0]] 567; SSE-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 132 568; SSE-NEXT: [[TMP27:%.*]] = load float, ptr [[TMP26]], align 4, !tbaa [[TBAA0]] 569; SSE-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 32 570; SSE-NEXT: [[TMP29:%.*]] = load float, ptr [[TMP28]], align 4, !tbaa [[TBAA0]] 571; SSE-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 120 572; SSE-NEXT: [[TMP31:%.*]] = load float, ptr [[TMP30]], align 4, !tbaa [[TBAA0]] 573; SSE-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 20 574; SSE-NEXT: [[TMP33:%.*]] = load float, ptr [[TMP32]], align 4, !tbaa [[TBAA0]] 575; SSE-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 108 576; SSE-NEXT: [[TMP35:%.*]] = load float, ptr [[TMP34]], align 4, !tbaa [[TBAA0]] 577; SSE-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 80 578; SSE-NEXT: [[TMP37:%.*]] = load float, ptr [[TMP36]], align 4, !tbaa [[TBAA0]] 579; SSE-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 92 580; SSE-NEXT: [[TMP39:%.*]] = load float, ptr [[TMP38]], align 4, !tbaa [[TBAA0]] 581; SSE-NEXT: [[TMP40:%.*]] = insertelement <4 x float> poison, float [[TMP25]], i64 0 582; SSE-NEXT: [[TMP41:%.*]] = insertelement <4 x float> [[TMP40]], float [[TMP29]], i64 1 583; SSE-NEXT: [[TMP42:%.*]] = insertelement <4 x float> [[TMP41]], float [[TMP33]], i64 2 584; SSE-NEXT: [[TMP43:%.*]] = insertelement <4 x float> [[TMP42]], float [[TMP37]], i64 3 585; SSE-NEXT: [[TMP44:%.*]] = insertelement <4 x float> poison, float [[TMP27]], i64 0 586; SSE-NEXT: [[TMP45:%.*]] = insertelement <4 x float> [[TMP44]], float [[TMP31]], i64 1 587; SSE-NEXT: [[TMP46:%.*]] = insertelement <4 x float> [[TMP45]], float [[TMP35]], i64 2 588; SSE-NEXT: [[TMP47:%.*]] = insertelement <4 x float> [[TMP46]], float [[TMP39]], i64 3 589; SSE-NEXT: [[TMP48:%.*]] = fdiv <4 x float> [[TMP43]], [[TMP47]] 590; SSE-NEXT: store <4 x float> [[TMP48]], ptr [[TMP9]], align 4, !tbaa [[TBAA0]] 591; SSE-NEXT: ret void 592; 593; AVX-LABEL: @gather_load_div( 594; AVX-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]] 595; AVX-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40 596; AVX-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 52 597; AVX-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 12 598; AVX-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 176 599; AVX-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[TBAA0]] 600; AVX-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 68 601; AVX-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP9]], align 4, !tbaa [[TBAA0]] 602; AVX-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 132 603; AVX-NEXT: [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4, !tbaa [[TBAA0]] 604; AVX-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 32 605; AVX-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP13]], align 4, !tbaa [[TBAA0]] 606; AVX-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 120 607; AVX-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4, !tbaa [[TBAA0]] 608; AVX-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 20 609; AVX-NEXT: [[TMP18:%.*]] = load float, ptr [[TMP17]], align 4, !tbaa [[TBAA0]] 610; AVX-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 108 611; AVX-NEXT: [[TMP20:%.*]] = load float, ptr [[TMP19]], align 4, !tbaa [[TBAA0]] 612; AVX-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 80 613; AVX-NEXT: [[TMP22:%.*]] = load float, ptr [[TMP21]], align 4, !tbaa [[TBAA0]] 614; AVX-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 92 615; AVX-NEXT: [[TMP24:%.*]] = load float, ptr [[TMP23]], align 4, !tbaa [[TBAA0]] 616; AVX-NEXT: [[TMP25:%.*]] = load <2 x float>, ptr [[TMP6]], align 4, !tbaa [[TBAA0]] 617; AVX-NEXT: [[TMP26:%.*]] = load <2 x float>, ptr [[TMP4]], align 4, !tbaa [[TBAA0]] 618; AVX-NEXT: [[TMP27:%.*]] = load <2 x float>, ptr [[TMP5]], align 4, !tbaa [[TBAA0]] 619; AVX-NEXT: [[TMP28:%.*]] = insertelement <8 x float> poison, float [[TMP3]], i64 0 620; AVX-NEXT: [[TMP29:%.*]] = shufflevector <2 x float> [[TMP26]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 621; AVX-NEXT: [[TMP30:%.*]] = shufflevector <8 x float> [[TMP28]], <8 x float> [[TMP29]], <8 x i32> <i32 0, i32 8, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 622; AVX-NEXT: [[TMP31:%.*]] = shufflevector <2 x float> [[TMP25]], <2 x float> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 623; AVX-NEXT: [[TMP32:%.*]] = shufflevector <8 x float> [[TMP30]], <8 x float> [[TMP31]], <8 x i32> <i32 0, i32 1, i32 8, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 624; AVX-NEXT: [[TMP33:%.*]] = shufflevector <2 x float> [[TMP27]], <2 x float> poison, <8 x i32> <i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 625; AVX-NEXT: [[TMP34:%.*]] = shufflevector <8 x float> [[TMP32]], <8 x float> [[TMP33]], <8 x i32> <i32 0, i32 1, i32 2, i32 9, i32 poison, i32 poison, i32 poison, i32 poison> 626; AVX-NEXT: [[TMP35:%.*]] = insertelement <8 x float> [[TMP34]], float [[TMP10]], i64 4 627; AVX-NEXT: [[TMP36:%.*]] = insertelement <8 x float> [[TMP35]], float [[TMP14]], i64 5 628; AVX-NEXT: [[TMP37:%.*]] = insertelement <8 x float> [[TMP36]], float [[TMP18]], i64 6 629; AVX-NEXT: [[TMP38:%.*]] = insertelement <8 x float> [[TMP37]], float [[TMP22]], i64 7 630; AVX-NEXT: [[TMP39:%.*]] = shufflevector <2 x float> [[TMP25]], <2 x float> [[TMP27]], <8 x i32> <i32 1, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 631; AVX-NEXT: [[TMP40:%.*]] = shufflevector <8 x float> [[TMP39]], <8 x float> [[TMP29]], <8 x i32> <i32 0, i32 1, i32 9, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 632; AVX-NEXT: [[TMP41:%.*]] = insertelement <8 x float> [[TMP40]], float [[TMP8]], i64 3 633; AVX-NEXT: [[TMP42:%.*]] = insertelement <8 x float> [[TMP41]], float [[TMP12]], i64 4 634; AVX-NEXT: [[TMP43:%.*]] = insertelement <8 x float> [[TMP42]], float [[TMP16]], i64 5 635; AVX-NEXT: [[TMP44:%.*]] = insertelement <8 x float> [[TMP43]], float [[TMP20]], i64 6 636; AVX-NEXT: [[TMP45:%.*]] = insertelement <8 x float> [[TMP44]], float [[TMP24]], i64 7 637; AVX-NEXT: [[TMP46:%.*]] = fdiv <8 x float> [[TMP38]], [[TMP45]] 638; AVX-NEXT: store <8 x float> [[TMP46]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] 639; AVX-NEXT: ret void 640; 641; AVX2-LABEL: @gather_load_div( 642; AVX2-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]] 643; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40 644; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 52 645; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 12 646; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 176 647; AVX2-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[TBAA0]] 648; AVX2-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 68 649; AVX2-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP9]], align 4, !tbaa [[TBAA0]] 650; AVX2-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 132 651; AVX2-NEXT: [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4, !tbaa [[TBAA0]] 652; AVX2-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 32 653; AVX2-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP13]], align 4, !tbaa [[TBAA0]] 654; AVX2-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 120 655; AVX2-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4, !tbaa [[TBAA0]] 656; AVX2-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 20 657; AVX2-NEXT: [[TMP18:%.*]] = load float, ptr [[TMP17]], align 4, !tbaa [[TBAA0]] 658; AVX2-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 108 659; AVX2-NEXT: [[TMP20:%.*]] = load float, ptr [[TMP19]], align 4, !tbaa [[TBAA0]] 660; AVX2-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 80 661; AVX2-NEXT: [[TMP22:%.*]] = load float, ptr [[TMP21]], align 4, !tbaa [[TBAA0]] 662; AVX2-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 92 663; AVX2-NEXT: [[TMP24:%.*]] = load float, ptr [[TMP23]], align 4, !tbaa [[TBAA0]] 664; AVX2-NEXT: [[TMP25:%.*]] = load <2 x float>, ptr [[TMP6]], align 4, !tbaa [[TBAA0]] 665; AVX2-NEXT: [[TMP26:%.*]] = load <2 x float>, ptr [[TMP4]], align 4, !tbaa [[TBAA0]] 666; AVX2-NEXT: [[TMP27:%.*]] = load <2 x float>, ptr [[TMP5]], align 4, !tbaa [[TBAA0]] 667; AVX2-NEXT: [[TMP28:%.*]] = insertelement <8 x float> poison, float [[TMP3]], i64 0 668; AVX2-NEXT: [[TMP29:%.*]] = shufflevector <2 x float> [[TMP26]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 669; AVX2-NEXT: [[TMP30:%.*]] = shufflevector <8 x float> [[TMP28]], <8 x float> [[TMP29]], <8 x i32> <i32 0, i32 8, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 670; AVX2-NEXT: [[TMP31:%.*]] = shufflevector <2 x float> [[TMP25]], <2 x float> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 671; AVX2-NEXT: [[TMP32:%.*]] = shufflevector <8 x float> [[TMP30]], <8 x float> [[TMP31]], <8 x i32> <i32 0, i32 1, i32 8, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 672; AVX2-NEXT: [[TMP33:%.*]] = shufflevector <2 x float> [[TMP27]], <2 x float> poison, <8 x i32> <i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 673; AVX2-NEXT: [[TMP34:%.*]] = shufflevector <8 x float> [[TMP32]], <8 x float> [[TMP33]], <8 x i32> <i32 0, i32 1, i32 2, i32 9, i32 poison, i32 poison, i32 poison, i32 poison> 674; AVX2-NEXT: [[TMP35:%.*]] = insertelement <8 x float> [[TMP34]], float [[TMP10]], i64 4 675; AVX2-NEXT: [[TMP36:%.*]] = insertelement <8 x float> [[TMP35]], float [[TMP14]], i64 5 676; AVX2-NEXT: [[TMP37:%.*]] = insertelement <8 x float> [[TMP36]], float [[TMP18]], i64 6 677; AVX2-NEXT: [[TMP38:%.*]] = insertelement <8 x float> [[TMP37]], float [[TMP22]], i64 7 678; AVX2-NEXT: [[TMP39:%.*]] = shufflevector <2 x float> [[TMP25]], <2 x float> [[TMP27]], <8 x i32> <i32 1, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 679; AVX2-NEXT: [[TMP40:%.*]] = shufflevector <8 x float> [[TMP39]], <8 x float> [[TMP29]], <8 x i32> <i32 0, i32 1, i32 9, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 680; AVX2-NEXT: [[TMP41:%.*]] = insertelement <8 x float> [[TMP40]], float [[TMP8]], i64 3 681; AVX2-NEXT: [[TMP42:%.*]] = insertelement <8 x float> [[TMP41]], float [[TMP12]], i64 4 682; AVX2-NEXT: [[TMP43:%.*]] = insertelement <8 x float> [[TMP42]], float [[TMP16]], i64 5 683; AVX2-NEXT: [[TMP44:%.*]] = insertelement <8 x float> [[TMP43]], float [[TMP20]], i64 6 684; AVX2-NEXT: [[TMP45:%.*]] = insertelement <8 x float> [[TMP44]], float [[TMP24]], i64 7 685; AVX2-NEXT: [[TMP46:%.*]] = fdiv <8 x float> [[TMP38]], [[TMP45]] 686; AVX2-NEXT: store <8 x float> [[TMP46]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] 687; AVX2-NEXT: ret void 688; 689; AVX512F-LABEL: @gather_load_div( 690; AVX512F-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0 691; AVX512F-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer 692; AVX512F-NEXT: [[TMP5:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> <i64 4, i64 13, i64 11, i64 44, i64 33, i64 30, i64 27, i64 23> 693; AVX512F-NEXT: [[TMP6:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> <i64 0, i64 10, i64 3, i64 14, i64 17, i64 8, i64 5, i64 20> 694; AVX512F-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP6]], i32 4, <8 x i1> splat (i1 true), <8 x float> poison), !tbaa [[TBAA0]] 695; AVX512F-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> splat (i1 true), <8 x float> poison), !tbaa [[TBAA0]] 696; AVX512F-NEXT: [[TMP9:%.*]] = fdiv <8 x float> [[TMP7]], [[TMP8]] 697; AVX512F-NEXT: store <8 x float> [[TMP9]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] 698; AVX512F-NEXT: ret void 699; 700; AVX512VL-LABEL: @gather_load_div( 701; AVX512VL-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP1:%.*]], i64 0 702; AVX512VL-NEXT: [[TMP4:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <8 x i32> zeroinitializer 703; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> <i64 4, i64 13, i64 11, i64 44, i64 33, i64 30, i64 27, i64 23> 704; AVX512VL-NEXT: [[TMP6:%.*]] = getelementptr float, <8 x ptr> [[TMP4]], <8 x i64> <i64 0, i64 10, i64 3, i64 14, i64 17, i64 8, i64 5, i64 20> 705; AVX512VL-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP6]], i32 4, <8 x i1> splat (i1 true), <8 x float> poison), !tbaa [[TBAA0]] 706; AVX512VL-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP5]], i32 4, <8 x i1> splat (i1 true), <8 x float> poison), !tbaa [[TBAA0]] 707; AVX512VL-NEXT: [[TMP9:%.*]] = fdiv <8 x float> [[TMP7]], [[TMP8]] 708; AVX512VL-NEXT: store <8 x float> [[TMP9]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] 709; AVX512VL-NEXT: ret void 710; 711 %3 = load float, ptr %1, align 4, !tbaa !2 712 %4 = getelementptr inbounds float, ptr %1, i64 4 713 %5 = load float, ptr %4, align 4, !tbaa !2 714 %6 = fdiv float %3, %5 715 %7 = getelementptr inbounds float, ptr %0, i64 1 716 store float %6, ptr %0, align 4, !tbaa !2 717 %8 = getelementptr inbounds float, ptr %1, i64 10 718 %9 = load float, ptr %8, align 4, !tbaa !2 719 %10 = getelementptr inbounds float, ptr %1, i64 13 720 %11 = load float, ptr %10, align 4, !tbaa !2 721 %12 = fdiv float %9, %11 722 %13 = getelementptr inbounds float, ptr %0, i64 2 723 store float %12, ptr %7, align 4, !tbaa !2 724 %14 = getelementptr inbounds float, ptr %1, i64 3 725 %15 = load float, ptr %14, align 4, !tbaa !2 726 %16 = getelementptr inbounds float, ptr %1, i64 11 727 %17 = load float, ptr %16, align 4, !tbaa !2 728 %18 = fdiv float %15, %17 729 %19 = getelementptr inbounds float, ptr %0, i64 3 730 store float %18, ptr %13, align 4, !tbaa !2 731 %20 = getelementptr inbounds float, ptr %1, i64 14 732 %21 = load float, ptr %20, align 4, !tbaa !2 733 %22 = getelementptr inbounds float, ptr %1, i64 44 734 %23 = load float, ptr %22, align 4, !tbaa !2 735 %24 = fdiv float %21, %23 736 %25 = getelementptr inbounds float, ptr %0, i64 4 737 store float %24, ptr %19, align 4, !tbaa !2 738 %26 = getelementptr inbounds float, ptr %1, i64 17 739 %27 = load float, ptr %26, align 4, !tbaa !2 740 %28 = getelementptr inbounds float, ptr %1, i64 33 741 %29 = load float, ptr %28, align 4, !tbaa !2 742 %30 = fdiv float %27, %29 743 %31 = getelementptr inbounds float, ptr %0, i64 5 744 store float %30, ptr %25, align 4, !tbaa !2 745 %32 = getelementptr inbounds float, ptr %1, i64 8 746 %33 = load float, ptr %32, align 4, !tbaa !2 747 %34 = getelementptr inbounds float, ptr %1, i64 30 748 %35 = load float, ptr %34, align 4, !tbaa !2 749 %36 = fdiv float %33, %35 750 %37 = getelementptr inbounds float, ptr %0, i64 6 751 store float %36, ptr %31, align 4, !tbaa !2 752 %38 = getelementptr inbounds float, ptr %1, i64 5 753 %39 = load float, ptr %38, align 4, !tbaa !2 754 %40 = getelementptr inbounds float, ptr %1, i64 27 755 %41 = load float, ptr %40, align 4, !tbaa !2 756 %42 = fdiv float %39, %41 757 %43 = getelementptr inbounds float, ptr %0, i64 7 758 store float %42, ptr %37, align 4, !tbaa !2 759 %44 = getelementptr inbounds float, ptr %1, i64 20 760 %45 = load float, ptr %44, align 4, !tbaa !2 761 %46 = getelementptr inbounds float, ptr %1, i64 23 762 %47 = load float, ptr %46, align 4, !tbaa !2 763 %48 = fdiv float %45, %47 764 store float %48, ptr %43, align 4, !tbaa !2 765 ret void 766} 767 768!2 = !{!3, !3, i64 0} 769!3 = !{!"short", !4, i64 0} 770!4 = !{!"omnipotent char", !5, i64 0} 771!5 = !{!"Simple C++ TBAA"} 772