1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -passes='require<profile-summary>,function(codegenprepare)' -mtriple=x86_64-- -mattr=+avx -S < %s | FileCheck %s --check-prefixes=AVX1 3; RUN: opt -passes='require<profile-summary>,function(codegenprepare)' -mtriple=x86_64-- -mattr=+avx2 -S < %s | FileCheck %s --check-prefixes=AVX2 4; RUN: opt -passes='require<profile-summary>,function(codegenprepare)' -mtriple=x86_64-- -mattr=+avx512bw -S < %s | FileCheck %s --check-prefixes=AVX512BW 5; RUN: opt -passes='require<profile-summary>,function(codegenprepare)' -mtriple=x86_64-- -mattr=+avx,+xop -S < %s | FileCheck %s --check-prefixes=XOP 6; RUN: opt -passes='require<profile-summary>,function(codegenprepare)' -mtriple=x86_64-- -mattr=+avx2,+xop -S < %s | FileCheck %s --check-prefixes=XOP 7; RUN: opt -passes='require<profile-summary>,function(codegenprepare)' -mtriple=x86_64-- -mattr=+avx -S -enable-debugify < %s 2>&1 | FileCheck %s -check-prefix=DEBUG 8 9define <4 x i32> @vector_variable_shift_right_v4i32(<4 x i1> %cond, <4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { 10; AVX1-LABEL: @vector_variable_shift_right_v4i32( 11; AVX1-NEXT: [[SPLAT1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer 12; AVX1-NEXT: [[SPLAT2:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer 13; AVX1-NEXT: [[SEL:%.*]] = select <4 x i1> [[COND:%.*]], <4 x i32> [[SPLAT1]], <4 x i32> [[SPLAT2]] 14; AVX1-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[Z:%.*]], [[SPLAT1]] 15; AVX1-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[Z]], [[SPLAT2]] 16; AVX1-NEXT: [[TMP3:%.*]] = select <4 x i1> [[COND]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]] 17; AVX1-NEXT: ret <4 x i32> [[TMP3]] 18; 19; AVX2-LABEL: @vector_variable_shift_right_v4i32( 20; AVX2-NEXT: [[SPLAT1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer 21; AVX2-NEXT: [[SPLAT2:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer 22; AVX2-NEXT: [[SEL:%.*]] = select <4 x i1> [[COND:%.*]], <4 x i32> [[SPLAT1]], <4 x i32> [[SPLAT2]] 23; AVX2-NEXT: [[SH:%.*]] = lshr <4 x i32> [[Z:%.*]], [[SEL]] 24; AVX2-NEXT: ret <4 x i32> [[SH]] 25; 26; AVX512BW-LABEL: @vector_variable_shift_right_v4i32( 27; AVX512BW-NEXT: [[SPLAT1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer 28; AVX512BW-NEXT: [[SPLAT2:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer 29; AVX512BW-NEXT: [[SEL:%.*]] = select <4 x i1> [[COND:%.*]], <4 x i32> [[SPLAT1]], <4 x i32> [[SPLAT2]] 30; AVX512BW-NEXT: [[SH:%.*]] = lshr <4 x i32> [[Z:%.*]], [[SEL]] 31; AVX512BW-NEXT: ret <4 x i32> [[SH]] 32; 33; XOP-LABEL: @vector_variable_shift_right_v4i32( 34; XOP-NEXT: [[SPLAT1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer 35; XOP-NEXT: [[SPLAT2:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer 36; XOP-NEXT: [[SEL:%.*]] = select <4 x i1> [[COND:%.*]], <4 x i32> [[SPLAT1]], <4 x i32> [[SPLAT2]] 37; XOP-NEXT: [[SH:%.*]] = lshr <4 x i32> [[Z:%.*]], [[SEL]] 38; XOP-NEXT: ret <4 x i32> [[SH]] 39; 40 %splat1 = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> zeroinitializer 41 %splat2 = shufflevector <4 x i32> %y, <4 x i32> undef, <4 x i32> zeroinitializer 42 %sel = select <4 x i1> %cond, <4 x i32> %splat1, <4 x i32> %splat2 43 %sh = lshr <4 x i32> %z, %sel 44 ret <4 x i32> %sh 45} 46 47define <16 x i16> @vector_variable_shift_right_v16i16(<16 x i1> %cond, <16 x i16> %x, <16 x i16> %y, <16 x i16> %z) { 48; AVX1-LABEL: @vector_variable_shift_right_v16i16( 49; AVX1-NEXT: [[SPLAT1:%.*]] = shufflevector <16 x i16> [[X:%.*]], <16 x i16> undef, <16 x i32> zeroinitializer 50; AVX1-NEXT: [[SPLAT2:%.*]] = shufflevector <16 x i16> [[Y:%.*]], <16 x i16> undef, <16 x i32> zeroinitializer 51; AVX1-NEXT: [[SEL:%.*]] = select <16 x i1> [[COND:%.*]], <16 x i16> [[SPLAT1]], <16 x i16> [[SPLAT2]] 52; AVX1-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[Z:%.*]], [[SPLAT1]] 53; AVX1-NEXT: [[TMP2:%.*]] = lshr <16 x i16> [[Z]], [[SPLAT2]] 54; AVX1-NEXT: [[TMP3:%.*]] = select <16 x i1> [[COND]], <16 x i16> [[TMP1]], <16 x i16> [[TMP2]] 55; AVX1-NEXT: ret <16 x i16> [[TMP3]] 56; 57; AVX2-LABEL: @vector_variable_shift_right_v16i16( 58; AVX2-NEXT: [[SPLAT1:%.*]] = shufflevector <16 x i16> [[X:%.*]], <16 x i16> undef, <16 x i32> zeroinitializer 59; AVX2-NEXT: [[SPLAT2:%.*]] = shufflevector <16 x i16> [[Y:%.*]], <16 x i16> undef, <16 x i32> zeroinitializer 60; AVX2-NEXT: [[SEL:%.*]] = select <16 x i1> [[COND:%.*]], <16 x i16> [[SPLAT1]], <16 x i16> [[SPLAT2]] 61; AVX2-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[Z:%.*]], [[SPLAT1]] 62; AVX2-NEXT: [[TMP2:%.*]] = lshr <16 x i16> [[Z]], [[SPLAT2]] 63; AVX2-NEXT: [[TMP3:%.*]] = select <16 x i1> [[COND]], <16 x i16> [[TMP1]], <16 x i16> [[TMP2]] 64; AVX2-NEXT: ret <16 x i16> [[TMP3]] 65; 66; AVX512BW-LABEL: @vector_variable_shift_right_v16i16( 67; AVX512BW-NEXT: [[SPLAT1:%.*]] = shufflevector <16 x i16> [[X:%.*]], <16 x i16> undef, <16 x i32> zeroinitializer 68; AVX512BW-NEXT: [[SPLAT2:%.*]] = shufflevector <16 x i16> [[Y:%.*]], <16 x i16> undef, <16 x i32> zeroinitializer 69; AVX512BW-NEXT: [[SEL:%.*]] = select <16 x i1> [[COND:%.*]], <16 x i16> [[SPLAT1]], <16 x i16> [[SPLAT2]] 70; AVX512BW-NEXT: [[SH:%.*]] = lshr <16 x i16> [[Z:%.*]], [[SEL]] 71; AVX512BW-NEXT: ret <16 x i16> [[SH]] 72; 73; XOP-LABEL: @vector_variable_shift_right_v16i16( 74; XOP-NEXT: [[SPLAT1:%.*]] = shufflevector <16 x i16> [[X:%.*]], <16 x i16> undef, <16 x i32> zeroinitializer 75; XOP-NEXT: [[SPLAT2:%.*]] = shufflevector <16 x i16> [[Y:%.*]], <16 x i16> undef, <16 x i32> zeroinitializer 76; XOP-NEXT: [[SEL:%.*]] = select <16 x i1> [[COND:%.*]], <16 x i16> [[SPLAT1]], <16 x i16> [[SPLAT2]] 77; XOP-NEXT: [[SH:%.*]] = lshr <16 x i16> [[Z:%.*]], [[SEL]] 78; XOP-NEXT: ret <16 x i16> [[SH]] 79; 80 %splat1 = shufflevector <16 x i16> %x, <16 x i16> undef, <16 x i32> zeroinitializer 81 %splat2 = shufflevector <16 x i16> %y, <16 x i16> undef, <16 x i32> zeroinitializer 82 %sel = select <16 x i1> %cond, <16 x i16> %splat1, <16 x i16> %splat2 83 %sh = lshr <16 x i16> %z, %sel 84 ret <16 x i16> %sh 85} 86 87define <32 x i8> @vector_variable_shift_right_v32i8(<32 x i1> %cond, <32 x i8> %x, <32 x i8> %y, <32 x i8> %z) { 88; AVX1-LABEL: @vector_variable_shift_right_v32i8( 89; AVX1-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer 90; AVX1-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer 91; AVX1-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]] 92; AVX1-NEXT: [[TMP1:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SPLAT1]] 93; AVX1-NEXT: [[TMP2:%.*]] = lshr <32 x i8> [[Z]], [[SPLAT2]] 94; AVX1-NEXT: [[TMP3:%.*]] = select <32 x i1> [[COND]], <32 x i8> [[TMP1]], <32 x i8> [[TMP2]] 95; AVX1-NEXT: ret <32 x i8> [[TMP3]] 96; 97; AVX2-LABEL: @vector_variable_shift_right_v32i8( 98; AVX2-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer 99; AVX2-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer 100; AVX2-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]] 101; AVX2-NEXT: [[TMP1:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SPLAT1]] 102; AVX2-NEXT: [[TMP2:%.*]] = lshr <32 x i8> [[Z]], [[SPLAT2]] 103; AVX2-NEXT: [[TMP3:%.*]] = select <32 x i1> [[COND]], <32 x i8> [[TMP1]], <32 x i8> [[TMP2]] 104; AVX2-NEXT: ret <32 x i8> [[TMP3]] 105; 106; AVX512BW-LABEL: @vector_variable_shift_right_v32i8( 107; AVX512BW-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer 108; AVX512BW-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer 109; AVX512BW-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]] 110; AVX512BW-NEXT: [[TMP1:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SPLAT1]] 111; AVX512BW-NEXT: [[TMP2:%.*]] = lshr <32 x i8> [[Z]], [[SPLAT2]] 112; AVX512BW-NEXT: [[TMP3:%.*]] = select <32 x i1> [[COND]], <32 x i8> [[TMP1]], <32 x i8> [[TMP2]] 113; AVX512BW-NEXT: ret <32 x i8> [[TMP3]] 114; 115; XOP-LABEL: @vector_variable_shift_right_v32i8( 116; XOP-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer 117; XOP-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer 118; XOP-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]] 119; XOP-NEXT: [[SH:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SEL]] 120; XOP-NEXT: ret <32 x i8> [[SH]] 121; 122 %splat1 = shufflevector <32 x i8> %x, <32 x i8> undef, <32 x i32> zeroinitializer 123 %splat2 = shufflevector <32 x i8> %y, <32 x i8> undef, <32 x i32> zeroinitializer 124 %sel = select <32 x i1> %cond, <32 x i8> %splat1, <32 x i8> %splat2 125 %sh = lshr <32 x i8> %z, %sel 126 ret <32 x i8> %sh 127} 128 129; PR37428 - https://bugs.llvm.org/show_bug.cgi?id=37428 130 131define void @vector_variable_shift_left_loop(ptr nocapture %arr, ptr nocapture readonly %control, i32 %count, i32 %amt0, i32 %amt1, i32 %x) { 132; AVX1-LABEL: @vector_variable_shift_left_loop( 133; AVX1-NEXT: entry: 134; AVX1-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[COUNT:%.*]], 0 135; AVX1-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[COUNT]] to i64 136; AVX1-NEXT: br i1 [[CMP16]], label [[VECTOR_PH:%.*]], label [[EXIT:%.*]] 137; AVX1: vector.ph: 138; AVX1-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967292 139; AVX1-NEXT: [[SPLATINSERT18:%.*]] = insertelement <4 x i32> undef, i32 [[AMT0:%.*]], i32 0 140; AVX1-NEXT: [[SPLAT1:%.*]] = shufflevector <4 x i32> [[SPLATINSERT18]], <4 x i32> undef, <4 x i32> zeroinitializer 141; AVX1-NEXT: [[SPLATINSERT20:%.*]] = insertelement <4 x i32> undef, i32 [[AMT1:%.*]], i32 0 142; AVX1-NEXT: [[SPLAT2:%.*]] = shufflevector <4 x i32> [[SPLATINSERT20]], <4 x i32> undef, <4 x i32> zeroinitializer 143; AVX1-NEXT: [[SPLATINSERT22:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0 144; AVX1-NEXT: [[SPLAT3:%.*]] = shufflevector <4 x i32> [[SPLATINSERT22]], <4 x i32> undef, <4 x i32> zeroinitializer 145; AVX1-NEXT: br label [[VECTOR_BODY:%.*]] 146; AVX1: vector.body: 147; AVX1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 148; AVX1-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[CONTROL:%.*]], i64 [[INDEX]] 149; AVX1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 150; AVX1-NEXT: [[TMP2:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer 151; AVX1-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[SPLAT1]], <4 x i32> [[SPLAT2]] 152; AVX1-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[SPLATINSERT18]], <4 x i32> undef, <4 x i32> zeroinitializer 153; AVX1-NEXT: [[TMP5:%.*]] = shl <4 x i32> [[SPLAT3]], [[TMP4]] 154; AVX1-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[SPLATINSERT20]], <4 x i32> undef, <4 x i32> zeroinitializer 155; AVX1-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[SPLAT3]], [[TMP6]] 156; AVX1-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP5]], <4 x i32> [[TMP7]] 157; AVX1-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[ARR:%.*]], i64 [[INDEX]] 158; AVX1-NEXT: store <4 x i32> [[TMP8]], ptr [[TMP9]], align 4 159; AVX1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 160; AVX1-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 161; AVX1-NEXT: br i1 [[TMP11]], label [[EXIT]], label [[VECTOR_BODY]] 162; AVX1: exit: 163; AVX1-NEXT: ret void 164; 165; AVX2-LABEL: @vector_variable_shift_left_loop( 166; AVX2-NEXT: entry: 167; AVX2-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[COUNT:%.*]], 0 168; AVX2-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[COUNT]] to i64 169; AVX2-NEXT: br i1 [[CMP16]], label [[VECTOR_PH:%.*]], label [[EXIT:%.*]] 170; AVX2: vector.ph: 171; AVX2-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967292 172; AVX2-NEXT: [[SPLATINSERT18:%.*]] = insertelement <4 x i32> undef, i32 [[AMT0:%.*]], i32 0 173; AVX2-NEXT: [[SPLAT1:%.*]] = shufflevector <4 x i32> [[SPLATINSERT18]], <4 x i32> undef, <4 x i32> zeroinitializer 174; AVX2-NEXT: [[SPLATINSERT20:%.*]] = insertelement <4 x i32> undef, i32 [[AMT1:%.*]], i32 0 175; AVX2-NEXT: [[SPLAT2:%.*]] = shufflevector <4 x i32> [[SPLATINSERT20]], <4 x i32> undef, <4 x i32> zeroinitializer 176; AVX2-NEXT: [[SPLATINSERT22:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0 177; AVX2-NEXT: [[SPLAT3:%.*]] = shufflevector <4 x i32> [[SPLATINSERT22]], <4 x i32> undef, <4 x i32> zeroinitializer 178; AVX2-NEXT: br label [[VECTOR_BODY:%.*]] 179; AVX2: vector.body: 180; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 181; AVX2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[CONTROL:%.*]], i64 [[INDEX]] 182; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 183; AVX2-NEXT: [[TMP2:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer 184; AVX2-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[SPLAT1]], <4 x i32> [[SPLAT2]] 185; AVX2-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[SPLAT3]], [[TMP3]] 186; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[ARR:%.*]], i64 [[INDEX]] 187; AVX2-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP5]], align 4 188; AVX2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 189; AVX2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 190; AVX2-NEXT: br i1 [[TMP7]], label [[EXIT]], label [[VECTOR_BODY]] 191; AVX2: exit: 192; AVX2-NEXT: ret void 193; 194; AVX512BW-LABEL: @vector_variable_shift_left_loop( 195; AVX512BW-NEXT: entry: 196; AVX512BW-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[COUNT:%.*]], 0 197; AVX512BW-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[COUNT]] to i64 198; AVX512BW-NEXT: br i1 [[CMP16]], label [[VECTOR_PH:%.*]], label [[EXIT:%.*]] 199; AVX512BW: vector.ph: 200; AVX512BW-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967292 201; AVX512BW-NEXT: [[SPLATINSERT18:%.*]] = insertelement <4 x i32> undef, i32 [[AMT0:%.*]], i32 0 202; AVX512BW-NEXT: [[SPLAT1:%.*]] = shufflevector <4 x i32> [[SPLATINSERT18]], <4 x i32> undef, <4 x i32> zeroinitializer 203; AVX512BW-NEXT: [[SPLATINSERT20:%.*]] = insertelement <4 x i32> undef, i32 [[AMT1:%.*]], i32 0 204; AVX512BW-NEXT: [[SPLAT2:%.*]] = shufflevector <4 x i32> [[SPLATINSERT20]], <4 x i32> undef, <4 x i32> zeroinitializer 205; AVX512BW-NEXT: [[SPLATINSERT22:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0 206; AVX512BW-NEXT: [[SPLAT3:%.*]] = shufflevector <4 x i32> [[SPLATINSERT22]], <4 x i32> undef, <4 x i32> zeroinitializer 207; AVX512BW-NEXT: br label [[VECTOR_BODY:%.*]] 208; AVX512BW: vector.body: 209; AVX512BW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 210; AVX512BW-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[CONTROL:%.*]], i64 [[INDEX]] 211; AVX512BW-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 212; AVX512BW-NEXT: [[TMP2:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer 213; AVX512BW-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[SPLAT1]], <4 x i32> [[SPLAT2]] 214; AVX512BW-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[SPLAT3]], [[TMP3]] 215; AVX512BW-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[ARR:%.*]], i64 [[INDEX]] 216; AVX512BW-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP5]], align 4 217; AVX512BW-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 218; AVX512BW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 219; AVX512BW-NEXT: br i1 [[TMP7]], label [[EXIT]], label [[VECTOR_BODY]] 220; AVX512BW: exit: 221; AVX512BW-NEXT: ret void 222; 223; XOP-LABEL: @vector_variable_shift_left_loop( 224; XOP-NEXT: entry: 225; XOP-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[COUNT:%.*]], 0 226; XOP-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[COUNT]] to i64 227; XOP-NEXT: br i1 [[CMP16]], label [[VECTOR_PH:%.*]], label [[EXIT:%.*]] 228; XOP: vector.ph: 229; XOP-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967292 230; XOP-NEXT: [[SPLATINSERT18:%.*]] = insertelement <4 x i32> undef, i32 [[AMT0:%.*]], i32 0 231; XOP-NEXT: [[SPLAT1:%.*]] = shufflevector <4 x i32> [[SPLATINSERT18]], <4 x i32> undef, <4 x i32> zeroinitializer 232; XOP-NEXT: [[SPLATINSERT20:%.*]] = insertelement <4 x i32> undef, i32 [[AMT1:%.*]], i32 0 233; XOP-NEXT: [[SPLAT2:%.*]] = shufflevector <4 x i32> [[SPLATINSERT20]], <4 x i32> undef, <4 x i32> zeroinitializer 234; XOP-NEXT: [[SPLATINSERT22:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0 235; XOP-NEXT: [[SPLAT3:%.*]] = shufflevector <4 x i32> [[SPLATINSERT22]], <4 x i32> undef, <4 x i32> zeroinitializer 236; XOP-NEXT: br label [[VECTOR_BODY:%.*]] 237; XOP: vector.body: 238; XOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 239; XOP-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[CONTROL:%.*]], i64 [[INDEX]] 240; XOP-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 241; XOP-NEXT: [[TMP2:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer 242; XOP-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[SPLAT1]], <4 x i32> [[SPLAT2]] 243; XOP-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[SPLAT3]], [[TMP3]] 244; XOP-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[ARR:%.*]], i64 [[INDEX]] 245; XOP-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP5]], align 4 246; XOP-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 247; XOP-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 248; XOP-NEXT: br i1 [[TMP7]], label [[EXIT]], label [[VECTOR_BODY]] 249; XOP: exit: 250; XOP-NEXT: ret void 251; 252entry: 253 %cmp16 = icmp sgt i32 %count, 0 254 %wide.trip.count = zext i32 %count to i64 255 br i1 %cmp16, label %vector.ph, label %exit 256 257vector.ph: 258 %n.vec = and i64 %wide.trip.count, 4294967292 259 %splatinsert18 = insertelement <4 x i32> undef, i32 %amt0, i32 0 260 %splat1 = shufflevector <4 x i32> %splatinsert18, <4 x i32> undef, <4 x i32> zeroinitializer 261 %splatinsert20 = insertelement <4 x i32> undef, i32 %amt1, i32 0 262 %splat2 = shufflevector <4 x i32> %splatinsert20, <4 x i32> undef, <4 x i32> zeroinitializer 263 %splatinsert22 = insertelement <4 x i32> undef, i32 %x, i32 0 264 %splat3 = shufflevector <4 x i32> %splatinsert22, <4 x i32> undef, <4 x i32> zeroinitializer 265 br label %vector.body 266 267vector.body: 268 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 269 %0 = getelementptr inbounds i8, ptr %control, i64 %index 270 %wide.load = load <4 x i8>, ptr %0, align 1 271 %1 = icmp eq <4 x i8> %wide.load, zeroinitializer 272 %2 = select <4 x i1> %1, <4 x i32> %splat1, <4 x i32> %splat2 273 %3 = shl <4 x i32> %splat3, %2 274 %4 = getelementptr inbounds i32, ptr %arr, i64 %index 275 store <4 x i32> %3, ptr %4, align 4 276 %index.next = add i64 %index, 4 277 %5 = icmp eq i64 %index.next, %n.vec 278 br i1 %5, label %exit, label %vector.body 279 280exit: 281 ret void 282} 283 284; PR37426 - https://bugs.llvm.org/show_bug.cgi?id=37426 285; If we don't have real vector shift instructions (AVX1), convert the funnel 286; shift into 2 funnel shifts and sink the splat shuffles into the loop. 287 288define void @fancierRotate2(ptr %arr, ptr %control, i32 %rot0, i32 %rot1) { 289; AVX1-LABEL: @fancierRotate2( 290; AVX1-NEXT: entry: 291; AVX1-NEXT: [[I0:%.*]] = insertelement <8 x i32> undef, i32 [[ROT0:%.*]], i32 0 292; AVX1-NEXT: [[S0:%.*]] = shufflevector <8 x i32> [[I0]], <8 x i32> undef, <8 x i32> zeroinitializer 293; AVX1-NEXT: [[I1:%.*]] = insertelement <8 x i32> undef, i32 [[ROT1:%.*]], i32 0 294; AVX1-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[I1]], <8 x i32> undef, <8 x i32> zeroinitializer 295; AVX1-NEXT: br label [[LOOP:%.*]] 296; AVX1: loop: 297; AVX1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ] 298; AVX1-NEXT: [[T0:%.*]] = getelementptr inbounds i8, ptr [[CONTROL:%.*]], i64 [[INDEX]] 299; AVX1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[T0]], align 1 300; AVX1-NEXT: [[T2:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer 301; AVX1-NEXT: [[SHAMT:%.*]] = select <8 x i1> [[T2]], <8 x i32> [[S0]], <8 x i32> [[S1]] 302; AVX1-NEXT: [[T4:%.*]] = getelementptr inbounds i32, ptr [[ARR:%.*]], i64 [[INDEX]] 303; AVX1-NEXT: [[WIDE_LOAD21:%.*]] = load <8 x i32>, ptr [[T4]], align 4 304; AVX1-NEXT: [[TMP0:%.*]] = shufflevector <8 x i32> [[I0]], <8 x i32> undef, <8 x i32> zeroinitializer 305; AVX1-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> [[WIDE_LOAD21]], <8 x i32> [[WIDE_LOAD21]], <8 x i32> [[TMP0]]) 306; AVX1-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[I1]], <8 x i32> undef, <8 x i32> zeroinitializer 307; AVX1-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> [[WIDE_LOAD21]], <8 x i32> [[WIDE_LOAD21]], <8 x i32> [[TMP2]]) 308; AVX1-NEXT: [[TMP4:%.*]] = select <8 x i1> [[T2]], <8 x i32> [[TMP1]], <8 x i32> [[TMP3]] 309; AVX1-NEXT: store <8 x i32> [[TMP4]], ptr [[T4]], align 4 310; AVX1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 311; AVX1-NEXT: [[T7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 312; AVX1-NEXT: br i1 [[T7]], label [[EXIT:%.*]], label [[LOOP]] 313; AVX1: exit: 314; AVX1-NEXT: ret void 315; 316; AVX2-LABEL: @fancierRotate2( 317; AVX2-NEXT: entry: 318; AVX2-NEXT: [[I0:%.*]] = insertelement <8 x i32> undef, i32 [[ROT0:%.*]], i32 0 319; AVX2-NEXT: [[S0:%.*]] = shufflevector <8 x i32> [[I0]], <8 x i32> undef, <8 x i32> zeroinitializer 320; AVX2-NEXT: [[I1:%.*]] = insertelement <8 x i32> undef, i32 [[ROT1:%.*]], i32 0 321; AVX2-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[I1]], <8 x i32> undef, <8 x i32> zeroinitializer 322; AVX2-NEXT: br label [[LOOP:%.*]] 323; AVX2: loop: 324; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ] 325; AVX2-NEXT: [[T0:%.*]] = getelementptr inbounds i8, ptr [[CONTROL:%.*]], i64 [[INDEX]] 326; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[T0]], align 1 327; AVX2-NEXT: [[T2:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer 328; AVX2-NEXT: [[SHAMT:%.*]] = select <8 x i1> [[T2]], <8 x i32> [[S0]], <8 x i32> [[S1]] 329; AVX2-NEXT: [[T4:%.*]] = getelementptr inbounds i32, ptr [[ARR:%.*]], i64 [[INDEX]] 330; AVX2-NEXT: [[WIDE_LOAD21:%.*]] = load <8 x i32>, ptr [[T4]], align 4 331; AVX2-NEXT: [[ROT:%.*]] = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> [[WIDE_LOAD21]], <8 x i32> [[WIDE_LOAD21]], <8 x i32> [[SHAMT]]) 332; AVX2-NEXT: store <8 x i32> [[ROT]], ptr [[T4]], align 4 333; AVX2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 334; AVX2-NEXT: [[T7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 335; AVX2-NEXT: br i1 [[T7]], label [[EXIT:%.*]], label [[LOOP]] 336; AVX2: exit: 337; AVX2-NEXT: ret void 338; 339; AVX512BW-LABEL: @fancierRotate2( 340; AVX512BW-NEXT: entry: 341; AVX512BW-NEXT: [[I0:%.*]] = insertelement <8 x i32> undef, i32 [[ROT0:%.*]], i32 0 342; AVX512BW-NEXT: [[S0:%.*]] = shufflevector <8 x i32> [[I0]], <8 x i32> undef, <8 x i32> zeroinitializer 343; AVX512BW-NEXT: [[I1:%.*]] = insertelement <8 x i32> undef, i32 [[ROT1:%.*]], i32 0 344; AVX512BW-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[I1]], <8 x i32> undef, <8 x i32> zeroinitializer 345; AVX512BW-NEXT: br label [[LOOP:%.*]] 346; AVX512BW: loop: 347; AVX512BW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ] 348; AVX512BW-NEXT: [[T0:%.*]] = getelementptr inbounds i8, ptr [[CONTROL:%.*]], i64 [[INDEX]] 349; AVX512BW-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[T0]], align 1 350; AVX512BW-NEXT: [[T2:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer 351; AVX512BW-NEXT: [[SHAMT:%.*]] = select <8 x i1> [[T2]], <8 x i32> [[S0]], <8 x i32> [[S1]] 352; AVX512BW-NEXT: [[T4:%.*]] = getelementptr inbounds i32, ptr [[ARR:%.*]], i64 [[INDEX]] 353; AVX512BW-NEXT: [[WIDE_LOAD21:%.*]] = load <8 x i32>, ptr [[T4]], align 4 354; AVX512BW-NEXT: [[ROT:%.*]] = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> [[WIDE_LOAD21]], <8 x i32> [[WIDE_LOAD21]], <8 x i32> [[SHAMT]]) 355; AVX512BW-NEXT: store <8 x i32> [[ROT]], ptr [[T4]], align 4 356; AVX512BW-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 357; AVX512BW-NEXT: [[T7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 358; AVX512BW-NEXT: br i1 [[T7]], label [[EXIT:%.*]], label [[LOOP]] 359; AVX512BW: exit: 360; AVX512BW-NEXT: ret void 361; 362; XOP-LABEL: @fancierRotate2( 363; XOP-NEXT: entry: 364; XOP-NEXT: [[I0:%.*]] = insertelement <8 x i32> undef, i32 [[ROT0:%.*]], i32 0 365; XOP-NEXT: [[S0:%.*]] = shufflevector <8 x i32> [[I0]], <8 x i32> undef, <8 x i32> zeroinitializer 366; XOP-NEXT: [[I1:%.*]] = insertelement <8 x i32> undef, i32 [[ROT1:%.*]], i32 0 367; XOP-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[I1]], <8 x i32> undef, <8 x i32> zeroinitializer 368; XOP-NEXT: br label [[LOOP:%.*]] 369; XOP: loop: 370; XOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ] 371; XOP-NEXT: [[T0:%.*]] = getelementptr inbounds i8, ptr [[CONTROL:%.*]], i64 [[INDEX]] 372; XOP-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[T0]], align 1 373; XOP-NEXT: [[T2:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer 374; XOP-NEXT: [[SHAMT:%.*]] = select <8 x i1> [[T2]], <8 x i32> [[S0]], <8 x i32> [[S1]] 375; XOP-NEXT: [[T4:%.*]] = getelementptr inbounds i32, ptr [[ARR:%.*]], i64 [[INDEX]] 376; XOP-NEXT: [[WIDE_LOAD21:%.*]] = load <8 x i32>, ptr [[T4]], align 4 377; XOP-NEXT: [[ROT:%.*]] = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> [[WIDE_LOAD21]], <8 x i32> [[WIDE_LOAD21]], <8 x i32> [[SHAMT]]) 378; XOP-NEXT: store <8 x i32> [[ROT]], ptr [[T4]], align 4 379; XOP-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 380; XOP-NEXT: [[T7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 381; XOP-NEXT: br i1 [[T7]], label [[EXIT:%.*]], label [[LOOP]] 382; XOP: exit: 383; XOP-NEXT: ret void 384; 385entry: 386 %i0 = insertelement <8 x i32> undef, i32 %rot0, i32 0 387 %s0 = shufflevector <8 x i32> %i0, <8 x i32> undef, <8 x i32> zeroinitializer 388 %i1 = insertelement <8 x i32> undef, i32 %rot1, i32 0 389 %s1 = shufflevector <8 x i32> %i1, <8 x i32> undef, <8 x i32> zeroinitializer 390 br label %loop 391 392loop: 393 %index = phi i64 [ 0, %entry ], [ %index.next, %loop ] 394 %t0 = getelementptr inbounds i8, ptr %control, i64 %index 395 %wide.load = load <8 x i8>, ptr %t0, align 1 396 %t2 = icmp eq <8 x i8> %wide.load, zeroinitializer 397 %shamt = select <8 x i1> %t2, <8 x i32> %s0, <8 x i32> %s1 398 %t4 = getelementptr inbounds i32, ptr %arr, i64 %index 399 %wide.load21 = load <8 x i32>, ptr %t4, align 4 400 %rot = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %wide.load21, <8 x i32> %wide.load21, <8 x i32> %shamt) 401 store <8 x i32> %rot, ptr %t4, align 4 402 %index.next = add i64 %index, 8 403 %t7 = icmp eq i64 %index.next, 1024 404 br i1 %t7, label %exit, label %loop 405 406exit: 407 ret void 408} 409 410declare <8 x i32> @llvm.fshl.v8i32(<8 x i32>, <8 x i32>, <8 x i32>) #1 411 412; Check that every instruction inserted by -passes='require<profile-summary>,function(codegenprepare)' has a debug location. 413; DEBUG: CheckModuleDebugify: PASS 414