1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE 3; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE 4; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX1 5; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 6; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256 7; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX512 8; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=znver4 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512VBMI2 9; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX512 10 11@a64 = common global [8 x i64] zeroinitializer, align 64 12@b64 = common global [8 x i64] zeroinitializer, align 64 13@c64 = common global [8 x i64] zeroinitializer, align 64 14@d64 = common global [8 x i64] zeroinitializer, align 64 15@a32 = common global [16 x i32] zeroinitializer, align 64 16@b32 = common global [16 x i32] zeroinitializer, align 64 17@c32 = common global [16 x i32] zeroinitializer, align 64 18@d32 = common global [16 x i32] zeroinitializer, align 64 19@a16 = common global [32 x i16] zeroinitializer, align 64 20@b16 = common global [32 x i16] zeroinitializer, align 64 21@c16 = common global [32 x i16] zeroinitializer, align 64 22@d16 = common global [32 x i16] zeroinitializer, align 64 23@a8 = common global [64 x i8] zeroinitializer, align 64 24@b8 = common global [64 x i8] zeroinitializer, align 64 25@c8 = common global [64 x i8] zeroinitializer, align 64 26@d8 = common global [64 x i8] zeroinitializer, align 64 27 28declare i64 @llvm.fshr.i64(i64, i64, i64) 29declare i32 @llvm.fshr.i32(i32, i32, i32) 30declare i16 @llvm.fshr.i16(i16, i16, i16) 31declare i8 @llvm.fshr.i8 (i8 , i8 , i8 ) 32 33define void @fshr_v8i64() { 34; SSE-LABEL: @fshr_v8i64( 35; SSE-NEXT: [[A0:%.*]] = load i64, ptr @a64, align 8 36; SSE-NEXT: [[A1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8 37; SSE-NEXT: [[A2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8 38; SSE-NEXT: [[A3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8 39; SSE-NEXT: [[A4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8 40; SSE-NEXT: [[A5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8 41; SSE-NEXT: [[A6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8 42; SSE-NEXT: [[A7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8 43; SSE-NEXT: [[B0:%.*]] = load i64, ptr @b64, align 8 44; SSE-NEXT: [[B1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8 45; SSE-NEXT: [[B2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8 46; SSE-NEXT: [[B3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8 47; SSE-NEXT: [[B4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8 48; SSE-NEXT: [[B5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8 49; SSE-NEXT: [[B6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8 50; SSE-NEXT: [[B7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8 51; SSE-NEXT: [[R0:%.*]] = call i64 @llvm.fshr.i64(i64 [[A0]], i64 [[A0]], i64 [[B0]]) 52; SSE-NEXT: [[R1:%.*]] = call i64 @llvm.fshr.i64(i64 [[A1]], i64 [[A1]], i64 [[B1]]) 53; SSE-NEXT: [[R2:%.*]] = call i64 @llvm.fshr.i64(i64 [[A2]], i64 [[A2]], i64 [[B2]]) 54; SSE-NEXT: [[R3:%.*]] = call i64 @llvm.fshr.i64(i64 [[A3]], i64 [[A3]], i64 [[B3]]) 55; SSE-NEXT: [[R4:%.*]] = call i64 @llvm.fshr.i64(i64 [[A4]], i64 [[A4]], i64 [[B4]]) 56; SSE-NEXT: [[R5:%.*]] = call i64 @llvm.fshr.i64(i64 [[A5]], i64 [[A5]], i64 [[B5]]) 57; SSE-NEXT: [[R6:%.*]] = call i64 @llvm.fshr.i64(i64 [[A6]], i64 [[A6]], i64 [[B6]]) 58; SSE-NEXT: [[R7:%.*]] = call i64 @llvm.fshr.i64(i64 [[A7]], i64 [[A7]], i64 [[B7]]) 59; SSE-NEXT: store i64 [[R0]], ptr @d64, align 8 60; SSE-NEXT: store i64 [[R1]], ptr getelementptr inbounds ([8 x i64], ptr @d64, i32 0, i64 1), align 8 61; SSE-NEXT: store i64 [[R2]], ptr getelementptr inbounds ([8 x i64], ptr @d64, i32 0, i64 2), align 8 62; SSE-NEXT: store i64 [[R3]], ptr getelementptr inbounds ([8 x i64], ptr @d64, i32 0, i64 3), align 8 63; SSE-NEXT: store i64 [[R4]], ptr getelementptr inbounds ([8 x i64], ptr @d64, i32 0, i64 4), align 8 64; SSE-NEXT: store i64 [[R5]], ptr getelementptr inbounds ([8 x i64], ptr @d64, i32 0, i64 5), align 8 65; SSE-NEXT: store i64 [[R6]], ptr getelementptr inbounds ([8 x i64], ptr @d64, i32 0, i64 6), align 8 66; SSE-NEXT: store i64 [[R7]], ptr getelementptr inbounds ([8 x i64], ptr @d64, i32 0, i64 7), align 8 67; SSE-NEXT: ret void 68; 69; AVX1-LABEL: @fshr_v8i64( 70; AVX1-NEXT: [[A0:%.*]] = load i64, ptr @a64, align 8 71; AVX1-NEXT: [[A1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8 72; AVX1-NEXT: [[A2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8 73; AVX1-NEXT: [[A3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8 74; AVX1-NEXT: [[A4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8 75; AVX1-NEXT: [[A5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8 76; AVX1-NEXT: [[A6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8 77; AVX1-NEXT: [[A7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8 78; AVX1-NEXT: [[B0:%.*]] = load i64, ptr @b64, align 8 79; AVX1-NEXT: [[B1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8 80; AVX1-NEXT: [[B2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8 81; AVX1-NEXT: [[B3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8 82; AVX1-NEXT: [[B4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8 83; AVX1-NEXT: [[B5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8 84; AVX1-NEXT: [[B6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8 85; AVX1-NEXT: [[B7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8 86; AVX1-NEXT: [[R0:%.*]] = call i64 @llvm.fshr.i64(i64 [[A0]], i64 [[A0]], i64 [[B0]]) 87; AVX1-NEXT: [[R1:%.*]] = call i64 @llvm.fshr.i64(i64 [[A1]], i64 [[A1]], i64 [[B1]]) 88; AVX1-NEXT: [[R2:%.*]] = call i64 @llvm.fshr.i64(i64 [[A2]], i64 [[A2]], i64 [[B2]]) 89; AVX1-NEXT: [[R3:%.*]] = call i64 @llvm.fshr.i64(i64 [[A3]], i64 [[A3]], i64 [[B3]]) 90; AVX1-NEXT: [[R4:%.*]] = call i64 @llvm.fshr.i64(i64 [[A4]], i64 [[A4]], i64 [[B4]]) 91; AVX1-NEXT: [[R5:%.*]] = call i64 @llvm.fshr.i64(i64 [[A5]], i64 [[A5]], i64 [[B5]]) 92; AVX1-NEXT: [[R6:%.*]] = call i64 @llvm.fshr.i64(i64 [[A6]], i64 [[A6]], i64 [[B6]]) 93; AVX1-NEXT: [[R7:%.*]] = call i64 @llvm.fshr.i64(i64 [[A7]], i64 [[A7]], i64 [[B7]]) 94; AVX1-NEXT: store i64 [[R0]], ptr @d64, align 8 95; AVX1-NEXT: store i64 [[R1]], ptr getelementptr inbounds ([8 x i64], ptr @d64, i32 0, i64 1), align 8 96; AVX1-NEXT: store i64 [[R2]], ptr getelementptr inbounds ([8 x i64], ptr @d64, i32 0, i64 2), align 8 97; AVX1-NEXT: store i64 [[R3]], ptr getelementptr inbounds ([8 x i64], ptr @d64, i32 0, i64 3), align 8 98; AVX1-NEXT: store i64 [[R4]], ptr getelementptr inbounds ([8 x i64], ptr @d64, i32 0, i64 4), align 8 99; AVX1-NEXT: store i64 [[R5]], ptr getelementptr inbounds ([8 x i64], ptr @d64, i32 0, i64 5), align 8 100; AVX1-NEXT: store i64 [[R6]], ptr getelementptr inbounds ([8 x i64], ptr @d64, i32 0, i64 6), align 8 101; AVX1-NEXT: store i64 [[R7]], ptr getelementptr inbounds ([8 x i64], ptr @d64, i32 0, i64 7), align 8 102; AVX1-NEXT: ret void 103; 104; AVX2-LABEL: @fshr_v8i64( 105; AVX2-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @a64, align 8 106; AVX2-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr @b64, align 8 107; AVX2-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> [[TMP1]], <4 x i64> [[TMP1]], <4 x i64> [[TMP2]]) 108; AVX2-NEXT: store <4 x i64> [[TMP3]], ptr @d64, align 8 109; AVX2-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8 110; AVX2-NEXT: [[TMP5:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8 111; AVX2-NEXT: [[TMP6:%.*]] = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> [[TMP4]], <4 x i64> [[TMP4]], <4 x i64> [[TMP5]]) 112; AVX2-NEXT: store <4 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @d64, i32 0, i64 4), align 8 113; AVX2-NEXT: ret void 114; 115; AVX256-LABEL: @fshr_v8i64( 116; AVX256-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @a64, align 8 117; AVX256-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr @b64, align 8 118; AVX256-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> [[TMP1]], <4 x i64> [[TMP1]], <4 x i64> [[TMP2]]) 119; AVX256-NEXT: store <4 x i64> [[TMP3]], ptr @d64, align 8 120; AVX256-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8 121; AVX256-NEXT: [[TMP5:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8 122; AVX256-NEXT: [[TMP6:%.*]] = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> [[TMP4]], <4 x i64> [[TMP4]], <4 x i64> [[TMP5]]) 123; AVX256-NEXT: store <4 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @d64, i32 0, i64 4), align 8 124; AVX256-NEXT: ret void 125; 126; AVX512-LABEL: @fshr_v8i64( 127; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @a64, align 8 128; AVX512-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @b64, align 8 129; AVX512-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP1]], <8 x i64> [[TMP2]]) 130; AVX512-NEXT: store <8 x i64> [[TMP3]], ptr @d64, align 8 131; AVX512-NEXT: ret void 132; 133; AVX512VBMI2-LABEL: @fshr_v8i64( 134; AVX512VBMI2-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @a64, align 8 135; AVX512VBMI2-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @b64, align 8 136; AVX512VBMI2-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP1]], <8 x i64> [[TMP2]]) 137; AVX512VBMI2-NEXT: store <8 x i64> [[TMP3]], ptr @d64, align 8 138; AVX512VBMI2-NEXT: ret void 139; 140 %a0 = load i64, ptr @a64, align 8 141 %a1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8 142 %a2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8 143 %a3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8 144 %a4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8 145 %a5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8 146 %a6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8 147 %a7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8 148 %b0 = load i64, ptr @b64, align 8 149 %b1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8 150 %b2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8 151 %b3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8 152 %b4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8 153 %b5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8 154 %b6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8 155 %b7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8 156 %r0 = call i64 @llvm.fshr.i64(i64 %a0, i64 %a0, i64 %b0) 157 %r1 = call i64 @llvm.fshr.i64(i64 %a1, i64 %a1, i64 %b1) 158 %r2 = call i64 @llvm.fshr.i64(i64 %a2, i64 %a2, i64 %b2) 159 %r3 = call i64 @llvm.fshr.i64(i64 %a3, i64 %a3, i64 %b3) 160 %r4 = call i64 @llvm.fshr.i64(i64 %a4, i64 %a4, i64 %b4) 161 %r5 = call i64 @llvm.fshr.i64(i64 %a5, i64 %a5, i64 %b5) 162 %r6 = call i64 @llvm.fshr.i64(i64 %a6, i64 %a6, i64 %b6) 163 %r7 = call i64 @llvm.fshr.i64(i64 %a7, i64 %a7, i64 %b7) 164 store i64 %r0, ptr @d64, align 8 165 store i64 %r1, ptr getelementptr inbounds ([8 x i64], ptr @d64, i32 0, i64 1), align 8 166 store i64 %r2, ptr getelementptr inbounds ([8 x i64], ptr @d64, i32 0, i64 2), align 8 167 store i64 %r3, ptr getelementptr inbounds ([8 x i64], ptr @d64, i32 0, i64 3), align 8 168 store i64 %r4, ptr getelementptr inbounds ([8 x i64], ptr @d64, i32 0, i64 4), align 8 169 store i64 %r5, ptr getelementptr inbounds ([8 x i64], ptr @d64, i32 0, i64 5), align 8 170 store i64 %r6, ptr getelementptr inbounds ([8 x i64], ptr @d64, i32 0, i64 6), align 8 171 store i64 %r7, ptr getelementptr inbounds ([8 x i64], ptr @d64, i32 0, i64 7), align 8 172 ret void 173} 174 175define void @fshr_v16i32() { 176; SSE-LABEL: @fshr_v16i32( 177; SSE-NEXT: [[A0:%.*]] = load i32, ptr @a32, align 4 178; SSE-NEXT: [[A1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1), align 4 179; SSE-NEXT: [[A2:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2), align 4 180; SSE-NEXT: [[A3:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3), align 4 181; SSE-NEXT: [[A4:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4 182; SSE-NEXT: [[A5:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5), align 4 183; SSE-NEXT: [[A6:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6), align 4 184; SSE-NEXT: [[A7:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7), align 4 185; SSE-NEXT: [[A8:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 186; SSE-NEXT: [[A9:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9), align 4 187; SSE-NEXT: [[A10:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4 188; SSE-NEXT: [[A11:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4 189; SSE-NEXT: [[A12:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 190; SSE-NEXT: [[A13:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4 191; SSE-NEXT: [[A14:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4 192; SSE-NEXT: [[A15:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4 193; SSE-NEXT: [[B0:%.*]] = load i32, ptr @b32, align 4 194; SSE-NEXT: [[B1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1), align 4 195; SSE-NEXT: [[B2:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2), align 4 196; SSE-NEXT: [[B3:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3), align 4 197; SSE-NEXT: [[B4:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4 198; SSE-NEXT: [[B5:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5), align 4 199; SSE-NEXT: [[B6:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6), align 4 200; SSE-NEXT: [[B7:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7), align 4 201; SSE-NEXT: [[B8:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4 202; SSE-NEXT: [[B9:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9), align 4 203; SSE-NEXT: [[B10:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4 204; SSE-NEXT: [[B11:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4 205; SSE-NEXT: [[B12:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4 206; SSE-NEXT: [[B13:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4 207; SSE-NEXT: [[B14:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4 208; SSE-NEXT: [[B15:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4 209; SSE-NEXT: [[R0:%.*]] = call i32 @llvm.fshr.i32(i32 [[A0]], i32 [[A0]], i32 [[B0]]) 210; SSE-NEXT: [[R1:%.*]] = call i32 @llvm.fshr.i32(i32 [[A1]], i32 [[A1]], i32 [[B1]]) 211; SSE-NEXT: [[R2:%.*]] = call i32 @llvm.fshr.i32(i32 [[A2]], i32 [[A2]], i32 [[B2]]) 212; SSE-NEXT: [[R3:%.*]] = call i32 @llvm.fshr.i32(i32 [[A3]], i32 [[A3]], i32 [[B3]]) 213; SSE-NEXT: [[R4:%.*]] = call i32 @llvm.fshr.i32(i32 [[A4]], i32 [[A4]], i32 [[B4]]) 214; SSE-NEXT: [[R5:%.*]] = call i32 @llvm.fshr.i32(i32 [[A5]], i32 [[A5]], i32 [[B5]]) 215; SSE-NEXT: [[R6:%.*]] = call i32 @llvm.fshr.i32(i32 [[A6]], i32 [[A6]], i32 [[B6]]) 216; SSE-NEXT: [[R7:%.*]] = call i32 @llvm.fshr.i32(i32 [[A7]], i32 [[A7]], i32 [[B7]]) 217; SSE-NEXT: [[R8:%.*]] = call i32 @llvm.fshr.i32(i32 [[A8]], i32 [[A8]], i32 [[B8]]) 218; SSE-NEXT: [[R9:%.*]] = call i32 @llvm.fshr.i32(i32 [[A9]], i32 [[A9]], i32 [[B9]]) 219; SSE-NEXT: [[R10:%.*]] = call i32 @llvm.fshr.i32(i32 [[A10]], i32 [[A10]], i32 [[B10]]) 220; SSE-NEXT: [[R11:%.*]] = call i32 @llvm.fshr.i32(i32 [[A11]], i32 [[A11]], i32 [[B11]]) 221; SSE-NEXT: [[R12:%.*]] = call i32 @llvm.fshr.i32(i32 [[A12]], i32 [[A12]], i32 [[B12]]) 222; SSE-NEXT: [[R13:%.*]] = call i32 @llvm.fshr.i32(i32 [[A13]], i32 [[A13]], i32 [[B13]]) 223; SSE-NEXT: [[R14:%.*]] = call i32 @llvm.fshr.i32(i32 [[A14]], i32 [[A14]], i32 [[B14]]) 224; SSE-NEXT: [[R15:%.*]] = call i32 @llvm.fshr.i32(i32 [[A15]], i32 [[A15]], i32 [[B15]]) 225; SSE-NEXT: store i32 [[R0]], ptr @d32, align 4 226; SSE-NEXT: store i32 [[R1]], ptr getelementptr inbounds ([16 x i32], ptr @d32, i32 0, i64 1), align 4 227; SSE-NEXT: store i32 [[R2]], ptr getelementptr inbounds ([16 x i32], ptr @d32, i32 0, i64 2), align 4 228; SSE-NEXT: store i32 [[R3]], ptr getelementptr inbounds ([16 x i32], ptr @d32, i32 0, i64 3), align 4 229; SSE-NEXT: store i32 [[R4]], ptr getelementptr inbounds ([16 x i32], ptr @d32, i32 0, i64 4), align 4 230; SSE-NEXT: store i32 [[R5]], ptr getelementptr inbounds ([16 x i32], ptr @d32, i32 0, i64 5), align 4 231; SSE-NEXT: store i32 [[R6]], ptr getelementptr inbounds ([16 x i32], ptr @d32, i32 0, i64 6), align 4 232; SSE-NEXT: store i32 [[R7]], ptr getelementptr inbounds ([16 x i32], ptr @d32, i32 0, i64 7), align 4 233; SSE-NEXT: store i32 [[R8]], ptr getelementptr inbounds ([16 x i32], ptr @d32, i32 0, i64 8), align 4 234; SSE-NEXT: store i32 [[R9]], ptr getelementptr inbounds ([16 x i32], ptr @d32, i32 0, i64 9), align 4 235; SSE-NEXT: store i32 [[R10]], ptr getelementptr inbounds ([16 x i32], ptr @d32, i32 0, i64 10), align 4 236; SSE-NEXT: store i32 [[R11]], ptr getelementptr inbounds ([16 x i32], ptr @d32, i32 0, i64 11), align 4 237; SSE-NEXT: store i32 [[R12]], ptr getelementptr inbounds ([16 x i32], ptr @d32, i32 0, i64 12), align 4 238; SSE-NEXT: store i32 [[R13]], ptr getelementptr inbounds ([16 x i32], ptr @d32, i32 0, i64 13), align 4 239; SSE-NEXT: store i32 [[R14]], ptr getelementptr inbounds ([16 x i32], ptr @d32, i32 0, i64 14), align 4 240; SSE-NEXT: store i32 [[R15]], ptr getelementptr inbounds ([16 x i32], ptr @d32, i32 0, i64 15), align 4 241; SSE-NEXT: ret void 242; 243; AVX1-LABEL: @fshr_v16i32( 244; AVX1-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4 245; AVX1-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @b32, align 4 246; AVX1-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) 247; AVX1-NEXT: store <4 x i32> [[TMP3]], ptr @d32, align 4 248; AVX1-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4 249; AVX1-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4 250; AVX1-NEXT: [[TMP6:%.*]] = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> [[TMP4]], <4 x i32> [[TMP4]], <4 x i32> [[TMP5]]) 251; AVX1-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @d32, i32 0, i64 4), align 4 252; AVX1-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 253; AVX1-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4 254; AVX1-NEXT: [[TMP9:%.*]] = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> [[TMP7]], <4 x i32> [[TMP7]], <4 x i32> [[TMP8]]) 255; AVX1-NEXT: store <4 x i32> [[TMP9]], ptr getelementptr inbounds ([16 x i32], ptr @d32, i32 0, i64 8), align 4 256; AVX1-NEXT: [[TMP10:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 257; AVX1-NEXT: [[TMP11:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4 258; AVX1-NEXT: [[TMP12:%.*]] = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> [[TMP10]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]]) 259; AVX1-NEXT: store <4 x i32> [[TMP12]], ptr getelementptr inbounds ([16 x i32], ptr @d32, i32 0, i64 12), align 4 260; AVX1-NEXT: ret void 261; 262; AVX2-LABEL: @fshr_v16i32( 263; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4 264; AVX2-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr @b32, align 4 265; AVX2-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> [[TMP1]], <8 x i32> [[TMP1]], <8 x i32> [[TMP2]]) 266; AVX2-NEXT: store <8 x i32> [[TMP3]], ptr @d32, align 4 267; AVX2-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 268; AVX2-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4 269; AVX2-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> [[TMP4]], <8 x i32> [[TMP4]], <8 x i32> [[TMP5]]) 270; AVX2-NEXT: store <8 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @d32, i32 0, i64 8), align 4 271; AVX2-NEXT: ret void 272; 273; AVX256-LABEL: @fshr_v16i32( 274; AVX256-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4 275; AVX256-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr @b32, align 4 276; AVX256-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> [[TMP1]], <8 x i32> [[TMP1]], <8 x i32> [[TMP2]]) 277; AVX256-NEXT: store <8 x i32> [[TMP3]], ptr @d32, align 4 278; AVX256-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 279; AVX256-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4 280; AVX256-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> [[TMP4]], <8 x i32> [[TMP4]], <8 x i32> [[TMP5]]) 281; AVX256-NEXT: store <8 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @d32, i32 0, i64 8), align 4 282; AVX256-NEXT: ret void 283; 284; AVX512-LABEL: @fshr_v16i32( 285; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4 286; AVX512-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @b32, align 4 287; AVX512-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP1]], <16 x i32> [[TMP2]]) 288; AVX512-NEXT: store <16 x i32> [[TMP3]], ptr @d32, align 4 289; AVX512-NEXT: ret void 290; 291; AVX512VBMI2-LABEL: @fshr_v16i32( 292; AVX512VBMI2-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4 293; AVX512VBMI2-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @b32, align 4 294; AVX512VBMI2-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP1]], <16 x i32> [[TMP2]]) 295; AVX512VBMI2-NEXT: store <16 x i32> [[TMP3]], ptr @d32, align 4 296; AVX512VBMI2-NEXT: ret void 297; 298 %a0 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4 299 %a1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4 300 %a2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2 ), align 4 301 %a3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3 ), align 4 302 %a4 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4 ), align 4 303 %a5 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5 ), align 4 304 %a6 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6 ), align 4 305 %a7 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7 ), align 4 306 %a8 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8 ), align 4 307 %a9 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9 ), align 4 308 %a10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4 309 %a11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4 310 %a12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 311 %a13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4 312 %a14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4 313 %a15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4 314 %b0 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 0 ), align 4 315 %b1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1 ), align 4 316 %b2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2 ), align 4 317 %b3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3 ), align 4 318 %b4 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4 ), align 4 319 %b5 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5 ), align 4 320 %b6 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6 ), align 4 321 %b7 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7 ), align 4 322 %b8 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8 ), align 4 323 %b9 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9 ), align 4 324 %b10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4 325 %b11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4 326 %b12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4 327 %b13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4 328 %b14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4 329 %b15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4 330 %r0 = call i32 @llvm.fshr.i32(i32 %a0 , i32 %a0 , i32 %b0 ) 331 %r1 = call i32 @llvm.fshr.i32(i32 %a1 , i32 %a1 , i32 %b1 ) 332 %r2 = call i32 @llvm.fshr.i32(i32 %a2 , i32 %a2 , i32 %b2 ) 333 %r3 = call i32 @llvm.fshr.i32(i32 %a3 , i32 %a3 , i32 %b3 ) 334 %r4 = call i32 @llvm.fshr.i32(i32 %a4 , i32 %a4 , i32 %b4 ) 335 %r5 = call i32 @llvm.fshr.i32(i32 %a5 , i32 %a5 , i32 %b5 ) 336 %r6 = call i32 @llvm.fshr.i32(i32 %a6 , i32 %a6 , i32 %b6 ) 337 %r7 = call i32 @llvm.fshr.i32(i32 %a7 , i32 %a7 , i32 %b7 ) 338 %r8 = call i32 @llvm.fshr.i32(i32 %a8 , i32 %a8 , i32 %b8 ) 339 %r9 = call i32 @llvm.fshr.i32(i32 %a9 , i32 %a9 , i32 %b9 ) 340 %r10 = call i32 @llvm.fshr.i32(i32 %a10, i32 %a10, i32 %b10) 341 %r11 = call i32 @llvm.fshr.i32(i32 %a11, i32 %a11, i32 %b11) 342 %r12 = call i32 @llvm.fshr.i32(i32 %a12, i32 %a12, i32 %b12) 343 %r13 = call i32 @llvm.fshr.i32(i32 %a13, i32 %a13, i32 %b13) 344 %r14 = call i32 @llvm.fshr.i32(i32 %a14, i32 %a14, i32 %b14) 345 %r15 = call i32 @llvm.fshr.i32(i32 %a15, i32 %a15, i32 %b15) 346 store i32 %r0 , ptr getelementptr inbounds ([16 x i32], ptr @d32, i32 0, i64 0 ), align 4 347 store i32 %r1 , ptr getelementptr inbounds ([16 x i32], ptr @d32, i32 0, i64 1 ), align 4 348 store i32 %r2 , ptr getelementptr inbounds ([16 x i32], ptr @d32, i32 0, i64 2 ), align 4 349 store i32 %r3 , ptr getelementptr inbounds ([16 x i32], ptr @d32, i32 0, i64 3 ), align 4 350 store i32 %r4 , ptr getelementptr inbounds ([16 x i32], ptr @d32, i32 0, i64 4 ), align 4 351 store i32 %r5 , ptr getelementptr inbounds ([16 x i32], ptr @d32, i32 0, i64 5 ), align 4 352 store i32 %r6 , ptr getelementptr inbounds ([16 x i32], ptr @d32, i32 0, i64 6 ), align 4 353 store i32 %r7 , ptr getelementptr inbounds ([16 x i32], ptr @d32, i32 0, i64 7 ), align 4 354 store i32 %r8 , ptr getelementptr inbounds ([16 x i32], ptr @d32, i32 0, i64 8 ), align 4 355 store i32 %r9 , ptr getelementptr inbounds ([16 x i32], ptr @d32, i32 0, i64 9 ), align 4 356 store i32 %r10, ptr getelementptr inbounds ([16 x i32], ptr @d32, i32 0, i64 10), align 4 357 store i32 %r11, ptr getelementptr inbounds ([16 x i32], ptr @d32, i32 0, i64 11), align 4 358 store i32 %r12, ptr getelementptr inbounds ([16 x i32], ptr @d32, i32 0, i64 12), align 4 359 store i32 %r13, ptr getelementptr inbounds ([16 x i32], ptr @d32, i32 0, i64 13), align 4 360 store i32 %r14, ptr getelementptr inbounds ([16 x i32], ptr @d32, i32 0, i64 14), align 4 361 store i32 %r15, ptr getelementptr inbounds ([16 x i32], ptr @d32, i32 0, i64 15), align 4 362 ret void 363} 364 365define void @fshr_v32i16() { 366; SSE-LABEL: @fshr_v32i16( 367; SSE-NEXT: [[A0:%.*]] = load i16, ptr @a16, align 2 368; SSE-NEXT: [[A1:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 1), align 2 369; SSE-NEXT: [[A2:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 2), align 2 370; SSE-NEXT: [[A3:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 3), align 2 371; SSE-NEXT: [[A4:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 4), align 2 372; SSE-NEXT: [[A5:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 5), align 2 373; SSE-NEXT: [[A6:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 6), align 2 374; SSE-NEXT: [[A7:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 7), align 2 375; SSE-NEXT: [[A8:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2 376; SSE-NEXT: [[A9:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 9), align 2 377; SSE-NEXT: [[A10:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 10), align 2 378; SSE-NEXT: [[A11:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 11), align 2 379; SSE-NEXT: [[A12:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 12), align 2 380; SSE-NEXT: [[A13:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 13), align 2 381; SSE-NEXT: [[A14:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 14), align 2 382; SSE-NEXT: [[A15:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 15), align 2 383; SSE-NEXT: [[A16:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2 384; SSE-NEXT: [[A17:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 17), align 2 385; SSE-NEXT: [[A18:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 18), align 2 386; SSE-NEXT: [[A19:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 19), align 2 387; SSE-NEXT: [[A20:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 20), align 2 388; SSE-NEXT: [[A21:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 21), align 2 389; SSE-NEXT: [[A22:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 22), align 2 390; SSE-NEXT: [[A23:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 23), align 2 391; SSE-NEXT: [[A24:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2 392; SSE-NEXT: [[A25:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 25), align 2 393; SSE-NEXT: [[A26:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 26), align 2 394; SSE-NEXT: [[A27:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 27), align 2 395; SSE-NEXT: [[A28:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 28), align 2 396; SSE-NEXT: [[A29:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 29), align 2 397; SSE-NEXT: [[A30:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 30), align 2 398; SSE-NEXT: [[A31:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 31), align 2 399; SSE-NEXT: [[B0:%.*]] = load i16, ptr @b16, align 2 400; SSE-NEXT: [[B1:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 1), align 2 401; SSE-NEXT: [[B2:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 2), align 2 402; SSE-NEXT: [[B3:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 3), align 2 403; SSE-NEXT: [[B4:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 4), align 2 404; SSE-NEXT: [[B5:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 5), align 2 405; SSE-NEXT: [[B6:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 6), align 2 406; SSE-NEXT: [[B7:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 7), align 2 407; SSE-NEXT: [[B8:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8), align 2 408; SSE-NEXT: [[B9:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 9), align 2 409; SSE-NEXT: [[B10:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 10), align 2 410; SSE-NEXT: [[B11:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 11), align 2 411; SSE-NEXT: [[B12:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 12), align 2 412; SSE-NEXT: [[B13:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 13), align 2 413; SSE-NEXT: [[B14:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 14), align 2 414; SSE-NEXT: [[B15:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 15), align 2 415; SSE-NEXT: [[B16:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2 416; SSE-NEXT: [[B17:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 17), align 2 417; SSE-NEXT: [[B18:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 18), align 2 418; SSE-NEXT: [[B19:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 19), align 2 419; SSE-NEXT: [[B20:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 20), align 2 420; SSE-NEXT: [[B21:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 21), align 2 421; SSE-NEXT: [[B22:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 22), align 2 422; SSE-NEXT: [[B23:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 23), align 2 423; SSE-NEXT: [[B24:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2 424; SSE-NEXT: [[B25:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 25), align 2 425; SSE-NEXT: [[B26:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 26), align 2 426; SSE-NEXT: [[B27:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 27), align 2 427; SSE-NEXT: [[B28:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 28), align 2 428; SSE-NEXT: [[B29:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 29), align 2 429; SSE-NEXT: [[B30:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 30), align 2 430; SSE-NEXT: [[B31:%.*]] = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 31), align 2 431; SSE-NEXT: [[R0:%.*]] = call i16 @llvm.fshr.i16(i16 [[A0]], i16 [[A0]], i16 [[B0]]) 432; SSE-NEXT: [[R1:%.*]] = call i16 @llvm.fshr.i16(i16 [[A1]], i16 [[A1]], i16 [[B1]]) 433; SSE-NEXT: [[R2:%.*]] = call i16 @llvm.fshr.i16(i16 [[A2]], i16 [[A2]], i16 [[B2]]) 434; SSE-NEXT: [[R3:%.*]] = call i16 @llvm.fshr.i16(i16 [[A3]], i16 [[A3]], i16 [[B3]]) 435; SSE-NEXT: [[R4:%.*]] = call i16 @llvm.fshr.i16(i16 [[A4]], i16 [[A4]], i16 [[B4]]) 436; SSE-NEXT: [[R5:%.*]] = call i16 @llvm.fshr.i16(i16 [[A5]], i16 [[A5]], i16 [[B5]]) 437; SSE-NEXT: [[R6:%.*]] = call i16 @llvm.fshr.i16(i16 [[A6]], i16 [[A6]], i16 [[B6]]) 438; SSE-NEXT: [[R7:%.*]] = call i16 @llvm.fshr.i16(i16 [[A7]], i16 [[A7]], i16 [[B7]]) 439; SSE-NEXT: [[R8:%.*]] = call i16 @llvm.fshr.i16(i16 [[A8]], i16 [[A8]], i16 [[B8]]) 440; SSE-NEXT: [[R9:%.*]] = call i16 @llvm.fshr.i16(i16 [[A9]], i16 [[A9]], i16 [[B9]]) 441; SSE-NEXT: [[R10:%.*]] = call i16 @llvm.fshr.i16(i16 [[A10]], i16 [[A10]], i16 [[B10]]) 442; SSE-NEXT: [[R11:%.*]] = call i16 @llvm.fshr.i16(i16 [[A11]], i16 [[A11]], i16 [[B11]]) 443; SSE-NEXT: [[R12:%.*]] = call i16 @llvm.fshr.i16(i16 [[A12]], i16 [[A12]], i16 [[B12]]) 444; SSE-NEXT: [[R13:%.*]] = call i16 @llvm.fshr.i16(i16 [[A13]], i16 [[A13]], i16 [[B13]]) 445; SSE-NEXT: [[R14:%.*]] = call i16 @llvm.fshr.i16(i16 [[A14]], i16 [[A14]], i16 [[B14]]) 446; SSE-NEXT: [[R15:%.*]] = call i16 @llvm.fshr.i16(i16 [[A15]], i16 [[A15]], i16 [[B15]]) 447; SSE-NEXT: [[R16:%.*]] = call i16 @llvm.fshr.i16(i16 [[A16]], i16 [[A16]], i16 [[B16]]) 448; SSE-NEXT: [[R17:%.*]] = call i16 @llvm.fshr.i16(i16 [[A17]], i16 [[A17]], i16 [[B17]]) 449; SSE-NEXT: [[R18:%.*]] = call i16 @llvm.fshr.i16(i16 [[A18]], i16 [[A18]], i16 [[B18]]) 450; SSE-NEXT: [[R19:%.*]] = call i16 @llvm.fshr.i16(i16 [[A19]], i16 [[A19]], i16 [[B19]]) 451; SSE-NEXT: [[R20:%.*]] = call i16 @llvm.fshr.i16(i16 [[A20]], i16 [[A20]], i16 [[B20]]) 452; SSE-NEXT: [[R21:%.*]] = call i16 @llvm.fshr.i16(i16 [[A21]], i16 [[A21]], i16 [[B21]]) 453; SSE-NEXT: [[R22:%.*]] = call i16 @llvm.fshr.i16(i16 [[A22]], i16 [[A22]], i16 [[B22]]) 454; SSE-NEXT: [[R23:%.*]] = call i16 @llvm.fshr.i16(i16 [[A23]], i16 [[A23]], i16 [[B23]]) 455; SSE-NEXT: [[R24:%.*]] = call i16 @llvm.fshr.i16(i16 [[A24]], i16 [[A24]], i16 [[B24]]) 456; SSE-NEXT: [[R25:%.*]] = call i16 @llvm.fshr.i16(i16 [[A25]], i16 [[A25]], i16 [[B25]]) 457; SSE-NEXT: [[R26:%.*]] = call i16 @llvm.fshr.i16(i16 [[A26]], i16 [[A26]], i16 [[B26]]) 458; SSE-NEXT: [[R27:%.*]] = call i16 @llvm.fshr.i16(i16 [[A27]], i16 [[A27]], i16 [[B27]]) 459; SSE-NEXT: [[R28:%.*]] = call i16 @llvm.fshr.i16(i16 [[A28]], i16 [[A28]], i16 [[B28]]) 460; SSE-NEXT: [[R29:%.*]] = call i16 @llvm.fshr.i16(i16 [[A29]], i16 [[A29]], i16 [[B29]]) 461; SSE-NEXT: [[R30:%.*]] = call i16 @llvm.fshr.i16(i16 [[A30]], i16 [[A30]], i16 [[B30]]) 462; SSE-NEXT: [[R31:%.*]] = call i16 @llvm.fshr.i16(i16 [[A31]], i16 [[A31]], i16 [[B31]]) 463; SSE-NEXT: store i16 [[R0]], ptr @d16, align 2 464; SSE-NEXT: store i16 [[R1]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 1), align 2 465; SSE-NEXT: store i16 [[R2]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 2), align 2 466; SSE-NEXT: store i16 [[R3]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 3), align 2 467; SSE-NEXT: store i16 [[R4]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 4), align 2 468; SSE-NEXT: store i16 [[R5]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 5), align 2 469; SSE-NEXT: store i16 [[R6]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 6), align 2 470; SSE-NEXT: store i16 [[R7]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 7), align 2 471; SSE-NEXT: store i16 [[R8]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 8), align 2 472; SSE-NEXT: store i16 [[R9]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 9), align 2 473; SSE-NEXT: store i16 [[R10]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 10), align 2 474; SSE-NEXT: store i16 [[R11]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 11), align 2 475; SSE-NEXT: store i16 [[R12]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 12), align 2 476; SSE-NEXT: store i16 [[R13]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 13), align 2 477; SSE-NEXT: store i16 [[R14]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 14), align 2 478; SSE-NEXT: store i16 [[R15]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 15), align 2 479; SSE-NEXT: store i16 [[R16]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 16), align 2 480; SSE-NEXT: store i16 [[R17]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 17), align 2 481; SSE-NEXT: store i16 [[R18]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 18), align 2 482; SSE-NEXT: store i16 [[R19]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 19), align 2 483; SSE-NEXT: store i16 [[R20]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 20), align 2 484; SSE-NEXT: store i16 [[R21]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 21), align 2 485; SSE-NEXT: store i16 [[R22]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 22), align 2 486; SSE-NEXT: store i16 [[R23]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 23), align 2 487; SSE-NEXT: store i16 [[R24]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 24), align 2 488; SSE-NEXT: store i16 [[R25]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 25), align 2 489; SSE-NEXT: store i16 [[R26]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 26), align 2 490; SSE-NEXT: store i16 [[R27]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 27), align 2 491; SSE-NEXT: store i16 [[R28]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 28), align 2 492; SSE-NEXT: store i16 [[R29]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 29), align 2 493; SSE-NEXT: store i16 [[R30]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 30), align 2 494; SSE-NEXT: store i16 [[R31]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 31), align 2 495; SSE-NEXT: ret void 496; 497; AVX-LABEL: @fshr_v32i16( 498; AVX-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @a16, align 2 499; AVX-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr @b16, align 2 500; AVX-NEXT: [[TMP3:%.*]] = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> [[TMP1]], <16 x i16> [[TMP1]], <16 x i16> [[TMP2]]) 501; AVX-NEXT: store <16 x i16> [[TMP3]], ptr @d16, align 2 502; AVX-NEXT: [[TMP4:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2 503; AVX-NEXT: [[TMP5:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2 504; AVX-NEXT: [[TMP6:%.*]] = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> [[TMP4]], <16 x i16> [[TMP4]], <16 x i16> [[TMP5]]) 505; AVX-NEXT: store <16 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 16), align 2 506; AVX-NEXT: ret void 507; 508; AVX512-LABEL: @fshr_v32i16( 509; AVX512-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @a16, align 2 510; AVX512-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @b16, align 2 511; AVX512-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> [[TMP1]], <32 x i16> [[TMP1]], <32 x i16> [[TMP2]]) 512; AVX512-NEXT: store <32 x i16> [[TMP3]], ptr @d16, align 2 513; AVX512-NEXT: ret void 514; 515; AVX512VBMI2-LABEL: @fshr_v32i16( 516; AVX512VBMI2-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @a16, align 2 517; AVX512VBMI2-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @b16, align 2 518; AVX512VBMI2-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> [[TMP1]], <32 x i16> [[TMP1]], <32 x i16> [[TMP2]]) 519; AVX512VBMI2-NEXT: store <32 x i16> [[TMP3]], ptr @d16, align 2 520; AVX512VBMI2-NEXT: ret void 521; 522 %a0 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 0 ), align 2 523 %a1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 1 ), align 2 524 %a2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 2 ), align 2 525 %a3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 3 ), align 2 526 %a4 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 4 ), align 2 527 %a5 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 5 ), align 2 528 %a6 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 6 ), align 2 529 %a7 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 7 ), align 2 530 %a8 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8 ), align 2 531 %a9 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 9 ), align 2 532 %a10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 10), align 2 533 %a11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 11), align 2 534 %a12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 12), align 2 535 %a13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 13), align 2 536 %a14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 14), align 2 537 %a15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 15), align 2 538 %a16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2 539 %a17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 17), align 2 540 %a18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 18), align 2 541 %a19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 19), align 2 542 %a20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 20), align 2 543 %a21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 21), align 2 544 %a22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 22), align 2 545 %a23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 23), align 2 546 %a24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2 547 %a25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 25), align 2 548 %a26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 26), align 2 549 %a27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 27), align 2 550 %a28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 28), align 2 551 %a29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 29), align 2 552 %a30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 30), align 2 553 %a31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 31), align 2 554 %b0 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 0 ), align 2 555 %b1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 1 ), align 2 556 %b2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 2 ), align 2 557 %b3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 3 ), align 2 558 %b4 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 4 ), align 2 559 %b5 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 5 ), align 2 560 %b6 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 6 ), align 2 561 %b7 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 7 ), align 2 562 %b8 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8 ), align 2 563 %b9 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 9 ), align 2 564 %b10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 10), align 2 565 %b11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 11), align 2 566 %b12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 12), align 2 567 %b13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 13), align 2 568 %b14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 14), align 2 569 %b15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 15), align 2 570 %b16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2 571 %b17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 17), align 2 572 %b18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 18), align 2 573 %b19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 19), align 2 574 %b20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 20), align 2 575 %b21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 21), align 2 576 %b22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 22), align 2 577 %b23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 23), align 2 578 %b24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2 579 %b25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 25), align 2 580 %b26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 26), align 2 581 %b27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 27), align 2 582 %b28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 28), align 2 583 %b29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 29), align 2 584 %b30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 30), align 2 585 %b31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 31), align 2 586 %r0 = call i16 @llvm.fshr.i16(i16 %a0 , i16 %a0 , i16 %b0 ) 587 %r1 = call i16 @llvm.fshr.i16(i16 %a1 , i16 %a1 , i16 %b1 ) 588 %r2 = call i16 @llvm.fshr.i16(i16 %a2 , i16 %a2 , i16 %b2 ) 589 %r3 = call i16 @llvm.fshr.i16(i16 %a3 , i16 %a3 , i16 %b3 ) 590 %r4 = call i16 @llvm.fshr.i16(i16 %a4 , i16 %a4 , i16 %b4 ) 591 %r5 = call i16 @llvm.fshr.i16(i16 %a5 , i16 %a5 , i16 %b5 ) 592 %r6 = call i16 @llvm.fshr.i16(i16 %a6 , i16 %a6 , i16 %b6 ) 593 %r7 = call i16 @llvm.fshr.i16(i16 %a7 , i16 %a7 , i16 %b7 ) 594 %r8 = call i16 @llvm.fshr.i16(i16 %a8 , i16 %a8 , i16 %b8 ) 595 %r9 = call i16 @llvm.fshr.i16(i16 %a9 , i16 %a9 , i16 %b9 ) 596 %r10 = call i16 @llvm.fshr.i16(i16 %a10, i16 %a10, i16 %b10) 597 %r11 = call i16 @llvm.fshr.i16(i16 %a11, i16 %a11, i16 %b11) 598 %r12 = call i16 @llvm.fshr.i16(i16 %a12, i16 %a12, i16 %b12) 599 %r13 = call i16 @llvm.fshr.i16(i16 %a13, i16 %a13, i16 %b13) 600 %r14 = call i16 @llvm.fshr.i16(i16 %a14, i16 %a14, i16 %b14) 601 %r15 = call i16 @llvm.fshr.i16(i16 %a15, i16 %a15, i16 %b15) 602 %r16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 %b16) 603 %r17 = call i16 @llvm.fshr.i16(i16 %a17, i16 %a17, i16 %b17) 604 %r18 = call i16 @llvm.fshr.i16(i16 %a18, i16 %a18, i16 %b18) 605 %r19 = call i16 @llvm.fshr.i16(i16 %a19, i16 %a19, i16 %b19) 606 %r20 = call i16 @llvm.fshr.i16(i16 %a20, i16 %a20, i16 %b20) 607 %r21 = call i16 @llvm.fshr.i16(i16 %a21, i16 %a21, i16 %b21) 608 %r22 = call i16 @llvm.fshr.i16(i16 %a22, i16 %a22, i16 %b22) 609 %r23 = call i16 @llvm.fshr.i16(i16 %a23, i16 %a23, i16 %b23) 610 %r24 = call i16 @llvm.fshr.i16(i16 %a24, i16 %a24, i16 %b24) 611 %r25 = call i16 @llvm.fshr.i16(i16 %a25, i16 %a25, i16 %b25) 612 %r26 = call i16 @llvm.fshr.i16(i16 %a26, i16 %a26, i16 %b26) 613 %r27 = call i16 @llvm.fshr.i16(i16 %a27, i16 %a27, i16 %b27) 614 %r28 = call i16 @llvm.fshr.i16(i16 %a28, i16 %a28, i16 %b28) 615 %r29 = call i16 @llvm.fshr.i16(i16 %a29, i16 %a29, i16 %b29) 616 %r30 = call i16 @llvm.fshr.i16(i16 %a30, i16 %a30, i16 %b30) 617 %r31 = call i16 @llvm.fshr.i16(i16 %a31, i16 %a31, i16 %b31) 618 store i16 %r0 , ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 0 ), align 2 619 store i16 %r1 , ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 1 ), align 2 620 store i16 %r2 , ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 2 ), align 2 621 store i16 %r3 , ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 3 ), align 2 622 store i16 %r4 , ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 4 ), align 2 623 store i16 %r5 , ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 5 ), align 2 624 store i16 %r6 , ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 6 ), align 2 625 store i16 %r7 , ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 7 ), align 2 626 store i16 %r8 , ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 8 ), align 2 627 store i16 %r9 , ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 9 ), align 2 628 store i16 %r10, ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 10), align 2 629 store i16 %r11, ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 11), align 2 630 store i16 %r12, ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 12), align 2 631 store i16 %r13, ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 13), align 2 632 store i16 %r14, ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 14), align 2 633 store i16 %r15, ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 15), align 2 634 store i16 %r16, ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 16), align 2 635 store i16 %r17, ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 17), align 2 636 store i16 %r18, ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 18), align 2 637 store i16 %r19, ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 19), align 2 638 store i16 %r20, ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 20), align 2 639 store i16 %r21, ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 21), align 2 640 store i16 %r22, ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 22), align 2 641 store i16 %r23, ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 23), align 2 642 store i16 %r24, ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 24), align 2 643 store i16 %r25, ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 25), align 2 644 store i16 %r26, ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 26), align 2 645 store i16 %r27, ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 27), align 2 646 store i16 %r28, ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 28), align 2 647 store i16 %r29, ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 29), align 2 648 store i16 %r30, ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 30), align 2 649 store i16 %r31, ptr getelementptr inbounds ([32 x i16], ptr @d16, i32 0, i64 31), align 2 650 ret void 651} 652 653define void @fshr_v64i8() { 654; SSE-LABEL: @fshr_v64i8( 655; SSE-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @a8, align 1 656; SSE-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr @b8, align 1 657; SSE-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) 658; SSE-NEXT: store <16 x i8> [[TMP3]], ptr @d8, align 1 659; SSE-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1 660; SSE-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1 661; SSE-NEXT: [[TMP6:%.*]] = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> [[TMP4]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]]) 662; SSE-NEXT: store <16 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 16), align 1 663; SSE-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1 664; SSE-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1 665; SSE-NEXT: [[TMP9:%.*]] = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP7]], <16 x i8> [[TMP8]]) 666; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 32), align 1 667; SSE-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1 668; SSE-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1 669; SSE-NEXT: [[TMP12:%.*]] = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP10]], <16 x i8> [[TMP11]]) 670; SSE-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 48), align 1 671; SSE-NEXT: ret void 672; 673; AVX-LABEL: @fshr_v64i8( 674; AVX-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @a8, align 1 675; AVX-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr @b8, align 1 676; AVX-NEXT: [[TMP3:%.*]] = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> [[TMP1]], <32 x i8> [[TMP1]], <32 x i8> [[TMP2]]) 677; AVX-NEXT: store <32 x i8> [[TMP3]], ptr @d8, align 1 678; AVX-NEXT: [[TMP4:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1 679; AVX-NEXT: [[TMP5:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1 680; AVX-NEXT: [[TMP6:%.*]] = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> [[TMP4]], <32 x i8> [[TMP4]], <32 x i8> [[TMP5]]) 681; AVX-NEXT: store <32 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 32), align 1 682; AVX-NEXT: ret void 683; 684; AVX512-LABEL: @fshr_v64i8( 685; AVX512-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @a8, align 1 686; AVX512-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr @b8, align 1 687; AVX512-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> [[TMP1]], <64 x i8> [[TMP1]], <64 x i8> [[TMP2]]) 688; AVX512-NEXT: store <64 x i8> [[TMP3]], ptr @d8, align 1 689; AVX512-NEXT: ret void 690; 691; AVX512VBMI2-LABEL: @fshr_v64i8( 692; AVX512VBMI2-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @a8, align 1 693; AVX512VBMI2-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr @b8, align 1 694; AVX512VBMI2-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> [[TMP1]], <64 x i8> [[TMP1]], <64 x i8> [[TMP2]]) 695; AVX512VBMI2-NEXT: store <64 x i8> [[TMP3]], ptr @d8, align 1 696; AVX512VBMI2-NEXT: ret void 697; 698 %a0 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 0 ), align 1 699 %a1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 1 ), align 1 700 %a2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 2 ), align 1 701 %a3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 3 ), align 1 702 %a4 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 4 ), align 1 703 %a5 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 5 ), align 1 704 %a6 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 6 ), align 1 705 %a7 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 7 ), align 1 706 %a8 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 8 ), align 1 707 %a9 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 9 ), align 1 708 %a10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 10), align 1 709 %a11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 11), align 1 710 %a12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 12), align 1 711 %a13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 13), align 1 712 %a14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 14), align 1 713 %a15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 15), align 1 714 %a16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1 715 %a17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 17), align 1 716 %a18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 18), align 1 717 %a19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 19), align 1 718 %a20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 20), align 1 719 %a21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 21), align 1 720 %a22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 22), align 1 721 %a23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 23), align 1 722 %a24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 24), align 1 723 %a25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 25), align 1 724 %a26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 26), align 1 725 %a27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 27), align 1 726 %a28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 28), align 1 727 %a29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 29), align 1 728 %a30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 30), align 1 729 %a31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 31), align 1 730 %a32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1 731 %a33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 33), align 1 732 %a34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 34), align 1 733 %a35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 35), align 1 734 %a36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 36), align 1 735 %a37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 37), align 1 736 %a38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 38), align 1 737 %a39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 39), align 1 738 %a40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 40), align 1 739 %a41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 41), align 1 740 %a42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 42), align 1 741 %a43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 43), align 1 742 %a44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 44), align 1 743 %a45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 45), align 1 744 %a46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 46), align 1 745 %a47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 47), align 1 746 %a48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1 747 %a49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 49), align 1 748 %a50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 50), align 1 749 %a51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 51), align 1 750 %a52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 52), align 1 751 %a53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 53), align 1 752 %a54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 54), align 1 753 %a55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 55), align 1 754 %a56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 56), align 1 755 %a57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 57), align 1 756 %a58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 58), align 1 757 %a59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 59), align 1 758 %a60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 60), align 1 759 %a61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 61), align 1 760 %a62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 62), align 1 761 %a63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 63), align 1 762 %b0 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 0 ), align 1 763 %b1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 1 ), align 1 764 %b2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 2 ), align 1 765 %b3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 3 ), align 1 766 %b4 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 4 ), align 1 767 %b5 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 5 ), align 1 768 %b6 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 6 ), align 1 769 %b7 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 7 ), align 1 770 %b8 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 8 ), align 1 771 %b9 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 9 ), align 1 772 %b10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 10), align 1 773 %b11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 11), align 1 774 %b12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 12), align 1 775 %b13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 13), align 1 776 %b14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 14), align 1 777 %b15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 15), align 1 778 %b16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1 779 %b17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 17), align 1 780 %b18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 18), align 1 781 %b19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 19), align 1 782 %b20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 20), align 1 783 %b21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 21), align 1 784 %b22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 22), align 1 785 %b23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 23), align 1 786 %b24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 24), align 1 787 %b25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 25), align 1 788 %b26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 26), align 1 789 %b27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 27), align 1 790 %b28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 28), align 1 791 %b29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 29), align 1 792 %b30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 30), align 1 793 %b31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 31), align 1 794 %b32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1 795 %b33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 33), align 1 796 %b34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 34), align 1 797 %b35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 35), align 1 798 %b36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 36), align 1 799 %b37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 37), align 1 800 %b38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 38), align 1 801 %b39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 39), align 1 802 %b40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 40), align 1 803 %b41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 41), align 1 804 %b42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 42), align 1 805 %b43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 43), align 1 806 %b44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 44), align 1 807 %b45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 45), align 1 808 %b46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 46), align 1 809 %b47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 47), align 1 810 %b48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1 811 %b49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 49), align 1 812 %b50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 50), align 1 813 %b51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 51), align 1 814 %b52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 52), align 1 815 %b53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 53), align 1 816 %b54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 54), align 1 817 %b55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 55), align 1 818 %b56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 56), align 1 819 %b57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 57), align 1 820 %b58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 58), align 1 821 %b59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 59), align 1 822 %b60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 60), align 1 823 %b61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 61), align 1 824 %b62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 62), align 1 825 %b63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 63), align 1 826 %r0 = call i8 @llvm.fshr.i8(i8 %a0 , i8 %a0 , i8 %b0 ) 827 %r1 = call i8 @llvm.fshr.i8(i8 %a1 , i8 %a1 , i8 %b1 ) 828 %r2 = call i8 @llvm.fshr.i8(i8 %a2 , i8 %a2 , i8 %b2 ) 829 %r3 = call i8 @llvm.fshr.i8(i8 %a3 , i8 %a3 , i8 %b3 ) 830 %r4 = call i8 @llvm.fshr.i8(i8 %a4 , i8 %a4 , i8 %b4 ) 831 %r5 = call i8 @llvm.fshr.i8(i8 %a5 , i8 %a5 , i8 %b5 ) 832 %r6 = call i8 @llvm.fshr.i8(i8 %a6 , i8 %a6 , i8 %b6 ) 833 %r7 = call i8 @llvm.fshr.i8(i8 %a7 , i8 %a7 , i8 %b7 ) 834 %r8 = call i8 @llvm.fshr.i8(i8 %a8 , i8 %a8 , i8 %b8 ) 835 %r9 = call i8 @llvm.fshr.i8(i8 %a9 , i8 %a9 , i8 %b9 ) 836 %r10 = call i8 @llvm.fshr.i8(i8 %a10, i8 %a10, i8 %b10) 837 %r11 = call i8 @llvm.fshr.i8(i8 %a11, i8 %a11, i8 %b11) 838 %r12 = call i8 @llvm.fshr.i8(i8 %a12, i8 %a12, i8 %b12) 839 %r13 = call i8 @llvm.fshr.i8(i8 %a13, i8 %a13, i8 %b13) 840 %r14 = call i8 @llvm.fshr.i8(i8 %a14, i8 %a14, i8 %b14) 841 %r15 = call i8 @llvm.fshr.i8(i8 %a15, i8 %a15, i8 %b15) 842 %r16 = call i8 @llvm.fshr.i8(i8 %a16, i8 %a16, i8 %b16) 843 %r17 = call i8 @llvm.fshr.i8(i8 %a17, i8 %a17, i8 %b17) 844 %r18 = call i8 @llvm.fshr.i8(i8 %a18, i8 %a18, i8 %b18) 845 %r19 = call i8 @llvm.fshr.i8(i8 %a19, i8 %a19, i8 %b19) 846 %r20 = call i8 @llvm.fshr.i8(i8 %a20, i8 %a20, i8 %b20) 847 %r21 = call i8 @llvm.fshr.i8(i8 %a21, i8 %a21, i8 %b21) 848 %r22 = call i8 @llvm.fshr.i8(i8 %a22, i8 %a22, i8 %b22) 849 %r23 = call i8 @llvm.fshr.i8(i8 %a23, i8 %a23, i8 %b23) 850 %r24 = call i8 @llvm.fshr.i8(i8 %a24, i8 %a24, i8 %b24) 851 %r25 = call i8 @llvm.fshr.i8(i8 %a25, i8 %a25, i8 %b25) 852 %r26 = call i8 @llvm.fshr.i8(i8 %a26, i8 %a26, i8 %b26) 853 %r27 = call i8 @llvm.fshr.i8(i8 %a27, i8 %a27, i8 %b27) 854 %r28 = call i8 @llvm.fshr.i8(i8 %a28, i8 %a28, i8 %b28) 855 %r29 = call i8 @llvm.fshr.i8(i8 %a29, i8 %a29, i8 %b29) 856 %r30 = call i8 @llvm.fshr.i8(i8 %a30, i8 %a30, i8 %b30) 857 %r31 = call i8 @llvm.fshr.i8(i8 %a31, i8 %a31, i8 %b31) 858 %r32 = call i8 @llvm.fshr.i8(i8 %a32, i8 %a32, i8 %b32) 859 %r33 = call i8 @llvm.fshr.i8(i8 %a33, i8 %a33, i8 %b33) 860 %r34 = call i8 @llvm.fshr.i8(i8 %a34, i8 %a34, i8 %b34) 861 %r35 = call i8 @llvm.fshr.i8(i8 %a35, i8 %a35, i8 %b35) 862 %r36 = call i8 @llvm.fshr.i8(i8 %a36, i8 %a36, i8 %b36) 863 %r37 = call i8 @llvm.fshr.i8(i8 %a37, i8 %a37, i8 %b37) 864 %r38 = call i8 @llvm.fshr.i8(i8 %a38, i8 %a38, i8 %b38) 865 %r39 = call i8 @llvm.fshr.i8(i8 %a39, i8 %a39, i8 %b39) 866 %r40 = call i8 @llvm.fshr.i8(i8 %a40, i8 %a40, i8 %b40) 867 %r41 = call i8 @llvm.fshr.i8(i8 %a41, i8 %a41, i8 %b41) 868 %r42 = call i8 @llvm.fshr.i8(i8 %a42, i8 %a42, i8 %b42) 869 %r43 = call i8 @llvm.fshr.i8(i8 %a43, i8 %a43, i8 %b43) 870 %r44 = call i8 @llvm.fshr.i8(i8 %a44, i8 %a44, i8 %b44) 871 %r45 = call i8 @llvm.fshr.i8(i8 %a45, i8 %a45, i8 %b45) 872 %r46 = call i8 @llvm.fshr.i8(i8 %a46, i8 %a46, i8 %b46) 873 %r47 = call i8 @llvm.fshr.i8(i8 %a47, i8 %a47, i8 %b47) 874 %r48 = call i8 @llvm.fshr.i8(i8 %a48, i8 %a48, i8 %b48) 875 %r49 = call i8 @llvm.fshr.i8(i8 %a49, i8 %a49, i8 %b49) 876 %r50 = call i8 @llvm.fshr.i8(i8 %a50, i8 %a50, i8 %b50) 877 %r51 = call i8 @llvm.fshr.i8(i8 %a51, i8 %a51, i8 %b51) 878 %r52 = call i8 @llvm.fshr.i8(i8 %a52, i8 %a52, i8 %b52) 879 %r53 = call i8 @llvm.fshr.i8(i8 %a53, i8 %a53, i8 %b53) 880 %r54 = call i8 @llvm.fshr.i8(i8 %a54, i8 %a54, i8 %b54) 881 %r55 = call i8 @llvm.fshr.i8(i8 %a55, i8 %a55, i8 %b55) 882 %r56 = call i8 @llvm.fshr.i8(i8 %a56, i8 %a56, i8 %b56) 883 %r57 = call i8 @llvm.fshr.i8(i8 %a57, i8 %a57, i8 %b57) 884 %r58 = call i8 @llvm.fshr.i8(i8 %a58, i8 %a58, i8 %b58) 885 %r59 = call i8 @llvm.fshr.i8(i8 %a59, i8 %a59, i8 %b59) 886 %r60 = call i8 @llvm.fshr.i8(i8 %a60, i8 %a60, i8 %b60) 887 %r61 = call i8 @llvm.fshr.i8(i8 %a61, i8 %a61, i8 %b61) 888 %r62 = call i8 @llvm.fshr.i8(i8 %a62, i8 %a62, i8 %b62) 889 %r63 = call i8 @llvm.fshr.i8(i8 %a63, i8 %a63, i8 %b63) 890 store i8 %r0 , ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 0 ), align 1 891 store i8 %r1 , ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 1 ), align 1 892 store i8 %r2 , ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 2 ), align 1 893 store i8 %r3 , ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 3 ), align 1 894 store i8 %r4 , ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 4 ), align 1 895 store i8 %r5 , ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 5 ), align 1 896 store i8 %r6 , ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 6 ), align 1 897 store i8 %r7 , ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 7 ), align 1 898 store i8 %r8 , ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 8 ), align 1 899 store i8 %r9 , ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 9 ), align 1 900 store i8 %r10, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 10), align 1 901 store i8 %r11, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 11), align 1 902 store i8 %r12, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 12), align 1 903 store i8 %r13, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 13), align 1 904 store i8 %r14, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 14), align 1 905 store i8 %r15, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 15), align 1 906 store i8 %r16, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 16), align 1 907 store i8 %r17, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 17), align 1 908 store i8 %r18, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 18), align 1 909 store i8 %r19, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 19), align 1 910 store i8 %r20, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 20), align 1 911 store i8 %r21, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 21), align 1 912 store i8 %r22, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 22), align 1 913 store i8 %r23, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 23), align 1 914 store i8 %r24, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 24), align 1 915 store i8 %r25, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 25), align 1 916 store i8 %r26, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 26), align 1 917 store i8 %r27, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 27), align 1 918 store i8 %r28, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 28), align 1 919 store i8 %r29, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 29), align 1 920 store i8 %r30, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 30), align 1 921 store i8 %r31, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 31), align 1 922 store i8 %r32, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 32), align 1 923 store i8 %r33, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 33), align 1 924 store i8 %r34, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 34), align 1 925 store i8 %r35, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 35), align 1 926 store i8 %r36, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 36), align 1 927 store i8 %r37, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 37), align 1 928 store i8 %r38, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 38), align 1 929 store i8 %r39, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 39), align 1 930 store i8 %r40, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 40), align 1 931 store i8 %r41, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 41), align 1 932 store i8 %r42, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 42), align 1 933 store i8 %r43, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 43), align 1 934 store i8 %r44, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 44), align 1 935 store i8 %r45, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 45), align 1 936 store i8 %r46, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 46), align 1 937 store i8 %r47, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 47), align 1 938 store i8 %r48, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 48), align 1 939 store i8 %r49, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 49), align 1 940 store i8 %r50, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 50), align 1 941 store i8 %r51, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 51), align 1 942 store i8 %r52, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 52), align 1 943 store i8 %r53, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 53), align 1 944 store i8 %r54, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 54), align 1 945 store i8 %r55, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 55), align 1 946 store i8 %r56, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 56), align 1 947 store i8 %r57, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 57), align 1 948 store i8 %r58, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 58), align 1 949 store i8 %r59, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 59), align 1 950 store i8 %r60, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 60), align 1 951 store i8 %r61, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 61), align 1 952 store i8 %r62, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 62), align 1 953 store i8 %r63, ptr getelementptr inbounds ([64 x i8], ptr @d8, i32 0, i64 63), align 1 954 ret void 955} 956 957define void @fshr_v2i32() { 958; CHECK-LABEL: @fshr_v2i32( 959; CHECK-NEXT: [[A0:%.*]] = load i32, ptr @a32, align 4 960; CHECK-NEXT: [[A1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1), align 4 961; CHECK-NEXT: [[B0:%.*]] = load i32, ptr @b32, align 4 962; CHECK-NEXT: [[B1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1), align 4 963; CHECK-NEXT: [[R0:%.*]] = call i32 @llvm.fshr.i32(i32 [[A0]], i32 [[A0]], i32 [[B0]]) 964; CHECK-NEXT: [[R1:%.*]] = call i32 @llvm.fshr.i32(i32 [[A1]], i32 [[A1]], i32 [[B1]]) 965; CHECK-NEXT: store i32 [[R0]], ptr @d32, align 4 966; CHECK-NEXT: store i32 [[R1]], ptr getelementptr inbounds ([16 x i32], ptr @d32, i32 0, i64 1), align 4 967; CHECK-NEXT: ret void 968; 969; AVX512VBMI2-LABEL: @fshr_v2i32( 970; AVX512VBMI2-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @a32, align 4 971; AVX512VBMI2-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr @b32, align 4 972; AVX512VBMI2-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> [[TMP2]]) 973; AVX512VBMI2-NEXT: store <2 x i32> [[TMP3]], ptr @d32, align 4 974; AVX512VBMI2-NEXT: ret void 975; 976 %a0 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4 977 %a1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4 978 %b0 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 0 ), align 4 979 %b1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1 ), align 4 980 %r0 = call i32 @llvm.fshr.i32(i32 %a0 , i32 %a0 , i32 %b0 ) 981 %r1 = call i32 @llvm.fshr.i32(i32 %a1 , i32 %a1 , i32 %b1 ) 982 store i32 %r0 , ptr getelementptr inbounds ([16 x i32], ptr @d32, i32 0, i64 0 ), align 4 983 store i32 %r1 , ptr getelementptr inbounds ([16 x i32], ptr @d32, i32 0, i64 1 ), align 4 984 ret void 985} 986 987; PR63980 988define void @fshr_v2i32_uniformconst() { 989; CHECK-LABEL: @fshr_v2i32_uniformconst( 990; CHECK-NEXT: [[A0:%.*]] = load i32, ptr @a32, align 4 991; CHECK-NEXT: [[A1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1), align 4 992; CHECK-NEXT: [[R0:%.*]] = call i32 @llvm.fshr.i32(i32 [[A0]], i32 [[A0]], i32 1) 993; CHECK-NEXT: [[R1:%.*]] = call i32 @llvm.fshr.i32(i32 [[A1]], i32 [[A1]], i32 1) 994; CHECK-NEXT: store i32 [[R0]], ptr @d32, align 4 995; CHECK-NEXT: store i32 [[R1]], ptr getelementptr inbounds ([16 x i32], ptr @d32, i32 0, i64 1), align 4 996; CHECK-NEXT: ret void 997; 998; AVX512VBMI2-LABEL: @fshr_v2i32_uniformconst( 999; AVX512VBMI2-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @a32, align 4 1000; AVX512VBMI2-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> splat (i32 1)) 1001; AVX512VBMI2-NEXT: store <2 x i32> [[TMP2]], ptr @d32, align 4 1002; AVX512VBMI2-NEXT: ret void 1003; 1004 %a0 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4 1005 %a1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4 1006 %r0 = call i32 @llvm.fshr.i32(i32 %a0 , i32 %a0 , i32 1 ) 1007 %r1 = call i32 @llvm.fshr.i32(i32 %a1 , i32 %a1 , i32 1 ) 1008 store i32 %r0 , ptr getelementptr inbounds ([16 x i32], ptr @d32, i32 0, i64 0 ), align 4 1009 store i32 %r1 , ptr getelementptr inbounds ([16 x i32], ptr @d32, i32 0, i64 1 ), align 4 1010 ret void 1011} 1012