1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE 3; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX 4; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX 5; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=bdver2 -passes=slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=XOP 6; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=bdver4 -passes=slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=XOP 7 8target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 9 10@src64 = common global [4 x i64] zeroinitializer, align 32 11@dst64 = common global [4 x i64] zeroinitializer, align 32 12@src32 = common global [8 x i32] zeroinitializer, align 32 13@dst32 = common global [8 x i32] zeroinitializer, align 32 14@src16 = common global [16 x i16] zeroinitializer, align 32 15@dst16 = common global [16 x i16] zeroinitializer, align 32 16@src8 = common global [32 x i8] zeroinitializer, align 32 17@dst8 = common global [32 x i8] zeroinitializer, align 32 18 19declare i64 @llvm.bitreverse.i64(i64) 20declare i32 @llvm.bitreverse.i32(i32) 21declare i16 @llvm.bitreverse.i16(i16) 22declare i8 @llvm.bitreverse.i8(i8) 23 24define void @bitreverse_2i64() #0 { 25; CHECK-LABEL: @bitreverse_2i64( 26; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 8 27; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> [[TMP1]]) 28; CHECK-NEXT: store <2 x i64> [[TMP2]], ptr @dst64, align 8 29; CHECK-NEXT: ret void 30; 31 %ld0 = load i64, ptr @src64, align 8 32 %ld1 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8 33 %bitreverse0 = call i64 @llvm.bitreverse.i64(i64 %ld0) 34 %bitreverse1 = call i64 @llvm.bitreverse.i64(i64 %ld1) 35 store i64 %bitreverse0, ptr @dst64, align 8 36 store i64 %bitreverse1, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8 37 ret void 38} 39 40define void @bitreverse_4i64() #0 { 41; SSE-LABEL: @bitreverse_4i64( 42; SSE-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 4 43; SSE-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> [[TMP1]]) 44; SSE-NEXT: store <2 x i64> [[TMP2]], ptr @dst64, align 4 45; SSE-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4 46; SSE-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> [[TMP3]]) 47; SSE-NEXT: store <2 x i64> [[TMP4]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4 48; SSE-NEXT: ret void 49; 50; AVX-LABEL: @bitreverse_4i64( 51; AVX-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 4 52; AVX-NEXT: [[TMP2:%.*]] = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> [[TMP1]]) 53; AVX-NEXT: store <4 x i64> [[TMP2]], ptr @dst64, align 4 54; AVX-NEXT: ret void 55; 56; XOP-LABEL: @bitreverse_4i64( 57; XOP-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 4 58; XOP-NEXT: [[TMP2:%.*]] = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> [[TMP1]]) 59; XOP-NEXT: store <4 x i64> [[TMP2]], ptr @dst64, align 4 60; XOP-NEXT: ret void 61; 62 %ld0 = load i64, ptr @src64, align 4 63 %ld1 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4 64 %ld2 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4 65 %ld3 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4 66 %bitreverse0 = call i64 @llvm.bitreverse.i64(i64 %ld0) 67 %bitreverse1 = call i64 @llvm.bitreverse.i64(i64 %ld1) 68 %bitreverse2 = call i64 @llvm.bitreverse.i64(i64 %ld2) 69 %bitreverse3 = call i64 @llvm.bitreverse.i64(i64 %ld3) 70 store i64 %bitreverse0, ptr @dst64, align 4 71 store i64 %bitreverse1, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4 72 store i64 %bitreverse2, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4 73 store i64 %bitreverse3, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4 74 ret void 75} 76 77define void @bitreverse_4i32() #0 { 78; CHECK-LABEL: @bitreverse_4i32( 79; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 4 80; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> [[TMP1]]) 81; CHECK-NEXT: store <4 x i32> [[TMP2]], ptr @dst32, align 4 82; CHECK-NEXT: ret void 83; 84 %ld0 = load i32, ptr @src32, align 4 85 %ld1 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 4 86 %ld2 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 4 87 %ld3 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 4 88 %bitreverse0 = call i32 @llvm.bitreverse.i32(i32 %ld0) 89 %bitreverse1 = call i32 @llvm.bitreverse.i32(i32 %ld1) 90 %bitreverse2 = call i32 @llvm.bitreverse.i32(i32 %ld2) 91 %bitreverse3 = call i32 @llvm.bitreverse.i32(i32 %ld3) 92 store i32 %bitreverse0, ptr @dst32, align 4 93 store i32 %bitreverse1, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 4 94 store i32 %bitreverse2, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 4 95 store i32 %bitreverse3, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 4 96 ret void 97} 98 99define void @bitreverse_8i32() #0 { 100; SSE-LABEL: @bitreverse_8i32( 101; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 2 102; SSE-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> [[TMP1]]) 103; SSE-NEXT: store <4 x i32> [[TMP2]], ptr @dst32, align 2 104; SSE-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 4), align 2 105; SSE-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> [[TMP3]]) 106; SSE-NEXT: store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 4), align 2 107; SSE-NEXT: ret void 108; 109; AVX-LABEL: @bitreverse_8i32( 110; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 2 111; AVX-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> [[TMP1]]) 112; AVX-NEXT: store <8 x i32> [[TMP2]], ptr @dst32, align 2 113; AVX-NEXT: ret void 114; 115; XOP-LABEL: @bitreverse_8i32( 116; XOP-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 2 117; XOP-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> [[TMP1]]) 118; XOP-NEXT: store <8 x i32> [[TMP2]], ptr @dst32, align 2 119; XOP-NEXT: ret void 120; 121 %ld0 = load i32, ptr @src32, align 2 122 %ld1 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 2 123 %ld2 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 2 124 %ld3 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 2 125 %ld4 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 4), align 2 126 %ld5 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 5), align 2 127 %ld6 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 6), align 2 128 %ld7 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 7), align 2 129 %bitreverse0 = call i32 @llvm.bitreverse.i32(i32 %ld0) 130 %bitreverse1 = call i32 @llvm.bitreverse.i32(i32 %ld1) 131 %bitreverse2 = call i32 @llvm.bitreverse.i32(i32 %ld2) 132 %bitreverse3 = call i32 @llvm.bitreverse.i32(i32 %ld3) 133 %bitreverse4 = call i32 @llvm.bitreverse.i32(i32 %ld4) 134 %bitreverse5 = call i32 @llvm.bitreverse.i32(i32 %ld5) 135 %bitreverse6 = call i32 @llvm.bitreverse.i32(i32 %ld6) 136 %bitreverse7 = call i32 @llvm.bitreverse.i32(i32 %ld7) 137 store i32 %bitreverse0, ptr @dst32, align 2 138 store i32 %bitreverse1, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 2 139 store i32 %bitreverse2, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 2 140 store i32 %bitreverse3, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 2 141 store i32 %bitreverse4, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 4), align 2 142 store i32 %bitreverse5, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 5), align 2 143 store i32 %bitreverse6, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 6), align 2 144 store i32 %bitreverse7, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 7), align 2 145 ret void 146} 147 148define void @bitreverse_8i16() #0 { 149; CHECK-LABEL: @bitreverse_8i16( 150; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 2 151; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> [[TMP1]]) 152; CHECK-NEXT: store <8 x i16> [[TMP2]], ptr @dst16, align 2 153; CHECK-NEXT: ret void 154; 155 %ld0 = load i16, ptr @src16, align 2 156 %ld1 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 1), align 2 157 %ld2 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 2), align 2 158 %ld3 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 3), align 2 159 %ld4 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 4), align 2 160 %ld5 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 5), align 2 161 %ld6 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 6), align 2 162 %ld7 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 7), align 2 163 %bitreverse0 = call i16 @llvm.bitreverse.i16(i16 %ld0) 164 %bitreverse1 = call i16 @llvm.bitreverse.i16(i16 %ld1) 165 %bitreverse2 = call i16 @llvm.bitreverse.i16(i16 %ld2) 166 %bitreverse3 = call i16 @llvm.bitreverse.i16(i16 %ld3) 167 %bitreverse4 = call i16 @llvm.bitreverse.i16(i16 %ld4) 168 %bitreverse5 = call i16 @llvm.bitreverse.i16(i16 %ld5) 169 %bitreverse6 = call i16 @llvm.bitreverse.i16(i16 %ld6) 170 %bitreverse7 = call i16 @llvm.bitreverse.i16(i16 %ld7) 171 store i16 %bitreverse0, ptr @dst16, align 2 172 store i16 %bitreverse1, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 1), align 2 173 store i16 %bitreverse2, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 2), align 2 174 store i16 %bitreverse3, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 3), align 2 175 store i16 %bitreverse4, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 4), align 2 176 store i16 %bitreverse5, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 5), align 2 177 store i16 %bitreverse6, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 6), align 2 178 store i16 %bitreverse7, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 7), align 2 179 ret void 180} 181 182define void @bitreverse_16i16() #0 { 183; SSE-LABEL: @bitreverse_16i16( 184; SSE-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 2 185; SSE-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> [[TMP1]]) 186; SSE-NEXT: store <8 x i16> [[TMP2]], ptr @dst16, align 2 187; SSE-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 8), align 2 188; SSE-NEXT: [[TMP4:%.*]] = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> [[TMP3]]) 189; SSE-NEXT: store <8 x i16> [[TMP4]], ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 8), align 2 190; SSE-NEXT: ret void 191; 192; AVX-LABEL: @bitreverse_16i16( 193; AVX-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @src16, align 2 194; AVX-NEXT: [[TMP2:%.*]] = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> [[TMP1]]) 195; AVX-NEXT: store <16 x i16> [[TMP2]], ptr @dst16, align 2 196; AVX-NEXT: ret void 197; 198; XOP-LABEL: @bitreverse_16i16( 199; XOP-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @src16, align 2 200; XOP-NEXT: [[TMP2:%.*]] = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> [[TMP1]]) 201; XOP-NEXT: store <16 x i16> [[TMP2]], ptr @dst16, align 2 202; XOP-NEXT: ret void 203; 204 %ld0 = load i16, ptr @src16, align 2 205 %ld1 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 1), align 2 206 %ld2 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 2), align 2 207 %ld3 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 3), align 2 208 %ld4 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 4), align 2 209 %ld5 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 5), align 2 210 %ld6 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 6), align 2 211 %ld7 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 7), align 2 212 %ld8 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 8), align 2 213 %ld9 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 9), align 2 214 %ld10 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 10), align 2 215 %ld11 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 11), align 2 216 %ld12 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 12), align 2 217 %ld13 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 13), align 2 218 %ld14 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 14), align 2 219 %ld15 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 15), align 2 220 %bitreverse0 = call i16 @llvm.bitreverse.i16(i16 %ld0) 221 %bitreverse1 = call i16 @llvm.bitreverse.i16(i16 %ld1) 222 %bitreverse2 = call i16 @llvm.bitreverse.i16(i16 %ld2) 223 %bitreverse3 = call i16 @llvm.bitreverse.i16(i16 %ld3) 224 %bitreverse4 = call i16 @llvm.bitreverse.i16(i16 %ld4) 225 %bitreverse5 = call i16 @llvm.bitreverse.i16(i16 %ld5) 226 %bitreverse6 = call i16 @llvm.bitreverse.i16(i16 %ld6) 227 %bitreverse7 = call i16 @llvm.bitreverse.i16(i16 %ld7) 228 %bitreverse8 = call i16 @llvm.bitreverse.i16(i16 %ld8) 229 %bitreverse9 = call i16 @llvm.bitreverse.i16(i16 %ld9) 230 %bitreverse10 = call i16 @llvm.bitreverse.i16(i16 %ld10) 231 %bitreverse11 = call i16 @llvm.bitreverse.i16(i16 %ld11) 232 %bitreverse12 = call i16 @llvm.bitreverse.i16(i16 %ld12) 233 %bitreverse13 = call i16 @llvm.bitreverse.i16(i16 %ld13) 234 %bitreverse14 = call i16 @llvm.bitreverse.i16(i16 %ld14) 235 %bitreverse15 = call i16 @llvm.bitreverse.i16(i16 %ld15) 236 store i16 %bitreverse0 , ptr @dst16, align 2 237 store i16 %bitreverse1 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 1), align 2 238 store i16 %bitreverse2 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 2), align 2 239 store i16 %bitreverse3 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 3), align 2 240 store i16 %bitreverse4 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 4), align 2 241 store i16 %bitreverse5 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 5), align 2 242 store i16 %bitreverse6 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 6), align 2 243 store i16 %bitreverse7 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 7), align 2 244 store i16 %bitreverse8 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 8), align 2 245 store i16 %bitreverse9 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 9), align 2 246 store i16 %bitreverse10, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 10), align 2 247 store i16 %bitreverse11, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 11), align 2 248 store i16 %bitreverse12, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 12), align 2 249 store i16 %bitreverse13, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 13), align 2 250 store i16 %bitreverse14, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 14), align 2 251 store i16 %bitreverse15, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 15), align 2 252 ret void 253} 254 255define void @bitreverse_16i8() #0 { 256; CHECK-LABEL: @bitreverse_16i8( 257; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @src8, align 1 258; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> [[TMP1]]) 259; CHECK-NEXT: store <16 x i8> [[TMP2]], ptr @dst8, align 1 260; CHECK-NEXT: ret void 261; 262 %ld0 = load i8, ptr @src8, align 1 263 %ld1 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 1), align 1 264 %ld2 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 2), align 1 265 %ld3 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 3), align 1 266 %ld4 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 4), align 1 267 %ld5 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 5), align 1 268 %ld6 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 6), align 1 269 %ld7 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 7), align 1 270 %ld8 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 8), align 1 271 %ld9 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 9), align 1 272 %ld10 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 10), align 1 273 %ld11 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 11), align 1 274 %ld12 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 12), align 1 275 %ld13 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 13), align 1 276 %ld14 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 14), align 1 277 %ld15 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 15), align 1 278 %bitreverse0 = call i8 @llvm.bitreverse.i8(i8 %ld0) 279 %bitreverse1 = call i8 @llvm.bitreverse.i8(i8 %ld1) 280 %bitreverse2 = call i8 @llvm.bitreverse.i8(i8 %ld2) 281 %bitreverse3 = call i8 @llvm.bitreverse.i8(i8 %ld3) 282 %bitreverse4 = call i8 @llvm.bitreverse.i8(i8 %ld4) 283 %bitreverse5 = call i8 @llvm.bitreverse.i8(i8 %ld5) 284 %bitreverse6 = call i8 @llvm.bitreverse.i8(i8 %ld6) 285 %bitreverse7 = call i8 @llvm.bitreverse.i8(i8 %ld7) 286 %bitreverse8 = call i8 @llvm.bitreverse.i8(i8 %ld8) 287 %bitreverse9 = call i8 @llvm.bitreverse.i8(i8 %ld9) 288 %bitreverse10 = call i8 @llvm.bitreverse.i8(i8 %ld10) 289 %bitreverse11 = call i8 @llvm.bitreverse.i8(i8 %ld11) 290 %bitreverse12 = call i8 @llvm.bitreverse.i8(i8 %ld12) 291 %bitreverse13 = call i8 @llvm.bitreverse.i8(i8 %ld13) 292 %bitreverse14 = call i8 @llvm.bitreverse.i8(i8 %ld14) 293 %bitreverse15 = call i8 @llvm.bitreverse.i8(i8 %ld15) 294 store i8 %bitreverse0 , ptr @dst8, align 1 295 store i8 %bitreverse1 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 1), align 1 296 store i8 %bitreverse2 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 2), align 1 297 store i8 %bitreverse3 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 3), align 1 298 store i8 %bitreverse4 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 4), align 1 299 store i8 %bitreverse5 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 5), align 1 300 store i8 %bitreverse6 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 6), align 1 301 store i8 %bitreverse7 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 7), align 1 302 store i8 %bitreverse8 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 8), align 1 303 store i8 %bitreverse9 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 9), align 1 304 store i8 %bitreverse10, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 10), align 1 305 store i8 %bitreverse11, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 11), align 1 306 store i8 %bitreverse12, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 12), align 1 307 store i8 %bitreverse13, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 13), align 1 308 store i8 %bitreverse14, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 14), align 1 309 store i8 %bitreverse15, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 15), align 1 310 ret void 311} 312 313define void @bitreverse_32i8() #0 { 314; SSE-LABEL: @bitreverse_32i8( 315; SSE-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @src8, align 1 316; SSE-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> [[TMP1]]) 317; SSE-NEXT: store <16 x i8> [[TMP2]], ptr @dst8, align 1 318; SSE-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 16), align 1 319; SSE-NEXT: [[TMP4:%.*]] = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> [[TMP3]]) 320; SSE-NEXT: store <16 x i8> [[TMP4]], ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 16), align 1 321; SSE-NEXT: ret void 322; 323; AVX-LABEL: @bitreverse_32i8( 324; AVX-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @src8, align 1 325; AVX-NEXT: [[TMP2:%.*]] = call <32 x i8> @llvm.bitreverse.v32i8(<32 x i8> [[TMP1]]) 326; AVX-NEXT: store <32 x i8> [[TMP2]], ptr @dst8, align 1 327; AVX-NEXT: ret void 328; 329; XOP-LABEL: @bitreverse_32i8( 330; XOP-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @src8, align 1 331; XOP-NEXT: [[TMP2:%.*]] = call <32 x i8> @llvm.bitreverse.v32i8(<32 x i8> [[TMP1]]) 332; XOP-NEXT: store <32 x i8> [[TMP2]], ptr @dst8, align 1 333; XOP-NEXT: ret void 334; 335 %ld0 = load i8, ptr @src8, align 1 336 %ld1 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 1), align 1 337 %ld2 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 2), align 1 338 %ld3 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 3), align 1 339 %ld4 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 4), align 1 340 %ld5 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 5), align 1 341 %ld6 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 6), align 1 342 %ld7 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 7), align 1 343 %ld8 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 8), align 1 344 %ld9 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 9), align 1 345 %ld10 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 10), align 1 346 %ld11 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 11), align 1 347 %ld12 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 12), align 1 348 %ld13 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 13), align 1 349 %ld14 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 14), align 1 350 %ld15 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 15), align 1 351 %ld16 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 16), align 1 352 %ld17 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 17), align 1 353 %ld18 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 18), align 1 354 %ld19 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 19), align 1 355 %ld20 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 20), align 1 356 %ld21 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 21), align 1 357 %ld22 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 22), align 1 358 %ld23 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 23), align 1 359 %ld24 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 24), align 1 360 %ld25 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 25), align 1 361 %ld26 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 26), align 1 362 %ld27 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 27), align 1 363 %ld28 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 28), align 1 364 %ld29 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 29), align 1 365 %ld30 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 30), align 1 366 %ld31 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 31), align 1 367 %bitreverse0 = call i8 @llvm.bitreverse.i8(i8 %ld0) 368 %bitreverse1 = call i8 @llvm.bitreverse.i8(i8 %ld1) 369 %bitreverse2 = call i8 @llvm.bitreverse.i8(i8 %ld2) 370 %bitreverse3 = call i8 @llvm.bitreverse.i8(i8 %ld3) 371 %bitreverse4 = call i8 @llvm.bitreverse.i8(i8 %ld4) 372 %bitreverse5 = call i8 @llvm.bitreverse.i8(i8 %ld5) 373 %bitreverse6 = call i8 @llvm.bitreverse.i8(i8 %ld6) 374 %bitreverse7 = call i8 @llvm.bitreverse.i8(i8 %ld7) 375 %bitreverse8 = call i8 @llvm.bitreverse.i8(i8 %ld8) 376 %bitreverse9 = call i8 @llvm.bitreverse.i8(i8 %ld9) 377 %bitreverse10 = call i8 @llvm.bitreverse.i8(i8 %ld10) 378 %bitreverse11 = call i8 @llvm.bitreverse.i8(i8 %ld11) 379 %bitreverse12 = call i8 @llvm.bitreverse.i8(i8 %ld12) 380 %bitreverse13 = call i8 @llvm.bitreverse.i8(i8 %ld13) 381 %bitreverse14 = call i8 @llvm.bitreverse.i8(i8 %ld14) 382 %bitreverse15 = call i8 @llvm.bitreverse.i8(i8 %ld15) 383 %bitreverse16 = call i8 @llvm.bitreverse.i8(i8 %ld16) 384 %bitreverse17 = call i8 @llvm.bitreverse.i8(i8 %ld17) 385 %bitreverse18 = call i8 @llvm.bitreverse.i8(i8 %ld18) 386 %bitreverse19 = call i8 @llvm.bitreverse.i8(i8 %ld19) 387 %bitreverse20 = call i8 @llvm.bitreverse.i8(i8 %ld20) 388 %bitreverse21 = call i8 @llvm.bitreverse.i8(i8 %ld21) 389 %bitreverse22 = call i8 @llvm.bitreverse.i8(i8 %ld22) 390 %bitreverse23 = call i8 @llvm.bitreverse.i8(i8 %ld23) 391 %bitreverse24 = call i8 @llvm.bitreverse.i8(i8 %ld24) 392 %bitreverse25 = call i8 @llvm.bitreverse.i8(i8 %ld25) 393 %bitreverse26 = call i8 @llvm.bitreverse.i8(i8 %ld26) 394 %bitreverse27 = call i8 @llvm.bitreverse.i8(i8 %ld27) 395 %bitreverse28 = call i8 @llvm.bitreverse.i8(i8 %ld28) 396 %bitreverse29 = call i8 @llvm.bitreverse.i8(i8 %ld29) 397 %bitreverse30 = call i8 @llvm.bitreverse.i8(i8 %ld30) 398 %bitreverse31 = call i8 @llvm.bitreverse.i8(i8 %ld31) 399 store i8 %bitreverse0 , ptr @dst8, align 1 400 store i8 %bitreverse1 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 1), align 1 401 store i8 %bitreverse2 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 2), align 1 402 store i8 %bitreverse3 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 3), align 1 403 store i8 %bitreverse4 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 4), align 1 404 store i8 %bitreverse5 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 5), align 1 405 store i8 %bitreverse6 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 6), align 1 406 store i8 %bitreverse7 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 7), align 1 407 store i8 %bitreverse8 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 8), align 1 408 store i8 %bitreverse9 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 9), align 1 409 store i8 %bitreverse10, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 10), align 1 410 store i8 %bitreverse11, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 11), align 1 411 store i8 %bitreverse12, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 12), align 1 412 store i8 %bitreverse13, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 13), align 1 413 store i8 %bitreverse14, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 14), align 1 414 store i8 %bitreverse15, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 15), align 1 415 store i8 %bitreverse16, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 16), align 1 416 store i8 %bitreverse17, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 17), align 1 417 store i8 %bitreverse18, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 18), align 1 418 store i8 %bitreverse19, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 19), align 1 419 store i8 %bitreverse20, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 20), align 1 420 store i8 %bitreverse21, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 21), align 1 421 store i8 %bitreverse22, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 22), align 1 422 store i8 %bitreverse23, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 23), align 1 423 store i8 %bitreverse24, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 24), align 1 424 store i8 %bitreverse25, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 25), align 1 425 store i8 %bitreverse26, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 26), align 1 426 store i8 %bitreverse27, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 27), align 1 427 store i8 %bitreverse28, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 28), align 1 428 store i8 %bitreverse29, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 29), align 1 429 store i8 %bitreverse30, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 30), align 1 430 store i8 %bitreverse31, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 31), align 1 431 ret void 432} 433 434attributes #0 = { nounwind } 435 436