1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE 3; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE 4; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX1 5; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 6; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=icelake-server -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512 7 8target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 9 10@src64 = common global [4 x i64] zeroinitializer, align 32 11@dst64 = common global [4 x i64] zeroinitializer, align 32 12@src32 = common global [8 x i32] zeroinitializer, align 32 13@dst32 = common global [8 x i32] zeroinitializer, align 32 14@src16 = common global [16 x i16] zeroinitializer, align 32 15@dst16 = common global [16 x i16] zeroinitializer, align 32 16@src8 = common global [32 x i8] zeroinitializer, align 32 17@dst8 = common global [32 x i8] zeroinitializer, align 32 18 19declare i64 @llvm.ctlz.i64(i64, i1) 20declare i32 @llvm.ctlz.i32(i32, i1) 21declare i16 @llvm.ctlz.i16(i16, i1) 22declare i8 @llvm.ctlz.i8(i8, i1) 23 24; 25; CTLZ 26; 27 28define void @ctlz_2i64() #0 { 29; SSE-LABEL: @ctlz_2i64( 30; SSE-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 8 31; SSE-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8 32; SSE-NEXT: [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 false) 33; SSE-NEXT: [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 false) 34; SSE-NEXT: store i64 [[CTLZ0]], ptr @dst64, align 8 35; SSE-NEXT: store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8 36; SSE-NEXT: ret void 37; 38; AVX1-LABEL: @ctlz_2i64( 39; AVX1-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 8 40; AVX1-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8 41; AVX1-NEXT: [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 false) 42; AVX1-NEXT: [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 false) 43; AVX1-NEXT: store i64 [[CTLZ0]], ptr @dst64, align 8 44; AVX1-NEXT: store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8 45; AVX1-NEXT: ret void 46; 47; AVX2-LABEL: @ctlz_2i64( 48; AVX2-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 8 49; AVX2-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8 50; AVX2-NEXT: [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 false) 51; AVX2-NEXT: [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 false) 52; AVX2-NEXT: store i64 [[CTLZ0]], ptr @dst64, align 8 53; AVX2-NEXT: store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8 54; AVX2-NEXT: ret void 55; 56; AVX512-LABEL: @ctlz_2i64( 57; AVX512-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 8 58; AVX512-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> [[TMP1]], i1 false) 59; AVX512-NEXT: store <2 x i64> [[TMP2]], ptr @dst64, align 8 60; AVX512-NEXT: ret void 61; 62 %ld0 = load i64, ptr @src64, align 8 63 %ld1 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8 64 %ctlz0 = call i64 @llvm.ctlz.i64(i64 %ld0, i1 0) 65 %ctlz1 = call i64 @llvm.ctlz.i64(i64 %ld1, i1 0) 66 store i64 %ctlz0, ptr @dst64, align 8 67 store i64 %ctlz1, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8 68 ret void 69} 70 71define void @ctlz_4i64() #0 { 72; SSE-LABEL: @ctlz_4i64( 73; SSE-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 4 74; SSE-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4 75; SSE-NEXT: [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4 76; SSE-NEXT: [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4 77; SSE-NEXT: [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 false) 78; SSE-NEXT: [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 false) 79; SSE-NEXT: [[CTLZ2:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD2]], i1 false) 80; SSE-NEXT: [[CTLZ3:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD3]], i1 false) 81; SSE-NEXT: store i64 [[CTLZ0]], ptr @dst64, align 4 82; SSE-NEXT: store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4 83; SSE-NEXT: store i64 [[CTLZ2]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4 84; SSE-NEXT: store i64 [[CTLZ3]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4 85; SSE-NEXT: ret void 86; 87; AVX1-LABEL: @ctlz_4i64( 88; AVX1-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 4 89; AVX1-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4 90; AVX1-NEXT: [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4 91; AVX1-NEXT: [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4 92; AVX1-NEXT: [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 false) 93; AVX1-NEXT: [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 false) 94; AVX1-NEXT: [[CTLZ2:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD2]], i1 false) 95; AVX1-NEXT: [[CTLZ3:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD3]], i1 false) 96; AVX1-NEXT: store i64 [[CTLZ0]], ptr @dst64, align 4 97; AVX1-NEXT: store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4 98; AVX1-NEXT: store i64 [[CTLZ2]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4 99; AVX1-NEXT: store i64 [[CTLZ3]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4 100; AVX1-NEXT: ret void 101; 102; AVX2-LABEL: @ctlz_4i64( 103; AVX2-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 4 104; AVX2-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4 105; AVX2-NEXT: [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4 106; AVX2-NEXT: [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4 107; AVX2-NEXT: [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 false) 108; AVX2-NEXT: [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 false) 109; AVX2-NEXT: [[CTLZ2:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD2]], i1 false) 110; AVX2-NEXT: [[CTLZ3:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD3]], i1 false) 111; AVX2-NEXT: store i64 [[CTLZ0]], ptr @dst64, align 4 112; AVX2-NEXT: store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4 113; AVX2-NEXT: store i64 [[CTLZ2]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4 114; AVX2-NEXT: store i64 [[CTLZ3]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4 115; AVX2-NEXT: ret void 116; 117; AVX512-LABEL: @ctlz_4i64( 118; AVX512-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 4 119; AVX512-NEXT: [[TMP2:%.*]] = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> [[TMP1]], i1 false) 120; AVX512-NEXT: store <4 x i64> [[TMP2]], ptr @dst64, align 4 121; AVX512-NEXT: ret void 122; 123 %ld0 = load i64, ptr @src64, align 4 124 %ld1 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4 125 %ld2 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4 126 %ld3 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4 127 %ctlz0 = call i64 @llvm.ctlz.i64(i64 %ld0, i1 0) 128 %ctlz1 = call i64 @llvm.ctlz.i64(i64 %ld1, i1 0) 129 %ctlz2 = call i64 @llvm.ctlz.i64(i64 %ld2, i1 0) 130 %ctlz3 = call i64 @llvm.ctlz.i64(i64 %ld3, i1 0) 131 store i64 %ctlz0, ptr @dst64, align 4 132 store i64 %ctlz1, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4 133 store i64 %ctlz2, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4 134 store i64 %ctlz3, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4 135 ret void 136} 137 138define void @ctlz_4i32() #0 { 139; SSE-LABEL: @ctlz_4i32( 140; SSE-NEXT: [[LD0:%.*]] = load i32, ptr @src32, align 4 141; SSE-NEXT: [[LD1:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 4 142; SSE-NEXT: [[LD2:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 4 143; SSE-NEXT: [[LD3:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 4 144; SSE-NEXT: [[CTLZ0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD0]], i1 false) 145; SSE-NEXT: [[CTLZ1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD1]], i1 false) 146; SSE-NEXT: [[CTLZ2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD2]], i1 false) 147; SSE-NEXT: [[CTLZ3:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD3]], i1 false) 148; SSE-NEXT: store i32 [[CTLZ0]], ptr @dst32, align 4 149; SSE-NEXT: store i32 [[CTLZ1]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 4 150; SSE-NEXT: store i32 [[CTLZ2]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 4 151; SSE-NEXT: store i32 [[CTLZ3]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 4 152; SSE-NEXT: ret void 153; 154; AVX1-LABEL: @ctlz_4i32( 155; AVX1-NEXT: [[LD0:%.*]] = load i32, ptr @src32, align 4 156; AVX1-NEXT: [[LD1:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 4 157; AVX1-NEXT: [[LD2:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 4 158; AVX1-NEXT: [[LD3:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 4 159; AVX1-NEXT: [[CTLZ0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD0]], i1 false) 160; AVX1-NEXT: [[CTLZ1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD1]], i1 false) 161; AVX1-NEXT: [[CTLZ2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD2]], i1 false) 162; AVX1-NEXT: [[CTLZ3:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD3]], i1 false) 163; AVX1-NEXT: store i32 [[CTLZ0]], ptr @dst32, align 4 164; AVX1-NEXT: store i32 [[CTLZ1]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 4 165; AVX1-NEXT: store i32 [[CTLZ2]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 4 166; AVX1-NEXT: store i32 [[CTLZ3]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 4 167; AVX1-NEXT: ret void 168; 169; AVX2-LABEL: @ctlz_4i32( 170; AVX2-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 4 171; AVX2-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[TMP1]], i1 false) 172; AVX2-NEXT: store <4 x i32> [[TMP2]], ptr @dst32, align 4 173; AVX2-NEXT: ret void 174; 175; AVX512-LABEL: @ctlz_4i32( 176; AVX512-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 4 177; AVX512-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[TMP1]], i1 false) 178; AVX512-NEXT: store <4 x i32> [[TMP2]], ptr @dst32, align 4 179; AVX512-NEXT: ret void 180; 181 %ld0 = load i32, ptr @src32, align 4 182 %ld1 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 4 183 %ld2 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 4 184 %ld3 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 4 185 %ctlz0 = call i32 @llvm.ctlz.i32(i32 %ld0, i1 0) 186 %ctlz1 = call i32 @llvm.ctlz.i32(i32 %ld1, i1 0) 187 %ctlz2 = call i32 @llvm.ctlz.i32(i32 %ld2, i1 0) 188 %ctlz3 = call i32 @llvm.ctlz.i32(i32 %ld3, i1 0) 189 store i32 %ctlz0, ptr @dst32, align 4 190 store i32 %ctlz1, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 4 191 store i32 %ctlz2, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 4 192 store i32 %ctlz3, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 4 193 ret void 194} 195 196define void @ctlz_8i32() #0 { 197; SSE-LABEL: @ctlz_8i32( 198; SSE-NEXT: [[LD0:%.*]] = load i32, ptr @src32, align 2 199; SSE-NEXT: [[LD1:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 2 200; SSE-NEXT: [[LD2:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 2 201; SSE-NEXT: [[LD3:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 2 202; SSE-NEXT: [[LD4:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 4), align 2 203; SSE-NEXT: [[LD5:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 5), align 2 204; SSE-NEXT: [[LD6:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 6), align 2 205; SSE-NEXT: [[LD7:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 7), align 2 206; SSE-NEXT: [[CTLZ0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD0]], i1 false) 207; SSE-NEXT: [[CTLZ1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD1]], i1 false) 208; SSE-NEXT: [[CTLZ2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD2]], i1 false) 209; SSE-NEXT: [[CTLZ3:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD3]], i1 false) 210; SSE-NEXT: [[CTLZ4:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD4]], i1 false) 211; SSE-NEXT: [[CTLZ5:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD5]], i1 false) 212; SSE-NEXT: [[CTLZ6:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD6]], i1 false) 213; SSE-NEXT: [[CTLZ7:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD7]], i1 false) 214; SSE-NEXT: store i32 [[CTLZ0]], ptr @dst32, align 2 215; SSE-NEXT: store i32 [[CTLZ1]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 2 216; SSE-NEXT: store i32 [[CTLZ2]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 2 217; SSE-NEXT: store i32 [[CTLZ3]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 2 218; SSE-NEXT: store i32 [[CTLZ4]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 4), align 2 219; SSE-NEXT: store i32 [[CTLZ5]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 5), align 2 220; SSE-NEXT: store i32 [[CTLZ6]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 6), align 2 221; SSE-NEXT: store i32 [[CTLZ7]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 7), align 2 222; SSE-NEXT: ret void 223; 224; AVX1-LABEL: @ctlz_8i32( 225; AVX1-NEXT: [[LD0:%.*]] = load i32, ptr @src32, align 2 226; AVX1-NEXT: [[LD1:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 2 227; AVX1-NEXT: [[LD2:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 2 228; AVX1-NEXT: [[LD3:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 2 229; AVX1-NEXT: [[LD4:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 4), align 2 230; AVX1-NEXT: [[LD5:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 5), align 2 231; AVX1-NEXT: [[LD6:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 6), align 2 232; AVX1-NEXT: [[LD7:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 7), align 2 233; AVX1-NEXT: [[CTLZ0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD0]], i1 false) 234; AVX1-NEXT: [[CTLZ1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD1]], i1 false) 235; AVX1-NEXT: [[CTLZ2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD2]], i1 false) 236; AVX1-NEXT: [[CTLZ3:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD3]], i1 false) 237; AVX1-NEXT: [[CTLZ4:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD4]], i1 false) 238; AVX1-NEXT: [[CTLZ5:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD5]], i1 false) 239; AVX1-NEXT: [[CTLZ6:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD6]], i1 false) 240; AVX1-NEXT: [[CTLZ7:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD7]], i1 false) 241; AVX1-NEXT: store i32 [[CTLZ0]], ptr @dst32, align 2 242; AVX1-NEXT: store i32 [[CTLZ1]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 2 243; AVX1-NEXT: store i32 [[CTLZ2]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 2 244; AVX1-NEXT: store i32 [[CTLZ3]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 2 245; AVX1-NEXT: store i32 [[CTLZ4]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 4), align 2 246; AVX1-NEXT: store i32 [[CTLZ5]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 5), align 2 247; AVX1-NEXT: store i32 [[CTLZ6]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 6), align 2 248; AVX1-NEXT: store i32 [[CTLZ7]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 7), align 2 249; AVX1-NEXT: ret void 250; 251; AVX2-LABEL: @ctlz_8i32( 252; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 2 253; AVX2-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> [[TMP1]], i1 false) 254; AVX2-NEXT: store <8 x i32> [[TMP2]], ptr @dst32, align 2 255; AVX2-NEXT: ret void 256; 257; AVX512-LABEL: @ctlz_8i32( 258; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 2 259; AVX512-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> [[TMP1]], i1 false) 260; AVX512-NEXT: store <8 x i32> [[TMP2]], ptr @dst32, align 2 261; AVX512-NEXT: ret void 262; 263 %ld0 = load i32, ptr @src32, align 2 264 %ld1 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 2 265 %ld2 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 2 266 %ld3 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 2 267 %ld4 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 4), align 2 268 %ld5 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 5), align 2 269 %ld6 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 6), align 2 270 %ld7 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 7), align 2 271 %ctlz0 = call i32 @llvm.ctlz.i32(i32 %ld0, i1 0) 272 %ctlz1 = call i32 @llvm.ctlz.i32(i32 %ld1, i1 0) 273 %ctlz2 = call i32 @llvm.ctlz.i32(i32 %ld2, i1 0) 274 %ctlz3 = call i32 @llvm.ctlz.i32(i32 %ld3, i1 0) 275 %ctlz4 = call i32 @llvm.ctlz.i32(i32 %ld4, i1 0) 276 %ctlz5 = call i32 @llvm.ctlz.i32(i32 %ld5, i1 0) 277 %ctlz6 = call i32 @llvm.ctlz.i32(i32 %ld6, i1 0) 278 %ctlz7 = call i32 @llvm.ctlz.i32(i32 %ld7, i1 0) 279 store i32 %ctlz0, ptr @dst32, align 2 280 store i32 %ctlz1, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 2 281 store i32 %ctlz2, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 2 282 store i32 %ctlz3, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 2 283 store i32 %ctlz4, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 4), align 2 284 store i32 %ctlz5, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 5), align 2 285 store i32 %ctlz6, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 6), align 2 286 store i32 %ctlz7, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 7), align 2 287 ret void 288} 289 290define void @ctlz_8i16() #0 { 291; CHECK-LABEL: @ctlz_8i16( 292; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 2 293; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[TMP1]], i1 false) 294; CHECK-NEXT: store <8 x i16> [[TMP2]], ptr @dst16, align 2 295; CHECK-NEXT: ret void 296; 297 %ld0 = load i16, ptr @src16, align 2 298 %ld1 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 1), align 2 299 %ld2 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 2), align 2 300 %ld3 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 3), align 2 301 %ld4 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 4), align 2 302 %ld5 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 5), align 2 303 %ld6 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 6), align 2 304 %ld7 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 7), align 2 305 %ctlz0 = call i16 @llvm.ctlz.i16(i16 %ld0, i1 0) 306 %ctlz1 = call i16 @llvm.ctlz.i16(i16 %ld1, i1 0) 307 %ctlz2 = call i16 @llvm.ctlz.i16(i16 %ld2, i1 0) 308 %ctlz3 = call i16 @llvm.ctlz.i16(i16 %ld3, i1 0) 309 %ctlz4 = call i16 @llvm.ctlz.i16(i16 %ld4, i1 0) 310 %ctlz5 = call i16 @llvm.ctlz.i16(i16 %ld5, i1 0) 311 %ctlz6 = call i16 @llvm.ctlz.i16(i16 %ld6, i1 0) 312 %ctlz7 = call i16 @llvm.ctlz.i16(i16 %ld7, i1 0) 313 store i16 %ctlz0, ptr @dst16, align 2 314 store i16 %ctlz1, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 1), align 2 315 store i16 %ctlz2, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 2), align 2 316 store i16 %ctlz3, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 3), align 2 317 store i16 %ctlz4, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 4), align 2 318 store i16 %ctlz5, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 5), align 2 319 store i16 %ctlz6, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 6), align 2 320 store i16 %ctlz7, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 7), align 2 321 ret void 322} 323 324define void @ctlz_16i16() #0 { 325; SSE-LABEL: @ctlz_16i16( 326; SSE-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 2 327; SSE-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[TMP1]], i1 false) 328; SSE-NEXT: store <8 x i16> [[TMP2]], ptr @dst16, align 2 329; SSE-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 8), align 2 330; SSE-NEXT: [[TMP4:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[TMP3]], i1 false) 331; SSE-NEXT: store <8 x i16> [[TMP4]], ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 8), align 2 332; SSE-NEXT: ret void 333; 334; AVX-LABEL: @ctlz_16i16( 335; AVX-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @src16, align 2 336; AVX-NEXT: [[TMP2:%.*]] = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> [[TMP1]], i1 false) 337; AVX-NEXT: store <16 x i16> [[TMP2]], ptr @dst16, align 2 338; AVX-NEXT: ret void 339; 340 %ld0 = load i16, ptr @src16, align 2 341 %ld1 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 1), align 2 342 %ld2 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 2), align 2 343 %ld3 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 3), align 2 344 %ld4 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 4), align 2 345 %ld5 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 5), align 2 346 %ld6 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 6), align 2 347 %ld7 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 7), align 2 348 %ld8 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 8), align 2 349 %ld9 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 9), align 2 350 %ld10 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 10), align 2 351 %ld11 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 11), align 2 352 %ld12 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 12), align 2 353 %ld13 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 13), align 2 354 %ld14 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 14), align 2 355 %ld15 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 15), align 2 356 %ctlz0 = call i16 @llvm.ctlz.i16(i16 %ld0, i1 0) 357 %ctlz1 = call i16 @llvm.ctlz.i16(i16 %ld1, i1 0) 358 %ctlz2 = call i16 @llvm.ctlz.i16(i16 %ld2, i1 0) 359 %ctlz3 = call i16 @llvm.ctlz.i16(i16 %ld3, i1 0) 360 %ctlz4 = call i16 @llvm.ctlz.i16(i16 %ld4, i1 0) 361 %ctlz5 = call i16 @llvm.ctlz.i16(i16 %ld5, i1 0) 362 %ctlz6 = call i16 @llvm.ctlz.i16(i16 %ld6, i1 0) 363 %ctlz7 = call i16 @llvm.ctlz.i16(i16 %ld7, i1 0) 364 %ctlz8 = call i16 @llvm.ctlz.i16(i16 %ld8, i1 0) 365 %ctlz9 = call i16 @llvm.ctlz.i16(i16 %ld9, i1 0) 366 %ctlz10 = call i16 @llvm.ctlz.i16(i16 %ld10, i1 0) 367 %ctlz11 = call i16 @llvm.ctlz.i16(i16 %ld11, i1 0) 368 %ctlz12 = call i16 @llvm.ctlz.i16(i16 %ld12, i1 0) 369 %ctlz13 = call i16 @llvm.ctlz.i16(i16 %ld13, i1 0) 370 %ctlz14 = call i16 @llvm.ctlz.i16(i16 %ld14, i1 0) 371 %ctlz15 = call i16 @llvm.ctlz.i16(i16 %ld15, i1 0) 372 store i16 %ctlz0 , ptr @dst16, align 2 373 store i16 %ctlz1 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 1), align 2 374 store i16 %ctlz2 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 2), align 2 375 store i16 %ctlz3 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 3), align 2 376 store i16 %ctlz4 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 4), align 2 377 store i16 %ctlz5 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 5), align 2 378 store i16 %ctlz6 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 6), align 2 379 store i16 %ctlz7 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 7), align 2 380 store i16 %ctlz8 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 8), align 2 381 store i16 %ctlz9 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 9), align 2 382 store i16 %ctlz10, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 10), align 2 383 store i16 %ctlz11, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 11), align 2 384 store i16 %ctlz12, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 12), align 2 385 store i16 %ctlz13, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 13), align 2 386 store i16 %ctlz14, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 14), align 2 387 store i16 %ctlz15, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 15), align 2 388 ret void 389} 390 391define void @ctlz_16i8() #0 { 392; CHECK-LABEL: @ctlz_16i8( 393; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @src8, align 1 394; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> [[TMP1]], i1 false) 395; CHECK-NEXT: store <16 x i8> [[TMP2]], ptr @dst8, align 1 396; CHECK-NEXT: ret void 397; 398 %ld0 = load i8, ptr @src8, align 1 399 %ld1 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 1), align 1 400 %ld2 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 2), align 1 401 %ld3 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 3), align 1 402 %ld4 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 4), align 1 403 %ld5 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 5), align 1 404 %ld6 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 6), align 1 405 %ld7 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 7), align 1 406 %ld8 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 8), align 1 407 %ld9 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 9), align 1 408 %ld10 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 10), align 1 409 %ld11 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 11), align 1 410 %ld12 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 12), align 1 411 %ld13 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 13), align 1 412 %ld14 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 14), align 1 413 %ld15 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 15), align 1 414 %ctlz0 = call i8 @llvm.ctlz.i8(i8 %ld0, i1 0) 415 %ctlz1 = call i8 @llvm.ctlz.i8(i8 %ld1, i1 0) 416 %ctlz2 = call i8 @llvm.ctlz.i8(i8 %ld2, i1 0) 417 %ctlz3 = call i8 @llvm.ctlz.i8(i8 %ld3, i1 0) 418 %ctlz4 = call i8 @llvm.ctlz.i8(i8 %ld4, i1 0) 419 %ctlz5 = call i8 @llvm.ctlz.i8(i8 %ld5, i1 0) 420 %ctlz6 = call i8 @llvm.ctlz.i8(i8 %ld6, i1 0) 421 %ctlz7 = call i8 @llvm.ctlz.i8(i8 %ld7, i1 0) 422 %ctlz8 = call i8 @llvm.ctlz.i8(i8 %ld8, i1 0) 423 %ctlz9 = call i8 @llvm.ctlz.i8(i8 %ld9, i1 0) 424 %ctlz10 = call i8 @llvm.ctlz.i8(i8 %ld10, i1 0) 425 %ctlz11 = call i8 @llvm.ctlz.i8(i8 %ld11, i1 0) 426 %ctlz12 = call i8 @llvm.ctlz.i8(i8 %ld12, i1 0) 427 %ctlz13 = call i8 @llvm.ctlz.i8(i8 %ld13, i1 0) 428 %ctlz14 = call i8 @llvm.ctlz.i8(i8 %ld14, i1 0) 429 %ctlz15 = call i8 @llvm.ctlz.i8(i8 %ld15, i1 0) 430 store i8 %ctlz0 , ptr @dst8, align 1 431 store i8 %ctlz1 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 1), align 1 432 store i8 %ctlz2 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 2), align 1 433 store i8 %ctlz3 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 3), align 1 434 store i8 %ctlz4 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 4), align 1 435 store i8 %ctlz5 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 5), align 1 436 store i8 %ctlz6 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 6), align 1 437 store i8 %ctlz7 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 7), align 1 438 store i8 %ctlz8 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 8), align 1 439 store i8 %ctlz9 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 9), align 1 440 store i8 %ctlz10, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 10), align 1 441 store i8 %ctlz11, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 11), align 1 442 store i8 %ctlz12, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 12), align 1 443 store i8 %ctlz13, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 13), align 1 444 store i8 %ctlz14, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 14), align 1 445 store i8 %ctlz15, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 15), align 1 446 ret void 447} 448 449define void @ctlz_32i8() #0 { 450; SSE-LABEL: @ctlz_32i8( 451; SSE-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @src8, align 1 452; SSE-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> [[TMP1]], i1 false) 453; SSE-NEXT: store <16 x i8> [[TMP2]], ptr @dst8, align 1 454; SSE-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 16), align 1 455; SSE-NEXT: [[TMP4:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> [[TMP3]], i1 false) 456; SSE-NEXT: store <16 x i8> [[TMP4]], ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 16), align 1 457; SSE-NEXT: ret void 458; 459; AVX-LABEL: @ctlz_32i8( 460; AVX-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @src8, align 1 461; AVX-NEXT: [[TMP2:%.*]] = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> [[TMP1]], i1 false) 462; AVX-NEXT: store <32 x i8> [[TMP2]], ptr @dst8, align 1 463; AVX-NEXT: ret void 464; 465 %ld0 = load i8, ptr @src8, align 1 466 %ld1 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 1), align 1 467 %ld2 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 2), align 1 468 %ld3 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 3), align 1 469 %ld4 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 4), align 1 470 %ld5 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 5), align 1 471 %ld6 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 6), align 1 472 %ld7 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 7), align 1 473 %ld8 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 8), align 1 474 %ld9 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 9), align 1 475 %ld10 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 10), align 1 476 %ld11 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 11), align 1 477 %ld12 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 12), align 1 478 %ld13 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 13), align 1 479 %ld14 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 14), align 1 480 %ld15 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 15), align 1 481 %ld16 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 16), align 1 482 %ld17 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 17), align 1 483 %ld18 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 18), align 1 484 %ld19 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 19), align 1 485 %ld20 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 20), align 1 486 %ld21 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 21), align 1 487 %ld22 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 22), align 1 488 %ld23 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 23), align 1 489 %ld24 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 24), align 1 490 %ld25 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 25), align 1 491 %ld26 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 26), align 1 492 %ld27 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 27), align 1 493 %ld28 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 28), align 1 494 %ld29 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 29), align 1 495 %ld30 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 30), align 1 496 %ld31 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 31), align 1 497 %ctlz0 = call i8 @llvm.ctlz.i8(i8 %ld0, i1 0) 498 %ctlz1 = call i8 @llvm.ctlz.i8(i8 %ld1, i1 0) 499 %ctlz2 = call i8 @llvm.ctlz.i8(i8 %ld2, i1 0) 500 %ctlz3 = call i8 @llvm.ctlz.i8(i8 %ld3, i1 0) 501 %ctlz4 = call i8 @llvm.ctlz.i8(i8 %ld4, i1 0) 502 %ctlz5 = call i8 @llvm.ctlz.i8(i8 %ld5, i1 0) 503 %ctlz6 = call i8 @llvm.ctlz.i8(i8 %ld6, i1 0) 504 %ctlz7 = call i8 @llvm.ctlz.i8(i8 %ld7, i1 0) 505 %ctlz8 = call i8 @llvm.ctlz.i8(i8 %ld8, i1 0) 506 %ctlz9 = call i8 @llvm.ctlz.i8(i8 %ld9, i1 0) 507 %ctlz10 = call i8 @llvm.ctlz.i8(i8 %ld10, i1 0) 508 %ctlz11 = call i8 @llvm.ctlz.i8(i8 %ld11, i1 0) 509 %ctlz12 = call i8 @llvm.ctlz.i8(i8 %ld12, i1 0) 510 %ctlz13 = call i8 @llvm.ctlz.i8(i8 %ld13, i1 0) 511 %ctlz14 = call i8 @llvm.ctlz.i8(i8 %ld14, i1 0) 512 %ctlz15 = call i8 @llvm.ctlz.i8(i8 %ld15, i1 0) 513 %ctlz16 = call i8 @llvm.ctlz.i8(i8 %ld16, i1 0) 514 %ctlz17 = call i8 @llvm.ctlz.i8(i8 %ld17, i1 0) 515 %ctlz18 = call i8 @llvm.ctlz.i8(i8 %ld18, i1 0) 516 %ctlz19 = call i8 @llvm.ctlz.i8(i8 %ld19, i1 0) 517 %ctlz20 = call i8 @llvm.ctlz.i8(i8 %ld20, i1 0) 518 %ctlz21 = call i8 @llvm.ctlz.i8(i8 %ld21, i1 0) 519 %ctlz22 = call i8 @llvm.ctlz.i8(i8 %ld22, i1 0) 520 %ctlz23 = call i8 @llvm.ctlz.i8(i8 %ld23, i1 0) 521 %ctlz24 = call i8 @llvm.ctlz.i8(i8 %ld24, i1 0) 522 %ctlz25 = call i8 @llvm.ctlz.i8(i8 %ld25, i1 0) 523 %ctlz26 = call i8 @llvm.ctlz.i8(i8 %ld26, i1 0) 524 %ctlz27 = call i8 @llvm.ctlz.i8(i8 %ld27, i1 0) 525 %ctlz28 = call i8 @llvm.ctlz.i8(i8 %ld28, i1 0) 526 %ctlz29 = call i8 @llvm.ctlz.i8(i8 %ld29, i1 0) 527 %ctlz30 = call i8 @llvm.ctlz.i8(i8 %ld30, i1 0) 528 %ctlz31 = call i8 @llvm.ctlz.i8(i8 %ld31, i1 0) 529 store i8 %ctlz0 , ptr @dst8, align 1 530 store i8 %ctlz1 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 1), align 1 531 store i8 %ctlz2 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 2), align 1 532 store i8 %ctlz3 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 3), align 1 533 store i8 %ctlz4 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 4), align 1 534 store i8 %ctlz5 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 5), align 1 535 store i8 %ctlz6 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 6), align 1 536 store i8 %ctlz7 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 7), align 1 537 store i8 %ctlz8 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 8), align 1 538 store i8 %ctlz9 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 9), align 1 539 store i8 %ctlz10, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 10), align 1 540 store i8 %ctlz11, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 11), align 1 541 store i8 %ctlz12, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 12), align 1 542 store i8 %ctlz13, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 13), align 1 543 store i8 %ctlz14, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 14), align 1 544 store i8 %ctlz15, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 15), align 1 545 store i8 %ctlz16, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 16), align 1 546 store i8 %ctlz17, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 17), align 1 547 store i8 %ctlz18, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 18), align 1 548 store i8 %ctlz19, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 19), align 1 549 store i8 %ctlz20, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 20), align 1 550 store i8 %ctlz21, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 21), align 1 551 store i8 %ctlz22, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 22), align 1 552 store i8 %ctlz23, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 23), align 1 553 store i8 %ctlz24, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 24), align 1 554 store i8 %ctlz25, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 25), align 1 555 store i8 %ctlz26, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 26), align 1 556 store i8 %ctlz27, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 27), align 1 557 store i8 %ctlz28, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 28), align 1 558 store i8 %ctlz29, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 29), align 1 559 store i8 %ctlz30, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 30), align 1 560 store i8 %ctlz31, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 31), align 1 561 ret void 562} 563 564; 565; CTLZ_ZERO_UNDEF 566; 567 568define void @ctlz_undef_2i64() #0 { 569; SSE-LABEL: @ctlz_undef_2i64( 570; SSE-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 8 571; SSE-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8 572; SSE-NEXT: [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 true) 573; SSE-NEXT: [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 true) 574; SSE-NEXT: store i64 [[CTLZ0]], ptr @dst64, align 8 575; SSE-NEXT: store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8 576; SSE-NEXT: ret void 577; 578; AVX1-LABEL: @ctlz_undef_2i64( 579; AVX1-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 8 580; AVX1-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8 581; AVX1-NEXT: [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 true) 582; AVX1-NEXT: [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 true) 583; AVX1-NEXT: store i64 [[CTLZ0]], ptr @dst64, align 8 584; AVX1-NEXT: store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8 585; AVX1-NEXT: ret void 586; 587; AVX2-LABEL: @ctlz_undef_2i64( 588; AVX2-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 8 589; AVX2-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8 590; AVX2-NEXT: [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 true) 591; AVX2-NEXT: [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 true) 592; AVX2-NEXT: store i64 [[CTLZ0]], ptr @dst64, align 8 593; AVX2-NEXT: store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8 594; AVX2-NEXT: ret void 595; 596; AVX512-LABEL: @ctlz_undef_2i64( 597; AVX512-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 8 598; AVX512-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> [[TMP1]], i1 true) 599; AVX512-NEXT: store <2 x i64> [[TMP2]], ptr @dst64, align 8 600; AVX512-NEXT: ret void 601; 602 %ld0 = load i64, ptr @src64, align 8 603 %ld1 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8 604 %ctlz0 = call i64 @llvm.ctlz.i64(i64 %ld0, i1 -1) 605 %ctlz1 = call i64 @llvm.ctlz.i64(i64 %ld1, i1 -1) 606 store i64 %ctlz0, ptr @dst64, align 8 607 store i64 %ctlz1, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8 608 ret void 609} 610 611define void @ctlz_undef_4i64() #0 { 612; SSE-LABEL: @ctlz_undef_4i64( 613; SSE-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 4 614; SSE-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4 615; SSE-NEXT: [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4 616; SSE-NEXT: [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4 617; SSE-NEXT: [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 true) 618; SSE-NEXT: [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 true) 619; SSE-NEXT: [[CTLZ2:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD2]], i1 true) 620; SSE-NEXT: [[CTLZ3:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD3]], i1 true) 621; SSE-NEXT: store i64 [[CTLZ0]], ptr @dst64, align 4 622; SSE-NEXT: store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4 623; SSE-NEXT: store i64 [[CTLZ2]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4 624; SSE-NEXT: store i64 [[CTLZ3]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4 625; SSE-NEXT: ret void 626; 627; AVX1-LABEL: @ctlz_undef_4i64( 628; AVX1-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 4 629; AVX1-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4 630; AVX1-NEXT: [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4 631; AVX1-NEXT: [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4 632; AVX1-NEXT: [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 true) 633; AVX1-NEXT: [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 true) 634; AVX1-NEXT: [[CTLZ2:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD2]], i1 true) 635; AVX1-NEXT: [[CTLZ3:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD3]], i1 true) 636; AVX1-NEXT: store i64 [[CTLZ0]], ptr @dst64, align 4 637; AVX1-NEXT: store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4 638; AVX1-NEXT: store i64 [[CTLZ2]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4 639; AVX1-NEXT: store i64 [[CTLZ3]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4 640; AVX1-NEXT: ret void 641; 642; AVX2-LABEL: @ctlz_undef_4i64( 643; AVX2-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 4 644; AVX2-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4 645; AVX2-NEXT: [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4 646; AVX2-NEXT: [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4 647; AVX2-NEXT: [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 true) 648; AVX2-NEXT: [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 true) 649; AVX2-NEXT: [[CTLZ2:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD2]], i1 true) 650; AVX2-NEXT: [[CTLZ3:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD3]], i1 true) 651; AVX2-NEXT: store i64 [[CTLZ0]], ptr @dst64, align 4 652; AVX2-NEXT: store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4 653; AVX2-NEXT: store i64 [[CTLZ2]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4 654; AVX2-NEXT: store i64 [[CTLZ3]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4 655; AVX2-NEXT: ret void 656; 657; AVX512-LABEL: @ctlz_undef_4i64( 658; AVX512-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 4 659; AVX512-NEXT: [[TMP2:%.*]] = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> [[TMP1]], i1 true) 660; AVX512-NEXT: store <4 x i64> [[TMP2]], ptr @dst64, align 4 661; AVX512-NEXT: ret void 662; 663 %ld0 = load i64, ptr @src64, align 4 664 %ld1 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4 665 %ld2 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4 666 %ld3 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4 667 %ctlz0 = call i64 @llvm.ctlz.i64(i64 %ld0, i1 -1) 668 %ctlz1 = call i64 @llvm.ctlz.i64(i64 %ld1, i1 -1) 669 %ctlz2 = call i64 @llvm.ctlz.i64(i64 %ld2, i1 -1) 670 %ctlz3 = call i64 @llvm.ctlz.i64(i64 %ld3, i1 -1) 671 store i64 %ctlz0, ptr @dst64, align 4 672 store i64 %ctlz1, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4 673 store i64 %ctlz2, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4 674 store i64 %ctlz3, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4 675 ret void 676} 677 678define void @ctlz_undef_4i32() #0 { 679; SSE-LABEL: @ctlz_undef_4i32( 680; SSE-NEXT: [[LD0:%.*]] = load i32, ptr @src32, align 4 681; SSE-NEXT: [[LD1:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 4 682; SSE-NEXT: [[LD2:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 4 683; SSE-NEXT: [[LD3:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 4 684; SSE-NEXT: [[CTLZ0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD0]], i1 true) 685; SSE-NEXT: [[CTLZ1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD1]], i1 true) 686; SSE-NEXT: [[CTLZ2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD2]], i1 true) 687; SSE-NEXT: [[CTLZ3:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD3]], i1 true) 688; SSE-NEXT: store i32 [[CTLZ0]], ptr @dst32, align 4 689; SSE-NEXT: store i32 [[CTLZ1]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 4 690; SSE-NEXT: store i32 [[CTLZ2]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 4 691; SSE-NEXT: store i32 [[CTLZ3]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 4 692; SSE-NEXT: ret void 693; 694; AVX1-LABEL: @ctlz_undef_4i32( 695; AVX1-NEXT: [[LD0:%.*]] = load i32, ptr @src32, align 4 696; AVX1-NEXT: [[LD1:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 4 697; AVX1-NEXT: [[LD2:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 4 698; AVX1-NEXT: [[LD3:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 4 699; AVX1-NEXT: [[CTLZ0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD0]], i1 true) 700; AVX1-NEXT: [[CTLZ1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD1]], i1 true) 701; AVX1-NEXT: [[CTLZ2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD2]], i1 true) 702; AVX1-NEXT: [[CTLZ3:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD3]], i1 true) 703; AVX1-NEXT: store i32 [[CTLZ0]], ptr @dst32, align 4 704; AVX1-NEXT: store i32 [[CTLZ1]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 4 705; AVX1-NEXT: store i32 [[CTLZ2]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 4 706; AVX1-NEXT: store i32 [[CTLZ3]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 4 707; AVX1-NEXT: ret void 708; 709; AVX2-LABEL: @ctlz_undef_4i32( 710; AVX2-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 4 711; AVX2-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[TMP1]], i1 true) 712; AVX2-NEXT: store <4 x i32> [[TMP2]], ptr @dst32, align 4 713; AVX2-NEXT: ret void 714; 715; AVX512-LABEL: @ctlz_undef_4i32( 716; AVX512-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 4 717; AVX512-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[TMP1]], i1 true) 718; AVX512-NEXT: store <4 x i32> [[TMP2]], ptr @dst32, align 4 719; AVX512-NEXT: ret void 720; 721 %ld0 = load i32, ptr @src32, align 4 722 %ld1 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 4 723 %ld2 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 4 724 %ld3 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 4 725 %ctlz0 = call i32 @llvm.ctlz.i32(i32 %ld0, i1 -1) 726 %ctlz1 = call i32 @llvm.ctlz.i32(i32 %ld1, i1 -1) 727 %ctlz2 = call i32 @llvm.ctlz.i32(i32 %ld2, i1 -1) 728 %ctlz3 = call i32 @llvm.ctlz.i32(i32 %ld3, i1 -1) 729 store i32 %ctlz0, ptr @dst32, align 4 730 store i32 %ctlz1, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 4 731 store i32 %ctlz2, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 4 732 store i32 %ctlz3, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 4 733 ret void 734} 735 736define void @ctlz_undef_8i32() #0 { 737; SSE-LABEL: @ctlz_undef_8i32( 738; SSE-NEXT: [[LD0:%.*]] = load i32, ptr @src32, align 2 739; SSE-NEXT: [[LD1:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 2 740; SSE-NEXT: [[LD2:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 2 741; SSE-NEXT: [[LD3:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 2 742; SSE-NEXT: [[LD4:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 4), align 2 743; SSE-NEXT: [[LD5:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 5), align 2 744; SSE-NEXT: [[LD6:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 6), align 2 745; SSE-NEXT: [[LD7:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 7), align 2 746; SSE-NEXT: [[CTLZ0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD0]], i1 true) 747; SSE-NEXT: [[CTLZ1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD1]], i1 true) 748; SSE-NEXT: [[CTLZ2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD2]], i1 true) 749; SSE-NEXT: [[CTLZ3:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD3]], i1 true) 750; SSE-NEXT: [[CTLZ4:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD4]], i1 true) 751; SSE-NEXT: [[CTLZ5:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD5]], i1 true) 752; SSE-NEXT: [[CTLZ6:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD6]], i1 true) 753; SSE-NEXT: [[CTLZ7:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD7]], i1 true) 754; SSE-NEXT: store i32 [[CTLZ0]], ptr @dst32, align 2 755; SSE-NEXT: store i32 [[CTLZ1]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 2 756; SSE-NEXT: store i32 [[CTLZ2]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 2 757; SSE-NEXT: store i32 [[CTLZ3]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 2 758; SSE-NEXT: store i32 [[CTLZ4]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 4), align 2 759; SSE-NEXT: store i32 [[CTLZ5]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 5), align 2 760; SSE-NEXT: store i32 [[CTLZ6]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 6), align 2 761; SSE-NEXT: store i32 [[CTLZ7]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 7), align 2 762; SSE-NEXT: ret void 763; 764; AVX1-LABEL: @ctlz_undef_8i32( 765; AVX1-NEXT: [[LD0:%.*]] = load i32, ptr @src32, align 2 766; AVX1-NEXT: [[LD1:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 2 767; AVX1-NEXT: [[LD2:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 2 768; AVX1-NEXT: [[LD3:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 2 769; AVX1-NEXT: [[LD4:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 4), align 2 770; AVX1-NEXT: [[LD5:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 5), align 2 771; AVX1-NEXT: [[LD6:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 6), align 2 772; AVX1-NEXT: [[LD7:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 7), align 2 773; AVX1-NEXT: [[CTLZ0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD0]], i1 true) 774; AVX1-NEXT: [[CTLZ1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD1]], i1 true) 775; AVX1-NEXT: [[CTLZ2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD2]], i1 true) 776; AVX1-NEXT: [[CTLZ3:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD3]], i1 true) 777; AVX1-NEXT: [[CTLZ4:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD4]], i1 true) 778; AVX1-NEXT: [[CTLZ5:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD5]], i1 true) 779; AVX1-NEXT: [[CTLZ6:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD6]], i1 true) 780; AVX1-NEXT: [[CTLZ7:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD7]], i1 true) 781; AVX1-NEXT: store i32 [[CTLZ0]], ptr @dst32, align 2 782; AVX1-NEXT: store i32 [[CTLZ1]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 2 783; AVX1-NEXT: store i32 [[CTLZ2]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 2 784; AVX1-NEXT: store i32 [[CTLZ3]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 2 785; AVX1-NEXT: store i32 [[CTLZ4]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 4), align 2 786; AVX1-NEXT: store i32 [[CTLZ5]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 5), align 2 787; AVX1-NEXT: store i32 [[CTLZ6]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 6), align 2 788; AVX1-NEXT: store i32 [[CTLZ7]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 7), align 2 789; AVX1-NEXT: ret void 790; 791; AVX2-LABEL: @ctlz_undef_8i32( 792; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 2 793; AVX2-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> [[TMP1]], i1 true) 794; AVX2-NEXT: store <8 x i32> [[TMP2]], ptr @dst32, align 2 795; AVX2-NEXT: ret void 796; 797; AVX512-LABEL: @ctlz_undef_8i32( 798; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 2 799; AVX512-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> [[TMP1]], i1 true) 800; AVX512-NEXT: store <8 x i32> [[TMP2]], ptr @dst32, align 2 801; AVX512-NEXT: ret void 802; 803 %ld0 = load i32, ptr @src32, align 2 804 %ld1 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 2 805 %ld2 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 2 806 %ld3 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 2 807 %ld4 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 4), align 2 808 %ld5 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 5), align 2 809 %ld6 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 6), align 2 810 %ld7 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 7), align 2 811 %ctlz0 = call i32 @llvm.ctlz.i32(i32 %ld0, i1 -1) 812 %ctlz1 = call i32 @llvm.ctlz.i32(i32 %ld1, i1 -1) 813 %ctlz2 = call i32 @llvm.ctlz.i32(i32 %ld2, i1 -1) 814 %ctlz3 = call i32 @llvm.ctlz.i32(i32 %ld3, i1 -1) 815 %ctlz4 = call i32 @llvm.ctlz.i32(i32 %ld4, i1 -1) 816 %ctlz5 = call i32 @llvm.ctlz.i32(i32 %ld5, i1 -1) 817 %ctlz6 = call i32 @llvm.ctlz.i32(i32 %ld6, i1 -1) 818 %ctlz7 = call i32 @llvm.ctlz.i32(i32 %ld7, i1 -1) 819 store i32 %ctlz0, ptr @dst32, align 2 820 store i32 %ctlz1, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 2 821 store i32 %ctlz2, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 2 822 store i32 %ctlz3, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 2 823 store i32 %ctlz4, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 4), align 2 824 store i32 %ctlz5, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 5), align 2 825 store i32 %ctlz6, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 6), align 2 826 store i32 %ctlz7, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 7), align 2 827 ret void 828} 829 830define void @ctlz_undef_8i16() #0 { 831; CHECK-LABEL: @ctlz_undef_8i16( 832; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 2 833; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[TMP1]], i1 true) 834; CHECK-NEXT: store <8 x i16> [[TMP2]], ptr @dst16, align 2 835; CHECK-NEXT: ret void 836; 837 %ld0 = load i16, ptr @src16, align 2 838 %ld1 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 1), align 2 839 %ld2 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 2), align 2 840 %ld3 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 3), align 2 841 %ld4 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 4), align 2 842 %ld5 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 5), align 2 843 %ld6 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 6), align 2 844 %ld7 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 7), align 2 845 %ctlz0 = call i16 @llvm.ctlz.i16(i16 %ld0, i1 -1) 846 %ctlz1 = call i16 @llvm.ctlz.i16(i16 %ld1, i1 -1) 847 %ctlz2 = call i16 @llvm.ctlz.i16(i16 %ld2, i1 -1) 848 %ctlz3 = call i16 @llvm.ctlz.i16(i16 %ld3, i1 -1) 849 %ctlz4 = call i16 @llvm.ctlz.i16(i16 %ld4, i1 -1) 850 %ctlz5 = call i16 @llvm.ctlz.i16(i16 %ld5, i1 -1) 851 %ctlz6 = call i16 @llvm.ctlz.i16(i16 %ld6, i1 -1) 852 %ctlz7 = call i16 @llvm.ctlz.i16(i16 %ld7, i1 -1) 853 store i16 %ctlz0, ptr @dst16, align 2 854 store i16 %ctlz1, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 1), align 2 855 store i16 %ctlz2, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 2), align 2 856 store i16 %ctlz3, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 3), align 2 857 store i16 %ctlz4, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 4), align 2 858 store i16 %ctlz5, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 5), align 2 859 store i16 %ctlz6, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 6), align 2 860 store i16 %ctlz7, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 7), align 2 861 ret void 862} 863 864define void @ctlz_undef_16i16() #0 { 865; SSE-LABEL: @ctlz_undef_16i16( 866; SSE-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 2 867; SSE-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[TMP1]], i1 true) 868; SSE-NEXT: store <8 x i16> [[TMP2]], ptr @dst16, align 2 869; SSE-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 8), align 2 870; SSE-NEXT: [[TMP4:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[TMP3]], i1 true) 871; SSE-NEXT: store <8 x i16> [[TMP4]], ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 8), align 2 872; SSE-NEXT: ret void 873; 874; AVX-LABEL: @ctlz_undef_16i16( 875; AVX-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @src16, align 2 876; AVX-NEXT: [[TMP2:%.*]] = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> [[TMP1]], i1 true) 877; AVX-NEXT: store <16 x i16> [[TMP2]], ptr @dst16, align 2 878; AVX-NEXT: ret void 879; 880 %ld0 = load i16, ptr @src16, align 2 881 %ld1 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 1), align 2 882 %ld2 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 2), align 2 883 %ld3 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 3), align 2 884 %ld4 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 4), align 2 885 %ld5 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 5), align 2 886 %ld6 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 6), align 2 887 %ld7 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 7), align 2 888 %ld8 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 8), align 2 889 %ld9 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 9), align 2 890 %ld10 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 10), align 2 891 %ld11 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 11), align 2 892 %ld12 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 12), align 2 893 %ld13 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 13), align 2 894 %ld14 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 14), align 2 895 %ld15 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 15), align 2 896 %ctlz0 = call i16 @llvm.ctlz.i16(i16 %ld0, i1 -1) 897 %ctlz1 = call i16 @llvm.ctlz.i16(i16 %ld1, i1 -1) 898 %ctlz2 = call i16 @llvm.ctlz.i16(i16 %ld2, i1 -1) 899 %ctlz3 = call i16 @llvm.ctlz.i16(i16 %ld3, i1 -1) 900 %ctlz4 = call i16 @llvm.ctlz.i16(i16 %ld4, i1 -1) 901 %ctlz5 = call i16 @llvm.ctlz.i16(i16 %ld5, i1 -1) 902 %ctlz6 = call i16 @llvm.ctlz.i16(i16 %ld6, i1 -1) 903 %ctlz7 = call i16 @llvm.ctlz.i16(i16 %ld7, i1 -1) 904 %ctlz8 = call i16 @llvm.ctlz.i16(i16 %ld8, i1 -1) 905 %ctlz9 = call i16 @llvm.ctlz.i16(i16 %ld9, i1 -1) 906 %ctlz10 = call i16 @llvm.ctlz.i16(i16 %ld10, i1 -1) 907 %ctlz11 = call i16 @llvm.ctlz.i16(i16 %ld11, i1 -1) 908 %ctlz12 = call i16 @llvm.ctlz.i16(i16 %ld12, i1 -1) 909 %ctlz13 = call i16 @llvm.ctlz.i16(i16 %ld13, i1 -1) 910 %ctlz14 = call i16 @llvm.ctlz.i16(i16 %ld14, i1 -1) 911 %ctlz15 = call i16 @llvm.ctlz.i16(i16 %ld15, i1 -1) 912 store i16 %ctlz0 , ptr @dst16, align 2 913 store i16 %ctlz1 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 1), align 2 914 store i16 %ctlz2 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 2), align 2 915 store i16 %ctlz3 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 3), align 2 916 store i16 %ctlz4 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 4), align 2 917 store i16 %ctlz5 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 5), align 2 918 store i16 %ctlz6 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 6), align 2 919 store i16 %ctlz7 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 7), align 2 920 store i16 %ctlz8 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 8), align 2 921 store i16 %ctlz9 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 9), align 2 922 store i16 %ctlz10, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 10), align 2 923 store i16 %ctlz11, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 11), align 2 924 store i16 %ctlz12, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 12), align 2 925 store i16 %ctlz13, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 13), align 2 926 store i16 %ctlz14, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 14), align 2 927 store i16 %ctlz15, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 15), align 2 928 ret void 929} 930 931define void @ctlz_undef_16i8() #0 { 932; CHECK-LABEL: @ctlz_undef_16i8( 933; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @src8, align 1 934; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> [[TMP1]], i1 true) 935; CHECK-NEXT: store <16 x i8> [[TMP2]], ptr @dst8, align 1 936; CHECK-NEXT: ret void 937; 938 %ld0 = load i8, ptr @src8, align 1 939 %ld1 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 1), align 1 940 %ld2 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 2), align 1 941 %ld3 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 3), align 1 942 %ld4 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 4), align 1 943 %ld5 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 5), align 1 944 %ld6 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 6), align 1 945 %ld7 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 7), align 1 946 %ld8 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 8), align 1 947 %ld9 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 9), align 1 948 %ld10 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 10), align 1 949 %ld11 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 11), align 1 950 %ld12 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 12), align 1 951 %ld13 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 13), align 1 952 %ld14 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 14), align 1 953 %ld15 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 15), align 1 954 %ctlz0 = call i8 @llvm.ctlz.i8(i8 %ld0, i1 -1) 955 %ctlz1 = call i8 @llvm.ctlz.i8(i8 %ld1, i1 -1) 956 %ctlz2 = call i8 @llvm.ctlz.i8(i8 %ld2, i1 -1) 957 %ctlz3 = call i8 @llvm.ctlz.i8(i8 %ld3, i1 -1) 958 %ctlz4 = call i8 @llvm.ctlz.i8(i8 %ld4, i1 -1) 959 %ctlz5 = call i8 @llvm.ctlz.i8(i8 %ld5, i1 -1) 960 %ctlz6 = call i8 @llvm.ctlz.i8(i8 %ld6, i1 -1) 961 %ctlz7 = call i8 @llvm.ctlz.i8(i8 %ld7, i1 -1) 962 %ctlz8 = call i8 @llvm.ctlz.i8(i8 %ld8, i1 -1) 963 %ctlz9 = call i8 @llvm.ctlz.i8(i8 %ld9, i1 -1) 964 %ctlz10 = call i8 @llvm.ctlz.i8(i8 %ld10, i1 -1) 965 %ctlz11 = call i8 @llvm.ctlz.i8(i8 %ld11, i1 -1) 966 %ctlz12 = call i8 @llvm.ctlz.i8(i8 %ld12, i1 -1) 967 %ctlz13 = call i8 @llvm.ctlz.i8(i8 %ld13, i1 -1) 968 %ctlz14 = call i8 @llvm.ctlz.i8(i8 %ld14, i1 -1) 969 %ctlz15 = call i8 @llvm.ctlz.i8(i8 %ld15, i1 -1) 970 store i8 %ctlz0 , ptr @dst8, align 1 971 store i8 %ctlz1 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 1), align 1 972 store i8 %ctlz2 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 2), align 1 973 store i8 %ctlz3 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 3), align 1 974 store i8 %ctlz4 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 4), align 1 975 store i8 %ctlz5 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 5), align 1 976 store i8 %ctlz6 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 6), align 1 977 store i8 %ctlz7 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 7), align 1 978 store i8 %ctlz8 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 8), align 1 979 store i8 %ctlz9 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 9), align 1 980 store i8 %ctlz10, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 10), align 1 981 store i8 %ctlz11, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 11), align 1 982 store i8 %ctlz12, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 12), align 1 983 store i8 %ctlz13, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 13), align 1 984 store i8 %ctlz14, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 14), align 1 985 store i8 %ctlz15, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 15), align 1 986 ret void 987} 988 989define void @ctlz_undef_32i8() #0 { 990; SSE-LABEL: @ctlz_undef_32i8( 991; SSE-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @src8, align 1 992; SSE-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> [[TMP1]], i1 true) 993; SSE-NEXT: store <16 x i8> [[TMP2]], ptr @dst8, align 1 994; SSE-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 16), align 1 995; SSE-NEXT: [[TMP4:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> [[TMP3]], i1 true) 996; SSE-NEXT: store <16 x i8> [[TMP4]], ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 16), align 1 997; SSE-NEXT: ret void 998; 999; AVX-LABEL: @ctlz_undef_32i8( 1000; AVX-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @src8, align 1 1001; AVX-NEXT: [[TMP2:%.*]] = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> [[TMP1]], i1 true) 1002; AVX-NEXT: store <32 x i8> [[TMP2]], ptr @dst8, align 1 1003; AVX-NEXT: ret void 1004; 1005 %ld0 = load i8, ptr @src8, align 1 1006 %ld1 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 1), align 1 1007 %ld2 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 2), align 1 1008 %ld3 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 3), align 1 1009 %ld4 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 4), align 1 1010 %ld5 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 5), align 1 1011 %ld6 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 6), align 1 1012 %ld7 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 7), align 1 1013 %ld8 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 8), align 1 1014 %ld9 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 9), align 1 1015 %ld10 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 10), align 1 1016 %ld11 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 11), align 1 1017 %ld12 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 12), align 1 1018 %ld13 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 13), align 1 1019 %ld14 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 14), align 1 1020 %ld15 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 15), align 1 1021 %ld16 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 16), align 1 1022 %ld17 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 17), align 1 1023 %ld18 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 18), align 1 1024 %ld19 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 19), align 1 1025 %ld20 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 20), align 1 1026 %ld21 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 21), align 1 1027 %ld22 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 22), align 1 1028 %ld23 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 23), align 1 1029 %ld24 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 24), align 1 1030 %ld25 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 25), align 1 1031 %ld26 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 26), align 1 1032 %ld27 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 27), align 1 1033 %ld28 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 28), align 1 1034 %ld29 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 29), align 1 1035 %ld30 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 30), align 1 1036 %ld31 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 31), align 1 1037 %ctlz0 = call i8 @llvm.ctlz.i8(i8 %ld0, i1 -1) 1038 %ctlz1 = call i8 @llvm.ctlz.i8(i8 %ld1, i1 -1) 1039 %ctlz2 = call i8 @llvm.ctlz.i8(i8 %ld2, i1 -1) 1040 %ctlz3 = call i8 @llvm.ctlz.i8(i8 %ld3, i1 -1) 1041 %ctlz4 = call i8 @llvm.ctlz.i8(i8 %ld4, i1 -1) 1042 %ctlz5 = call i8 @llvm.ctlz.i8(i8 %ld5, i1 -1) 1043 %ctlz6 = call i8 @llvm.ctlz.i8(i8 %ld6, i1 -1) 1044 %ctlz7 = call i8 @llvm.ctlz.i8(i8 %ld7, i1 -1) 1045 %ctlz8 = call i8 @llvm.ctlz.i8(i8 %ld8, i1 -1) 1046 %ctlz9 = call i8 @llvm.ctlz.i8(i8 %ld9, i1 -1) 1047 %ctlz10 = call i8 @llvm.ctlz.i8(i8 %ld10, i1 -1) 1048 %ctlz11 = call i8 @llvm.ctlz.i8(i8 %ld11, i1 -1) 1049 %ctlz12 = call i8 @llvm.ctlz.i8(i8 %ld12, i1 -1) 1050 %ctlz13 = call i8 @llvm.ctlz.i8(i8 %ld13, i1 -1) 1051 %ctlz14 = call i8 @llvm.ctlz.i8(i8 %ld14, i1 -1) 1052 %ctlz15 = call i8 @llvm.ctlz.i8(i8 %ld15, i1 -1) 1053 %ctlz16 = call i8 @llvm.ctlz.i8(i8 %ld16, i1 -1) 1054 %ctlz17 = call i8 @llvm.ctlz.i8(i8 %ld17, i1 -1) 1055 %ctlz18 = call i8 @llvm.ctlz.i8(i8 %ld18, i1 -1) 1056 %ctlz19 = call i8 @llvm.ctlz.i8(i8 %ld19, i1 -1) 1057 %ctlz20 = call i8 @llvm.ctlz.i8(i8 %ld20, i1 -1) 1058 %ctlz21 = call i8 @llvm.ctlz.i8(i8 %ld21, i1 -1) 1059 %ctlz22 = call i8 @llvm.ctlz.i8(i8 %ld22, i1 -1) 1060 %ctlz23 = call i8 @llvm.ctlz.i8(i8 %ld23, i1 -1) 1061 %ctlz24 = call i8 @llvm.ctlz.i8(i8 %ld24, i1 -1) 1062 %ctlz25 = call i8 @llvm.ctlz.i8(i8 %ld25, i1 -1) 1063 %ctlz26 = call i8 @llvm.ctlz.i8(i8 %ld26, i1 -1) 1064 %ctlz27 = call i8 @llvm.ctlz.i8(i8 %ld27, i1 -1) 1065 %ctlz28 = call i8 @llvm.ctlz.i8(i8 %ld28, i1 -1) 1066 %ctlz29 = call i8 @llvm.ctlz.i8(i8 %ld29, i1 -1) 1067 %ctlz30 = call i8 @llvm.ctlz.i8(i8 %ld30, i1 -1) 1068 %ctlz31 = call i8 @llvm.ctlz.i8(i8 %ld31, i1 -1) 1069 store i8 %ctlz0 , ptr @dst8, align 1 1070 store i8 %ctlz1 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 1), align 1 1071 store i8 %ctlz2 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 2), align 1 1072 store i8 %ctlz3 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 3), align 1 1073 store i8 %ctlz4 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 4), align 1 1074 store i8 %ctlz5 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 5), align 1 1075 store i8 %ctlz6 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 6), align 1 1076 store i8 %ctlz7 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 7), align 1 1077 store i8 %ctlz8 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 8), align 1 1078 store i8 %ctlz9 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 9), align 1 1079 store i8 %ctlz10, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 10), align 1 1080 store i8 %ctlz11, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 11), align 1 1081 store i8 %ctlz12, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 12), align 1 1082 store i8 %ctlz13, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 13), align 1 1083 store i8 %ctlz14, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 14), align 1 1084 store i8 %ctlz15, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 15), align 1 1085 store i8 %ctlz16, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 16), align 1 1086 store i8 %ctlz17, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 17), align 1 1087 store i8 %ctlz18, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 18), align 1 1088 store i8 %ctlz19, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 19), align 1 1089 store i8 %ctlz20, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 20), align 1 1090 store i8 %ctlz21, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 21), align 1 1091 store i8 %ctlz22, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 22), align 1 1092 store i8 %ctlz23, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 23), align 1 1093 store i8 %ctlz24, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 24), align 1 1094 store i8 %ctlz25, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 25), align 1 1095 store i8 %ctlz26, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 26), align 1 1096 store i8 %ctlz27, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 27), align 1 1097 store i8 %ctlz28, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 28), align 1 1098 store i8 %ctlz29, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 29), align 1 1099 store i8 %ctlz30, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 30), align 1 1100 store i8 %ctlz31, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 31), align 1 1101 ret void 1102} 1103 1104attributes #0 = { nounwind } 1105;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 1106; SSE2: {{.*}} 1107; SSE4: {{.*}} 1108