1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X64 3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=X64 4; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=X64 5; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X64 6; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512vl,+avx512bw,+avx512dq | FileCheck %s --check-prefixes=X64 7; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X86 8 9declare i1 @llvm.vector.reduce.and.v2i1(<2 x i1>) 10declare i1 @llvm.vector.reduce.and.v4i1(<4 x i1>) 11declare i1 @llvm.vector.reduce.and.v8i1(<8 x i1>) 12 13; All four versions are semantically equivalent and should produce same asm as scalar version. 14 15define i1 @intrinsic_v2i8(ptr align 1 %arg, ptr align 1 %arg1) { 16; X64-LABEL: intrinsic_v2i8: 17; X64: # %bb.0: # %bb 18; X64-NEXT: movzwl (%rsi), %eax 19; X64-NEXT: cmpw (%rdi), %ax 20; X64-NEXT: sete %al 21; X64-NEXT: retq 22; 23; X86-LABEL: intrinsic_v2i8: 24; X86: # %bb.0: # %bb 25; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 26; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 27; X86-NEXT: movzwl (%ecx), %ecx 28; X86-NEXT: cmpw (%eax), %cx 29; X86-NEXT: sete %al 30; X86-NEXT: retl 31bb: 32 %lhs = load <2 x i8>, ptr %arg1, align 1 33 %rhs = load <2 x i8>, ptr %arg, align 1 34 %cmp = icmp eq <2 x i8> %lhs, %rhs 35 %all_eq = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %cmp) 36 ret i1 %all_eq 37} 38 39define i1 @intrinsic_v4i8(ptr align 1 %arg, ptr align 1 %arg1) { 40; X64-LABEL: intrinsic_v4i8: 41; X64: # %bb.0: # %bb 42; X64-NEXT: movl (%rsi), %eax 43; X64-NEXT: cmpl (%rdi), %eax 44; X64-NEXT: sete %al 45; X64-NEXT: retq 46; 47; X86-LABEL: intrinsic_v4i8: 48; X86: # %bb.0: # %bb 49; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 50; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 51; X86-NEXT: movl (%ecx), %ecx 52; X86-NEXT: cmpl (%eax), %ecx 53; X86-NEXT: sete %al 54; X86-NEXT: retl 55bb: 56 %lhs = load <4 x i8>, ptr %arg1, align 1 57 %rhs = load <4 x i8>, ptr %arg, align 1 58 %cmp = icmp eq <4 x i8> %lhs, %rhs 59 %all_eq = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %cmp) 60 ret i1 %all_eq 61} 62 63define i1 @intrinsic_v8i8(ptr align 1 %arg, ptr align 1 %arg1) { 64; X64-LABEL: intrinsic_v8i8: 65; X64: # %bb.0: # %bb 66; X64-NEXT: movq (%rsi), %rax 67; X64-NEXT: cmpq (%rdi), %rax 68; X64-NEXT: sete %al 69; X64-NEXT: retq 70; 71; X86-LABEL: intrinsic_v8i8: 72; X86: # %bb.0: # %bb 73; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 74; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 75; X86-NEXT: movl (%ecx), %edx 76; X86-NEXT: movl 4(%ecx), %ecx 77; X86-NEXT: xorl 4(%eax), %ecx 78; X86-NEXT: xorl (%eax), %edx 79; X86-NEXT: orl %ecx, %edx 80; X86-NEXT: sete %al 81; X86-NEXT: retl 82bb: 83 %lhs = load <8 x i8>, ptr %arg1, align 1 84 %rhs = load <8 x i8>, ptr %arg, align 1 85 %cmp = icmp eq <8 x i8> %lhs, %rhs 86 %all_eq = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %cmp) 87 ret i1 %all_eq 88} 89 90define i1 @vector_version_v2i8(ptr align 1 %arg, ptr align 1 %arg1) { 91; X64-LABEL: vector_version_v2i8: 92; X64: # %bb.0: # %bb 93; X64-NEXT: movzwl (%rsi), %eax 94; X64-NEXT: cmpw (%rdi), %ax 95; X64-NEXT: sete %al 96; X64-NEXT: retq 97; 98; X86-LABEL: vector_version_v2i8: 99; X86: # %bb.0: # %bb 100; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 101; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 102; X86-NEXT: movzwl (%ecx), %ecx 103; X86-NEXT: cmpw (%eax), %cx 104; X86-NEXT: sete %al 105; X86-NEXT: retl 106bb: 107 %lhs = load <2 x i8>, ptr %arg1, align 1 108 %rhs = load <2 x i8>, ptr %arg, align 1 109 %any_ne = icmp ne <2 x i8> %lhs, %rhs 110 %any_ne_scalar = bitcast <2 x i1> %any_ne to i2 111 %all_eq = icmp eq i2 %any_ne_scalar, 0 112 ret i1 %all_eq 113} 114 115define i1 @vector_version_v4i8(ptr align 1 %arg, ptr align 1 %arg1) { 116; X64-LABEL: vector_version_v4i8: 117; X64: # %bb.0: # %bb 118; X64-NEXT: movl (%rsi), %eax 119; X64-NEXT: cmpl (%rdi), %eax 120; X64-NEXT: sete %al 121; X64-NEXT: retq 122; 123; X86-LABEL: vector_version_v4i8: 124; X86: # %bb.0: # %bb 125; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 126; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 127; X86-NEXT: movl (%ecx), %ecx 128; X86-NEXT: cmpl (%eax), %ecx 129; X86-NEXT: sete %al 130; X86-NEXT: retl 131bb: 132 %lhs = load <4 x i8>, ptr %arg1, align 1 133 %rhs = load <4 x i8>, ptr %arg, align 1 134 %any_ne = icmp ne <4 x i8> %lhs, %rhs 135 %any_ne_scalar = bitcast <4 x i1> %any_ne to i4 136 %all_eq = icmp eq i4 %any_ne_scalar, 0 137 ret i1 %all_eq 138} 139 140define i1 @vector_version_v8i8(ptr align 1 %arg, ptr align 1 %arg1) { 141; X64-LABEL: vector_version_v8i8: 142; X64: # %bb.0: # %bb 143; X64-NEXT: movq (%rsi), %rax 144; X64-NEXT: cmpq (%rdi), %rax 145; X64-NEXT: sete %al 146; X64-NEXT: retq 147; 148; X86-LABEL: vector_version_v8i8: 149; X86: # %bb.0: # %bb 150; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 151; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 152; X86-NEXT: movl (%ecx), %edx 153; X86-NEXT: movl 4(%ecx), %ecx 154; X86-NEXT: xorl 4(%eax), %ecx 155; X86-NEXT: xorl (%eax), %edx 156; X86-NEXT: orl %ecx, %edx 157; X86-NEXT: sete %al 158; X86-NEXT: retl 159bb: 160 %lhs = load <8 x i8>, ptr %arg1, align 1 161 %rhs = load <8 x i8>, ptr %arg, align 1 162 %any_ne = icmp ne <8 x i8> %lhs, %rhs 163 %any_ne_scalar = bitcast <8 x i1> %any_ne to i8 164 %all_eq = icmp eq i8 %any_ne_scalar, 0 165 ret i1 %all_eq 166} 167 168define i1 @mixed_version_v2i8(ptr align 1 %arg, ptr align 1 %arg1) { 169; X64-LABEL: mixed_version_v2i8: 170; X64: # %bb.0: # %bb 171; X64-NEXT: movzwl (%rsi), %eax 172; X64-NEXT: cmpw (%rdi), %ax 173; X64-NEXT: sete %al 174; X64-NEXT: retq 175; 176; X86-LABEL: mixed_version_v2i8: 177; X86: # %bb.0: # %bb 178; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 179; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 180; X86-NEXT: movzwl (%ecx), %ecx 181; X86-NEXT: cmpw (%eax), %cx 182; X86-NEXT: sete %al 183; X86-NEXT: retl 184bb: 185 %lhs = load <2 x i8>, ptr %arg1, align 1 186 %rhs = load <2 x i8>, ptr %arg, align 1 187 %lhs_s = bitcast <2 x i8> %lhs to i16 188 %rhs_s = bitcast <2 x i8> %rhs to i16 189 %all_eq = icmp eq i16 %lhs_s, %rhs_s 190 ret i1 %all_eq 191} 192 193define i1 @mixed_version_v4i8(ptr align 1 %arg, ptr align 1 %arg1) { 194; X64-LABEL: mixed_version_v4i8: 195; X64: # %bb.0: # %bb 196; X64-NEXT: movl (%rsi), %eax 197; X64-NEXT: cmpl (%rdi), %eax 198; X64-NEXT: sete %al 199; X64-NEXT: retq 200; 201; X86-LABEL: mixed_version_v4i8: 202; X86: # %bb.0: # %bb 203; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 204; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 205; X86-NEXT: movl (%ecx), %ecx 206; X86-NEXT: cmpl (%eax), %ecx 207; X86-NEXT: sete %al 208; X86-NEXT: retl 209bb: 210 %lhs = load <4 x i8>, ptr %arg1, align 1 211 %rhs = load <4 x i8>, ptr %arg, align 1 212 %lhs_s = bitcast <4 x i8> %lhs to i32 213 %rhs_s = bitcast <4 x i8> %rhs to i32 214 %all_eq = icmp eq i32 %lhs_s, %rhs_s 215 ret i1 %all_eq 216} 217 218define i1 @mixed_version_v8i8(ptr align 1 %arg, ptr align 1 %arg1) { 219; X64-LABEL: mixed_version_v8i8: 220; X64: # %bb.0: # %bb 221; X64-NEXT: movq (%rsi), %rax 222; X64-NEXT: cmpq (%rdi), %rax 223; X64-NEXT: sete %al 224; X64-NEXT: retq 225; 226; X86-LABEL: mixed_version_v8i8: 227; X86: # %bb.0: # %bb 228; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 229; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 230; X86-NEXT: movl (%ecx), %edx 231; X86-NEXT: movl 4(%ecx), %ecx 232; X86-NEXT: xorl 4(%eax), %ecx 233; X86-NEXT: xorl (%eax), %edx 234; X86-NEXT: orl %ecx, %edx 235; X86-NEXT: sete %al 236; X86-NEXT: retl 237bb: 238 %lhs = load <8 x i8>, ptr %arg1, align 1 239 %rhs = load <8 x i8>, ptr %arg, align 1 240 %lhs_s = bitcast <8 x i8> %lhs to i64 241 %rhs_s = bitcast <8 x i8> %rhs to i64 242 %all_eq = icmp eq i64 %lhs_s, %rhs_s 243 ret i1 %all_eq 244} 245 246define i1 @scalar_version_i16(ptr align 1 %arg, ptr align 1 %arg1) { 247; X64-LABEL: scalar_version_i16: 248; X64: # %bb.0: # %bb 249; X64-NEXT: movzwl (%rsi), %eax 250; X64-NEXT: cmpw (%rdi), %ax 251; X64-NEXT: sete %al 252; X64-NEXT: retq 253; 254; X86-LABEL: scalar_version_i16: 255; X86: # %bb.0: # %bb 256; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 257; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 258; X86-NEXT: movzwl (%ecx), %ecx 259; X86-NEXT: cmpw (%eax), %cx 260; X86-NEXT: sete %al 261; X86-NEXT: retl 262bb: 263 %lhs = load i16, ptr %arg1, align 1 264 %rhs = load i16, ptr %arg, align 1 265 %all_eq = icmp eq i16 %lhs, %rhs 266 ret i1 %all_eq 267} 268 269define i1 @scalar_version_i32(ptr align 1 %arg, ptr align 1 %arg1) { 270; X64-LABEL: scalar_version_i32: 271; X64: # %bb.0: # %bb 272; X64-NEXT: movl (%rsi), %eax 273; X64-NEXT: cmpl (%rdi), %eax 274; X64-NEXT: sete %al 275; X64-NEXT: retq 276; 277; X86-LABEL: scalar_version_i32: 278; X86: # %bb.0: # %bb 279; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 280; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 281; X86-NEXT: movl (%ecx), %ecx 282; X86-NEXT: cmpl (%eax), %ecx 283; X86-NEXT: sete %al 284; X86-NEXT: retl 285bb: 286 %lhs = load i32, ptr %arg1, align 1 287 %rhs = load i32, ptr %arg, align 1 288 %all_eq = icmp eq i32 %lhs, %rhs 289 ret i1 %all_eq 290} 291 292define i1 @scalar_version_i64(ptr align 1 %arg, ptr align 1 %arg1) { 293; X64-LABEL: scalar_version_i64: 294; X64: # %bb.0: # %bb 295; X64-NEXT: movq (%rsi), %rax 296; X64-NEXT: cmpq (%rdi), %rax 297; X64-NEXT: sete %al 298; X64-NEXT: retq 299; 300; X86-LABEL: scalar_version_i64: 301; X86: # %bb.0: # %bb 302; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 303; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 304; X86-NEXT: movl (%ecx), %edx 305; X86-NEXT: movl 4(%ecx), %ecx 306; X86-NEXT: xorl 4(%eax), %ecx 307; X86-NEXT: xorl (%eax), %edx 308; X86-NEXT: orl %ecx, %edx 309; X86-NEXT: sete %al 310; X86-NEXT: retl 311bb: 312 %lhs = load i64, ptr %arg1, align 1 313 %rhs = load i64, ptr %arg, align 1 314 %all_eq = icmp eq i64 %lhs, %rhs 315 ret i1 %all_eq 316} 317