xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/vfirst-byte-compare-index.ll (revision 9122c5235ec85ce0c0ad337e862b006e7b349d84)
18b55d342SMin-Yih Hsu; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
28b55d342SMin-Yih Hsu; RUN: llc -mtriple=riscv64 -mattr=+v < %s | FileCheck %s
38b55d342SMin-Yih Hsu
48b55d342SMin-Yih Hsu; Testing VFIRST patterns related to llvm/test/Transforms/LoopIdiom/RISCV/byte-compare-index.ll
58b55d342SMin-Yih Hsu
68b55d342SMin-Yih Hsudefine i32 @compare_bytes_simple(ptr %a, ptr %b, i32 signext %len, i32 signext %n) {
78b55d342SMin-Yih Hsu; CHECK-LABEL: compare_bytes_simple:
88b55d342SMin-Yih Hsu; CHECK:       # %bb.0: # %entry
9*9122c523SPengcheng Wang; CHECK-NEXT:    addiw a5, a2, 1
10*9122c523SPengcheng Wang; CHECK-NEXT:    bltu a3, a5, .LBB0_7
118b55d342SMin-Yih Hsu; CHECK-NEXT:  # %bb.1: # %mismatch_mem_check
12*9122c523SPengcheng Wang; CHECK-NEXT:    slli a2, a5, 32
13*9122c523SPengcheng Wang; CHECK-NEXT:    slli a4, a3, 32
148b55d342SMin-Yih Hsu; CHECK-NEXT:    srli a2, a2, 32
15*9122c523SPengcheng Wang; CHECK-NEXT:    srli a4, a4, 32
168b55d342SMin-Yih Hsu; CHECK-NEXT:    add a6, a0, a2
17*9122c523SPengcheng Wang; CHECK-NEXT:    add a7, a0, a4
188b55d342SMin-Yih Hsu; CHECK-NEXT:    srli a6, a6, 12
198b55d342SMin-Yih Hsu; CHECK-NEXT:    srli a7, a7, 12
208b55d342SMin-Yih Hsu; CHECK-NEXT:    bne a6, a7, .LBB0_7
218b55d342SMin-Yih Hsu; CHECK-NEXT:  # %bb.2: # %mismatch_mem_check
228b55d342SMin-Yih Hsu; CHECK-NEXT:    add a6, a1, a2
23*9122c523SPengcheng Wang; CHECK-NEXT:    add a7, a1, a4
248b55d342SMin-Yih Hsu; CHECK-NEXT:    srli a6, a6, 12
258b55d342SMin-Yih Hsu; CHECK-NEXT:    srli a7, a7, 12
268b55d342SMin-Yih Hsu; CHECK-NEXT:    bne a6, a7, .LBB0_7
278b55d342SMin-Yih Hsu; CHECK-NEXT:  .LBB0_3: # %mismatch_vec_loop
288b55d342SMin-Yih Hsu; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
29*9122c523SPengcheng Wang; CHECK-NEXT:    sub a5, a4, a2
308b55d342SMin-Yih Hsu; CHECK-NEXT:    add a6, a0, a2
31*9122c523SPengcheng Wang; CHECK-NEXT:    add a7, a1, a2
32*9122c523SPengcheng Wang; CHECK-NEXT:    vsetvli a5, a5, e8, m2, ta, ma
338b55d342SMin-Yih Hsu; CHECK-NEXT:    vle8.v v8, (a6)
34*9122c523SPengcheng Wang; CHECK-NEXT:    vle8.v v10, (a7)
358b55d342SMin-Yih Hsu; CHECK-NEXT:    vmsne.vv v12, v8, v10
368b55d342SMin-Yih Hsu; CHECK-NEXT:    vfirst.m a7, v12
37*9122c523SPengcheng Wang; CHECK-NEXT:    mv a6, a5
388b55d342SMin-Yih Hsu; CHECK-NEXT:    bltz a7, .LBB0_5
398b55d342SMin-Yih Hsu; CHECK-NEXT:  # %bb.4: # %mismatch_vec_loop
408b55d342SMin-Yih Hsu; CHECK-NEXT:    # in Loop: Header=BB0_3 Depth=1
418b55d342SMin-Yih Hsu; CHECK-NEXT:    mv a6, a7
428b55d342SMin-Yih Hsu; CHECK-NEXT:  .LBB0_5: # %mismatch_vec_loop
438b55d342SMin-Yih Hsu; CHECK-NEXT:    # in Loop: Header=BB0_3 Depth=1
448b55d342SMin-Yih Hsu; CHECK-NEXT:    sext.w a7, a6
45*9122c523SPengcheng Wang; CHECK-NEXT:    bne a7, a5, .LBB0_11
468b55d342SMin-Yih Hsu; CHECK-NEXT:  # %bb.6: # %mismatch_vec_loop_inc
478b55d342SMin-Yih Hsu; CHECK-NEXT:    # in Loop: Header=BB0_3 Depth=1
48*9122c523SPengcheng Wang; CHECK-NEXT:    add a2, a2, a5
49*9122c523SPengcheng Wang; CHECK-NEXT:    bne a2, a4, .LBB0_3
508b55d342SMin-Yih Hsu; CHECK-NEXT:    j .LBB0_9
518b55d342SMin-Yih Hsu; CHECK-NEXT:  .LBB0_7: # %mismatch_loop
528b55d342SMin-Yih Hsu; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
53*9122c523SPengcheng Wang; CHECK-NEXT:    slli a2, a5, 32
548b55d342SMin-Yih Hsu; CHECK-NEXT:    srli a2, a2, 32
55*9122c523SPengcheng Wang; CHECK-NEXT:    add a4, a0, a2
568b55d342SMin-Yih Hsu; CHECK-NEXT:    add a2, a1, a2
57*9122c523SPengcheng Wang; CHECK-NEXT:    lbu a4, 0(a4)
588b55d342SMin-Yih Hsu; CHECK-NEXT:    lbu a2, 0(a2)
59*9122c523SPengcheng Wang; CHECK-NEXT:    bne a4, a2, .LBB0_10
608b55d342SMin-Yih Hsu; CHECK-NEXT:  # %bb.8: # %mismatch_loop_inc
618b55d342SMin-Yih Hsu; CHECK-NEXT:    # in Loop: Header=BB0_7 Depth=1
62*9122c523SPengcheng Wang; CHECK-NEXT:    addiw a5, a5, 1
63*9122c523SPengcheng Wang; CHECK-NEXT:    bne a3, a5, .LBB0_7
648b55d342SMin-Yih Hsu; CHECK-NEXT:  .LBB0_9: # %while.end
658b55d342SMin-Yih Hsu; CHECK-NEXT:    mv a0, a3
668b55d342SMin-Yih Hsu; CHECK-NEXT:    ret
678b55d342SMin-Yih Hsu; CHECK-NEXT:  .LBB0_10:
68*9122c523SPengcheng Wang; CHECK-NEXT:    mv a0, a5
698b55d342SMin-Yih Hsu; CHECK-NEXT:    ret
708b55d342SMin-Yih Hsu; CHECK-NEXT:  .LBB0_11: # %mismatch_vec_loop_found
718b55d342SMin-Yih Hsu; CHECK-NEXT:    slli a6, a6, 32
728b55d342SMin-Yih Hsu; CHECK-NEXT:    srli a3, a6, 32
738b55d342SMin-Yih Hsu; CHECK-NEXT:    add a0, a2, a3
748b55d342SMin-Yih Hsu; CHECK-NEXT:    ret
758b55d342SMin-Yih Hsuentry:
768b55d342SMin-Yih Hsu  %0 = add i32 %len, 1
778b55d342SMin-Yih Hsu  br label %mismatch_min_it_check
788b55d342SMin-Yih Hsu
798b55d342SMin-Yih Hsumismatch_min_it_check:                            ; preds = %entry
808b55d342SMin-Yih Hsu  %1 = zext i32 %0 to i64
818b55d342SMin-Yih Hsu  %2 = zext i32 %n to i64
828b55d342SMin-Yih Hsu  %3 = icmp ule i32 %0, %n
838b55d342SMin-Yih Hsu  br i1 %3, label %mismatch_mem_check, label %mismatch_loop_pre
848b55d342SMin-Yih Hsu
858b55d342SMin-Yih Hsumismatch_mem_check:                               ; preds = %mismatch_min_it_check
868b55d342SMin-Yih Hsu  %4 = getelementptr i8, ptr %a, i64 %1
878b55d342SMin-Yih Hsu  %5 = getelementptr i8, ptr %b, i64 %1
888b55d342SMin-Yih Hsu  %6 = ptrtoint ptr %5 to i64
898b55d342SMin-Yih Hsu  %7 = ptrtoint ptr %4 to i64
908b55d342SMin-Yih Hsu  %8 = getelementptr i8, ptr %a, i64 %2
918b55d342SMin-Yih Hsu  %9 = getelementptr i8, ptr %b, i64 %2
928b55d342SMin-Yih Hsu  %10 = ptrtoint ptr %8 to i64
938b55d342SMin-Yih Hsu  %11 = ptrtoint ptr %9 to i64
948b55d342SMin-Yih Hsu  %12 = lshr i64 %7, 12
958b55d342SMin-Yih Hsu  %13 = lshr i64 %10, 12
968b55d342SMin-Yih Hsu  %14 = lshr i64 %6, 12
978b55d342SMin-Yih Hsu  %15 = lshr i64 %11, 12
988b55d342SMin-Yih Hsu  %16 = icmp ne i64 %12, %13
998b55d342SMin-Yih Hsu  %17 = icmp ne i64 %14, %15
1008b55d342SMin-Yih Hsu  %18 = or i1 %16, %17
1018b55d342SMin-Yih Hsu  br i1 %18, label %mismatch_loop_pre, label %mismatch_vec_loop_preheader
1028b55d342SMin-Yih Hsu
1038b55d342SMin-Yih Hsumismatch_vec_loop_preheader:                      ; preds = %mismatch_mem_check
1048b55d342SMin-Yih Hsu  br label %mismatch_vec_loop
1058b55d342SMin-Yih Hsu
1068b55d342SMin-Yih Hsumismatch_vec_loop:                                ; preds = %mismatch_vec_loop_inc, %mismatch_vec_loop_preheader
1078b55d342SMin-Yih Hsu  %mismatch_vector_index = phi i64 [ %1, %mismatch_vec_loop_preheader ], [ %25, %mismatch_vec_loop_inc ]
1088b55d342SMin-Yih Hsu  %avl = sub nuw nsw i64 %2, %mismatch_vector_index
1098b55d342SMin-Yih Hsu  %19 = call i32 @llvm.experimental.get.vector.length.i64(i64 %avl, i32 16, i1 true)
1108b55d342SMin-Yih Hsu  %20 = getelementptr inbounds i8, ptr %a, i64 %mismatch_vector_index
1118b55d342SMin-Yih Hsu  %lhs.load = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr %20, <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 %19)
1128b55d342SMin-Yih Hsu  %21 = getelementptr inbounds i8, ptr %b, i64 %mismatch_vector_index
1138b55d342SMin-Yih Hsu  %rhs.load = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr %21, <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 %19)
1148b55d342SMin-Yih Hsu  %mismatch.cmp = call <vscale x 16 x i1> @llvm.vp.icmp.nxv16i8(<vscale x 16 x i8> %lhs.load, <vscale x 16 x i8> %rhs.load, metadata !"ne", <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 %19)
1158b55d342SMin-Yih Hsu  %22 = call i32 @llvm.vp.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> %mismatch.cmp, i1 false, <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 %19)
1168b55d342SMin-Yih Hsu  %23 = icmp ne i32 %22, %19
1178b55d342SMin-Yih Hsu  br i1 %23, label %mismatch_vec_loop_found, label %mismatch_vec_loop_inc
1188b55d342SMin-Yih Hsu
1198b55d342SMin-Yih Hsumismatch_vec_loop_inc:                            ; preds = %mismatch_vec_loop
1208b55d342SMin-Yih Hsu  %24 = zext i32 %19 to i64
1218b55d342SMin-Yih Hsu  %25 = add nuw nsw i64 %mismatch_vector_index, %24
1228b55d342SMin-Yih Hsu  %26 = icmp ne i64 %25, %2
1238b55d342SMin-Yih Hsu  br i1 %26, label %mismatch_vec_loop, label %mismatch_end
1248b55d342SMin-Yih Hsu
1258b55d342SMin-Yih Hsumismatch_vec_loop_found:                          ; preds = %mismatch_vec_loop
1268b55d342SMin-Yih Hsu  %ctz = phi i32 [ %22, %mismatch_vec_loop ]
1278b55d342SMin-Yih Hsu  %mismatch_vector_index1 = phi i64 [ %mismatch_vector_index, %mismatch_vec_loop ]
1288b55d342SMin-Yih Hsu  %27 = zext i32 %ctz to i64
1298b55d342SMin-Yih Hsu  %28 = add nuw nsw i64 %mismatch_vector_index1, %27
1308b55d342SMin-Yih Hsu  %29 = trunc i64 %28 to i32
1318b55d342SMin-Yih Hsu  br label %mismatch_end
1328b55d342SMin-Yih Hsu
1338b55d342SMin-Yih Hsumismatch_loop_pre:                                ; preds = %mismatch_mem_check, %mismatch_min_it_check
1348b55d342SMin-Yih Hsu  br label %mismatch_loop
1358b55d342SMin-Yih Hsu
1368b55d342SMin-Yih Hsumismatch_loop:                                    ; preds = %mismatch_loop_inc, %mismatch_loop_pre
1378b55d342SMin-Yih Hsu  %mismatch_index = phi i32 [ %0, %mismatch_loop_pre ], [ %36, %mismatch_loop_inc ]
1388b55d342SMin-Yih Hsu  %30 = zext i32 %mismatch_index to i64
1398b55d342SMin-Yih Hsu  %31 = getelementptr inbounds i8, ptr %a, i64 %30
1408b55d342SMin-Yih Hsu  %32 = load i8, ptr %31, align 1
1418b55d342SMin-Yih Hsu  %33 = getelementptr inbounds i8, ptr %b, i64 %30
1428b55d342SMin-Yih Hsu  %34 = load i8, ptr %33, align 1
1438b55d342SMin-Yih Hsu  %35 = icmp eq i8 %32, %34
1448b55d342SMin-Yih Hsu  br i1 %35, label %mismatch_loop_inc, label %mismatch_end
1458b55d342SMin-Yih Hsu
1468b55d342SMin-Yih Hsumismatch_loop_inc:                                ; preds = %mismatch_loop
1478b55d342SMin-Yih Hsu  %36 = add i32 %mismatch_index, 1
1488b55d342SMin-Yih Hsu  %37 = icmp eq i32 %36, %n
1498b55d342SMin-Yih Hsu  br i1 %37, label %mismatch_end, label %mismatch_loop
1508b55d342SMin-Yih Hsu
1518b55d342SMin-Yih Hsumismatch_end:                                     ; preds = %mismatch_loop_inc, %mismatch_loop, %mismatch_vec_loop_found, %mismatch_vec_loop_inc
1528b55d342SMin-Yih Hsu  %mismatch_result = phi i32 [ %n, %mismatch_loop_inc ], [ %mismatch_index, %mismatch_loop ], [ %n, %mismatch_vec_loop_inc ], [ %29, %mismatch_vec_loop_found ]
1538b55d342SMin-Yih Hsu  br i1 true, label %byte.compare, label %while.cond
1548b55d342SMin-Yih Hsu
1558b55d342SMin-Yih Hsuwhile.cond:                                       ; preds = %mismatch_end, %while.body
1568b55d342SMin-Yih Hsu  %len.addr = phi i32 [ %len, %mismatch_end ], [ %mismatch_result, %while.body ]
1578b55d342SMin-Yih Hsu  %inc = add i32 %len.addr, 1
1588b55d342SMin-Yih Hsu  %cmp.not = icmp eq i32 %mismatch_result, %n
1598b55d342SMin-Yih Hsu  br i1 %cmp.not, label %while.end, label %while.body
1608b55d342SMin-Yih Hsu
1618b55d342SMin-Yih Hsuwhile.body:                                       ; preds = %while.cond
1628b55d342SMin-Yih Hsu  %idxprom = zext i32 %mismatch_result to i64
1638b55d342SMin-Yih Hsu  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %idxprom
1648b55d342SMin-Yih Hsu  %38 = load i8, ptr %arrayidx, align 1
1658b55d342SMin-Yih Hsu  %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %idxprom
1668b55d342SMin-Yih Hsu  %39 = load i8, ptr %arrayidx2, align 1
1678b55d342SMin-Yih Hsu  %cmp.not2 = icmp eq i8 %38, %39
1688b55d342SMin-Yih Hsu  br i1 %cmp.not2, label %while.cond, label %while.end
1698b55d342SMin-Yih Hsu
1708b55d342SMin-Yih Hsubyte.compare:                                     ; preds = %mismatch_end
1718b55d342SMin-Yih Hsu  br label %while.end
1728b55d342SMin-Yih Hsu
1738b55d342SMin-Yih Hsuwhile.end:                                        ; preds = %byte.compare, %while.body, %while.cond
1748b55d342SMin-Yih Hsu  %inc.lcssa = phi i32 [ %mismatch_result, %while.body ], [ %mismatch_result, %while.cond ], [ %mismatch_result, %byte.compare ]
1758b55d342SMin-Yih Hsu  ret i32 %inc.lcssa
1768b55d342SMin-Yih Hsu}
1778b55d342SMin-Yih Hsu
178