18b55d342SMin-Yih Hsu; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 28b55d342SMin-Yih Hsu; RUN: llc -mtriple=riscv64 -mattr=+v < %s | FileCheck %s 38b55d342SMin-Yih Hsu 48b55d342SMin-Yih Hsu; Testing VFIRST patterns related to llvm/test/Transforms/LoopIdiom/RISCV/byte-compare-index.ll 58b55d342SMin-Yih Hsu 68b55d342SMin-Yih Hsudefine i32 @compare_bytes_simple(ptr %a, ptr %b, i32 signext %len, i32 signext %n) { 78b55d342SMin-Yih Hsu; CHECK-LABEL: compare_bytes_simple: 88b55d342SMin-Yih Hsu; CHECK: # %bb.0: # %entry 9*9122c523SPengcheng Wang; CHECK-NEXT: addiw a5, a2, 1 10*9122c523SPengcheng Wang; CHECK-NEXT: bltu a3, a5, .LBB0_7 118b55d342SMin-Yih Hsu; CHECK-NEXT: # %bb.1: # %mismatch_mem_check 12*9122c523SPengcheng Wang; CHECK-NEXT: slli a2, a5, 32 13*9122c523SPengcheng Wang; CHECK-NEXT: slli a4, a3, 32 148b55d342SMin-Yih Hsu; CHECK-NEXT: srli a2, a2, 32 15*9122c523SPengcheng Wang; CHECK-NEXT: srli a4, a4, 32 168b55d342SMin-Yih Hsu; CHECK-NEXT: add a6, a0, a2 17*9122c523SPengcheng Wang; CHECK-NEXT: add a7, a0, a4 188b55d342SMin-Yih Hsu; CHECK-NEXT: srli a6, a6, 12 198b55d342SMin-Yih Hsu; CHECK-NEXT: srli a7, a7, 12 208b55d342SMin-Yih Hsu; CHECK-NEXT: bne a6, a7, .LBB0_7 218b55d342SMin-Yih Hsu; CHECK-NEXT: # %bb.2: # %mismatch_mem_check 228b55d342SMin-Yih Hsu; CHECK-NEXT: add a6, a1, a2 23*9122c523SPengcheng Wang; CHECK-NEXT: add a7, a1, a4 248b55d342SMin-Yih Hsu; CHECK-NEXT: srli a6, a6, 12 258b55d342SMin-Yih Hsu; CHECK-NEXT: srli a7, a7, 12 268b55d342SMin-Yih Hsu; CHECK-NEXT: bne a6, a7, .LBB0_7 278b55d342SMin-Yih Hsu; CHECK-NEXT: .LBB0_3: # %mismatch_vec_loop 288b55d342SMin-Yih Hsu; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 29*9122c523SPengcheng Wang; CHECK-NEXT: sub a5, a4, a2 308b55d342SMin-Yih Hsu; CHECK-NEXT: add a6, a0, a2 31*9122c523SPengcheng Wang; CHECK-NEXT: add a7, a1, a2 32*9122c523SPengcheng Wang; CHECK-NEXT: vsetvli a5, a5, e8, m2, ta, ma 338b55d342SMin-Yih Hsu; CHECK-NEXT: vle8.v v8, (a6) 34*9122c523SPengcheng Wang; CHECK-NEXT: vle8.v v10, (a7) 358b55d342SMin-Yih Hsu; CHECK-NEXT: vmsne.vv v12, v8, v10 368b55d342SMin-Yih Hsu; CHECK-NEXT: vfirst.m a7, v12 37*9122c523SPengcheng Wang; CHECK-NEXT: mv a6, a5 388b55d342SMin-Yih Hsu; CHECK-NEXT: bltz a7, .LBB0_5 398b55d342SMin-Yih Hsu; CHECK-NEXT: # %bb.4: # %mismatch_vec_loop 408b55d342SMin-Yih Hsu; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 418b55d342SMin-Yih Hsu; CHECK-NEXT: mv a6, a7 428b55d342SMin-Yih Hsu; CHECK-NEXT: .LBB0_5: # %mismatch_vec_loop 438b55d342SMin-Yih Hsu; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 448b55d342SMin-Yih Hsu; CHECK-NEXT: sext.w a7, a6 45*9122c523SPengcheng Wang; CHECK-NEXT: bne a7, a5, .LBB0_11 468b55d342SMin-Yih Hsu; CHECK-NEXT: # %bb.6: # %mismatch_vec_loop_inc 478b55d342SMin-Yih Hsu; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 48*9122c523SPengcheng Wang; CHECK-NEXT: add a2, a2, a5 49*9122c523SPengcheng Wang; CHECK-NEXT: bne a2, a4, .LBB0_3 508b55d342SMin-Yih Hsu; CHECK-NEXT: j .LBB0_9 518b55d342SMin-Yih Hsu; CHECK-NEXT: .LBB0_7: # %mismatch_loop 528b55d342SMin-Yih Hsu; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 53*9122c523SPengcheng Wang; CHECK-NEXT: slli a2, a5, 32 548b55d342SMin-Yih Hsu; CHECK-NEXT: srli a2, a2, 32 55*9122c523SPengcheng Wang; CHECK-NEXT: add a4, a0, a2 568b55d342SMin-Yih Hsu; CHECK-NEXT: add a2, a1, a2 57*9122c523SPengcheng Wang; CHECK-NEXT: lbu a4, 0(a4) 588b55d342SMin-Yih Hsu; CHECK-NEXT: lbu a2, 0(a2) 59*9122c523SPengcheng Wang; CHECK-NEXT: bne a4, a2, .LBB0_10 608b55d342SMin-Yih Hsu; CHECK-NEXT: # %bb.8: # %mismatch_loop_inc 618b55d342SMin-Yih Hsu; CHECK-NEXT: # in Loop: Header=BB0_7 Depth=1 62*9122c523SPengcheng Wang; CHECK-NEXT: addiw a5, a5, 1 63*9122c523SPengcheng Wang; CHECK-NEXT: bne a3, a5, .LBB0_7 648b55d342SMin-Yih Hsu; CHECK-NEXT: .LBB0_9: # %while.end 658b55d342SMin-Yih Hsu; CHECK-NEXT: mv a0, a3 668b55d342SMin-Yih Hsu; CHECK-NEXT: ret 678b55d342SMin-Yih Hsu; CHECK-NEXT: .LBB0_10: 68*9122c523SPengcheng Wang; CHECK-NEXT: mv a0, a5 698b55d342SMin-Yih Hsu; CHECK-NEXT: ret 708b55d342SMin-Yih Hsu; CHECK-NEXT: .LBB0_11: # %mismatch_vec_loop_found 718b55d342SMin-Yih Hsu; CHECK-NEXT: slli a6, a6, 32 728b55d342SMin-Yih Hsu; CHECK-NEXT: srli a3, a6, 32 738b55d342SMin-Yih Hsu; CHECK-NEXT: add a0, a2, a3 748b55d342SMin-Yih Hsu; CHECK-NEXT: ret 758b55d342SMin-Yih Hsuentry: 768b55d342SMin-Yih Hsu %0 = add i32 %len, 1 778b55d342SMin-Yih Hsu br label %mismatch_min_it_check 788b55d342SMin-Yih Hsu 798b55d342SMin-Yih Hsumismatch_min_it_check: ; preds = %entry 808b55d342SMin-Yih Hsu %1 = zext i32 %0 to i64 818b55d342SMin-Yih Hsu %2 = zext i32 %n to i64 828b55d342SMin-Yih Hsu %3 = icmp ule i32 %0, %n 838b55d342SMin-Yih Hsu br i1 %3, label %mismatch_mem_check, label %mismatch_loop_pre 848b55d342SMin-Yih Hsu 858b55d342SMin-Yih Hsumismatch_mem_check: ; preds = %mismatch_min_it_check 868b55d342SMin-Yih Hsu %4 = getelementptr i8, ptr %a, i64 %1 878b55d342SMin-Yih Hsu %5 = getelementptr i8, ptr %b, i64 %1 888b55d342SMin-Yih Hsu %6 = ptrtoint ptr %5 to i64 898b55d342SMin-Yih Hsu %7 = ptrtoint ptr %4 to i64 908b55d342SMin-Yih Hsu %8 = getelementptr i8, ptr %a, i64 %2 918b55d342SMin-Yih Hsu %9 = getelementptr i8, ptr %b, i64 %2 928b55d342SMin-Yih Hsu %10 = ptrtoint ptr %8 to i64 938b55d342SMin-Yih Hsu %11 = ptrtoint ptr %9 to i64 948b55d342SMin-Yih Hsu %12 = lshr i64 %7, 12 958b55d342SMin-Yih Hsu %13 = lshr i64 %10, 12 968b55d342SMin-Yih Hsu %14 = lshr i64 %6, 12 978b55d342SMin-Yih Hsu %15 = lshr i64 %11, 12 988b55d342SMin-Yih Hsu %16 = icmp ne i64 %12, %13 998b55d342SMin-Yih Hsu %17 = icmp ne i64 %14, %15 1008b55d342SMin-Yih Hsu %18 = or i1 %16, %17 1018b55d342SMin-Yih Hsu br i1 %18, label %mismatch_loop_pre, label %mismatch_vec_loop_preheader 1028b55d342SMin-Yih Hsu 1038b55d342SMin-Yih Hsumismatch_vec_loop_preheader: ; preds = %mismatch_mem_check 1048b55d342SMin-Yih Hsu br label %mismatch_vec_loop 1058b55d342SMin-Yih Hsu 1068b55d342SMin-Yih Hsumismatch_vec_loop: ; preds = %mismatch_vec_loop_inc, %mismatch_vec_loop_preheader 1078b55d342SMin-Yih Hsu %mismatch_vector_index = phi i64 [ %1, %mismatch_vec_loop_preheader ], [ %25, %mismatch_vec_loop_inc ] 1088b55d342SMin-Yih Hsu %avl = sub nuw nsw i64 %2, %mismatch_vector_index 1098b55d342SMin-Yih Hsu %19 = call i32 @llvm.experimental.get.vector.length.i64(i64 %avl, i32 16, i1 true) 1108b55d342SMin-Yih Hsu %20 = getelementptr inbounds i8, ptr %a, i64 %mismatch_vector_index 1118b55d342SMin-Yih Hsu %lhs.load = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr %20, <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 %19) 1128b55d342SMin-Yih Hsu %21 = getelementptr inbounds i8, ptr %b, i64 %mismatch_vector_index 1138b55d342SMin-Yih Hsu %rhs.load = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr %21, <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 %19) 1148b55d342SMin-Yih Hsu %mismatch.cmp = call <vscale x 16 x i1> @llvm.vp.icmp.nxv16i8(<vscale x 16 x i8> %lhs.load, <vscale x 16 x i8> %rhs.load, metadata !"ne", <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 %19) 1158b55d342SMin-Yih Hsu %22 = call i32 @llvm.vp.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> %mismatch.cmp, i1 false, <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 %19) 1168b55d342SMin-Yih Hsu %23 = icmp ne i32 %22, %19 1178b55d342SMin-Yih Hsu br i1 %23, label %mismatch_vec_loop_found, label %mismatch_vec_loop_inc 1188b55d342SMin-Yih Hsu 1198b55d342SMin-Yih Hsumismatch_vec_loop_inc: ; preds = %mismatch_vec_loop 1208b55d342SMin-Yih Hsu %24 = zext i32 %19 to i64 1218b55d342SMin-Yih Hsu %25 = add nuw nsw i64 %mismatch_vector_index, %24 1228b55d342SMin-Yih Hsu %26 = icmp ne i64 %25, %2 1238b55d342SMin-Yih Hsu br i1 %26, label %mismatch_vec_loop, label %mismatch_end 1248b55d342SMin-Yih Hsu 1258b55d342SMin-Yih Hsumismatch_vec_loop_found: ; preds = %mismatch_vec_loop 1268b55d342SMin-Yih Hsu %ctz = phi i32 [ %22, %mismatch_vec_loop ] 1278b55d342SMin-Yih Hsu %mismatch_vector_index1 = phi i64 [ %mismatch_vector_index, %mismatch_vec_loop ] 1288b55d342SMin-Yih Hsu %27 = zext i32 %ctz to i64 1298b55d342SMin-Yih Hsu %28 = add nuw nsw i64 %mismatch_vector_index1, %27 1308b55d342SMin-Yih Hsu %29 = trunc i64 %28 to i32 1318b55d342SMin-Yih Hsu br label %mismatch_end 1328b55d342SMin-Yih Hsu 1338b55d342SMin-Yih Hsumismatch_loop_pre: ; preds = %mismatch_mem_check, %mismatch_min_it_check 1348b55d342SMin-Yih Hsu br label %mismatch_loop 1358b55d342SMin-Yih Hsu 1368b55d342SMin-Yih Hsumismatch_loop: ; preds = %mismatch_loop_inc, %mismatch_loop_pre 1378b55d342SMin-Yih Hsu %mismatch_index = phi i32 [ %0, %mismatch_loop_pre ], [ %36, %mismatch_loop_inc ] 1388b55d342SMin-Yih Hsu %30 = zext i32 %mismatch_index to i64 1398b55d342SMin-Yih Hsu %31 = getelementptr inbounds i8, ptr %a, i64 %30 1408b55d342SMin-Yih Hsu %32 = load i8, ptr %31, align 1 1418b55d342SMin-Yih Hsu %33 = getelementptr inbounds i8, ptr %b, i64 %30 1428b55d342SMin-Yih Hsu %34 = load i8, ptr %33, align 1 1438b55d342SMin-Yih Hsu %35 = icmp eq i8 %32, %34 1448b55d342SMin-Yih Hsu br i1 %35, label %mismatch_loop_inc, label %mismatch_end 1458b55d342SMin-Yih Hsu 1468b55d342SMin-Yih Hsumismatch_loop_inc: ; preds = %mismatch_loop 1478b55d342SMin-Yih Hsu %36 = add i32 %mismatch_index, 1 1488b55d342SMin-Yih Hsu %37 = icmp eq i32 %36, %n 1498b55d342SMin-Yih Hsu br i1 %37, label %mismatch_end, label %mismatch_loop 1508b55d342SMin-Yih Hsu 1518b55d342SMin-Yih Hsumismatch_end: ; preds = %mismatch_loop_inc, %mismatch_loop, %mismatch_vec_loop_found, %mismatch_vec_loop_inc 1528b55d342SMin-Yih Hsu %mismatch_result = phi i32 [ %n, %mismatch_loop_inc ], [ %mismatch_index, %mismatch_loop ], [ %n, %mismatch_vec_loop_inc ], [ %29, %mismatch_vec_loop_found ] 1538b55d342SMin-Yih Hsu br i1 true, label %byte.compare, label %while.cond 1548b55d342SMin-Yih Hsu 1558b55d342SMin-Yih Hsuwhile.cond: ; preds = %mismatch_end, %while.body 1568b55d342SMin-Yih Hsu %len.addr = phi i32 [ %len, %mismatch_end ], [ %mismatch_result, %while.body ] 1578b55d342SMin-Yih Hsu %inc = add i32 %len.addr, 1 1588b55d342SMin-Yih Hsu %cmp.not = icmp eq i32 %mismatch_result, %n 1598b55d342SMin-Yih Hsu br i1 %cmp.not, label %while.end, label %while.body 1608b55d342SMin-Yih Hsu 1618b55d342SMin-Yih Hsuwhile.body: ; preds = %while.cond 1628b55d342SMin-Yih Hsu %idxprom = zext i32 %mismatch_result to i64 1638b55d342SMin-Yih Hsu %arrayidx = getelementptr inbounds i8, ptr %a, i64 %idxprom 1648b55d342SMin-Yih Hsu %38 = load i8, ptr %arrayidx, align 1 1658b55d342SMin-Yih Hsu %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %idxprom 1668b55d342SMin-Yih Hsu %39 = load i8, ptr %arrayidx2, align 1 1678b55d342SMin-Yih Hsu %cmp.not2 = icmp eq i8 %38, %39 1688b55d342SMin-Yih Hsu br i1 %cmp.not2, label %while.cond, label %while.end 1698b55d342SMin-Yih Hsu 1708b55d342SMin-Yih Hsubyte.compare: ; preds = %mismatch_end 1718b55d342SMin-Yih Hsu br label %while.end 1728b55d342SMin-Yih Hsu 1738b55d342SMin-Yih Hsuwhile.end: ; preds = %byte.compare, %while.body, %while.cond 1748b55d342SMin-Yih Hsu %inc.lcssa = phi i32 [ %mismatch_result, %while.body ], [ %mismatch_result, %while.cond ], [ %mismatch_result, %byte.compare ] 1758b55d342SMin-Yih Hsu ret i32 %inc.lcssa 1768b55d342SMin-Yih Hsu} 1778b55d342SMin-Yih Hsu 178