; RUN: llc -mtriple=hexagon < %s ; Thie tests checks a compiler assert. So the test just needs to compile ; for it to pass target triple = "hexagon-unknown--elf" %struct.buffer_t = type { i64, ptr, [4 x i32], [4 x i32], [4 x i32], i32, i8, i8, [6 x i8] } ; Function Attrs: norecurse nounwind define i32 @__testOne(ptr noalias nocapture readonly %inputOne.buffer, ptr noalias nocapture readonly %inputTwo.buffer, ptr noalias nocapture readonly %testOne.buffer) #0 { entry: %buf_host = getelementptr inbounds %struct.buffer_t, ptr %inputOne.buffer, i32 0, i32 1 %inputOne.host = load ptr, ptr %buf_host, align 4 %buf_min = getelementptr inbounds %struct.buffer_t, ptr %inputOne.buffer, i32 0, i32 4, i32 0 %inputOne.min.0 = load i32, ptr %buf_min, align 4 %buf_host10 = getelementptr inbounds %struct.buffer_t, ptr %inputTwo.buffer, i32 0, i32 1 %inputTwo.host = load ptr, ptr %buf_host10, align 4 %buf_min22 = getelementptr inbounds %struct.buffer_t, ptr %inputTwo.buffer, i32 0, i32 4, i32 0 %inputTwo.min.0 = load i32, ptr %buf_min22, align 4 %buf_host27 = getelementptr inbounds %struct.buffer_t, ptr %testOne.buffer, i32 0, i32 1 %testOne.host = load ptr, ptr %buf_host27, align 4 %buf_extent31 = getelementptr inbounds %struct.buffer_t, ptr %testOne.buffer, i32 0, i32 2, i32 0 %testOne.extent.0 = load i32, ptr %buf_extent31, align 4 %buf_min39 = getelementptr inbounds %struct.buffer_t, ptr %testOne.buffer, i32 0, i32 4, i32 0 %testOne.min.0 = load i32, ptr %buf_min39, align 4 %0 = ashr i32 %testOne.extent.0, 6 %1 = icmp sgt i32 %0, 0 br i1 %1, label %"for testOne.s0.x.x.preheader", label %"end for testOne.s0.x.x" "for testOne.s0.x.x.preheader": ; preds = %entry %2 = bitcast ptr %testOne.host to ptr br label %"for testOne.s0.x.x" "for testOne.s0.x.x": ; preds = %"for testOne.s0.x.x", %"for testOne.s0.x.x.preheader" %.phi = phi ptr [ %2, %"for testOne.s0.x.x.preheader" ], [ %.inc, %"for testOne.s0.x.x" ] %testOne.s0.x.x = phi i32 [ 0, %"for testOne.s0.x.x.preheader" ], [ %38, %"for testOne.s0.x.x" ] %3 = shl nsw i32 %testOne.s0.x.x, 6 %4 = add nsw i32 %3, %testOne.min.0 %5 = shl nsw i32 %4, 1 %6 = sub nsw i32 %5, %inputOne.min.0 %7 = getelementptr inbounds i8, ptr %inputOne.host, i32 %6 %8 = bitcast ptr %7 to ptr %9 = load <64 x i8>, ptr %8, align 1, !tbaa !5 %10 = add nsw i32 %6, 64 %11 = getelementptr inbounds i8, ptr %inputOne.host, i32 %10 %12 = bitcast ptr %11 to ptr %13 = load <64 x i8>, ptr %12, align 1, !tbaa !5 %14 = shufflevector <64 x i8> %9, <64 x i8> %13, <64 x i32> %15 = shufflevector <64 x i8> %9, <64 x i8> %13, <64 x i32> %16 = shufflevector <64 x i8> %14, <64 x i8> %15, <128 x i32> %17 = sub nsw i32 %5, %inputTwo.min.0 %18 = getelementptr inbounds i8, ptr %inputTwo.host, i32 %17 %19 = bitcast ptr %18 to ptr %20 = load <64 x i8>, ptr %19, align 1, !tbaa !8 %21 = add nsw i32 %17, 64 %22 = getelementptr inbounds i8, ptr %inputTwo.host, i32 %21 %23 = bitcast ptr %22 to ptr %24 = load <64 x i8>, ptr %23, align 1, !tbaa !8 %25 = shufflevector <64 x i8> %20, <64 x i8> %24, <64 x i32> %26 = shufflevector <64 x i8> %20, <64 x i8> %24, <64 x i32> %27 = shufflevector <64 x i8> %25, <64 x i8> %26, <128 x i32> %28 = bitcast <128 x i8> %16 to <32 x i32> %29 = bitcast <128 x i8> %27 to <32 x i32> %30 = tail call <32 x i32> @llvm.hexagon.V6.vmpabuuv(<32 x i32> %28, <32 x i32> %29) %31 = bitcast <32 x i32> %30 to <64 x i16> %32 = shufflevector <64 x i16> %31, <64 x i16> undef, <32 x i32> %33 = bitcast ptr %.phi to ptr store <32 x i16> %32, ptr %33, align 2, !tbaa !10 %34 = shufflevector <64 x i16> %31, <64 x i16> undef, <32 x i32> %35 = or i32 %3, 32 %36 = getelementptr inbounds i16, ptr %2, i32 %35 %37 = bitcast ptr %36 to ptr store <32 x i16> %34, ptr %37, align 2, !tbaa !10 %38 = add nuw nsw i32 %testOne.s0.x.x, 1 %39 = icmp eq i32 %38, %0 %.inc = getelementptr i16, ptr %.phi, i32 64 br i1 %39, label %"end for testOne.s0.x.x", label %"for testOne.s0.x.x" "end for testOne.s0.x.x": ; preds = %"for testOne.s0.x.x", %entry %40 = add nsw i32 %testOne.extent.0, 63 %41 = ashr i32 %40, 6 %42 = icmp sgt i32 %41, %0 br i1 %42, label %"for testOne.s0.x.x44.preheader", label %destructor_block "for testOne.s0.x.x44.preheader": ; preds = %"end for testOne.s0.x.x" %43 = add nsw i32 %testOne.min.0, %testOne.extent.0 %44 = shl nsw i32 %43, 1 %45 = sub nsw i32 %44, %inputOne.min.0 %46 = add nsw i32 %45, -128 %47 = getelementptr inbounds i8, ptr %inputOne.host, i32 %46 %48 = bitcast ptr %47 to ptr %49 = load <64 x i8>, ptr %48, align 1 %50 = add nsw i32 %45, -64 %51 = getelementptr inbounds i8, ptr %inputOne.host, i32 %50 %52 = bitcast ptr %51 to ptr %53 = load <64 x i8>, ptr %52, align 1 %54 = shufflevector <64 x i8> %49, <64 x i8> %53, <64 x i32> %55 = shufflevector <64 x i8> %49, <64 x i8> %53, <64 x i32> %56 = shufflevector <64 x i8> %54, <64 x i8> %55, <128 x i32> %57 = sub nsw i32 %44, %inputTwo.min.0 %58 = add nsw i32 %57, -128 %59 = getelementptr inbounds i8, ptr %inputTwo.host, i32 %58 %60 = bitcast ptr %59 to ptr %61 = load <64 x i8>, ptr %60, align 1 %62 = add nsw i32 %57, -64 %63 = getelementptr inbounds i8, ptr %inputTwo.host, i32 %62 %64 = bitcast ptr %63 to ptr %65 = load <64 x i8>, ptr %64, align 1 %66 = shufflevector <64 x i8> %61, <64 x i8> %65, <64 x i32> %67 = shufflevector <64 x i8> %61, <64 x i8> %65, <64 x i32> %68 = shufflevector <64 x i8> %66, <64 x i8> %67, <128 x i32> %69 = bitcast <128 x i8> %56 to <32 x i32> %70 = bitcast <128 x i8> %68 to <32 x i32> %71 = tail call <32 x i32> @llvm.hexagon.V6.vmpabuuv(<32 x i32> %69, <32 x i32> %70) %72 = bitcast <32 x i32> %71 to <64 x i16> %73 = add nsw i32 %testOne.extent.0, -64 %74 = bitcast ptr %testOne.host to ptr %75 = getelementptr inbounds i16, ptr %74, i32 %73 %76 = bitcast ptr %75 to ptr %77 = add nsw i32 %testOne.extent.0, -32 %78 = getelementptr inbounds i16, ptr %74, i32 %77 %79 = shufflevector <64 x i16> %72, <64 x i16> undef, <32 x i32> %80 = shufflevector <64 x i16> %72, <64 x i16> undef, <32 x i32> %81 = bitcast ptr %78 to ptr store <32 x i16> %79, ptr %76, align 2, !tbaa !10 store <32 x i16> %80, ptr %81, align 2, !tbaa !10 br label %destructor_block destructor_block: ; preds = %"for testOne.s0.x.x44.preheader", %"end for testOne.s0.x.x" ret i32 0 } ; Function Attrs: nounwind readnone declare <32 x i32> @llvm.hexagon.V6.vmpabuuv(<32 x i32>, <32 x i32>) #1 attributes #0 = { norecurse nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" } attributes #1 = { nounwind readnone "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" } !5 = !{!6, !6, i64 0} !6 = !{!"inputOne", !7} !7 = !{!"Halide buffer"} !8 = !{!9, !9, i64 0} !9 = !{!"inputTwo", !7} !10 = !{!11, !11, i64 0} !11 = !{!"testOne", !7}