xref: /openbsd-src/gnu/usr.bin/perl/cpan/Text-Tabs/lib/Text/Wrap.pm (revision 3d61058aa5c692477b6d18acfbbdb653a9930ff9)
1eac174f2Safresh1use strict; use warnings;
2eac174f2Safresh1
3b39c5158Smillertpackage Text::Wrap;
4b39c5158Smillert
5b39c5158Smillertuse warnings::register;
6b39c5158Smillert
7eac174f2Safresh1BEGIN { require Exporter; *import = \&Exporter::import }
8b39c5158Smillert
9eac174f2Safresh1our @EXPORT = qw( wrap fill );
10eac174f2Safresh1our @EXPORT_OK = qw( $columns $break $huge );
11b39c5158Smillert
12*3d61058aSafresh1our $VERSION = '2024.001';
13eac174f2Safresh1our $SUBVERSION = 'modern'; # back-compat vestige
1491f110e0Safresh1
15*3d61058aSafresh1BEGIN { eval sprintf 'sub REGEXPS_USE_BYTES () { %d }', scalar( pack('U*', 0x80) =~ /\xc2/ ) }
16*3d61058aSafresh1
17*3d61058aSafresh1my $brkspc = "\x{a0}\x{202f}" =~ /\s/ ? '[^\x{a0}\x{202f}\S]' : '\s';
18*3d61058aSafresh1
19eac174f2Safresh1our $columns = 76;  # <= screen width
20*3d61058aSafresh1our $break = '(?>\n|\r\n|'.$brkspc.'\pM*)';
21eac174f2Safresh1our $huge = 'wrap'; # alternatively: 'die' or 'overflow'
22eac174f2Safresh1our $unexpand = 1;
23eac174f2Safresh1our $tabstop = 8;
24eac174f2Safresh1our $separator = "\n";
25eac174f2Safresh1our $separator2 = undef;
26b39c5158Smillert
27*3d61058aSafresh1sub _xlen { $_[0] =~ /^\pM/ + ( () = $_[0] =~ /\PM/g ) }
2891f110e0Safresh1
29b39c5158Smillertuse Text::Tabs qw(expand unexpand);
30b39c5158Smillert
31b39c5158Smillertsub wrap
32b39c5158Smillert{
33eac174f2Safresh1	my ($ip, $xp, @t) = map +( defined $_ ? $_ : '' ), @_;
34b39c5158Smillert
35b39c5158Smillert	local($Text::Tabs::tabstop) = $tabstop;
36b39c5158Smillert	my $r = "";
37b39c5158Smillert	my $tail = pop(@t);
38b39c5158Smillert	my $t = expand(join("", (map { /\s+\z/ ? ( $_ ) : ($_, ' ') } @t), $tail));
39b39c5158Smillert	my $lead = $ip;
4091f110e0Safresh1	my $nll = $columns - _xlen(expand($xp)) - 1;
41b39c5158Smillert	if ($nll <= 0 && $xp ne '') {
4291f110e0Safresh1		my $nc = _xlen(expand($xp)) + 2;
43b39c5158Smillert		warnings::warnif "Increasing \$Text::Wrap::columns from $columns to $nc to accommodate length of subsequent tab";
44b39c5158Smillert		$columns = $nc;
45b39c5158Smillert		$nll = 1;
46b39c5158Smillert	}
4791f110e0Safresh1	my $ll = $columns - _xlen(expand($ip)) - 1;
48b39c5158Smillert	$ll = 0 if $ll < 0;
49b39c5158Smillert	my $nl = "";
50b39c5158Smillert	my $remainder = "";
51b39c5158Smillert
52b39c5158Smillert	use re 'taint';
53b39c5158Smillert
54b39c5158Smillert	pos($t) = 0;
55b39c5158Smillert	while ($t !~ /\G(?:$break)*\Z/gc) {
56*3d61058aSafresh1		if ($t =~ /\G((?>(?!\n)\PM\pM*|(?<![^\n])\pM+){0,$ll})($break|\n+|\z)/xmgc) {
57b39c5158Smillert			$r .= $unexpand
58b39c5158Smillert				? unexpand($nl . $lead . $1)
59b39c5158Smillert				: $nl . $lead . $1;
60b39c5158Smillert			$remainder = $2;
61*3d61058aSafresh1		} elsif ($huge eq 'wrap' && $t =~ /\G((?>(?!\n)\PM\pM*|(?<![^\n])\pM+){$ll})/gc) {
62b39c5158Smillert			$r .= $unexpand
63b39c5158Smillert				? unexpand($nl . $lead . $1)
64b39c5158Smillert				: $nl . $lead . $1;
65b39c5158Smillert			$remainder = defined($separator2) ? $separator2 : $separator;
66*3d61058aSafresh1		} elsif ($huge eq 'overflow' && $t =~ /\G([^\n]*?)(?!(?<![^\n])\pM)($break|\n+|\z)/xmgc) {
67b39c5158Smillert			$r .= $unexpand
68b39c5158Smillert				? unexpand($nl . $lead . $1)
69b39c5158Smillert				: $nl . $lead . $1;
70b39c5158Smillert			$remainder = $2;
71b39c5158Smillert		} elsif ($huge eq 'die') {
72b39c5158Smillert			die "couldn't wrap '$t'";
73b39c5158Smillert		} elsif ($columns < 2) {
74b39c5158Smillert			warnings::warnif "Increasing \$Text::Wrap::columns from $columns to 2";
75b39c5158Smillert			$columns = 2;
76eac174f2Safresh1			return @_;
77b39c5158Smillert		} else {
78b39c5158Smillert			die "This shouldn't happen";
79b39c5158Smillert		}
80b39c5158Smillert
81b39c5158Smillert		$lead = $xp;
82b39c5158Smillert		$ll = $nll;
83b39c5158Smillert		$nl = defined($separator2)
84b39c5158Smillert			? ($remainder eq "\n"
85b39c5158Smillert				? "\n"
86b39c5158Smillert				: $separator2)
87b39c5158Smillert			: $separator;
88b39c5158Smillert	}
89b39c5158Smillert	$r .= $remainder;
90b39c5158Smillert
91b39c5158Smillert	$r .= $lead . substr($t, pos($t), length($t) - pos($t))
92b39c5158Smillert		if pos($t) ne length($t);
93b39c5158Smillert
94*3d61058aSafresh1	# the 5.6 regexp engine ignores the UTF8 flag, so using capture buffers acts as an implicit _utf8_off
95*3d61058aSafresh1	# that means on 5.6 we now have to manually set UTF8=on on the output if the input had it, for which
96*3d61058aSafresh1	# we extract just the UTF8 flag from the input and check if it forces chr(0x80) to become multibyte
97*3d61058aSafresh1	return REGEXPS_USE_BYTES && (substr($t,0,0)."\x80") =~ /\xc2/ ? pack('U0a*', $r) : $r;
98b39c5158Smillert}
99b39c5158Smillert
100b39c5158Smillertsub fill
101b39c5158Smillert{
102eac174f2Safresh1	my ($ip, $xp, @raw) = map +( defined $_ ? $_ : '' ), @_;
103b39c5158Smillert	my @para;
104b39c5158Smillert	my $pp;
105b39c5158Smillert
106b39c5158Smillert	for $pp (split(/\n\s+/, join("\n",@raw))) {
107b39c5158Smillert		$pp =~ s/\s+/ /g;
108b39c5158Smillert		my $x = wrap($ip, $xp, $pp);
109b39c5158Smillert		push(@para, $x);
110b39c5158Smillert	}
111b39c5158Smillert
112b39c5158Smillert	# if paragraph_indent is the same as line_indent,
113b39c5158Smillert	# separate paragraphs with blank lines
114b39c5158Smillert
115b39c5158Smillert	my $ps = ($ip eq $xp) ? "\n\n" : "\n";
116b39c5158Smillert	return join ($ps, @para);
117b39c5158Smillert}
118b39c5158Smillert
119b39c5158Smillert1;
120eac174f2Safresh1
121b39c5158Smillert__END__
122b39c5158Smillert
123b39c5158Smillert=head1 NAME
124b39c5158Smillert
125b39c5158SmillertText::Wrap - line wrapping to form simple paragraphs
126b39c5158Smillert
127b39c5158Smillert=head1 SYNOPSIS
128b39c5158Smillert
129b39c5158SmillertB<Example 1>
130b39c5158Smillert
131b39c5158Smillert	use Text::Wrap;
132b39c5158Smillert
133b39c5158Smillert	$initial_tab = "\t";	# Tab before first line
134b39c5158Smillert	$subsequent_tab = "";	# All other lines flush left
135b39c5158Smillert
136b39c5158Smillert	print wrap($initial_tab, $subsequent_tab, @text);
137b39c5158Smillert	print fill($initial_tab, $subsequent_tab, @text);
138b39c5158Smillert
139b39c5158Smillert	$lines = wrap($initial_tab, $subsequent_tab, @text);
140b39c5158Smillert
141b39c5158Smillert	@paragraphs = fill($initial_tab, $subsequent_tab, @text);
142b39c5158Smillert
143b39c5158SmillertB<Example 2>
144b39c5158Smillert
145b39c5158Smillert	use Text::Wrap qw(wrap $columns $huge);
146b39c5158Smillert
147b39c5158Smillert	$columns = 132;		# Wrap at 132 characters
148b39c5158Smillert	$huge = 'die';
149b39c5158Smillert	$huge = 'wrap';
150b39c5158Smillert	$huge = 'overflow';
151b39c5158Smillert
152b39c5158SmillertB<Example 3>
153b39c5158Smillert
154b39c5158Smillert	use Text::Wrap;
155b39c5158Smillert
156b39c5158Smillert	$Text::Wrap::columns = 72;
157b39c5158Smillert	print wrap('', '', @text);
158b39c5158Smillert
159b39c5158Smillert=head1 DESCRIPTION
160b39c5158Smillert
161b39c5158SmillertC<Text::Wrap::wrap()> is a very simple paragraph formatter.  It formats a
162b39c5158Smillertsingle paragraph at a time by breaking lines at word boundaries.
163b39c5158SmillertIndentation is controlled for the first line (C<$initial_tab>) and
164b39c5158Smillertall subsequent lines (C<$subsequent_tab>) independently.  Please note:
165b39c5158SmillertC<$initial_tab> and C<$subsequent_tab> are the literal strings that will
166b39c5158Smillertbe used: it is unlikely you would want to pass in a number.
167b39c5158Smillert
16891f110e0Safresh1C<Text::Wrap::fill()> is a simple multi-paragraph formatter.  It formats
169b39c5158Smillerteach paragraph separately and then joins them together when it's done.  It
170b39c5158Smillertwill destroy any whitespace in the original text.  It breaks text into
17191f110e0Safresh1paragraphs by looking for whitespace after a newline.  In other respects,
172b39c5158Smillertit acts like wrap().
173b39c5158Smillert
17491f110e0Safresh1C<wrap()> compresses trailing whitespace into one newline, and C<fill()>
17591f110e0Safresh1deletes all trailing whitespace.
17691f110e0Safresh1
177b39c5158SmillertBoth C<wrap()> and C<fill()> return a single string.
178b39c5158Smillert
17991f110e0Safresh1Unlike the old Unix fmt(1) utility, this module correctly accounts for
18091f110e0Safresh1any Unicode combining characters (such as diacriticals) that may occur
18191f110e0Safresh1in each line for both expansion and unexpansion.  These are overstrike
18291f110e0Safresh1characters that do not increment the logical position.  Make sure
18391f110e0Safresh1you have the appropriate Unicode settings enabled.
18491f110e0Safresh1
185b39c5158Smillert=head1 OVERRIDES
186b39c5158Smillert
187b39c5158SmillertC<Text::Wrap::wrap()> has a number of variables that control its behavior.
188b39c5158SmillertBecause other modules might be using C<Text::Wrap::wrap()> it is suggested
189b39c5158Smillertthat you leave these variables alone!  If you can't do that, then
190b39c5158Smillertuse C<local($Text::Wrap::VARIABLE) = YOURVALUE> when you change the
191b39c5158Smillertvalues so that the original value is restored.  This C<local()> trick
192b39c5158Smillertwill not work if you import the variable into your own namespace.
193b39c5158Smillert
194b39c5158SmillertLines are wrapped at C<$Text::Wrap::columns> columns (default value: 76).
195b39c5158SmillertC<$Text::Wrap::columns> should be set to the full width of your output
196b39c5158Smillertdevice.  In fact, every resulting line will have length of no more than
197b39c5158SmillertC<$columns - 1>.
198b39c5158Smillert
199b39c5158SmillertIt is possible to control which characters terminate words by
200b39c5158Smillertmodifying C<$Text::Wrap::break>. Set this to a string such as
201b39c5158SmillertC<'[\s:]'> (to break before spaces or colons) or a pre-compiled regexp
202b39c5158Smillertsuch as C<qr/[\s']/> (to break before spaces or apostrophes). The
203b39c5158Smillertdefault is simply C<'\s'>; that is, words are terminated by spaces.
204b39c5158Smillert(This means, among other things, that trailing punctuation  such as
205b39c5158Smillertfull stops or commas stay with the word they are "attached" to.)
206b39c5158SmillertSetting C<$Text::Wrap::break> to a regular expression that doesn't
207b39c5158Smillerteat any characters (perhaps just a forward look-ahead assertion) will
208b39c5158Smillertcause warnings.
209b39c5158Smillert
210b39c5158SmillertBeginner note: In example 2, above C<$columns> is imported into
211b39c5158Smillertthe local namespace, and set locally.  In example 3,
212b39c5158SmillertC<$Text::Wrap::columns> is set in its own namespace without importing it.
213b39c5158Smillert
214b39c5158SmillertC<Text::Wrap::wrap()> starts its work by expanding all the tabs in its
215b39c5158Smillertinput into spaces.  The last thing it does it to turn spaces back
216b39c5158Smillertinto tabs.  If you do not want tabs in your results, set
217b39c5158SmillertC<$Text::Wrap::unexpand> to a false value.  Likewise if you do not
218b39c5158Smillertwant to use 8-character tabstops, set C<$Text::Wrap::tabstop> to
219b39c5158Smillertthe number of characters you do want for your tabstops.
220b39c5158Smillert
221b39c5158SmillertIf you want to separate your lines with something other than C<\n>
222b39c5158Smillertthen set C<$Text::Wrap::separator> to your preference.  This replaces
223b39c5158Smillertall newlines with C<$Text::Wrap::separator>.  If you just want to
224b39c5158Smillertpreserve existing newlines but add new breaks with something else, set
225b39c5158SmillertC<$Text::Wrap::separator2> instead.
226b39c5158Smillert
227b39c5158SmillertWhen words that are longer than C<$columns> are encountered, they
228b39c5158Smillertare broken up.  C<wrap()> adds a C<"\n"> at column C<$columns>.
229b39c5158SmillertThis behavior can be overridden by setting C<$huge> to
230b39c5158Smillert'die' or to 'overflow'.  When set to 'die', large words will cause
231b39c5158SmillertC<die()> to be called.  When set to 'overflow', large words will be
232b39c5158Smillertleft intact.
233b39c5158Smillert
234b39c5158SmillertHistorical notes: 'die' used to be the default value of
235b39c5158SmillertC<$huge>.  Now, 'wrap' is the default value.
236b39c5158Smillert
237b39c5158Smillert=head1 EXAMPLES
238b39c5158Smillert
239b39c5158SmillertCode:
240b39c5158Smillert
241b39c5158Smillert  print wrap("\t","",<<END);
242b39c5158Smillert  This is a bit of text that forms
243b39c5158Smillert  a normal book-style indented paragraph
244b39c5158Smillert  END
245b39c5158Smillert
246b39c5158SmillertResult:
247b39c5158Smillert
248b39c5158Smillert  "	This is a bit of text that forms
249b39c5158Smillert  a normal book-style indented paragraph
250b39c5158Smillert  "
251b39c5158Smillert
252b39c5158SmillertCode:
253b39c5158Smillert
254b39c5158Smillert  $Text::Wrap::columns=20;
255b39c5158Smillert  $Text::Wrap::separator="|";
256b39c5158Smillert  print wrap("","","This is a bit of text that forms a normal book-style paragraph");
257b39c5158Smillert
258b39c5158SmillertResult:
259b39c5158Smillert
260b39c5158Smillert  "This is a bit of|text that forms a|normal book-style|paragraph"
261b39c5158Smillert
262b39c5158Smillert=head1 SEE ALSO
263b39c5158Smillert
26491f110e0Safresh1For correct handling of East Asian half- and full-width characters,
26591f110e0Safresh1see L<Text::WrapI18N>.  For more detailed controls: L<Text::Format>.
26691f110e0Safresh1
26791f110e0Safresh1=head1 AUTHOR
26891f110e0Safresh1
26991f110e0Safresh1David Muir Sharnoff <cpan@dave.sharnoff.org> with help from Tim Pierce and
27091f110e0Safresh1many many others.
271b39c5158Smillert
272b39c5158Smillert=head1 LICENSE
273b39c5158Smillert
27491f110e0Safresh1Copyright (C) 1996-2009 David Muir Sharnoff.
275e9ce3842Safresh1Copyright (C) 2012-2013 Google, Inc.
27691f110e0Safresh1This module may be modified, used, copied, and redistributed at your own risk.
277e9ce3842Safresh1Although allowed by the preceding license, please do not publicly
278e9ce3842Safresh1redistribute modified versions of this code with the name "Text::Wrap"
279e9ce3842Safresh1unless it passes the unmodified Text::Wrap test suite.
280