1eac174f2Safresh1use strict; use warnings; 2eac174f2Safresh1 3b39c5158Smillertpackage Text::Wrap; 4b39c5158Smillert 5b39c5158Smillertuse warnings::register; 6b39c5158Smillert 7eac174f2Safresh1BEGIN { require Exporter; *import = \&Exporter::import } 8b39c5158Smillert 9eac174f2Safresh1our @EXPORT = qw( wrap fill ); 10eac174f2Safresh1our @EXPORT_OK = qw( $columns $break $huge ); 11b39c5158Smillert 12*3d61058aSafresh1our $VERSION = '2024.001'; 13eac174f2Safresh1our $SUBVERSION = 'modern'; # back-compat vestige 1491f110e0Safresh1 15*3d61058aSafresh1BEGIN { eval sprintf 'sub REGEXPS_USE_BYTES () { %d }', scalar( pack('U*', 0x80) =~ /\xc2/ ) } 16*3d61058aSafresh1 17*3d61058aSafresh1my $brkspc = "\x{a0}\x{202f}" =~ /\s/ ? '[^\x{a0}\x{202f}\S]' : '\s'; 18*3d61058aSafresh1 19eac174f2Safresh1our $columns = 76; # <= screen width 20*3d61058aSafresh1our $break = '(?>\n|\r\n|'.$brkspc.'\pM*)'; 21eac174f2Safresh1our $huge = 'wrap'; # alternatively: 'die' or 'overflow' 22eac174f2Safresh1our $unexpand = 1; 23eac174f2Safresh1our $tabstop = 8; 24eac174f2Safresh1our $separator = "\n"; 25eac174f2Safresh1our $separator2 = undef; 26b39c5158Smillert 27*3d61058aSafresh1sub _xlen { $_[0] =~ /^\pM/ + ( () = $_[0] =~ /\PM/g ) } 2891f110e0Safresh1 29b39c5158Smillertuse Text::Tabs qw(expand unexpand); 30b39c5158Smillert 31b39c5158Smillertsub wrap 32b39c5158Smillert{ 33eac174f2Safresh1 my ($ip, $xp, @t) = map +( defined $_ ? $_ : '' ), @_; 34b39c5158Smillert 35b39c5158Smillert local($Text::Tabs::tabstop) = $tabstop; 36b39c5158Smillert my $r = ""; 37b39c5158Smillert my $tail = pop(@t); 38b39c5158Smillert my $t = expand(join("", (map { /\s+\z/ ? ( $_ ) : ($_, ' ') } @t), $tail)); 39b39c5158Smillert my $lead = $ip; 4091f110e0Safresh1 my $nll = $columns - _xlen(expand($xp)) - 1; 41b39c5158Smillert if ($nll <= 0 && $xp ne '') { 4291f110e0Safresh1 my $nc = _xlen(expand($xp)) + 2; 43b39c5158Smillert warnings::warnif "Increasing \$Text::Wrap::columns from $columns to $nc to accommodate length of subsequent tab"; 44b39c5158Smillert $columns = $nc; 45b39c5158Smillert $nll = 1; 46b39c5158Smillert } 4791f110e0Safresh1 my $ll = $columns - _xlen(expand($ip)) - 1; 48b39c5158Smillert $ll = 0 if $ll < 0; 49b39c5158Smillert my $nl = ""; 50b39c5158Smillert my $remainder = ""; 51b39c5158Smillert 52b39c5158Smillert use re 'taint'; 53b39c5158Smillert 54b39c5158Smillert pos($t) = 0; 55b39c5158Smillert while ($t !~ /\G(?:$break)*\Z/gc) { 56*3d61058aSafresh1 if ($t =~ /\G((?>(?!\n)\PM\pM*|(?<![^\n])\pM+){0,$ll})($break|\n+|\z)/xmgc) { 57b39c5158Smillert $r .= $unexpand 58b39c5158Smillert ? unexpand($nl . $lead . $1) 59b39c5158Smillert : $nl . $lead . $1; 60b39c5158Smillert $remainder = $2; 61*3d61058aSafresh1 } elsif ($huge eq 'wrap' && $t =~ /\G((?>(?!\n)\PM\pM*|(?<![^\n])\pM+){$ll})/gc) { 62b39c5158Smillert $r .= $unexpand 63b39c5158Smillert ? unexpand($nl . $lead . $1) 64b39c5158Smillert : $nl . $lead . $1; 65b39c5158Smillert $remainder = defined($separator2) ? $separator2 : $separator; 66*3d61058aSafresh1 } elsif ($huge eq 'overflow' && $t =~ /\G([^\n]*?)(?!(?<![^\n])\pM)($break|\n+|\z)/xmgc) { 67b39c5158Smillert $r .= $unexpand 68b39c5158Smillert ? unexpand($nl . $lead . $1) 69b39c5158Smillert : $nl . $lead . $1; 70b39c5158Smillert $remainder = $2; 71b39c5158Smillert } elsif ($huge eq 'die') { 72b39c5158Smillert die "couldn't wrap '$t'"; 73b39c5158Smillert } elsif ($columns < 2) { 74b39c5158Smillert warnings::warnif "Increasing \$Text::Wrap::columns from $columns to 2"; 75b39c5158Smillert $columns = 2; 76eac174f2Safresh1 return @_; 77b39c5158Smillert } else { 78b39c5158Smillert die "This shouldn't happen"; 79b39c5158Smillert } 80b39c5158Smillert 81b39c5158Smillert $lead = $xp; 82b39c5158Smillert $ll = $nll; 83b39c5158Smillert $nl = defined($separator2) 84b39c5158Smillert ? ($remainder eq "\n" 85b39c5158Smillert ? "\n" 86b39c5158Smillert : $separator2) 87b39c5158Smillert : $separator; 88b39c5158Smillert } 89b39c5158Smillert $r .= $remainder; 90b39c5158Smillert 91b39c5158Smillert $r .= $lead . substr($t, pos($t), length($t) - pos($t)) 92b39c5158Smillert if pos($t) ne length($t); 93b39c5158Smillert 94*3d61058aSafresh1 # the 5.6 regexp engine ignores the UTF8 flag, so using capture buffers acts as an implicit _utf8_off 95*3d61058aSafresh1 # that means on 5.6 we now have to manually set UTF8=on on the output if the input had it, for which 96*3d61058aSafresh1 # we extract just the UTF8 flag from the input and check if it forces chr(0x80) to become multibyte 97*3d61058aSafresh1 return REGEXPS_USE_BYTES && (substr($t,0,0)."\x80") =~ /\xc2/ ? pack('U0a*', $r) : $r; 98b39c5158Smillert} 99b39c5158Smillert 100b39c5158Smillertsub fill 101b39c5158Smillert{ 102eac174f2Safresh1 my ($ip, $xp, @raw) = map +( defined $_ ? $_ : '' ), @_; 103b39c5158Smillert my @para; 104b39c5158Smillert my $pp; 105b39c5158Smillert 106b39c5158Smillert for $pp (split(/\n\s+/, join("\n",@raw))) { 107b39c5158Smillert $pp =~ s/\s+/ /g; 108b39c5158Smillert my $x = wrap($ip, $xp, $pp); 109b39c5158Smillert push(@para, $x); 110b39c5158Smillert } 111b39c5158Smillert 112b39c5158Smillert # if paragraph_indent is the same as line_indent, 113b39c5158Smillert # separate paragraphs with blank lines 114b39c5158Smillert 115b39c5158Smillert my $ps = ($ip eq $xp) ? "\n\n" : "\n"; 116b39c5158Smillert return join ($ps, @para); 117b39c5158Smillert} 118b39c5158Smillert 119b39c5158Smillert1; 120eac174f2Safresh1 121b39c5158Smillert__END__ 122b39c5158Smillert 123b39c5158Smillert=head1 NAME 124b39c5158Smillert 125b39c5158SmillertText::Wrap - line wrapping to form simple paragraphs 126b39c5158Smillert 127b39c5158Smillert=head1 SYNOPSIS 128b39c5158Smillert 129b39c5158SmillertB<Example 1> 130b39c5158Smillert 131b39c5158Smillert use Text::Wrap; 132b39c5158Smillert 133b39c5158Smillert $initial_tab = "\t"; # Tab before first line 134b39c5158Smillert $subsequent_tab = ""; # All other lines flush left 135b39c5158Smillert 136b39c5158Smillert print wrap($initial_tab, $subsequent_tab, @text); 137b39c5158Smillert print fill($initial_tab, $subsequent_tab, @text); 138b39c5158Smillert 139b39c5158Smillert $lines = wrap($initial_tab, $subsequent_tab, @text); 140b39c5158Smillert 141b39c5158Smillert @paragraphs = fill($initial_tab, $subsequent_tab, @text); 142b39c5158Smillert 143b39c5158SmillertB<Example 2> 144b39c5158Smillert 145b39c5158Smillert use Text::Wrap qw(wrap $columns $huge); 146b39c5158Smillert 147b39c5158Smillert $columns = 132; # Wrap at 132 characters 148b39c5158Smillert $huge = 'die'; 149b39c5158Smillert $huge = 'wrap'; 150b39c5158Smillert $huge = 'overflow'; 151b39c5158Smillert 152b39c5158SmillertB<Example 3> 153b39c5158Smillert 154b39c5158Smillert use Text::Wrap; 155b39c5158Smillert 156b39c5158Smillert $Text::Wrap::columns = 72; 157b39c5158Smillert print wrap('', '', @text); 158b39c5158Smillert 159b39c5158Smillert=head1 DESCRIPTION 160b39c5158Smillert 161b39c5158SmillertC<Text::Wrap::wrap()> is a very simple paragraph formatter. It formats a 162b39c5158Smillertsingle paragraph at a time by breaking lines at word boundaries. 163b39c5158SmillertIndentation is controlled for the first line (C<$initial_tab>) and 164b39c5158Smillertall subsequent lines (C<$subsequent_tab>) independently. Please note: 165b39c5158SmillertC<$initial_tab> and C<$subsequent_tab> are the literal strings that will 166b39c5158Smillertbe used: it is unlikely you would want to pass in a number. 167b39c5158Smillert 16891f110e0Safresh1C<Text::Wrap::fill()> is a simple multi-paragraph formatter. It formats 169b39c5158Smillerteach paragraph separately and then joins them together when it's done. It 170b39c5158Smillertwill destroy any whitespace in the original text. It breaks text into 17191f110e0Safresh1paragraphs by looking for whitespace after a newline. In other respects, 172b39c5158Smillertit acts like wrap(). 173b39c5158Smillert 17491f110e0Safresh1C<wrap()> compresses trailing whitespace into one newline, and C<fill()> 17591f110e0Safresh1deletes all trailing whitespace. 17691f110e0Safresh1 177b39c5158SmillertBoth C<wrap()> and C<fill()> return a single string. 178b39c5158Smillert 17991f110e0Safresh1Unlike the old Unix fmt(1) utility, this module correctly accounts for 18091f110e0Safresh1any Unicode combining characters (such as diacriticals) that may occur 18191f110e0Safresh1in each line for both expansion and unexpansion. These are overstrike 18291f110e0Safresh1characters that do not increment the logical position. Make sure 18391f110e0Safresh1you have the appropriate Unicode settings enabled. 18491f110e0Safresh1 185b39c5158Smillert=head1 OVERRIDES 186b39c5158Smillert 187b39c5158SmillertC<Text::Wrap::wrap()> has a number of variables that control its behavior. 188b39c5158SmillertBecause other modules might be using C<Text::Wrap::wrap()> it is suggested 189b39c5158Smillertthat you leave these variables alone! If you can't do that, then 190b39c5158Smillertuse C<local($Text::Wrap::VARIABLE) = YOURVALUE> when you change the 191b39c5158Smillertvalues so that the original value is restored. This C<local()> trick 192b39c5158Smillertwill not work if you import the variable into your own namespace. 193b39c5158Smillert 194b39c5158SmillertLines are wrapped at C<$Text::Wrap::columns> columns (default value: 76). 195b39c5158SmillertC<$Text::Wrap::columns> should be set to the full width of your output 196b39c5158Smillertdevice. In fact, every resulting line will have length of no more than 197b39c5158SmillertC<$columns - 1>. 198b39c5158Smillert 199b39c5158SmillertIt is possible to control which characters terminate words by 200b39c5158Smillertmodifying C<$Text::Wrap::break>. Set this to a string such as 201b39c5158SmillertC<'[\s:]'> (to break before spaces or colons) or a pre-compiled regexp 202b39c5158Smillertsuch as C<qr/[\s']/> (to break before spaces or apostrophes). The 203b39c5158Smillertdefault is simply C<'\s'>; that is, words are terminated by spaces. 204b39c5158Smillert(This means, among other things, that trailing punctuation such as 205b39c5158Smillertfull stops or commas stay with the word they are "attached" to.) 206b39c5158SmillertSetting C<$Text::Wrap::break> to a regular expression that doesn't 207b39c5158Smillerteat any characters (perhaps just a forward look-ahead assertion) will 208b39c5158Smillertcause warnings. 209b39c5158Smillert 210b39c5158SmillertBeginner note: In example 2, above C<$columns> is imported into 211b39c5158Smillertthe local namespace, and set locally. In example 3, 212b39c5158SmillertC<$Text::Wrap::columns> is set in its own namespace without importing it. 213b39c5158Smillert 214b39c5158SmillertC<Text::Wrap::wrap()> starts its work by expanding all the tabs in its 215b39c5158Smillertinput into spaces. The last thing it does it to turn spaces back 216b39c5158Smillertinto tabs. If you do not want tabs in your results, set 217b39c5158SmillertC<$Text::Wrap::unexpand> to a false value. Likewise if you do not 218b39c5158Smillertwant to use 8-character tabstops, set C<$Text::Wrap::tabstop> to 219b39c5158Smillertthe number of characters you do want for your tabstops. 220b39c5158Smillert 221b39c5158SmillertIf you want to separate your lines with something other than C<\n> 222b39c5158Smillertthen set C<$Text::Wrap::separator> to your preference. This replaces 223b39c5158Smillertall newlines with C<$Text::Wrap::separator>. If you just want to 224b39c5158Smillertpreserve existing newlines but add new breaks with something else, set 225b39c5158SmillertC<$Text::Wrap::separator2> instead. 226b39c5158Smillert 227b39c5158SmillertWhen words that are longer than C<$columns> are encountered, they 228b39c5158Smillertare broken up. C<wrap()> adds a C<"\n"> at column C<$columns>. 229b39c5158SmillertThis behavior can be overridden by setting C<$huge> to 230b39c5158Smillert'die' or to 'overflow'. When set to 'die', large words will cause 231b39c5158SmillertC<die()> to be called. When set to 'overflow', large words will be 232b39c5158Smillertleft intact. 233b39c5158Smillert 234b39c5158SmillertHistorical notes: 'die' used to be the default value of 235b39c5158SmillertC<$huge>. Now, 'wrap' is the default value. 236b39c5158Smillert 237b39c5158Smillert=head1 EXAMPLES 238b39c5158Smillert 239b39c5158SmillertCode: 240b39c5158Smillert 241b39c5158Smillert print wrap("\t","",<<END); 242b39c5158Smillert This is a bit of text that forms 243b39c5158Smillert a normal book-style indented paragraph 244b39c5158Smillert END 245b39c5158Smillert 246b39c5158SmillertResult: 247b39c5158Smillert 248b39c5158Smillert " This is a bit of text that forms 249b39c5158Smillert a normal book-style indented paragraph 250b39c5158Smillert " 251b39c5158Smillert 252b39c5158SmillertCode: 253b39c5158Smillert 254b39c5158Smillert $Text::Wrap::columns=20; 255b39c5158Smillert $Text::Wrap::separator="|"; 256b39c5158Smillert print wrap("","","This is a bit of text that forms a normal book-style paragraph"); 257b39c5158Smillert 258b39c5158SmillertResult: 259b39c5158Smillert 260b39c5158Smillert "This is a bit of|text that forms a|normal book-style|paragraph" 261b39c5158Smillert 262b39c5158Smillert=head1 SEE ALSO 263b39c5158Smillert 26491f110e0Safresh1For correct handling of East Asian half- and full-width characters, 26591f110e0Safresh1see L<Text::WrapI18N>. For more detailed controls: L<Text::Format>. 26691f110e0Safresh1 26791f110e0Safresh1=head1 AUTHOR 26891f110e0Safresh1 26991f110e0Safresh1David Muir Sharnoff <cpan@dave.sharnoff.org> with help from Tim Pierce and 27091f110e0Safresh1many many others. 271b39c5158Smillert 272b39c5158Smillert=head1 LICENSE 273b39c5158Smillert 27491f110e0Safresh1Copyright (C) 1996-2009 David Muir Sharnoff. 275e9ce3842Safresh1Copyright (C) 2012-2013 Google, Inc. 27691f110e0Safresh1This module may be modified, used, copied, and redistributed at your own risk. 277e9ce3842Safresh1Although allowed by the preceding license, please do not publicly 278e9ce3842Safresh1redistribute modified versions of this code with the name "Text::Wrap" 279e9ce3842Safresh1unless it passes the unmodified Text::Wrap test suite. 280