#!/usr/bin/perl -w

use strict;

=pod

=head1 NAME

B<fmt.pl> - plain text paragraph and column formatter


=head1 SYNOPSIS

B<fmt.pl> [ B<-s> ] [ B<-u> ] [ B<-w> I<chars> | B<-W> I<words> ] [ I<file ...> ]


=head1 DESCRIPTION

C<fmt.pl> is a plain text document formatter similar to the standard C<fmt>
program but at the same time simpler and more versatile.  It does without the
fancier features of C<fmt> but is more predictable and configurable in its
operation.

Lines are broken at white space, and paragraphs are separated by empty lines
containing only white space.  Leading empty lines and indentation are always
removed.  Overlong lines without white space remain unbroken.

If no input file is given, standard input is read.

Command-line options:

=over

=item B<-s>

Split lines only, never concatenate.  In effect, this starts a new paragraph
after every line (but does not insert new empty lines).

=item B<-u>

Make inter-word space uniform: one space between words, two after sentences,
and sequences of empty lines are replaced by one.  With B<-W>, two tabs are
placed between words / columns.

=item B<-w> I<width>

Set maximum line length to I<width> characters.  If a word is longer than
I<width> characters, the line length will still be exceeded.  A I<width> of 0
sets an unlimited line length, causing concatenation of whole paragraphs and no
line splitting.  The default is 75.

=item B<-W> I<columns>

Enables formatting into columns and sets the number of columns.  The last
option on the command line that is either B<-w> or B<-W> takes precedence.

=back


=head1 COPYRIGHT

Copyright 2015 Volker Schatz.  May be copied and/or modified according the Gnu
General Public Licence version 3 or later (see
L<http://www.gnu.org/licenses/gpl.html>).

=cut


sub usageexit
{
    print <<EOF;
usage: fmt.pl [ -s ] [ -u ] [ -w <chars> | -W <words> ]
This is a simpler but more predictable and more versatile variant of the
standard fmt program.  It formats text into paragraphs or columns of words.  -s
prevents joining of lines, -u makes white space uniform (one space between
words, two between sentences; two tabs between columns with -W; one empty line
between paragraphs and none at the top), -w or -W set the maximum line width in
characters or words.  -w 0 sets infinite line width; -w 75 is the default.
Leading white space in input lines is always stripped.  Overlong lines without
white space remain unbroken.
EOF
    exit 1;
}


my %opts= ( width => 75 );


while( @ARGV && $ARGV[0] =~ /^-/ ) {
    my $opt= shift @ARGV;
    if( $opt =~ /^--?h(?:elp)?$/i ) {
        usageexit();
    }
    elsif( $opt =~ /^-(\d+)$/ ) {
        $opts{width}= $1;
    }
    elsif( $opt eq "-u" ) {
        $opts{uniform}= 1;
    }
    elsif( $opt eq "-s" ) {
        $opts{splitonly}= 1;
    }
    elsif( $opt eq "-W" ) {
        if( ! @ARGV || $ARGV[0] !~ /^\d+$/ ) {
            print STDERR "The -W option needs a non-negative integer argument.\n";
            usageexit();
        }
        $opts{words}= shift @ARGV;
        $opts{wordsm1}= $opts{words} - 1;
        $opts{width}= undef;
    }
    elsif( $opt eq "-w" ) {
        if( ! @ARGV || $ARGV[0] !~ /^\d+$/ ) {
            print STDERR "The -w option needs a non-negative integer argument.\n";
            usageexit();
        }
        $opts{width}= shift @ARGV;
        $opts{words}= undef;
    }
    else {
        print STDERR "Unknown option \`$opt;.\n";
        exit 1;
    }
}


my $currline= "";
my @currwords;
my $nwords= 0;

while( <> ) {
    chomp;
    s/^\s+//;
    if( !length($_) ) {
        if( defined $currline ) {
            if( length $currline ) {
                $currline =~ s/\s+$//;
                print "$currline\n";
            }
            print "\n";
            $currline= undef;
        }
        else {
            print "\n" unless $opts{uniform};
        }
        next;
    }
    if( defined $opts{words} ) {
        if( $opts{uniform} ) {
            s/\s*(?:\s|$)/\t\t/g;
        }
        else {
            s/(\S)$/$1\t/;
        }
        $currline .= $_;
        next unless $opts{words};
        print "$1\n" while $currline =~ s/^((?:\S+\s+){$opts{wordsm1}}\S+)\s*//;
    }
    else {
        if( $opts{uniform} ) {
            s/\s*(?:\s|$)/ /g;
            s/([.!?][]})'"]*) /$1  /g;
        }
        else {
            s/(\S)$/$1 /;
        }
        $currline .= $_;
        next unless $opts{width};
        while( length($currline) > $opts{width} ) {
            my $thisline= substr($currline, 0, $opts{width} + 1, "");
            if( $thisline =~ s/\s+(\S*)$// ) {
                $currline= "$1$currline";
                $currline =~ s/^\s+//;
                print "$thisline\n";
            }
            else {
                $currline =~ s/^(\S*)\s+//;
                print "$thisline$1\n";
            }
        }
    }
}
continue {
    if( $opts{splitonly} ) {
        $currline =~ s/\s+$//;
        print "$currline\n";
        $currline= "";
    }
}

if( defined $currline && length $currline ) {
    $currline =~ s/\s+$//;
    print "$currline\n";
}

