Contributor: MATT BOUSEK              

(*
Here is TALLY.PAS, a program that Matt Bousek  wrote
to do a word frequency analysis on a text file.  It uses an AVL tree.  It
should compile under TP 6.0 or BP 7.0
*)
program word_freq(input,output);

type
    short_str = string[32];

{************AVLtree routines*********}
type
    balance_set = (left_tilt,neutral,right_tilt);
    memptr      = ^memrec;
    memrec = record
        balance     : balance_set;
        left,right  : memptr;
        count       : longint;
        key         : short_str;
    end;

    {**************************************}
    procedure rotate_right(var root:memptr);
    var ptr2,ptr3 : memptr;
    begin
        ptr2:=root^.right;
        if ptr2^.balance=right_tilt then begin
            root^.right:=ptr2^.left;
            ptr2^.left:=root;
            root^.balance:=neutral;
            root:=ptr2;
        end else begin
            ptr3:=ptr2^.left;
            ptr2^.left:=ptr3^.right;
            ptr3^.right:=ptr2;
            root^.right:=ptr3^.left;
            ptr3^.left:=root;
            if ptr3^.balance=left_tilt
                then ptr2^.balance:=right_tilt
                else ptr2^.balance:=neutral;
            if ptr3^.balance=right_tilt
                then root^.balance:=left_tilt
                else root^.balance:=neutral;
            root:=ptr3;
        end;
        root^.balance:=neutral;
    end;

    {*************************************}
    procedure rotate_left(var root:memptr);
    var ptr2,ptr3 : memptr;
    begin
        ptr2:=root^.left;
        if ptr2^.balance=left_tilt then begin
            root^.left:=ptr2^.right;
            ptr2^.right:=root;
            root^.balance:=neutral;
            root:=ptr2;
        end else begin
            ptr3:=ptr2^.right;
            ptr2^.right:=ptr3^.left;
            ptr3^.left:=ptr2;
            root^.left:=ptr3^.right;
            ptr3^.right:=root;
            if ptr3^.balance=right_tilt
                then ptr2^.balance:=left_tilt
                else ptr2^.balance:=neutral;
            if ptr3^.balance=left_tilt
                then root^.balance:=right_tilt
                else root^.balance:=neutral;
            root:=ptr3;
        end;
        root^.balance:=neutral;
    end;

    {*****************************************************************}
    procedure insert_mem(var root:memptr; x:short_str; var ok:boolean);
    begin
        if root=nil then begin
            new(root);
            with root^ do begin
                key:=x;
                left:=nil;
                right:=nil;
                balance:=neutral;
                count:=1;
            end;
            ok:=true;
        end else begin
            if x=root^.key then begin
                ok:=false;
                inc(root^.count);
            end else begin
                if xnil then begin
            dump_mem(root^.left);
            writeln(root^.count:5,' ',root^.key);
            dump_mem(root^.right);
        end;
    end;


{MAIN***************************************************************}
{*** This program was written by Matt Bousek sometime in 1992.   ***}
{*** The act of this posting over Internet makes the code public ***}
{*** domain, but it would be nice to keep this header.           ***}
{*** The basic AVL routines came from a book called "Turbo Algo- ***}
{*** rythms",  Sorry, I don't have the book here and I can't     ***}
{*** remember the authors or publisher.  Enjoy.  And remember,   ***}
{*** there is no free lunch...                                   ***}

const
    wchars:set of char=['''','a'..'z'];

var
    i,j         : word;
    aword       : short_str;
    subject     : text;
    wstart,wend : word;
    inword      : boolean;
    linecount   : longint;
    wordcount   : longint;
    buffer      : array[1..10240] of char;
    line        : string;
    filename    : string;
    tree        : memptr;

BEGIN
    tree:=nil;

    filename:=paramstr(1);
    if filename='' then filename:='tally.pas';
    assign(subject,filename);
    settextbuf(subject,buffer);
    reset(subject);

    wordcount:=0;
    linecount:=0;
    while not eof(subject) do begin
        inc(linecount);
        readln(subject,line);
        wstart:=0; wend:=0;
        for i:=1 to byte(line[0]) do begin
            if line[i] in ['A'..'Z'] then line[i]:=chr(ord(line[i])+32);
            inword:=(line[i] in wchars);
            if inword and (wstart=0) then wstart:=i;
            if inword and (wstart>0) then wend:=i;
            if not(inword) or (i=byte(line[0])) then begin
                if wend>wstart then begin
                    aword:=copy(line,wstart,wend+1-wstart);
                    j:=byte(aword[0]);
                    if (aword[j]='''') and (j>2) then begin {lose trailing '}
                        aword:=copy(aword,1,j-1);
                        dec(wend);
                        dec(j);
                    end;
                    if (aword[1]='''') and (j>2) then begin {lose leading '}
                        aword:=copy(aword,2,j-1);
                        inc(wstart);
                        dec(j);
                    end;
                    if (j>2) and (aword[j-1]='''') and (aword[j]='s') then
begin {lose trailing 's}
                        aword:=copy(aword,1,j-2);
                        dec(wend,2);
                        dec(j,2);
                    end;
                    if (j>2) then begin
                        inc(wordcount);
                        insert_memtree(tree,aword);
                    end;
                end; { **if wend>wstart** }
                wstart:=0; wend:=0;
            end; { **if not(inword)** }
        end; { **for byte(line[0])** }
    end; { **while not eof** }

dump_mem(tree);
writeln(linecount,' lines, ',wordcount,' words.');
END.