\documentclass{ictlab}
%\documentclass[solutions]{ictlab}

\RCS $Revision: 1.0 $

\usepackage{verbatim,key,alltt,amstext,answer2}
\usepackage[hang,bf,nooneline]{caption2}
\usepackage[pdfpagemode=None,pdfauthor={Nick Urbanik}]{hyperref}

\newcommand*{\labTitle}{Summary of Perl}

\renewcommand*{\floatpagefraction}{0.75} % default is .5, to increase
% density.
\renewcommand*{\bottomfraction}{0.6} % default is 0.3
\renewcommand*{\topfraction}{0.85} % default is 0.7
\renewcommand*{\textfraction}{0.1} % default is 0.2

\renewcommand*{\extrarowheight}{1pt}
\begin{document}
\Large

\section{Main Topics}
\label{sec:main-topics}

\begin{description}
\item[Shebang] Each Perl program begins with a ``shebang'':
  \begin{alltt}
#! /usr/bin/perl -w
  \end{alltt}
  It tells the operating system which interpreter to use to execute
  the program.

  You can add options to this, such as the \texttt{-w} above, which
  switches on additional warnings.  I strongly recommend always using
  this while developing the program.
\end{description}

\subsection{Variables, Operators}
\label{sec:variables}

\begin{description}
\item[Scalars and non-scalars] There are two categories of variables:
  \emph{scalars} and \emph{non-scalars}.
  \begin{itemize}
  \item scalars have a single value, such as \texttt{"a string"}, and
  \item non-scalars have a list of values, such as \texttt{( 1,
      2, "a string")}
  \end{itemize}
  
\item[Non-scalars] There are two types of non-scalars: \emph{arrays}
  and \emph{hashes}.
  \begin{itemize}
  \item \emph{arrays} are much like arrays in Java or C (though much
    more versatile).
  \item \emph{hash}es are like arrays that are indexed by strings, a
    bit like \texttt{java.util.Hashtable}, but simpler and more
    flexible.
  \end{itemize}
  
\item[\texttt{\$}, \texttt{@} and \texttt{\%}] Scalar variable
  values always start with a \texttt{\$}, such as \texttt{\$var =
    1;}
    
  Arrays variable values always start with a \texttt{@}, such as
  \texttt{@array = (2, 4, 6);}
    
  Hash variable values always start with a \texttt{\%}, such as
  \texttt{\%hash = ( 'NL' => 'Netherlands', BE => 'Belgium' );}

  Note that it is a \emph{value}.  For example, in \texttt{@array},
  there is a scalar value \texttt{\$array[0]}, and in \texttt{\%hash},
  there is a scalar value \texttt{\$hash\{"BE"\}}.

\item[Variable Interpolation] A variable can be put right into a
  string like this: \texttt{"The value of \bs\$var is \$var.\bs{}n"}

  If you print that string, the value of \texttt{\$var} will be
  printed in the string, instead of the four characters
  \texttt{\$var}.  Notice that, just as in C, the backslash hides the
  special meaning of special characters such as \texttt{\$}.

\item[Operators:] Perl has all the operators of C, in the same priority
  as in C\@.

  Note Perl also has special operators for comparing strings:

  \begin{tabular}[t]{@{}lcc@{}}
    \toprule%
    \emph{Comparison} & \emph{Numeric} & \emph{String} \\
    \midrule%
    equal & \texttt{==} & \texttt{eq} \\
    not equal & \texttt{!=} & \texttt{ne} \\
    greater than & \texttt{>}  & \texttt{gt} \\
    less than & \texttt{<} & \texttt{lt} \\
    greater than or equal to & \texttt{>=} & \texttt{ge} \\
    less than or equal to & \texttt{<=} & \texttt{le} \\
    \bottomrule
  \end{tabular}

\item[\texttt{use strict;}] Turns on compile-time checks for lots of
  possible error conditions, such as undeclared variables, and other
  possible typing errors.  I strongly recommend using this in all your
  programs that are longer than half a page.

\item[\texttt{my} and \texttt{our}:] are used to declare local
  variables and static variables, respectively.  Necessary if you
  put\\
  \texttt{use strict;}\\
  in your program.
\end{description}

\subsection{Statements}
\label{sec:statements}

\begin{description}
\item[\texttt{if}, \texttt{while}, \texttt{for} need braces:] You must
  use braces in a normal \texttt{if} statement, unlike in C or Java\@.
  
\item[\texttt{if} statement:] The \texttt{if} statement is similar to C
  or Java, except that there is a keyword ``\texttt{elsif}'':
\begin{verbatim}
if ( $age > $max ) {
    print "Too old\n";
} elsif ( $age < $min ) {
    print "Too young\n";
} else {
    print "Just right\n";
}
\end{verbatim}

\item[\texttt{for} loops:] There are two types of \texttt{for} loop,
  one as in C and Java, the other is more useful in Perl:
\begin{verbatim}
for ( $i = 0; $i < $max; ++$i ) {
   $sum += $array[ i ];
}
\end{verbatim}
But this \texttt{for} loop is much more useful.  Here is an example
that adds 1 to each element of an array:
\begin{verbatim}
foreach $a ( @array ) {
    ++$a;
}
\end{verbatim}
Notice that \texttt{\$a} here is made a reference to each element of
the array, so changing \texttt{\$a} actually changes the array
element.  You can write ``\texttt{for}'' or ``\texttt{foreach}'', Perl
won't mind.

\item[Special variable: \texttt{\$\_}:] this special variable appears
  as the default argument of many built-in functions, including
  \texttt{print}, so this \texttt{foreach} loop prints all elements of
  \texttt{\@array}:
\begin{verbatim}
foreach ( @array ) {
    print;
}
\end{verbatim}

\item[\texttt{while} loops:] are rather like in C or Java\@.
\begin{verbatim}
while ( $i < $max ) {
    ++$i;
}
\end{verbatim}%$
  
\item[Reading each line from input files:] We often use a
  \texttt{while} loop to read each line from each of the files listed
  on the command line:
\begin{verbatim}
while ( <> ) {
    print $_;
}
\end{verbatim}%$
What this does is:
\begin{itemize}
\item If there are command line parameters to this script, then it
  assumes that they are file names, and opens each in turn, and loops
  once for each line in the file, setting \texttt{\$\_} to that line
\item Otherwise, it reads standard input, setting \texttt{\$\_} to
  each line.
\end{itemize}
Note that you could achieve the same result as above with:
\begin{verbatim}
print <>;
\end{verbatim}

\item[Reading from standard input only:] is very similar to using
  \texttt{<>}.  This example prints each line of standard input:
\begin{verbatim}
while ( <STDIN> ) {
    print $_;
}
\end{verbatim}%$

\item[\texttt{next} and \texttt{last}] \texttt{next} is like
  \texttt{continue} in C; \texttt{last} is like \texttt{break} in C\@.

\end{description}


\subsection{Array operations}
\label{sec:array-operations}

\begin{description}
\item[\texttt{push}] add a value at the end of an array

\item[\texttt{pop}] remove and return value from end of an array

\item[\texttt{shift}] remove and return value from the beginning of an
  array
  
\item[\texttt{unshift}] add value to the beginning of an array
\end{description}

\subsection{\texttt{split} and \texttt{join}}
\label{sec:split-and-join}

\begin{description}
\item[\texttt{split}] splits a string into an array:
\begin{verbatim}
my $pwline = "nicku:x:500:500:Nick Urbanik:/home/nicku:/bin/bash";
my ( $userid, $pw, $userid_number, $group_id_number,
     $name, $home_dir, $shell ) = split /:/, $pwline;
\end{verbatim}%$
  
\item[\texttt{join}] is the opposite of \texttt{split} and joins an
  array into a string:
\begin{verbatim}
my $pwline = join ':', @pwfields;
\end{verbatim}
\end{description}

\subsection{Executing External Programs}
\label{sec:external-programs}

Perl provides many ways of doing this, but we just used the
\texttt{system} built-in function.  In the laboratory in creating user
accounts, I have written solutions that pass an array to
\texttt{system}:
\begin{description}
\item[\texttt{system}:] 
\begin{verbatim}
        my @cmd = (
                   'useradd',
                   '-c', "\"$name\"",
                   '-p', $hashed_passwd,
                   $id
                  );
        print "@cmd\n";
        system @cmd;
\end{verbatim}%$
This also works:
\begin{verbatim}
       system "useradd -c \"$name\" -p \"$hashed_passwd\" $id";
\end{verbatim}%$
The difference is that the second form is usually passed to a command
shell (such as \texttt{/bin/sh} or \texttt{CMD.EXE}) to execute,
whereas the first form is executed directly.

\item[Was the command successful?] You can tell if the command was
  successful by checking that the return value was zero:
\begin{verbatim}
if ( system( "useradd -c \"$name\" -p \"$hashed_passwd\" $id" ) != 0 ) {
    print "useradd failed";
    exit;
}
\end{verbatim}%$
This is usually written in Perl more simply using the built in function
\texttt{die}, and the \texttt{or} operator:
\begin{verbatim}
system( "useradd -c \"$name\" -p \"$hashed_passwd\" $id" ) == 0
   or die "useradd failed";
\end{verbatim}%$
\end{description}

\section{Regular Expressions}
\label{sec:regular-expressions}

We spent most time in the laboratory and in the lectures studying and
using regular expressions.  Regular expressions are an important part
of Perl\@.  Regular expressions just been incorporated into Java
1.4, and are based directly on Perl regular expressions.  Regular
expressions are also used in many other programming languages, text
editors, programs\ldots even Microsoft Word\@.  They will be an
important part of the exam.

You should be familiar with character classes, matching the beginning
and end of a line, and selecting part of a match.  At an absolute
minimum, you \emph{must} be familiar with the application of:

\begin{tabularx}{\linewidth}{@{}>{\ttfamily}lY@{}}
  \bs &   Quote the next metacharacter \\
  \textasciicircum &   Match the beginning of the line\\
  . &  Match any character (except newline)\\
  \$ &  Match the end of the line (or before newline at the end)\\
  \textbar &  Alternation\\
  () & Grouping\\
  {[]} & Character class \\
  *   &   Match 0 or more times \\
  +   &   Match 1 or more times \\
\end{tabularx}

\section{Perl Regular Expression Symbols: extracted from
  \texttt{perlre} manual page}
\label{sec:perlre}

[Note: this table will be provided in the exam].

\begin{tabularx}{\linewidth}{@{}>{\ttfamily}lY@{}}
  \bs &   Quote the next metacharacter \\
  \textasciicircum &   Match the beginning of the line\\
  . &  Match any character (except newline)\\
  \$ &  Match the end of the line (or before newline at the end)\\
  \textbar &  Alternation\\
  () & Grouping\\
  {[]} & Character class \\
  *   &   Match 0 or more times \\
  +   &   Match 1 or more times \\
  ?   &   Match 1 or 0 times \\
  \{$n$\} &   Match exactly $n$ times \\
  \{$n$,\} &  Match at least $n$ times \\
  \{$n$,$m$\} & Match at least $n$ but not more than $m$ times \\
  \bs w & Match a ``word'' character (alphanumeric plus ``\texttt{\_}'') \\
  \bs W & Match a non-``word'' character \\
  \bs s & Match a whitespace character \\
  \bs S & Match a non-whitespace character \\
  \bs d & Match a digit character \\
  \bs D & Match a non-digit character \\
  (?:pattern) & This is for clustering, not capturing; it groups
  subexpressions like ``\texttt{()}'', but doesn't make back­
  references as ``\texttt{()}'' does.
 \end{tabularx}

\vspace{3ex}

\subsection*{Regular Expression Modifiers}
\label{regular-expression-modifiers}

\renewcommand{\extrarowheight}{8pt}
\begin{tabularx}{\linewidth}{@{}>{\ttfamily}lY@{}}
  i &  Do case-insensitive pattern matching.\\
  x & Extend your pattern's legibility by permitting whitespace and
  comments.
\end{tabularx}

\end{document}