#! /bin/sh

# Copyright (C) 2004  Nick Urbanik <nicku(at)vtc.edu.hk>

# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

file=artificial-student-data.txt
[ $# -gt 0 ] && file=$1
[ -r $file ] || { echo "Need to be able to read $file"; exit 1; }

# 10. Download a copy of the bogus student registration data from
#     http://ictlab.tyict.vtc.edu.hk/snm/lab/regular-
#     expressions/artificial-student-data.txt. Use this for the
#     following exercises, together with the grep program: 
#     (a) Search for all students with the name "CHAN"
echo "================================================================="
echo "Part (a):"
echo "================================================================="
egrep '^.{6}CHAN[^A-Za-z0-9]' $file
echo
echo "Show Word Count:"
egrep '^.{6}CHAN[^A-Za-z0-9]' $file | wc

echo "================================================================="
echo "================================================================="
echo

#     (b) Search for all students whose student number begins and ends
#         with 9, and with any other digits in between.

echo "================================================================="
echo "Part (b):"
echo "================================================================="
egrep '9[0-9]{7}9' $file
echo
echo "Show Word Count:"
egrep '9[0-9]{7}9' $file | wc

echo "================================================================="
echo "================================================================="
echo

#     (c) Search for all student records where the Hong Kong ID has a
#         letter, not a number, in the parentheses.

echo "================================================================="
echo "Part (c):"
echo "================================================================="
egrep '[A-Za-z][0-9]{6}\([A-Za-z]\)' $file
echo
echo "Show Word Count:"
egrep '[A-Za-z][0-9]{6}\([A-Za-z]\)' $file | wc

echo "================================================================="
echo "================================================================="
echo

#     (d) Do the same exercises, but display only the students' names,
#         or student number. You will need a program such as awk (or even
#         cut) to select the appropriate columns from the output of grep.

echo "================================================================="
echo "Part (d)(a):"
echo "================================================================="
cut -b7-28 artificial-student-data.txt \
    | grep -i 'chan[^a-z]' \
    | egrep -o '([A-Za-z]+,? )+[A-Za-z]+'
echo
echo "Show Word Count:"
cut -b7-28 artificial-student-data.txt \
    | grep -i 'chan[^a-z]' \
    | egrep -o '([A-Za-z]+,? )+[A-Za-z]+' \
    | wc

echo "================================================================="
echo

echo "================================================================="
echo "Part (d)(a) Alternative solution:"
echo "================================================================="
egrep -o '^.{6}CHAN[^A-Za-z].{20}' artificial-student-data.txt \
    | egrep -o '([A-Za-z]{2,},? )+[A-Za-z]+'
echo
echo "Show Word Count:"
egrep -o '^.{6}CHAN[^A-Za-z].{20}' artificial-student-data.txt \
    | egrep -o '([A-Za-z]{2,},? )+[A-Za-z]+' \
    | wc

echo "================================================================="
echo

echo "================================================================="
echo "Part (d)(a) Second alternative solution:"
echo "================================================================="
cut -b7-28 artificial-student-data.txt \
    | grep -i 'chan[^a-z]' \
    | sed -r 's/ *$//'
echo
echo "Show Word Count:"
cut -b7-28 artificial-student-data.txt \
    | grep -i 'chan[^a-z]' \
    | sed -r 's/ *$//' \
    | wc

echo "================================================================="
echo "================================================================="
echo

echo "================================================================="
echo "Part(d)(b)"
echo "================================================================="
egrep -o '9[0-9]{7}9' $file
echo
echo "Show Word Count:"
egrep -o '9[0-9]{7}9' $file | wc

echo "================================================================="
echo "================================================================="
echo

echo "================================================================="
echo "Part(d)(c)"
echo "================================================================="
egrep -o '[A-Za-z][0-9]{6}\([A-Za-z]\)' $file
echo
echo "Show Word Count:"
egrep -o '[A-Za-z][0-9]{6}\([A-Za-z]\)' $file | wc

