#
# This script uses the following global variables
# VARY
# used when a function has to return a string and not only a number
# DECODED_LINE
# The current state of a header being currently decoded
# STATUS
# Has three possible values:
# - "none" at the beginning of a new header
# - "decoded-word" after a correctly decoded MIME part
# - "normal" after any other string
#
VARY=""
DECODED_LINE=""
STATUS="none"
#
# Function: usage
#
usage()
{
printf "Usage: %s [OPTION...] [FILE...]\n" $0
printf "Decode headers from given files for RFC 2047 MIME encodings.\n"
printf "\n"
printf "With no FILE, or when FILE is -, read standard input.\n"
printf "\n"
printf " -h, --help Give this help list\n"
exit 1
}
#
# Function: decode_word
#
# Description:
# Check that the parameter is an encoded word, then decode it and
# convert it to UTF-8.
#
# Parameter:
# A single (possibly MIME-encoded) word.
#
# Return value:
# If decoding is ok, set the result into VARY and return 0
# Otherwise return 1
#
decode_word()
{
word="$*"
###################################################################
# An encoded word contains only ASCII characters in range from
# '!' (Ascii value 0x21) to '~' (Ascii value 0x7e). This excludes
# in particular the SPACE (Ascii 0x20) and the TAB (Ascii 0x09).
#
# More specifically, it consists of five parts separated by
# question marks '?'
# 1. A character "="
# 2. The charset, e.g. "UTF-8" or "ISO-8859-1"
# 3. The encoding, B or Q in upper or lower case
# 4. The encoded text
# 5. A character "="
###################################################################
# Check that:
# - there is no character outside range from '!' to '~'
# - the 1st part is a "="
# - the 5th part is a "=" and it is the end of the string
if [ $(LANG=C expr "_$word" : '_[!-~]*$') = 0 ]; then return 1; fi
part1=$(printf "$word" | cut -f 1 -d '?')
part5=$(printf "$word" | cut -f 5- -d '?')
if [ "$part1" != "=" -o "$part5" != "=" ]; then return 1; fi
case $encoding in
B | b)
decoded=$(printf "$encoded" | decode_b64 2>/dev/null)
if [ $? != 0 ]; then return 1; fi
;;
Q | q)
decoded=$(printf "$encoded" | decode_qp 2>/dev/null)
if [ $? != 0 ]; then return 1; fi
;;
*)
return 1
;;
esac
#
# Function: add_word
#
# Description:
# Try to decode a new word, and update DECODED_LINE and STATUS
# depending on the result and the previous STATUS.
#
# Parameter:
# A single (possibly MIME-encoded) word.
#
# Return value:
# None
#
# Side effects:
# Change DECODED_LINE and STATUS
#
add_word()
{
# Manage possible initial and final parentheses
# $p1 = prefix, $p2 = word, $p3 = suffix
p123="$*"
p1=$(printf "%s" "${p123}" | sed -e 's/^\([()]*\).*/\1/')
p23="${p123:${#p1}}"
p2=$(printf "%s" "${p23}" | sed -e 's/[()]*$//')
p3="${p23:${#p2}}"
if decode_word "$p2"; then
word="${p1}${VARY}${p3}"
if [ "$STATUS" = "normal" ]; then
DECODED_LINE="${DECODED_LINE} "
elif [ "$STATUS" != "none" -a -n "${p1}" ]; then
DECODED_LINE="${DECODED_LINE} "
fi
DECODED_LINE="${DECODED_LINE}${word}"
if [ -n "${p3}" ]; then
STATUS="normal"
else
STATUS="decoded-word"
fi
else
word="${p123}"
if [ "$STATUS" != "none" ]; then
DECODED_LINE="${DECODED_LINE} "
fi
DECODED_LINE="${DECODED_LINE}${word}"
STATUS="normal"
fi
}
#
# Function: flush_line
#
# Description:
# Before beginning to manage a new header, or just before ending
# the script, print the pending DECODED_LINE if any.
#
# Parameter:
# None
#
# Return value:
# None
#
# Side effects:
# Print things to stdout
# Change DECODED_LINE and STATUS
#
flush_line()
{
if [ -n "${DECODED_LINE}" ]; then
printf "%s\n" "${DECODED_LINE}"
DECODED_LINE=""
STATUS="none"
fi
}
#
# Function: manage_line
#
# Description:
# Manage a new line, which can be either the beginning or the
# continuation of a mail/news header.
# This function prints the previous line if this is a new one,
# then it adds successive parts to the new DECODED_LINE, while
# updating STATUS as needed.
#
# Parameter:
# An input line.
#
# Return value:
# None
#
# Side effects:
# Print things to stdout
# Change DECODED_LINE and STATUS
#
manage_line()
{
line="$*"
# Is it a continuation line?
if [ $(LANG=C expr "_$line" : "_[ \t]") = 0 ]; then
# No: new header
flush_line
fi
for word in $line; do
add_word "$word"
done
}
#
# Function: manage_file
#
# Description:
# Call manage_line for each line in a given file
#
# Parameter:
# A file name, or "-" for stdin
#
# Return value:
# None
#
# Side effects:
# Same as manage_line
#
manage_file()
{
file=${1--} # POSIX-compliant; ${1:--} can be used either.
while IFS= read -r line; do
manage_line "$line"
done < <(cat -- "$file")
}
#
# Parse arguments for -h or --help
#
for i in "$@"; do
case $i in
-h | --help)
usage
;;
-)
;;
-*)
printf "Unknown argument '%s'\n" "$i"
usage
;;
esac
done
#
# Main loop.
# Call manage_file for each filename in parameters,
# then print the last pending DECODED_LINE if any.
#
if [ "$*" = "" ]; then
manage_file "-"
else
for file in "$@"; do
manage_file "$file"
done
fi
flush_line