Script num=c3=a9ro 6

21 réponses

Olivier Miakinen

20/10/2019 à 20:37

Je n'ose plus espérer que ce sera le dernier...

#!/bin/bash
############################################################################
# Name:
# decode_headers
#
# Description:
# Script used for decoding RFC 2047 MIME encoded headers.
#
# Example:
# decode_headers <<POTIRON
# Message-ID: <c5a9-1@part.org>
# From: arvo@part.org (=?ASCII?Q?Arvo?= =?L1?Q?=20?= =?UTF-8?Q?P=C3=A4rt?=)
# To: =?Latin1?Q?Fr=E9d=E9ric_Chopin?= <fred@chopin.org>,
# =?Latin2?Q?Anton=EDn_Dvo=F8=E1k?= <anton@dvorak.org>
# Cc: Arvo =?UTF-8?Q?P=C3=A4rt?= <arvo@part.org>
# References: <A65R-4d@chopin.org> <c5a7-3@part.org>
# <A72Q-5a@chopin.org>
# In-Reply-To: <A72Q-5a@chopin.org>
# Subject: Re: Going to =?Shift-JIS?B?k4yLngo=?= (Tokyo) =?UTF-8?B?zpEK?=
# =?UTF-8?B?zrjOrs69zrEK?= (Athens) and =?ISO-8859-5?Q?=BC=DE=E1=DA?=
# =?ISO-8859-5?Q?=D2=D0?= (Moscow)
# POTIRON
# ->
# Message-ID: <c5a9-1@part.org>
# From: arvo@part.org (Arvo Pärt)
# To: Frédéric Chopin <fred@chopin.org>, Antonín Dvořák <anton@dvorak.org>
# Cc: Arvo Pärt <arvo@part.org>
# References: <A65R-4d@chopin.org> <c5a7-3@part.org> <A72Q-5a@chopin.org>
# In-Reply-To: <A72Q-5a@chopin.org>
# Subject: Re: Going to 東京 (Tokyo) Αθήνα (Athens) and Москва (Moscow)
############################################################################

#
# This script uses the following global variables
# DECODED_LINE
# The current state of a header being currently decoded
# STATUS
# Has three possible values:
# - "none" at the beginning of a new header
# - "decoded-word" after a correctly decoded MIME part
# - "normal" after any other string
#
DECODED_LINE=""
STATUS="none"

#
# Function: usage
#
usage()
{
printf "Usage: %s [OPTION...] [FILE...]\n" $0
printf "Decode headers from given files for RFC 2047 MIME encodings.\n"
printf "\n"
printf "With no FILE, or when FILE is -, read standard input.\n"
printf "\n"
printf " -h, --help Give this help list\n"
exit 1
}

#
# Function: decode_qp
#
# Description:
# Decode a quoted-printable encoded text.
#
# Parameter:
# The third part of an encoded-word of type 'Q'.
#
# Return value:
# If decoding is ok, print the result and return 0
# Otherwise, may print a partial result, but return 1
#
function decode_qp()
{
while IFS= read -n 1 char; do
case $char in
"_")
printf " "
;;
"=")
IFS= read -n 2 hex
if [ $(LANG=C expr "_${hex}" : '_[0-9A-Fa-f]*') != 3 ]; then
return 1
fi
printf "%b" "\\x${hex}"
;;
*)
printf "%s" "$char"
;;
esac
done

return 0
}

#
# Function: b64index
#
# Description:
# Return the index corresponding to an encoding character in base64.
#
# Parameter:
# A character, normally in [A-Z] or [a-z] or [0-9] or + or /
# It may also be the special end character =
#
# Return value:
# If OK, print a number from 0 to 63 corresponding to the character,
# or 64 for the special end character, then return 0.
# Otherwise return 1.
#
function b64index()
{
# A(65)..Z(90) : 0..25 i.e. ascii - 65
# a(97)..z(122) : 26..51 i.e. ascii - 71
# 0(48)..9(57) : 52..61 i.e. ascii + 4
# +(43) : 62
# /(47) : 63
# =(61) : 64
ascii=$(printf "%d" "'$1")
if [ "$ascii" -ge 65 -a "$ascii" -le 90 ]; then
echo $(( "$ascii" - 65 ))
return 0
fi
if [ "$ascii" -ge 97 -a "$ascii" -le 122 ]; then
echo $(( "$ascii" - 71 ))
return 0
fi
if [ "$ascii" -ge 48 -a "$ascii" -le 57 ]; then
echo $(( "$ascii" + 4 ))
return 0
fi
case "$ascii" in
43) echo 62 ;;
47) echo 63 ;;
61) echo 64 ;;
*) return 1 ;;
esac
return 0
}

#
# Function: decode_b64
#
# Description:
# Decode a base64 encoded text.
#
# Parameter:
# The third part of an encoded-word of type 'B'.
#
# Return value:
# If decoding is ok, print the result and return 0
# Otherwise, may print a partial result, but return 1
#
function decode_b64()
{
while IFS= read -n 4 chunk; do
c1="${chunk:0:1}"
c2="${chunk:1:1}"
c3="${chunk:2:1}"
c4="${chunk:3:1}"
if i1=$(b64index "${c1}"); then
n24=$(( i1 << 18 ))
else
return 1
fi
if i2=$(b64index "${c2}"); then
n24=$(( $n24 | ( i2 << 12 ) ))
printf $(printf "\\%03o" $(($n24 >> 16)) )
else
return 1
fi
if [ "${c3}" = "=" ]; then
return 0
elif i3=$(b64index "${c3}"); then
n24=$(( $n24 | ( i3 << 6 ) ))
printf $(printf "\\%03o" $(( ($n24 >> 8) & 255 )) )
else
return 1
fi
if [ "${c4}" = "=" ]; then
return 0
elif i4=$(b64index "${c4}"); then
n24=$(( $n24 | i4 ))
printf $(printf "\\%03o" $(( $n24 & 255 )) )
else
return 1
fi
done

return 0
}

#
# Function: decode_word
#
# Description:
# Check that the parameter is an encoded word, then decode it and
# convert it to UTF-8.
#
# Parameter:
# A single (possibly MIME-encoded) word.
#
# Return value:
# If decoding is ok, print the result and return 0
# Otherwise return 1
#
decode_word()
{
word="$*"
###################################################################
# An encoded word contains only ASCII characters in range from
# '!' (Ascii value 0x21) to '~' (Ascii value 0x7e). This excludes
# in particular the SPACE (Ascii 0x20) and the TAB (Ascii 0x09).
#
# More specifically, it consists of five parts separated by
# question marks '?'
# 1. A character "="
# 2. The charset, e.g. "UTF-8" or "ISO-8859-1"
# 3. The encoding, B or Q in upper or lower case
# 4. The encoded text
# 5. A character "="
###################################################################

# Check that:
# - there is no character outside range from '!' to '~'
# - the 1st part is a "="
# - the 5th part is a "=" and it is the end of the string
if [ $(LANG=C expr "_$word" : '_[!-~]*$') = 0 ]; then return 1; fi
part1=$(printf "$word" | cut -f 1 -d '?')
part5=$(printf "$word" | cut -f 5- -d '?')
if [ "$part1" != "=" -o "$part5" != "=" ]; then return 1; fi

# Extract charset, encoding, and encoded text
charset=$(printf "$word" | cut -f 2 -d '?')
encoding=$(printf "$word" | cut -f 3 -d '?')
encoded=$(printf "$word" | cut -f 4 -d '?')

case $encoding in
B | b)
decoded=$(printf "$encoded" | decode_b64 2>/dev/null)
if [ $? != 0 ]; then return 1; fi
;;
Q | q)
decoded=$(printf "$encoded" | decode_qp 2>/dev/null)
if [ $? != 0 ]; then return 1; fi
;;
*)
return 1
;;
esac

printf "$decoded" | iconv -f $charset -t UTF-8 2>/dev/null
return $?
}

#
# Function: add_word
#
# Description:
# Try to decode a new word, and update DECODED_LINE and STATUS
# depending on the result and the previous STATUS.
#
# Parameter:
# A single (possibly MIME-encoded) word.
#
# Return value:
# None
#
# Side effects:
# Change DECODED_LINE and STATUS
#
add_word()
{
# Manage possible initial and final parentheses
# $p123 = prefix + word + suffix
# $p1 = prefix, $p2 = word, $p3 = suffix
p123="$*"
p1=$(printf "%s" "${p123}" | sed -e 's/^$[()]*$.*/\1/')
p23="${p123:${#p1}}"
p2=$(printf "%s" "${p23}" | sed -e 's/[()]*$//')
p3="${p23:${#p2}}"

if word=$(decode_word "$p2"); then
p123="${p1}${word}${p3}"
if [ "$STATUS" = "normal" ]; then
DECODED_LINE="${DECODED_LINE} "
elif [ "$STATUS" != "none" -a -n "${p1}" ]; then
DECODED_LINE="${DECODED_LINE} "
fi
DECODED_LINE="${DECODED_LINE}${p123}"
if [ -n "${p3}" ]; then
STATUS="normal"
else
STATUS="decoded-word"
fi
else
if [ "$STATUS" != "none" ]; then
DECODED_LINE="${DECODED_LINE} "
fi
DECODED_LINE="${DECODED_LINE}${p123}"
STATUS="normal"
fi
}

#
# Function: flush_line
#
# Description:
# Before beginning to manage a new header, or just before ending
# the script, print the pending DECODED_LINE if any.
#
# Parameter:
# None
#
# Return value:
# None
#
# Side effects:
# Print things to stdout
# Change DECODED_LINE and STATUS
#
flush_line()
{
if [ -n "${DECODED_LINE}" ]; then
printf "%s\n" "${DECODED_LINE}"
DECODED_LINE=""
STATUS="none"
fi
}

#
# Function: manage_line
#
# Description:
# Manage a new line, which can be either the beginning or the
# continuation of a mail/news header.
# This function prints the previous line if this is a new one,
# then it adds successive parts to the new DECODED_LINE, while
# updating STATUS as needed.
#
# Parameter:
# An input line.
#
# Return value:
# None
#
# Side effects:
# Print things to stdout
# Change DECODED_LINE and STATUS
#
manage_line()
{
line="$*"

# Is it a continuation line?
if [ $(LANG=C expr "_$line" : "_[ \t]") = 0 ]; then
# No: new header
flush_line
fi

for word in $line; do
add_word "$word"
done
}

#
# Function: manage_file
#
# Description:
# Call manage_line for each line in a given file
#
# Parameter:
# A file name, or "-" for stdin
#
# Return value:
# None
#
# Side effects:
# Same as manage_line
#
manage_file()
{
file=${1--} # POSIX-compliant; ${1:--} can be used either.
while IFS= read -r line; do
manage_line "$line"
done < <(cat -- "$file")
}

#
# Parse arguments for -h or --help
#
for i in "$@"; do
case $i in
-h | --help)
usage
;;
-)
;;
-*)
printf "Unknown argument '%s'\n" "$i"
usage
;;
esac
done

#
# Main loop.
# Call manage_file for each filename in parameters,
# then print the last pending DECODED_LINE if any.
#
if [ "$*" = "" ]; then
manage_file "-"
else
for file in "$@"; do
manage_file "$file"
done
fi
flush_line

exit 0

--
Olivier Miakinen

1 réponse

1 2 3

M.V.

24/10/2019 à 10:24

Le 24 octobre 2019 à 00:29, Olivier Miakinen a pris le temps d'écrire :

Tiens, voilà un petit souci dans Mac Café, dû aux lignes d'attribution
à rallonge et à l'absence d'espace insécable.

Oui et je n'ai pas été suffisamment attentif car j'aurais dû m'en
apercevoir.

Il a fallu que je modifie mon script car il était perturbé par
"retour chariot suivi de =?"

Sur GNU/Linux, comme sur Unix, il n'y a jamais de "retour chariot" (CR),
ce sont des "sauts de ligne" (LF). Si je me rappelle bien, ça avait
déjà été un problème quand tu avais voulu tester l'une des premières
versions de mon script.

Je me suis mal exprimé : c'est bien d'un saut de ligne et non d'un
retour chariot dont je parlais et c'est bien "saut de ligne suivi de
=?" qui perturbait mon script.
Bonne journée.
--
Michel VAUQUOIS - http://michelvauquois.fr

1 2 3

Script num=c3=a9ro 6

1 réponse

Veuillez sélectionner un problème