From d79c57846950e31964ffadcd8034c44667c0fbaf Mon Sep 17 00:00:00 2001 From: Simon Ser <contact@emersion.fr> Date: Tue, 15 Dec 2020 10:37:09 +0000 Subject: [PATCH] dim: fix handling of 8-bit non-UTF-8 messages Python's open() function will return a file object that decodes input bytes to an UTF-8 string. Python assumes all files are UTF-8 by default (unless an explicit encoding param is passed). This works fine with 7-bit and UTF-8 messages. However, when a message uses a 8-bit Content-Transfer-Encoding and a non-UTF-8 charset (such as iso-8859-1), Python will error out. To prevent this, open the file in binary mode to prevent Python from doing any charset conversion under-the-hood. Signed-off-by: Simon Ser <contact@emersion.fr> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> --- dim | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/dim b/dim index ac53ade..f4366ea 100755 --- a/dim +++ b/dim @@ -443,9 +443,11 @@ function check_dim_config message_get_id () { $dim_python <<EOF -from email.parser import Parser -headers = Parser().parse(open('$1', 'r')) -message_id = headers['message-id'] +import email + +f = open('$1', 'rb') +msg = email.message_from_binary_file(f) +message_id = msg['message-id'] if message_id is not None: print(message_id.strip('<> \n')) EOF @@ -457,12 +459,12 @@ message_print_body () import email def print_msg(file): - msg = email.message_from_file(file) + msg = email.message_from_binary_file(file) for part in msg.walk(): if part.get_content_type() == 'text/plain': print(part.get_payload(decode=True).decode(part.get_content_charset(failobj='us-ascii'), 'replace')) -print_msg(open('$1', 'r')) +print_msg(open('$1', 'rb')) EOF } -- GitLab