Commit 1c5522df authored by Arkadiusz Hiler's avatar Arkadiusz Hiler
Browse files

parsemail: Handle malformed headers better

Some servers have tendency to send non-RFC compliant headers which
causes patchwork to hiccup. Their admins are reluctant to act on it and
we cannot just force whole bunch of users to switch providers.

Let's do our bit of the robustness principle and be more liberal about
what we take.

There are actually two solution in place, one for PY2 and one for PY3.

PY2 is fine with reading the malformed file, but fails at header
encoding, so we just need to catch the exception and discard the header.

PY3 is a bit more complex. Since there were a lot of changes in how
files and strings are handled, we have to read the mail explicitly as
binary otherwise we get decode errors, hence the sys.stdin.buffer.

The changes in the email module and the addition of
message_from_binary_file() takes care of the issue for us.
Signed-off-by: default avatarArkadiusz Hiler <>
parent dc183460
Pipeline #38279 passed with stage
in 1 minute and 47 seconds
......@@ -21,10 +21,16 @@
from __future__ import absolute_import
import six
import argparse
import codecs
import datetime
from email import message_from_file
if six.PY3:
from email import message_from_binary_file
from email import message_from_file
from email.header import Header, decode_header
from email.parser import HeaderParser
from email.utils import parsedate_tz, mktime_tz
......@@ -208,11 +214,23 @@ def mail_date(mail):
return datetime.datetime.utcfromtimestamp(mktime_tz(t))
def __format_header(header_name, value):
if six.PY3:
value = value.encode()
header_instance = Header(value, continuation_ws=' ')
return '%s: %s\n' % (header_name, header_instance.encode())
def mail_headers(mail):
return reduce(operator.__concat__,
['%s: %s\n' % (k, Header(v, header_name=k,
continuation_ws=' ').encode())
for (k, v) in list(mail.items())])
headers = []
for (k, v) in list(mail.items()):
headers += [__format_header(k, v)]
except UnicodeDecodeError:
return reduce(operator.__concat__, headers)
def find_pull_request(content):
......@@ -914,7 +932,10 @@ def main(args):
mail = message_from_file(sys.stdin)
if six.PY3:
mail = message_from_binary_file(sys.stdin.buffer)
mail = message_from_file(sys.stdin)
parse_lock = lock()
return parse_mail(mail)
From: Fake Email <>
Received: (from user@host) by with id
12345678901234; Mon, 13 May 2019 11:56:47 -0700
To: <>
Date: Mon, 13 May 2019 18:56:39 +0000
Message-ID: <>
X-Mailer: git-send-email 2.10.1
Content-Type: text/plain; charset="UTF-8"
Content-Transfer-Encoding: 8bit
Subject: y, hello thar
Some mail content here.
......@@ -17,15 +17,17 @@
# along with Patchwork; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import six
from email import message_from_string
from email.mime.text import MIMEText
from email.utils import make_msgid
from email.parser import HeaderParser
from django.test import TestCase, TransactionTestCase
from patchwork.bin.parsemail import (find_content, find_author, find_project,
parse_mail, split_prefixes, clean_subject,
mail_headers, parse_mail, split_prefixes,
clean_subject, parse_series_marker)
from patchwork.models import (Project, Person, Patch, Comment, State, EventLog,
Event, SeriesRevision,
......@@ -737,6 +739,30 @@ class NoNewlineAtEndOfFilePatchTest(MBoxPatchTest):
patch.content.count('\\ No newline at end of file'))
class NonAsciiHeaders(MBoxPatchTest):
mail_file = '0012-non-ascii-headers.mbox'
def testShouldKeepAllTheOtherCompliantHeaders(self):
headers = mail_headers(self.mail)
headers = HeaderParser().parsestr(headers)
# the RFC-abiding headers, all should be present
self.assertTrue("From" in headers)
self.assertTrue("To" in headers)
self.assertTrue("Date" in headers)
self.assertTrue("Message-ID" in headers)
self.assertTrue("X-Mailer" in headers)
self.assertTrue("Content-Type" in headers)
self.assertTrue("Content-Transfer-Encoding" in headers)
if six.PY3:
# offending header, should be re-encoded
self.assertTrue("Received" in headers)
# offending header, should be discarded
self.assertFalse("Received" in headers)
class DelegateRequestTest(TestCase):
fixtures = ['default_states', 'default_events']
patch_filename = '0001-add-line.patch'
......@@ -18,7 +18,12 @@
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import codecs
from email import message_from_file
import six
if six.PY3:
from email import message_from_binary_file
from email import message_from_file
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.utils import make_msgid
......@@ -148,7 +153,12 @@ def read_patch(filename, encoding=None):
def read_mail(filename, project=None):
file_path = os.path.join(_test_mail_dir, filename)
mail = message_from_file(open(file_path))
if six.PY3:
mail = message_from_binary_file(open(file_path, "rb"))
mail = message_from_file(open(file_path))
if 'Message-Id' not in mail:
mail['Message-Id'] = make_msgid()
if project is not None:
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment