Commit 189b72f2 authored by Arkadiusz Hiler's avatar Arkadiusz Hiler

Don't mangle \n and \r combinations in the generated mboxes

Generated mboxes weren't representing newline (aka LF or \n) and
carriage return (aka CR or \r) found in the original patch faithfully.

This is mostly due to how email.Message is implemented - it tries to
normalize newlines in plain text emails. This doesn't work for us as we
want to provide the patches in an unchanged form.

To do that we have to overwrite as_string() and as_bytes() methods on
our Message class that will use modified Generator that doesn't mangle
newlines.

The difference in the checksums is due to slightly different header
wrapping caused by the changes in generator.

Additionally tests are provided to make sure that we ingest emails
correctly (no mangling there) and that the generated mbox is also
a-okay.

Fixes: #33Signed-off-by: default avatarArkadiusz Hiler <arkadiusz.hiler@intel.com>
parent fdb77357
Pipeline #172701 passed with stage
in 1 minute and 24 seconds
This diff is collapsed.
...@@ -406,3 +406,17 @@ class MboxEncoding(TestCase): ...@@ -406,3 +406,17 @@ class MboxEncoding(TestCase):
response = self.client.get('/patch/%d/mbox/' % patch.id) response = self.client.get('/patch/%d/mbox/' % patch.id)
self.assertContains(response, self.assertContains(response,
'Content-Transfer-Encoding: 8bit') 'Content-Transfer-Encoding: 8bit')
def testCarriageReturn(self):
sequence = 'nr\n\r rn\r\n r\r n\n'
patch = self.create_patch("a", sequence)
response = self.client.get('/patch/%d/mbox/' % patch.id)
m = email.message_from_string(response.content.decode())
self.assertTrue(sequence.encode() in m.get_payload(decode=True))
def testCarriageReturnBase64(self):
sequence = 'nr\n\r rn\r\n r\r n\n'
patch = self.create_patch("a", ("y" * 250) + sequence)
response = self.client.get('/patch/%d/mbox/' % patch.id)
m = email.message_from_string(response.content.decode())
self.assertTrue(sequence.encode() in m.get_payload(decode=True))
...@@ -341,6 +341,22 @@ class MultiPartEmailHandling(TestCase): ...@@ -341,6 +341,22 @@ class MultiPartEmailHandling(TestCase):
self.p1.delete() self.p1.delete()
class CharacterTest(TestCase):
fixtures = ['default_states', 'default_events']
def setUp(self):
self.p1 = Project(linkname='test-project-1', name='Project 1',
listid='1.example.com', listemail='1@example.com')
self.p1.save()
def testCarriageReturn(self):
mail = read_mail('0014-all-ascii.mbox',
project=self.p1)
parse_mail(mail)
patch = Patch.objects.get()
self.assertTrue('13:\r\n' in patch.content)
class MultipleProjectPatchTest(TestCase): class MultipleProjectPatchTest(TestCase):
""" Test that patches sent to multiple patchwork projects are """ Test that patches sent to multiple patchwork projects are
handled correctly """ handled correctly """
......
...@@ -251,13 +251,13 @@ class APITest(APITestBase): ...@@ -251,13 +251,13 @@ class APITest(APITestBase):
def testSeriesMbox(self): def testSeriesMbox(self):
self.check_mbox("/series/%s/revisions/1/mbox/" % self.series.pk, self.check_mbox("/series/%s/revisions/1/mbox/" % self.series.pk,
'for_each_-intel_-crtc-v2.mbox', 'for_each_-intel_-crtc-v2.mbox',
'6d59b59dbf751064408249e18401275f') '42e2b2c9eeccf912c998be41683f50d7')
def testMboxWithCoverLetter(self): def testMboxWithCoverLetter(self):
self.check_mbox("/series/%s/revisions/1/mbox_with_cover/" % self.check_mbox("/series/%s/revisions/1/mbox_with_cover/" %
self.series.pk, self.series.pk,
'for_each_-intel_-crtc-v2.mbox', 'for_each_-intel_-crtc-v2.mbox',
'826f03a224083df013a2b373459527a3') '3921b55a5ac3fb28eea0c4e198406cca')
def testCoverLetterMbox(self): def testCoverLetterMbox(self):
self.check_mbox("/series/%s/revisions/1/cover/" % self.check_mbox("/series/%s/revisions/1/cover/" %
......
...@@ -21,10 +21,12 @@ from __future__ import absolute_import ...@@ -21,10 +21,12 @@ from __future__ import absolute_import
import datetime import datetime
import time import time
from io import BytesIO
from email.encoders import encode_7or8bit, encode_base64 from email.encoders import encode_7or8bit, encode_base64
from email.header import Header from email.header import Header
from email.mime.nonmultipart import MIMENonMultipart from email.mime.nonmultipart import MIMENonMultipart
from email.parser import HeaderParser from email.parser import HeaderParser
from email.generator import BytesGenerator
import email.utils import email.utils
import re import re
...@@ -177,6 +179,23 @@ def process_multiplepatch_form(form, user, action, patches, context): ...@@ -177,6 +179,23 @@ def process_multiplepatch_form(form, user, action, patches, context):
return errors return errors
class NewlineNeutralGenerator(BytesGenerator):
"""Alternative generator that doesn't try to normalize newlines.
It's meant to be used by PatchMbox to accurately represent any CR and LF
combination found in the original email.
XXX: this may be prone to breakages as Python is getting upgraded to a new
release. We are overwriting _write_lines() here.
"""
def _write_lines(self, lines):
# We have to transform the line endings.
if not lines:
return
self.write(lines)
class PatchMbox(MIMENonMultipart): class PatchMbox(MIMENonMultipart):
patch_charset = 'utf-8' patch_charset = 'utf-8'
...@@ -190,6 +209,16 @@ class PatchMbox(MIMENonMultipart): ...@@ -190,6 +209,16 @@ class PatchMbox(MIMENonMultipart):
else: else:
encode_7or8bit(self) encode_7or8bit(self)
def as_string(self, unixfrom=False, maxheaderlen=0, policy=None):
return self.as_bytes(unixfrom=unixfrom, policy=policy).decode()
def as_bytes(self, unixfrom=False, policy=None):
policy = self.policy if policy is None else policy
fp = BytesIO()
g = NewlineNeutralGenerator(fp, mangle_from_=False, policy=policy)
g.flatten(self, unixfrom=unixfrom)
return fp.getvalue()
def get_from(patch, charset): def get_from(patch, charset):
if patch.headers: if patch.headers:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment