1# Copyright 2013-2015 The Distro Tracker Developers
2# See the COPYRIGHT file at the top-level directory of this distribution and
3# at https://deb.li/DTAuthors
4#
5# This file is part of Distro Tracker. It is subject to the license terms
6# in the LICENSE file found in the top-level directory of this
7# distribution and at https://deb.li/DTLicense. No part of Distro Tracker,
8# including this file, may be copied, modified, propagated, or distributed
9# except according to the terms contained in the LICENSE file.
11"""
12Module including some utility functions and classes for manipulating email.
13"""
14import copy
15import email
16import io
17import re
18import types
19from email.mime.base import MIMEBase
21from django.core.mail import EmailMessage
22from django.utils.encoding import force_bytes
25def extract_email_address_from_header(header):
26 """
27 Extracts the email address from the From email header.
29 >>> str(extract_email_address_from_header('Real Name <foo@domain.com>'))
30 'foo@domain.com'
31 >>> str(extract_email_address_from_header('foo@domain.com'))
32 'foo@domain.com'
33 """
34 from email.utils import parseaddr
35 real_name, from_address = parseaddr(str(header))
36 return from_address
39def name_and_address_from_string(content):
40 """
41 Takes an address in almost-RFC822 format and turns it into a dict
42 {'name': real_name, 'email': email_address}
44 The difference with email.utils.parseaddr and rfc822.parseaddr
45 is that this routine allows unquoted commas to appear in the real name
46 (in violation of RFC822).
47 """
48 from email.utils import parseaddr
49 hacked_content = content.replace(",", "WEWANTNOCOMMAS")
50 name, mail = parseaddr(hacked_content)
51 if mail:
52 return {
53 'name': name.replace("WEWANTNOCOMMAS", ","),
54 'email': mail.replace("WEWANTNOCOMMAS", ",")
55 }
56 else:
57 return None
60def names_and_addresses_from_string(content):
61 """
62 Takes a string with addresses in RFC822 format and returns a list of dicts
63 {'name': real_name, 'email': email_address}
64 It tries to be forgiving about unquoted commas in addresses.
65 """
66 all_parts = [
67 name_and_address_from_string(part)
68 for part in re.split(r'(?<=>)\s*,\s*', content)
69 ]
70 return [
71 part
72 for part in all_parts
73 if part is not None
74 ]
77def get_decoded_message_payload(message, default_charset='utf-8'):
78 """
79 Extracts the payload of the given ``email.message.Message`` and returns it
80 decoded based on the Content-Transfer-Encoding and charset.
81 """
82 # If the message is multipart there is nothing to decode so None is
83 # returned
84 if message.is_multipart(): 84 ↛ 85line 84 didn't jump to line 85, because the condition on line 84 was never true
85 return None
86 # Decodes the message based on transfer encoding and returns bytes
87 payload = message.get_payload(decode=True)
88 if payload is None:
89 return None
91 # The charset defaults to ascii if none is given
92 charset = message.get_content_charset(default_charset)
93 try:
94 return payload.decode(charset)
95 except (UnicodeDecodeError, LookupError):
96 # If we did not get the charset right, assume it's latin1 and make
97 # sure to not fail furter
98 return payload.decode('latin1', 'replace')
101def patch_message_for_django_compat(message):
102 """
103 Live patch the :py:class:`email.message.Message` object passed as
104 parameter so that:
106 * the ``as_string()`` method return the same set of bytes it has been parsed
107 from (to preserve as much as possible the original message)
108 * the ``as_bytes()`` is added too (this method is expected by Django's SMTP
109 backend)
110 """
111 # Django expects patched versions of as_string/as_bytes, see
112 # django/core/mail/message.py
113 def as_string(self, unixfrom=False, maxheaderlen=0, linesep='\n'):
114 """
115 Returns the payload of the message encoded as bytes.
116 """
117 from email.generator import BytesGenerator as Generator
118 fp = io.BytesIO()
119 g = Generator(fp, mangle_from_=False, maxheaderlen=maxheaderlen)
120 g.flatten(self, unixfrom=unixfrom, linesep=linesep)
121 return force_bytes(fp.getvalue(), 'utf-8')
123 message.as_string = types.MethodType(as_string, message)
124 message.as_bytes = message.as_string
125 return message
128def message_from_bytes(message_bytes):
129 """
130 Returns a live-patched :class:`email.Message` object from the given
131 bytes.
133 The changes ensure that parsing the message's bytes with this method
134 and then returning them by using the returned object's as_string
135 method is an idempotent operation.
137 An as_bytes method is also created since Django's SMTP backend relies
138 on this method (which is usually brought by its own
139 :class:`django.core.mail.SafeMIMEText` object but that we don't use
140 in our :class:`CustomEmailMessage`).
141 """
142 from email import message_from_bytes as email_message_from_bytes
143 message = email_message_from_bytes(message_bytes)
145 return patch_message_for_django_compat(message)
148def get_message_body(msg):
149 """
150 Returns the message body, joining together all parts into one string.
152 :param msg: The original received package message
153 :type msg: :py:class:`email.message.Message`
154 """
155 return '\n'.join(get_decoded_message_payload(part)
156 for part in msg.walk() if not part.is_multipart())
159class CustomEmailMessage(EmailMessage):
160 """
161 A subclass of :class:`django.core.mail.EmailMessage` which can be fed
162 an :class:`email.message.Message` instance to define the body of the
163 message.
165 If :attr:`msg` is set, the :attr:`body <django.core.mail.EmailMessage.body>`
166 attribute is ignored.
168 If the user wants to attach additional parts to the message, the
169 :meth:`attach` method can be used but the user must ensure that the given
170 ``msg`` instance is a multipart message before doing so.
172 Effectively, this is also a wrapper which allows sending instances of
173 :class:`email.message.Message` via Django email backends.
174 """
175 def __init__(self, msg=None, *args, **kwargs):
176 """
177 Use the keyword argument ``msg`` to set the
178 :class:`email.message.Message` instance which should be used to define
179 the body of the message. The original object is copied.
181 If no ``msg`` is set, the object's behaviour is identical to
182 :class:`django.core.mail.EmailMessage`
183 """
184 super(CustomEmailMessage, self).__init__(*args, **kwargs)
185 self.msg = msg
187 def message(self):
188 """
189 Returns the underlying :class:`email.message.Message` object.
190 In case the user did not set a :attr:`msg` attribute for this instance
191 the parent :meth:`EmailMessage.message
192 <django.core.mail.EmailMessage.message>` method is used.
193 """
194 if self.msg: 194 ↛ 198line 194 didn't jump to line 198, because the condition on line 194 was never false
195 msg = self._attach_all()
196 return msg
197 else:
198 return EmailMessage.message(self)
200 def _attach_all(self):
201 """
202 Attaches all existing attachments to the given message ``msg``.
203 """
204 msg = self.msg
205 if self.attachments: 205 ↛ 206line 205 didn't jump to line 206, because the condition on line 205 was never true
206 assert self.msg.is_multipart()
207 msg = copy.deepcopy(self.msg)
208 for attachment in self.attachments:
209 if isinstance(attachment, MIMEBase):
210 msg.attach(attachment)
211 else:
212 msg.attach(self._create_attachment(*attachment))
213 return msg
216def decode_header(header, default_encoding='utf-8'):
217 """
218 Decodes an email message header and returns it coded as a unicode
219 string.
221 This is necessary since it is possible that a header is made of multiple
222 differently encoded parts which makes :func:`email.header.decode_header`
223 insufficient.
224 """
225 if header is None:
226 return None
227 decoded_header = email.header.decode_header(header)
228 # Join all the different parts of the header into a single unicode string
229 result = ''
230 for part, encoding in decoded_header:
231 if encoding == 'unknown-8bit':
232 # Python 3 returns unknown-8bit instead of None when you have 8bit
233 # characters without any encoding information
234 encoding = 'iso-8859-1'
235 if isinstance(part, bytes):
236 encoding = encoding if encoding else default_encoding
237 try:
238 result += part.decode(encoding)
239 except UnicodeDecodeError:
240 result += part.decode('iso-8859-1', 'replace')
241 else:
242 result += part
243 return result
246def unfold_header(header):
247 """
248 Unfolding is the process to remove the line wrapping added by mail agents.
249 A header is a single logical line and they are not allowed to be multi-line
250 values.
252 We need to unfold their values in particular when we want to reuse the
253 values to compose a reply message as Python's email API chokes on those
254 newline characters.
256 If header is None, the return value is None as well.
258 :param:header: the header value to unfold
259 :type param: str
260 :returns: the unfolded version of the header.
261 :rtype: str
262 """
263 if header is None:
264 return None
265 return re.sub(r'\r?\n(\s)', r'\1', str(header), 0, re.MULTILINE)