1# Copyright 2013-2015 The Distro Tracker Developers
2# See the COPYRIGHT file at the top-level directory of this distribution and
3# at https://deb.li/DTAuthors
4#
5# This file is part of Distro Tracker. It is subject to the license terms
6# in the LICENSE file found in the top-level directory of this
7# distribution and at https://deb.li/DTLicense. No part of Distro Tracker,
8# including this file, may be copied, modified, propagated, or distributed
9# except according to the terms contained in the LICENSE file.
11"""
12Module including some utility functions and classes for manipulating email.
13"""
14import copy
15import email
16import io
17import re
18import types
19from email.mime.base import MIMEBase
20from email.utils import parseaddr
22from django.core.mail import EmailMessage
23from django.utils.encoding import force_bytes
26def extract_email_address_from_header(header):
27 """
28 Extracts the email address from the From email header.
30 >>> str(extract_email_address_from_header('Real Name <foo@domain.com>'))
31 'foo@domain.com'
32 >>> str(extract_email_address_from_header('foo@domain.com'))
33 'foo@domain.com'
34 """
35 from email.utils import parseaddr
36 real_name, from_address = parseaddr(str(header))
37 return from_address
40def names_and_addresses_from_string(content):
41 """
42 Takes a string with addresses in RFC822 format and returns a list of dicts
43 {'name': real_name, 'email': email_address}
44 It tries to be forgiving about unquoted commas in the real name part of the
45 address.
46 """
47 result = []
48 content = re.sub(r'(?<!>)(\s*),', r'\1WEWANTNOCOMMAS', content)
49 for part in re.split(r'(?<=>)\s*,\s*', content):
50 (name, addr) = parseaddr(part)
51 if addr:
52 result.append({
53 'name': name.replace('WEWANTNOCOMMAS', ','),
54 'email': addr,
55 })
56 return result
59def get_decoded_message_payload(message, default_charset='utf-8'):
60 """
61 Extracts the payload of the given ``email.message.Message`` and returns it
62 decoded based on the Content-Transfer-Encoding and charset.
63 """
64 # If the message is multipart there is nothing to decode so None is
65 # returned
66 if message.is_multipart(): 66 ↛ 67line 66 didn't jump to line 67, because the condition on line 66 was never true
67 return None
68 # Decodes the message based on transfer encoding and returns bytes
69 payload = message.get_payload(decode=True)
70 if payload is None:
71 return None
73 # The charset defaults to ascii if none is given
74 charset = message.get_content_charset(default_charset)
75 try:
76 return payload.decode(charset)
77 except (UnicodeDecodeError, LookupError):
78 # If we did not get the charset right, assume it's latin1 and make
79 # sure to not fail furter
80 return payload.decode('latin1', 'replace')
83def patch_message_for_django_compat(message):
84 """
85 Live patch the :py:class:`email.message.Message` object passed as
86 parameter so that:
88 * the ``as_string()`` method return the same set of bytes it has been parsed
89 from (to preserve as much as possible the original message)
90 * the ``as_bytes()`` is added too (this method is expected by Django's SMTP
91 backend)
92 """
93 # Django expects patched versions of as_string/as_bytes, see
94 # django/core/mail/message.py
95 def as_string(self, unixfrom=False, maxheaderlen=0, linesep='\n'):
96 """
97 Returns the payload of the message encoded as bytes.
98 """
99 from email.generator import BytesGenerator as Generator
100 fp = io.BytesIO()
101 g = Generator(fp, mangle_from_=False, maxheaderlen=maxheaderlen)
102 g.flatten(self, unixfrom=unixfrom, linesep=linesep)
103 return force_bytes(fp.getvalue(), 'utf-8')
105 message.as_string = types.MethodType(as_string, message)
106 message.as_bytes = message.as_string
107 return message
110def message_from_bytes(message_bytes):
111 """
112 Returns a live-patched :class:`email.Message` object from the given
113 bytes.
115 The changes ensure that parsing the message's bytes with this method
116 and then returning them by using the returned object's as_string
117 method is an idempotent operation.
119 An as_bytes method is also created since Django's SMTP backend relies
120 on this method (which is usually brought by its own
121 :class:`django.core.mail.SafeMIMEText` object but that we don't use
122 in our :class:`CustomEmailMessage`).
123 """
124 from email import message_from_bytes as email_message_from_bytes
125 message = email_message_from_bytes(message_bytes)
127 return patch_message_for_django_compat(message)
130def get_message_body(msg):
131 """
132 Returns the message body, joining together all parts into one string.
134 :param msg: The original received package message
135 :type msg: :py:class:`email.message.Message`
136 """
137 return '\n'.join(get_decoded_message_payload(part)
138 for part in msg.walk() if not part.is_multipart())
141class CustomEmailMessage(EmailMessage):
142 """
143 A subclass of :class:`django.core.mail.EmailMessage` which can be fed
144 an :class:`email.message.Message` instance to define the body of the
145 message.
147 If :attr:`msg` is set, the :attr:`body <django.core.mail.EmailMessage.body>`
148 attribute is ignored.
150 If the user wants to attach additional parts to the message, the
151 :meth:`attach` method can be used but the user must ensure that the given
152 ``msg`` instance is a multipart message before doing so.
154 Effectively, this is also a wrapper which allows sending instances of
155 :class:`email.message.Message` via Django email backends.
156 """
157 def __init__(self, msg=None, *args, **kwargs):
158 """
159 Use the keyword argument ``msg`` to set the
160 :class:`email.message.Message` instance which should be used to define
161 the body of the message. The original object is copied.
163 If no ``msg`` is set, the object's behaviour is identical to
164 :class:`django.core.mail.EmailMessage`
165 """
166 super(CustomEmailMessage, self).__init__(*args, **kwargs)
167 self.msg = msg
169 def message(self):
170 """
171 Returns the underlying :class:`email.message.Message` object.
172 In case the user did not set a :attr:`msg` attribute for this instance
173 the parent :meth:`EmailMessage.message
174 <django.core.mail.EmailMessage.message>` method is used.
175 """
176 if self.msg: 176 ↛ 180line 176 didn't jump to line 180, because the condition on line 176 was never false
177 msg = self._attach_all()
178 return msg
179 else:
180 return EmailMessage.message(self)
182 def _attach_all(self):
183 """
184 Attaches all existing attachments to the given message ``msg``.
185 """
186 msg = self.msg
187 if self.attachments: 187 ↛ 188line 187 didn't jump to line 188, because the condition on line 187 was never true
188 assert self.msg.is_multipart()
189 msg = copy.deepcopy(self.msg)
190 for attachment in self.attachments:
191 if isinstance(attachment, MIMEBase):
192 msg.attach(attachment)
193 else:
194 msg.attach(self._create_attachment(*attachment))
195 return msg
198def decode_header(header, default_encoding='utf-8'):
199 """
200 Decodes an email message header and returns it coded as a unicode
201 string.
203 This is necessary since it is possible that a header is made of multiple
204 differently encoded parts which makes :func:`email.header.decode_header`
205 insufficient.
206 """
207 if header is None:
208 return None
209 decoded_header = email.header.decode_header(header)
210 # Join all the different parts of the header into a single unicode string
211 result = ''
212 for part, encoding in decoded_header:
213 if encoding == 'unknown-8bit':
214 # Python 3 returns unknown-8bit instead of None when you have 8bit
215 # characters without any encoding information
216 encoding = 'iso-8859-1'
217 if isinstance(part, bytes):
218 encoding = encoding if encoding else default_encoding
219 try:
220 result += part.decode(encoding)
221 except UnicodeDecodeError:
222 result += part.decode('iso-8859-1', 'replace')
223 else:
224 result += part
225 return result
228def unfold_header(header):
229 """
230 Unfolding is the process to remove the line wrapping added by mail agents.
231 A header is a single logical line and they are not allowed to be multi-line
232 values.
234 We need to unfold their values in particular when we want to reuse the
235 values to compose a reply message as Python's email API chokes on those
236 newline characters.
238 If header is None, the return value is None as well.
240 :param:header: the header value to unfold
241 :type param: str
242 :returns: the unfolded version of the header.
243 :rtype: str
244 """
245 if header is None:
246 return None
247 return re.sub(r'\r?\n(\s)', r'\1', str(header), 0, re.MULTILINE)