1# Copyright 2013-2015 The Distro Tracker Developers 

2# See the COPYRIGHT file at the top-level directory of this distribution and 

3# at https://deb.li/DTAuthors 

4# 

5# This file is part of Distro Tracker. It is subject to the license terms 

6# in the LICENSE file found in the top-level directory of this 

7# distribution and at https://deb.li/DTLicense. No part of Distro Tracker, 

8# including this file, may be copied, modified, propagated, or distributed 

9# except according to the terms contained in the LICENSE file. 

10 

11""" 

12Module including some utility functions and classes for manipulating email. 

13""" 

14import copy 

15import email 

16import io 

17import re 

18import types 

19from email.mime.base import MIMEBase 

20from email.utils import parseaddr 

21 

22from django.core.mail import EmailMessage 

23from django.utils.encoding import force_bytes 

24 

25 

26def extract_email_address_from_header(header): 

27 """ 

28 Extracts the email address from the From email header. 

29 

30 >>> str(extract_email_address_from_header('Real Name <foo@domain.com>')) 

31 'foo@domain.com' 

32 >>> str(extract_email_address_from_header('foo@domain.com')) 

33 'foo@domain.com' 

34 """ 

35 from email.utils import parseaddr 

36 real_name, from_address = parseaddr(str(header)) 

37 return from_address 

38 

39 

40def names_and_addresses_from_string(content): 

41 """ 

42 Takes a string with addresses in RFC822 format and returns a list of dicts 

43 {'name': real_name, 'email': email_address} 

44 It tries to be forgiving about unquoted commas in the real name part of the 

45 address. 

46 """ 

47 result = [] 

48 content = re.sub(r'(?<!>)(\s*),', r'\1WEWANTNOCOMMAS', content) 

49 for part in re.split(r'(?<=>)\s*,\s*', content): 

50 (name, addr) = parseaddr(part) 

51 if addr: 

52 result.append({ 

53 'name': name.replace('WEWANTNOCOMMAS', ','), 

54 'email': addr, 

55 }) 

56 return result 

57 

58 

59def get_decoded_message_payload(message, default_charset='utf-8'): 

60 """ 

61 Extracts the payload of the given ``email.message.Message`` and returns it 

62 decoded based on the Content-Transfer-Encoding and charset. 

63 """ 

64 # If the message is multipart there is nothing to decode so None is 

65 # returned 

66 if message.is_multipart(): 66 ↛ 67line 66 didn't jump to line 67, because the condition on line 66 was never true

67 return None 

68 # Decodes the message based on transfer encoding and returns bytes 

69 payload = message.get_payload(decode=True) 

70 if payload is None: 

71 return None 

72 

73 # The charset defaults to ascii if none is given 

74 charset = message.get_content_charset(default_charset) 

75 try: 

76 return payload.decode(charset) 

77 except (UnicodeDecodeError, LookupError): 

78 # If we did not get the charset right, assume it's latin1 and make 

79 # sure to not fail furter 

80 return payload.decode('latin1', 'replace') 

81 

82 

83def patch_message_for_django_compat(message): 

84 """ 

85 Live patch the :py:class:`email.message.Message` object passed as 

86 parameter so that: 

87 

88 * the ``as_string()`` method return the same set of bytes it has been parsed 

89 from (to preserve as much as possible the original message) 

90 * the ``as_bytes()`` is added too (this method is expected by Django's SMTP 

91 backend) 

92 """ 

93 # Django expects patched versions of as_string/as_bytes, see 

94 # django/core/mail/message.py 

95 def as_string(self, unixfrom=False, maxheaderlen=0, linesep='\n'): 

96 """ 

97 Returns the payload of the message encoded as bytes. 

98 """ 

99 from email.generator import BytesGenerator as Generator 

100 fp = io.BytesIO() 

101 g = Generator(fp, mangle_from_=False, maxheaderlen=maxheaderlen) 

102 g.flatten(self, unixfrom=unixfrom, linesep=linesep) 

103 return force_bytes(fp.getvalue(), 'utf-8') 

104 

105 message.as_string = types.MethodType(as_string, message) 

106 message.as_bytes = message.as_string 

107 return message 

108 

109 

110def message_from_bytes(message_bytes): 

111 """ 

112 Returns a live-patched :class:`email.Message` object from the given 

113 bytes. 

114 

115 The changes ensure that parsing the message's bytes with this method 

116 and then returning them by using the returned object's as_string 

117 method is an idempotent operation. 

118 

119 An as_bytes method is also created since Django's SMTP backend relies 

120 on this method (which is usually brought by its own 

121 :class:`django.core.mail.SafeMIMEText` object but that we don't use 

122 in our :class:`CustomEmailMessage`). 

123 """ 

124 from email import message_from_bytes as email_message_from_bytes 

125 message = email_message_from_bytes(message_bytes) 

126 

127 return patch_message_for_django_compat(message) 

128 

129 

130def get_message_body(msg): 

131 """ 

132 Returns the message body, joining together all parts into one string. 

133 

134 :param msg: The original received package message 

135 :type msg: :py:class:`email.message.Message` 

136 """ 

137 return '\n'.join(get_decoded_message_payload(part) 

138 for part in msg.walk() if not part.is_multipart()) 

139 

140 

141class CustomEmailMessage(EmailMessage): 

142 """ 

143 A subclass of :class:`django.core.mail.EmailMessage` which can be fed 

144 an :class:`email.message.Message` instance to define the body of the 

145 message. 

146 

147 If :attr:`msg` is set, the :attr:`body <django.core.mail.EmailMessage.body>` 

148 attribute is ignored. 

149 

150 If the user wants to attach additional parts to the message, the 

151 :meth:`attach` method can be used but the user must ensure that the given 

152 ``msg`` instance is a multipart message before doing so. 

153 

154 Effectively, this is also a wrapper which allows sending instances of 

155 :class:`email.message.Message` via Django email backends. 

156 """ 

157 def __init__(self, msg=None, *args, **kwargs): 

158 """ 

159 Use the keyword argument ``msg`` to set the 

160 :class:`email.message.Message` instance which should be used to define 

161 the body of the message. The original object is copied. 

162 

163 If no ``msg`` is set, the object's behaviour is identical to 

164 :class:`django.core.mail.EmailMessage` 

165 """ 

166 super(CustomEmailMessage, self).__init__(*args, **kwargs) 

167 self.msg = msg 

168 

169 def message(self): 

170 """ 

171 Returns the underlying :class:`email.message.Message` object. 

172 In case the user did not set a :attr:`msg` attribute for this instance 

173 the parent :meth:`EmailMessage.message 

174 <django.core.mail.EmailMessage.message>` method is used. 

175 """ 

176 if self.msg: 176 ↛ 180line 176 didn't jump to line 180, because the condition on line 176 was never false

177 msg = self._attach_all() 

178 return msg 

179 else: 

180 return EmailMessage.message(self) 

181 

182 def _attach_all(self): 

183 """ 

184 Attaches all existing attachments to the given message ``msg``. 

185 """ 

186 msg = self.msg 

187 if self.attachments: 187 ↛ 188line 187 didn't jump to line 188, because the condition on line 187 was never true

188 assert self.msg.is_multipart() 

189 msg = copy.deepcopy(self.msg) 

190 for attachment in self.attachments: 

191 if isinstance(attachment, MIMEBase): 

192 msg.attach(attachment) 

193 else: 

194 msg.attach(self._create_attachment(*attachment)) 

195 return msg 

196 

197 

198def decode_header(header, default_encoding='utf-8'): 

199 """ 

200 Decodes an email message header and returns it coded as a unicode 

201 string. 

202 

203 This is necessary since it is possible that a header is made of multiple 

204 differently encoded parts which makes :func:`email.header.decode_header` 

205 insufficient. 

206 """ 

207 if header is None: 

208 return None 

209 decoded_header = email.header.decode_header(header) 

210 # Join all the different parts of the header into a single unicode string 

211 result = '' 

212 for part, encoding in decoded_header: 

213 if encoding == 'unknown-8bit': 

214 # Python 3 returns unknown-8bit instead of None when you have 8bit 

215 # characters without any encoding information 

216 encoding = 'iso-8859-1' 

217 if isinstance(part, bytes): 

218 encoding = encoding if encoding else default_encoding 

219 try: 

220 result += part.decode(encoding) 

221 except UnicodeDecodeError: 

222 result += part.decode('iso-8859-1', 'replace') 

223 else: 

224 result += part 

225 return result 

226 

227 

228def unfold_header(header): 

229 """ 

230 Unfolding is the process to remove the line wrapping added by mail agents. 

231 A header is a single logical line and they are not allowed to be multi-line 

232 values. 

233 

234 We need to unfold their values in particular when we want to reuse the 

235 values to compose a reply message as Python's email API chokes on those 

236 newline characters. 

237 

238 If header is None, the return value is None as well. 

239 

240 :param:header: the header value to unfold 

241 :type param: str 

242 :returns: the unfolded version of the header. 

243 :rtype: str 

244 """ 

245 if header is None: 

246 return None 

247 return re.sub(r'\r?\n(\s)', r'\1', str(header), 0, re.MULTILINE)