1# Copyright 2013-2015 The Distro Tracker Developers 

2# See the COPYRIGHT file at the top-level directory of this distribution and 

3# at https://deb.li/DTAuthors 

4# 

5# This file is part of Distro Tracker. It is subject to the license terms 

6# in the LICENSE file found in the top-level directory of this 

7# distribution and at https://deb.li/DTLicense. No part of Distro Tracker, 

8# including this file, may be copied, modified, propagated, or distributed 

9# except according to the terms contained in the LICENSE file. 

10 

11""" 

12Module including some utility functions and classes for manipulating email. 

13""" 

14import copy 

15import email 

16import io 

17import re 

18import types 

19from email.mime.base import MIMEBase 

20 

21from django.core.mail import EmailMessage 

22from django.utils.encoding import force_bytes 

23 

24 

25def extract_email_address_from_header(header): 

26 """ 

27 Extracts the email address from the From email header. 

28 

29 >>> str(extract_email_address_from_header('Real Name <foo@domain.com>')) 

30 'foo@domain.com' 

31 >>> str(extract_email_address_from_header('foo@domain.com')) 

32 'foo@domain.com' 

33 """ 

34 from email.utils import parseaddr 

35 real_name, from_address = parseaddr(str(header)) 

36 return from_address 

37 

38 

39def name_and_address_from_string(content): 

40 """ 

41 Takes an address in almost-RFC822 format and turns it into a dict 

42 {'name': real_name, 'email': email_address} 

43 

44 The difference with email.utils.parseaddr and rfc822.parseaddr 

45 is that this routine allows unquoted commas to appear in the real name 

46 (in violation of RFC822). 

47 """ 

48 from email.utils import parseaddr 

49 hacked_content = content.replace(",", "WEWANTNOCOMMAS") 

50 name, mail = parseaddr(hacked_content) 

51 if mail: 

52 return { 

53 'name': name.replace("WEWANTNOCOMMAS", ","), 

54 'email': mail.replace("WEWANTNOCOMMAS", ",") 

55 } 

56 else: 

57 return None 

58 

59 

60def names_and_addresses_from_string(content): 

61 """ 

62 Takes a string with addresses in RFC822 format and returns a list of dicts 

63 {'name': real_name, 'email': email_address} 

64 It tries to be forgiving about unquoted commas in addresses. 

65 """ 

66 all_parts = [ 

67 name_and_address_from_string(part) 

68 for part in re.split(r'(?<=>)\s*,\s*', content) 

69 ] 

70 return [ 

71 part 

72 for part in all_parts 

73 if part is not None 

74 ] 

75 

76 

77def get_decoded_message_payload(message, default_charset='utf-8'): 

78 """ 

79 Extracts the payload of the given ``email.message.Message`` and returns it 

80 decoded based on the Content-Transfer-Encoding and charset. 

81 """ 

82 # If the message is multipart there is nothing to decode so None is 

83 # returned 

84 if message.is_multipart(): 84 ↛ 85line 84 didn't jump to line 85, because the condition on line 84 was never true

85 return None 

86 # Decodes the message based on transfer encoding and returns bytes 

87 payload = message.get_payload(decode=True) 

88 if payload is None: 

89 return None 

90 

91 # The charset defaults to ascii if none is given 

92 charset = message.get_content_charset(default_charset) 

93 try: 

94 return payload.decode(charset) 

95 except (UnicodeDecodeError, LookupError): 

96 # If we did not get the charset right, assume it's latin1 and make 

97 # sure to not fail furter 

98 return payload.decode('latin1', 'replace') 

99 

100 

101def patch_message_for_django_compat(message): 

102 """ 

103 Live patch the :py:class:`email.message.Message` object passed as 

104 parameter so that: 

105 

106 * the ``as_string()`` method return the same set of bytes it has been parsed 

107 from (to preserve as much as possible the original message) 

108 * the ``as_bytes()`` is added too (this method is expected by Django's SMTP 

109 backend) 

110 """ 

111 # Django expects patched versions of as_string/as_bytes, see 

112 # django/core/mail/message.py 

113 def as_string(self, unixfrom=False, maxheaderlen=0, linesep='\n'): 

114 """ 

115 Returns the payload of the message encoded as bytes. 

116 """ 

117 from email.generator import BytesGenerator as Generator 

118 fp = io.BytesIO() 

119 g = Generator(fp, mangle_from_=False, maxheaderlen=maxheaderlen) 

120 g.flatten(self, unixfrom=unixfrom, linesep=linesep) 

121 return force_bytes(fp.getvalue(), 'utf-8') 

122 

123 message.as_string = types.MethodType(as_string, message) 

124 message.as_bytes = message.as_string 

125 return message 

126 

127 

128def message_from_bytes(message_bytes): 

129 """ 

130 Returns a live-patched :class:`email.Message` object from the given 

131 bytes. 

132 

133 The changes ensure that parsing the message's bytes with this method 

134 and then returning them by using the returned object's as_string 

135 method is an idempotent operation. 

136 

137 An as_bytes method is also created since Django's SMTP backend relies 

138 on this method (which is usually brought by its own 

139 :class:`django.core.mail.SafeMIMEText` object but that we don't use 

140 in our :class:`CustomEmailMessage`). 

141 """ 

142 from email import message_from_bytes as email_message_from_bytes 

143 message = email_message_from_bytes(message_bytes) 

144 

145 return patch_message_for_django_compat(message) 

146 

147 

148class CustomEmailMessage(EmailMessage): 

149 """ 

150 A subclass of :class:`django.core.mail.EmailMessage` which can be fed 

151 an :class:`email.message.Message` instance to define the body of the 

152 message. 

153 

154 If :attr:`msg` is set, the :attr:`body <django.core.mail.EmailMessage.body>` 

155 attribute is ignored. 

156 

157 If the user wants to attach additional parts to the message, the 

158 :meth:`attach` method can be used but the user must ensure that the given 

159 ``msg`` instance is a multipart message before doing so. 

160 

161 Effectively, this is also a wrapper which allows sending instances of 

162 :class:`email.message.Message` via Django email backends. 

163 """ 

164 def __init__(self, msg=None, *args, **kwargs): 

165 """ 

166 Use the keyword argument ``msg`` to set the 

167 :class:`email.message.Message` instance which should be used to define 

168 the body of the message. The original object is copied. 

169 

170 If no ``msg`` is set, the object's behaviour is identical to 

171 :class:`django.core.mail.EmailMessage` 

172 """ 

173 super(CustomEmailMessage, self).__init__(*args, **kwargs) 

174 self.msg = msg 

175 

176 def message(self): 

177 """ 

178 Returns the underlying :class:`email.message.Message` object. 

179 In case the user did not set a :attr:`msg` attribute for this instance 

180 the parent :meth:`EmailMessage.message 

181 <django.core.mail.EmailMessage.message>` method is used. 

182 """ 

183 if self.msg: 183 ↛ 187line 183 didn't jump to line 187, because the condition on line 183 was never false

184 msg = self._attach_all() 

185 return msg 

186 else: 

187 return EmailMessage.message(self) 

188 

189 def _attach_all(self): 

190 """ 

191 Attaches all existing attachments to the given message ``msg``. 

192 """ 

193 msg = self.msg 

194 if self.attachments: 194 ↛ 195line 194 didn't jump to line 195, because the condition on line 194 was never true

195 assert self.msg.is_multipart() 

196 msg = copy.deepcopy(self.msg) 

197 for attachment in self.attachments: 

198 if isinstance(attachment, MIMEBase): 

199 msg.attach(attachment) 

200 else: 

201 msg.attach(self._create_attachment(*attachment)) 

202 return msg 

203 

204 

205def decode_header(header, default_encoding='utf-8'): 

206 """ 

207 Decodes an email message header and returns it coded as a unicode 

208 string. 

209 

210 This is necessary since it is possible that a header is made of multiple 

211 differently encoded parts which makes :func:`email.header.decode_header` 

212 insufficient. 

213 """ 

214 if header is None: 

215 return None 

216 decoded_header = email.header.decode_header(header) 

217 # Join all the different parts of the header into a single unicode string 

218 result = '' 

219 for part, encoding in decoded_header: 

220 if encoding == 'unknown-8bit': 

221 # Python 3 returns unknown-8bit instead of None when you have 8bit 

222 # characters without any encoding information 

223 encoding = 'iso-8859-1' 

224 if isinstance(part, bytes): 

225 encoding = encoding if encoding else default_encoding 

226 try: 

227 result += part.decode(encoding) 

228 except UnicodeDecodeError: 

229 result += part.decode('iso-8859-1', 'replace') 

230 else: 

231 result += part 

232 return result 

233 

234 

235def unfold_header(header): 

236 """ 

237 Unfolding is the process to remove the line wrapping added by mail agents. 

238 A header is a single logical line and they are not allowed to be multi-line 

239 values. 

240 

241 We need to unfold their values in particular when we want to reuse the 

242 values to compose a reply message as Python's email API chokes on those 

243 newline characters. 

244 

245 If header is None, the return value is None as well. 

246 

247 :param:header: the header value to unfold 

248 :type param: str 

249 :returns: the unfolded version of the header. 

250 :rtype: str 

251 """ 

252 if header is None: 

253 return None 

254 return re.sub(r'\r?\n(\s)', r'\1', str(header), 0, re.MULTILINE)