1# Copyright 2013-2015 The Distro Tracker Developers 

2# See the COPYRIGHT file at the top-level directory of this distribution and 

3# at https://deb.li/DTAuthors 

4# 

5# This file is part of Distro Tracker. It is subject to the license terms 

6# in the LICENSE file found in the top-level directory of this 

7# distribution and at https://deb.li/DTLicense. No part of Distro Tracker, 

8# including this file, may be copied, modified, propagated, or distributed 

9# except according to the terms contained in the LICENSE file. 

10 

11""" 

12Module including some utility functions and classes for manipulating email. 

13""" 

14import copy 

15import email 

16import io 

17import re 

18import types 

19from email.mime.base import MIMEBase 

20 

21from django.core.mail import EmailMessage 

22from django.utils.encoding import force_bytes 

23 

24 

25def extract_email_address_from_header(header): 

26 """ 

27 Extracts the email address from the From email header. 

28 

29 >>> str(extract_email_address_from_header('Real Name <foo@domain.com>')) 

30 'foo@domain.com' 

31 >>> str(extract_email_address_from_header('foo@domain.com')) 

32 'foo@domain.com' 

33 """ 

34 from email.utils import parseaddr 

35 real_name, from_address = parseaddr(str(header)) 

36 return from_address 

37 

38 

39def name_and_address_from_string(content): 

40 """ 

41 Takes an address in almost-RFC822 format and turns it into a dict 

42 {'name': real_name, 'email': email_address} 

43 

44 The difference with email.utils.parseaddr and rfc822.parseaddr 

45 is that this routine allows unquoted commas to appear in the real name 

46 (in violation of RFC822). 

47 """ 

48 from email.utils import parseaddr 

49 hacked_content = content.replace(",", "WEWANTNOCOMMAS") 

50 name, mail = parseaddr(hacked_content) 

51 if mail: 

52 return { 

53 'name': name.replace("WEWANTNOCOMMAS", ","), 

54 'email': mail.replace("WEWANTNOCOMMAS", ",") 

55 } 

56 else: 

57 return None 

58 

59 

60def names_and_addresses_from_string(content): 

61 """ 

62 Takes a string with addresses in RFC822 format and returns a list of dicts 

63 {'name': real_name, 'email': email_address} 

64 It tries to be forgiving about unquoted commas in addresses. 

65 """ 

66 all_parts = [ 

67 name_and_address_from_string(part) 

68 for part in re.split(r'(?<=>)\s*,\s*', content) 

69 ] 

70 return [ 

71 part 

72 for part in all_parts 

73 if part is not None 

74 ] 

75 

76 

77def get_decoded_message_payload(message, default_charset='utf-8'): 

78 """ 

79 Extracts the payload of the given ``email.message.Message`` and returns it 

80 decoded based on the Content-Transfer-Encoding and charset. 

81 """ 

82 # If the message is multipart there is nothing to decode so None is 

83 # returned 

84 if message.is_multipart(): 84 ↛ 85line 84 didn't jump to line 85, because the condition on line 84 was never true

85 return None 

86 # Decodes the message based on transfer encoding and returns bytes 

87 payload = message.get_payload(decode=True) 

88 if payload is None: 

89 return None 

90 

91 # The charset defaults to ascii if none is given 

92 charset = message.get_content_charset(default_charset) 

93 try: 

94 return payload.decode(charset) 

95 except (UnicodeDecodeError, LookupError): 

96 # If we did not get the charset right, assume it's latin1 and make 

97 # sure to not fail furter 

98 return payload.decode('latin1', 'replace') 

99 

100 

101def patch_message_for_django_compat(message): 

102 """ 

103 Live patch the :py:class:`email.message.Message` object passed as 

104 parameter so that: 

105 

106 * the ``as_string()`` method return the same set of bytes it has been parsed 

107 from (to preserve as much as possible the original message) 

108 * the ``as_bytes()`` is added too (this method is expected by Django's SMTP 

109 backend) 

110 """ 

111 # Django expects patched versions of as_string/as_bytes, see 

112 # django/core/mail/message.py 

113 def as_string(self, unixfrom=False, maxheaderlen=0, linesep='\n'): 

114 """ 

115 Returns the payload of the message encoded as bytes. 

116 """ 

117 from email.generator import BytesGenerator as Generator 

118 fp = io.BytesIO() 

119 g = Generator(fp, mangle_from_=False, maxheaderlen=maxheaderlen) 

120 g.flatten(self, unixfrom=unixfrom, linesep=linesep) 

121 return force_bytes(fp.getvalue(), 'utf-8') 

122 

123 message.as_string = types.MethodType(as_string, message) 

124 message.as_bytes = message.as_string 

125 return message 

126 

127 

128def message_from_bytes(message_bytes): 

129 """ 

130 Returns a live-patched :class:`email.Message` object from the given 

131 bytes. 

132 

133 The changes ensure that parsing the message's bytes with this method 

134 and then returning them by using the returned object's as_string 

135 method is an idempotent operation. 

136 

137 An as_bytes method is also created since Django's SMTP backend relies 

138 on this method (which is usually brought by its own 

139 :class:`django.core.mail.SafeMIMEText` object but that we don't use 

140 in our :class:`CustomEmailMessage`). 

141 """ 

142 from email import message_from_bytes as email_message_from_bytes 

143 message = email_message_from_bytes(message_bytes) 

144 

145 return patch_message_for_django_compat(message) 

146 

147 

148def get_message_body(msg): 

149 """ 

150 Returns the message body, joining together all parts into one string. 

151 

152 :param msg: The original received package message 

153 :type msg: :py:class:`email.message.Message` 

154 """ 

155 return '\n'.join(get_decoded_message_payload(part) 

156 for part in msg.walk() if not part.is_multipart()) 

157 

158 

159class CustomEmailMessage(EmailMessage): 

160 """ 

161 A subclass of :class:`django.core.mail.EmailMessage` which can be fed 

162 an :class:`email.message.Message` instance to define the body of the 

163 message. 

164 

165 If :attr:`msg` is set, the :attr:`body <django.core.mail.EmailMessage.body>` 

166 attribute is ignored. 

167 

168 If the user wants to attach additional parts to the message, the 

169 :meth:`attach` method can be used but the user must ensure that the given 

170 ``msg`` instance is a multipart message before doing so. 

171 

172 Effectively, this is also a wrapper which allows sending instances of 

173 :class:`email.message.Message` via Django email backends. 

174 """ 

175 def __init__(self, msg=None, *args, **kwargs): 

176 """ 

177 Use the keyword argument ``msg`` to set the 

178 :class:`email.message.Message` instance which should be used to define 

179 the body of the message. The original object is copied. 

180 

181 If no ``msg`` is set, the object's behaviour is identical to 

182 :class:`django.core.mail.EmailMessage` 

183 """ 

184 super(CustomEmailMessage, self).__init__(*args, **kwargs) 

185 self.msg = msg 

186 

187 def message(self): 

188 """ 

189 Returns the underlying :class:`email.message.Message` object. 

190 In case the user did not set a :attr:`msg` attribute for this instance 

191 the parent :meth:`EmailMessage.message 

192 <django.core.mail.EmailMessage.message>` method is used. 

193 """ 

194 if self.msg: 194 ↛ 198line 194 didn't jump to line 198, because the condition on line 194 was never false

195 msg = self._attach_all() 

196 return msg 

197 else: 

198 return EmailMessage.message(self) 

199 

200 def _attach_all(self): 

201 """ 

202 Attaches all existing attachments to the given message ``msg``. 

203 """ 

204 msg = self.msg 

205 if self.attachments: 205 ↛ 206line 205 didn't jump to line 206, because the condition on line 205 was never true

206 assert self.msg.is_multipart() 

207 msg = copy.deepcopy(self.msg) 

208 for attachment in self.attachments: 

209 if isinstance(attachment, MIMEBase): 

210 msg.attach(attachment) 

211 else: 

212 msg.attach(self._create_attachment(*attachment)) 

213 return msg 

214 

215 

216def decode_header(header, default_encoding='utf-8'): 

217 """ 

218 Decodes an email message header and returns it coded as a unicode 

219 string. 

220 

221 This is necessary since it is possible that a header is made of multiple 

222 differently encoded parts which makes :func:`email.header.decode_header` 

223 insufficient. 

224 """ 

225 if header is None: 

226 return None 

227 decoded_header = email.header.decode_header(header) 

228 # Join all the different parts of the header into a single unicode string 

229 result = '' 

230 for part, encoding in decoded_header: 

231 if encoding == 'unknown-8bit': 

232 # Python 3 returns unknown-8bit instead of None when you have 8bit 

233 # characters without any encoding information 

234 encoding = 'iso-8859-1' 

235 if isinstance(part, bytes): 

236 encoding = encoding if encoding else default_encoding 

237 try: 

238 result += part.decode(encoding) 

239 except UnicodeDecodeError: 

240 result += part.decode('iso-8859-1', 'replace') 

241 else: 

242 result += part 

243 return result 

244 

245 

246def unfold_header(header): 

247 """ 

248 Unfolding is the process to remove the line wrapping added by mail agents. 

249 A header is a single logical line and they are not allowed to be multi-line 

250 values. 

251 

252 We need to unfold their values in particular when we want to reuse the 

253 values to compose a reply message as Python's email API chokes on those 

254 newline characters. 

255 

256 If header is None, the return value is None as well. 

257 

258 :param:header: the header value to unfold 

259 :type param: str 

260 :returns: the unfolded version of the header. 

261 :rtype: str 

262 """ 

263 if header is None: 

264 return None 

265 return re.sub(r'\r?\n(\s)', r'\1', str(header), 0, re.MULTILINE)