Skip to content

Commit 875048b

Browse files
committed
#665194: support roundtripping RFC2822 date stamps in the email.utils module
1 parent 3e44612 commit 875048b

File tree

5 files changed

+133
-8
lines changed

5 files changed

+133
-8
lines changed

Doc/library/email.util.rst

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,20 @@ There are several useful utilities provided in the :mod:`email.utils` module:
8181
indexes 6, 7, and 8 of the result tuple are not usable.
8282

8383

84+
.. function:: parsedate_to_datetime(date)
85+
86+
The inverse of :func:`format_datetime`. Performs the same function as
87+
:func:`parsedate`, but on success returns a :mod:`~datetime.datetime`. If
88+
the input date has a timezone of ``-0000``, the ``datetime`` will be a naive
89+
``datetime``, and if the date is conforming to the RFCs it will represent a
90+
time in UTC but with no indication of the actual source timezone of the
91+
message the date comes from. If the input date has any other valid timezone
92+
offset, the ``datetime`` will be an aware ``datetime`` with the
93+
corresponding a :class:`~datetime.timezone` :class:`~datetime.tzinfo`.
94+
95+
.. versionadded:: 3.3
96+
97+
8498
.. function:: mktime_tz(tuple)
8599

86100
Turn a 10-tuple as returned by :func:`parsedate_tz` into a UTC timestamp. It
@@ -112,6 +126,20 @@ There are several useful utilities provided in the :mod:`email.utils` module:
112126
``False``. The default is ``False``.
113127

114128

129+
.. function:: format_datetime(dt, usegmt=False)
130+
131+
Like ``formatdate``, but the input is a :mod:`datetime` instance. If it is
132+
a naive datetime, it is assumed to be "UTC with no information about the
133+
source timezone", and the conventional ``-0000`` is used for the timezone.
134+
If it is an aware ``datetime``, then the numeric timezone offset is used.
135+
If it is an aware timezone with offset zero, then *usegmt* may be set to
136+
``True``, in which case the string ``GMT`` is used instead of the numeric
137+
timezone offset. This provides a way to generate standards conformant HTTP
138+
date headers.
139+
140+
.. versionadded:: 3.3
141+
142+
115143
.. function:: make_msgid(idstring=None, domain=None)
116144

117145
Returns a string suitable for an :rfc:`2822`\ -compliant

Lib/email/_parseaddr.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,21 @@ def parsedate_tz(data):
4646
"""Convert a date string to a time tuple.
4747
4848
Accounts for military timezones.
49+
"""
50+
res = _parsedate_tz(data)
51+
if res[9] is None:
52+
res[9] = 0
53+
return tuple(res)
54+
55+
def _parsedate_tz(data):
56+
"""Convert date to extended time tuple.
57+
58+
The last (additional) element is the time zone offset in seconds, except if
59+
the timezone was specified as -0000. In that case the last element is
60+
None. This indicates a UTC timestamp that explicitly declaims knowledge of
61+
the source timezone, as opposed to a +0000 timestamp that indicates the
62+
source timezone really was UTC.
63+
4964
"""
5065
data = data.split()
5166
# The FWS after the comma after the day-of-week is optional, so search and
@@ -138,6 +153,8 @@ def parsedate_tz(data):
138153
tzoffset = int(tz)
139154
except ValueError:
140155
pass
156+
if tzoffset==0 and tz.startswith('-'):
157+
tzoffset = None
141158
# Convert a timezone offset into seconds ; -0500 -> -18000
142159
if tzoffset:
143160
if tzoffset < 0:
@@ -147,7 +164,7 @@ def parsedate_tz(data):
147164
tzsign = 1
148165
tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)
149166
# Daylight Saving Time flag is set to -1, since DST is unknown.
150-
return yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset
167+
return [yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset]
151168

152169

153170
def parsedate(data):

Lib/email/utils.py

Lines changed: 39 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,14 @@
1111
'encode_rfc2231',
1212
'formataddr',
1313
'formatdate',
14+
'format_datetime',
1415
'getaddresses',
1516
'make_msgid',
1617
'mktime_tz',
1718
'parseaddr',
1819
'parsedate',
1920
'parsedate_tz',
21+
'parsedate_to_datetime',
2022
'unquote',
2123
]
2224

@@ -26,6 +28,7 @@
2628
import base64
2729
import random
2830
import socket
31+
import datetime
2932
import urllib.parse
3033
import warnings
3134
from io import StringIO
@@ -37,6 +40,7 @@
3740
# We need wormarounds for bugs in these methods in older Pythons (see below)
3841
from email._parseaddr import parsedate as _parsedate
3942
from email._parseaddr import parsedate_tz as _parsedate_tz
43+
from email._parseaddr import _parsedate_tz as __parsedate_tz
4044

4145
from quopri import decodestring as _qdecode
4246

@@ -110,6 +114,14 @@ def getaddresses(fieldvalues):
110114
''', re.VERBOSE | re.IGNORECASE)
111115

112116

117+
def _format_timetuple_and_zone(timetuple, zone):
118+
return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
119+
['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][timetuple[6]],
120+
timetuple[2],
121+
['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
122+
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][timetuple[1] - 1],
123+
timetuple[0], timetuple[3], timetuple[4], timetuple[5],
124+
zone)
113125

114126
def formatdate(timeval=None, localtime=False, usegmt=False):
115127
"""Returns a date string as specified by RFC 2822, e.g.:
@@ -154,14 +166,25 @@ def formatdate(timeval=None, localtime=False, usegmt=False):
154166
zone = 'GMT'
155167
else:
156168
zone = '-0000'
157-
return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
158-
['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]],
159-
now[2],
160-
['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
161-
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1],
162-
now[0], now[3], now[4], now[5],
163-
zone)
169+
return _format_timetuple_and_zone(now, zone)
164170

171+
def format_datetime(dt, usegmt=False):
172+
"""Turn a datetime into a date string as specified in RFC 2822.
173+
174+
If usegmt is True, dt must be an aware datetime with an offset of zero. In
175+
this case 'GMT' will be rendered instead of the normal +0000 required by
176+
RFC2822. This is to support HTTP headers involving date stamps.
177+
"""
178+
now = dt.timetuple()
179+
if usegmt:
180+
if dt.tzinfo is None or dt.tzinfo != datetime.timezone.utc:
181+
raise ValueError("usegmt option requires a UTC datetime")
182+
zone = 'GMT'
183+
elif dt.tzinfo is None:
184+
zone = '-0000'
185+
else:
186+
zone = dt.strftime("%z")
187+
return _format_timetuple_and_zone(now, zone)
165188

166189

167190
def make_msgid(idstring=None, domain=None):
@@ -203,6 +226,15 @@ def parsedate_tz(data):
203226
return None
204227
return _parsedate_tz(data)
205228

229+
def parsedate_to_datetime(data):
230+
if not data:
231+
return None
232+
*dtuple, tz = __parsedate_tz(data)
233+
if tz is None:
234+
return datetime.datetime(*dtuple[:6])
235+
return datetime.datetime(*dtuple[:6],
236+
tzinfo=datetime.timezone(datetime.timedelta(seconds=tz)))
237+
206238

207239
def parseaddr(addr):
208240
addrs = _AddressList(addr).addresslist

Lib/test/test_email/test_utils.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import datetime
2+
from email import utils
3+
import unittest
4+
5+
class DateTimeTests(unittest.TestCase):
6+
7+
datestring = 'Sun, 23 Sep 2001 20:10:55'
8+
dateargs = (2001, 9, 23, 20, 10, 55)
9+
offsetstring = ' -0700'
10+
utcoffset = datetime.timedelta(hours=-7)
11+
tz = datetime.timezone(utcoffset)
12+
naive_dt = datetime.datetime(*dateargs)
13+
aware_dt = datetime.datetime(*dateargs, tzinfo=tz)
14+
15+
def test_naive_datetime(self):
16+
self.assertEqual(utils.format_datetime(self.naive_dt),
17+
self.datestring + ' -0000')
18+
19+
def test_aware_datetime(self):
20+
self.assertEqual(utils.format_datetime(self.aware_dt),
21+
self.datestring + self.offsetstring)
22+
23+
def test_usegmt(self):
24+
utc_dt = datetime.datetime(*self.dateargs,
25+
tzinfo=datetime.timezone.utc)
26+
self.assertEqual(utils.format_datetime(utc_dt, usegmt=True),
27+
self.datestring + ' GMT')
28+
29+
def test_usegmt_with_naive_datetime_raises(self):
30+
with self.assertRaises(ValueError):
31+
utils.format_datetime(self.naive_dt, usegmt=True)
32+
33+
def test_usegmt_with_non_utc_datetime_raises(self):
34+
with self.assertRaises(ValueError):
35+
utils.format_datetime(self.aware_dt, usegmt=True)
36+
37+
def test_parsedate_to_datetime(self):
38+
self.assertEqual(
39+
utils.parsedate_to_datetime(self.datestring + self.offsetstring),
40+
self.aware_dt)
41+
42+
def test_parsedate_to_datetime_naive(self):
43+
self.assertEqual(
44+
utils.parsedate_to_datetime(self.datestring + ' -0000'),
45+
self.naive_dt)

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,9 @@ Core and Builtins
234234
Library
235235
-------
236236

237+
- Issue #665194: email.utils now has format_datetime and parsedate_to_datetime
238+
functions, allowing for round tripping of RFC2822 format dates.
239+
237240
- Issue #12571: Add a plat-linux3 directory mirroring the plat-linux2
238241
directory, so that "import DLFCN" and other similar imports work on
239242
Linux 3.0.

0 commit comments

Comments
 (0)