14
14
import re
15
15
import sys
16
16
17
- from unittest import mock
17
+ from html . parser import HTMLParser
18
18
from typing import Any , IO
19
+ from unittest import mock
19
20
20
21
import pytest
21
22
@@ -94,6 +95,12 @@ def get_html_index_content(self) -> str:
94
95
)
95
96
return index
96
97
98
+ def get_html_report_text_lines (self , module : str ) -> list [str ]:
99
+ """Parse the HTML report, and return a list of strings, the text rendered."""
100
+ parser = HtmlReportParser ()
101
+ parser .feed (self .get_html_report_content (module ))
102
+ return parser .text ()
103
+
97
104
def assert_correct_timestamp (self , html : str ) -> None :
98
105
"""Extract the time stamp from `html`, and assert it is recent."""
99
106
timestamp_pat = r"created at (\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2})"
@@ -133,6 +140,43 @@ def assert_valid_hrefs(self, directory: str = "htmlcov") -> None:
133
140
)
134
141
135
142
143
+ class HtmlReportParser (HTMLParser ): # pylint: disable=abstract-method
144
+ """An HTML parser for our HTML reports.
145
+
146
+ Assertions are made about the structure we expect.
147
+ """
148
+ def __init__ (self ) -> None :
149
+ super ().__init__ ()
150
+ self .lines : list [list [str ]] = []
151
+ self .in_source = False
152
+
153
+ def handle_starttag (self , tag : str , attrs : list [tuple [str , str | None ]]) -> None :
154
+ if tag == "main" :
155
+ assert attrs == [("id" , "source" )]
156
+ self .in_source = True
157
+ elif self .in_source and tag == "a" :
158
+ dattrs = dict (attrs )
159
+ assert "id" in dattrs
160
+ ida = dattrs ["id" ]
161
+ assert ida is not None
162
+ assert ida [0 ] == "t"
163
+ line_no = int (ida [1 :])
164
+ self .lines .append ([])
165
+ assert line_no == len (self .lines )
166
+
167
+ def handle_endtag (self , tag : str ) -> None :
168
+ if tag == "main" :
169
+ self .in_source = False
170
+
171
+ def handle_data (self , data : str ) -> None :
172
+ if self .in_source and self .lines :
173
+ self .lines [- 1 ].append (data )
174
+
175
+ def text (self ) -> list [str ]:
176
+ """Get the rendered text as a list of strings, one per line."""
177
+ return ["" .join (l ).rstrip () for l in self .lines ]
178
+
179
+
136
180
class FileWriteTracker :
137
181
"""A fake object to track how `open` is used to write files."""
138
182
def __init__ (self , written : set [str ]) -> None :
@@ -1141,10 +1185,10 @@ def test_bug_1828(self) -> None:
1141
1185
1142
1186
cov = coverage .Coverage ()
1143
1187
backslashes = self .start_import_stop (cov , "backslashes" )
1144
- cov .html_report (backslashes , directory = "out" )
1188
+ cov .html_report (backslashes )
1145
1189
1146
1190
contains (
1147
- "out /backslashes_py.html" ,
1191
+ "htmlcov /backslashes_py.html" ,
1148
1192
# line 2 is `"bbb \`
1149
1193
r'<a id="t2" href="#t2">2</a></span>'
1150
1194
+ r'<span class="t"> <span class="str">"bbb \</span>' ,
@@ -1153,6 +1197,12 @@ def test_bug_1828(self) -> None:
1153
1197
+ r'<span class="t"><span class="str"> ccc"</span><span class="op">]</span>' ,
1154
1198
)
1155
1199
1200
+ assert self .get_html_report_text_lines ("backslashes.py" ) == [
1201
+ '1a = ["aaa",\\ ' ,
1202
+ '2 "bbb \\ ' ,
1203
+ '3 ccc"]' ,
1204
+ ]
1205
+
1156
1206
def test_unicode (self ) -> None :
1157
1207
surrogate = "\U000e0100 "
1158
1208
0 commit comments