Open
Description
from html5lib._tokenizer import HTMLTokenizer
from io import StringIO
class T():
def __init__(self, data):
print("Object from string: " + data)
self.src = StringIO()
self.tokenizer = HTMLTokenizer(self.src)
pos = self.src.tell()
self.src.write(data)
self.src.seek(pos)
self.handle_tokens()
self.src.close()
def handle_tokens(self):
for token in self.tokenizer:
print(str(self.tokenizer.stream.chunkOffset))
T("klas katt")
T("klas katt<br>")
->
Object from string: klas katt
0
Object from string: klas katt
9
13
I expected first number outputted to be 9.
Apologies if this is an internal variable I should not use.
I'm trying to deduce tag offsets (start,stop) in the html document.
Metadata
Metadata
Assignees
Labels
No labels