Skip to content

Commit c05ed72

Browse files
committed
Merge branch 'main' of github.com:ocrmypdf/OCRmyPDF
2 parents c29f58a + eb303fe commit c05ed72

File tree

1 file changed

+13
-18
lines changed

1 file changed

+13
-18
lines changed

src/ocrmypdf/pdfinfo/layout.py

+13-18
Original file line numberDiff line numberDiff line change
@@ -338,13 +338,15 @@ def __init__(self, infile: Path, pscript5_mode: bool) -> None:
338338
self.infile = infile
339339
self.rman = pdfminer.pdfinterp.PDFResourceManager(caching=True)
340340
self.disable_boxes_flow = None
341+
self.page_iter = None
341342
self.page_cache: list[PDFPage] = []
342343
self.pscript5_mode = pscript5_mode
343344
self.file = None
344345

345346
def __enter__(self):
346347
"""Enter the context manager."""
347348
self.file = Path(self.infile).open('rb')
349+
self.page_iter = PDFPage.get_pages(self.file)
348350
return self
349351

350352
def __exit__(self, exc_type, exc_value, traceback):
@@ -353,27 +355,20 @@ def __exit__(self, exc_type, exc_value, traceback):
353355
self.file.close()
354356
return True
355357

356-
def _load_page_cache(self):
357-
"""Load the page cache."""
358-
try:
359-
self.page_cache = list(PDFPage.get_pages(self.file))
360-
if not self.page_cache:
361-
raise InputFileError(
362-
"pdfminer did not find any pages in the input file."
363-
)
364-
for n, page in enumerate(self.page_cache):
365-
if page is None:
366-
raise InputFileError(
367-
f"pdfminer could not process page {n} (counting from 0)."
368-
)
369-
except PDFTextExtractionNotAllowed as e:
370-
raise EncryptedPdfError() from e
371-
372358
def get_page_analysis(self, pageno: int):
373359
"""Get the page analysis for a given page."""
374-
if not self.page_cache:
375-
self._load_page_cache()
360+
while len(self.page_cache) <= pageno:
361+
try:
362+
self.page_cache.append(next(self.page_iter))
363+
except StopIteration:
364+
raise InputFileError(
365+
f"pdfminer did not find page {pageno} in the input file."
366+
)
376367
page = self.page_cache[pageno]
368+
if not page:
369+
raise InputFileError(
370+
f"pdfminer could not process page {pageno} (counting from 0)."
371+
)
377372
dev = TextPositionTracker(
378373
self.rman,
379374
laparams=LAParams(

0 commit comments

Comments
 (0)