@@ -338,13 +338,15 @@ def __init__(self, infile: Path, pscript5_mode: bool) -> None:
338
338
self .infile = infile
339
339
self .rman = pdfminer .pdfinterp .PDFResourceManager (caching = True )
340
340
self .disable_boxes_flow = None
341
+ self .page_iter = None
341
342
self .page_cache : list [PDFPage ] = []
342
343
self .pscript5_mode = pscript5_mode
343
344
self .file = None
344
345
345
346
def __enter__ (self ):
346
347
"""Enter the context manager."""
347
348
self .file = Path (self .infile ).open ('rb' )
349
+ self .page_iter = PDFPage .get_pages (self .file )
348
350
return self
349
351
350
352
def __exit__ (self , exc_type , exc_value , traceback ):
@@ -353,27 +355,20 @@ def __exit__(self, exc_type, exc_value, traceback):
353
355
self .file .close ()
354
356
return True
355
357
356
- def _load_page_cache (self ):
357
- """Load the page cache."""
358
- try :
359
- self .page_cache = list (PDFPage .get_pages (self .file ))
360
- if not self .page_cache :
361
- raise InputFileError (
362
- "pdfminer did not find any pages in the input file."
363
- )
364
- for n , page in enumerate (self .page_cache ):
365
- if page is None :
366
- raise InputFileError (
367
- f"pdfminer could not process page { n } (counting from 0)."
368
- )
369
- except PDFTextExtractionNotAllowed as e :
370
- raise EncryptedPdfError () from e
371
-
372
358
def get_page_analysis (self , pageno : int ):
373
359
"""Get the page analysis for a given page."""
374
- if not self .page_cache :
375
- self ._load_page_cache ()
360
+ while len (self .page_cache ) <= pageno :
361
+ try :
362
+ self .page_cache .append (next (self .page_iter ))
363
+ except StopIteration :
364
+ raise InputFileError (
365
+ f"pdfminer did not find page { pageno } in the input file."
366
+ )
376
367
page = self .page_cache [pageno ]
368
+ if not page :
369
+ raise InputFileError (
370
+ f"pdfminer could not process page { pageno } (counting from 0)."
371
+ )
377
372
dev = TextPositionTracker (
378
373
self .rman ,
379
374
laparams = LAParams (
0 commit comments