@@ -44,6 +44,7 @@ def load_file(
4444def load (
4545 pdf_file : IO ,
4646 pdf_file_path : Optional [str ] = None ,
47+ password : Optional [str ] = None ,
4748 la_params : Optional [Dict ] = None ,
4849 ** kwargs : Any ,
4950) -> PDFDocument :
@@ -52,13 +53,15 @@ def load(
5253
5354 Args:
5455 pdf_file (io): The PDF file.
56+ pdf_file_path (str, optional): Passed to `PDFDocument`. See the documentation
57+ for `PDFDocument`.
58+ password (str, optional): Password for the encrypted PDF. Required if the
59+ PDF is encrypted.
5560 la_params (dict): The layout parameters passed to PDF Miner for analysis. See
5661 the PDFMiner documentation here:
5762 https://pdfminersix.readthedocs.io/en/latest/reference/composable.html#laparams.
5863 Note that py_pdf_parser will re-order the elements it receives from PDFMiner
5964 so options relating to element ordering will have no effect.
60- pdf_file_path (str, optional): Passed to `PDFDocument`. See the documentation
61- for `PDFDocument`.
6265 kwargs: Passed to `PDFDocument`. See the documentation for `PDFDocument`.
6366
6467 Returns:
@@ -69,7 +72,9 @@ def load(
6972 la_params = {** DEFAULT_LA_PARAMS , ** la_params }
7073
7174 pages : Dict [int , Page ] = {}
72- for page in extract_pages (pdf_file , laparams = LAParams (** la_params )):
75+ for page in extract_pages (
76+ pdf_file , laparams = LAParams (** la_params ), password = password
77+ ):
7378 elements = [element for element in page if isinstance (element , LTTextBox )]
7479
7580 # If all_texts=True then we may get some text from inside figures
0 commit comments