U
    NfZ                     @   s  d dl Z d dlZd dlmZ d dlmZ d dlmZmZmZm	Z	m
Z
mZmZmZmZmZ ddlmZ ddlmZmZ ddlmZ dd	lmZmZmZmZ dd
lmZ ddlm Z! ddlm"Z"m#Z#m$Z$ ddlm%Z& ddlm'Z( ddlm)Z* ddlm+Z, ddlm-Z. ddl/m0Z0 ddl1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7m8Z8m9Z9m:Z:m;Z;m<Z<m=Z=m>Z>m?Z?m@Z@mAZAmBZBmCZC ddlDmEZEmFZF ddlGmHZH eIeJeeJeedf f dddZKG dd de6ZLG dd dZMG dd deeef ZNdS )     N)abstractmethod)datetime)
AnyDictIterableIteratorListMappingOptionalTupleUnioncast   )
Encryption)
PageObject_VirtualList)index2label)b_deprecate_with_replacementlogger_warningparse_iso8824_date)CatalogAttributes)CatalogDictionary)CheckboxRadioButtonAttributesGoToActionArgumentsUserAccessPermissions)Core)DocumentInformationAttributes)FieldDictionaryAttributes)PageAttributes)PagesAttributes)PdfReadError)ArrayObjectBooleanObjectByteStringObjectDestinationDictionaryObjectEncodedStreamObjectFieldFitFloatObjectIndirectObject
NameObject
NullObjectNumberObject	PdfObjectTextStringObject
TreeObjectViewerPreferencescreate_string_object)OutlineTypePagemodeType)XmpInformation.)dsizereturnc                 C   s4   |dkrt dd|  } | dd  } td| d S )N   zinvalid size in convert_to_ints           iz>qr   )r!   structunpack)r7   r8    r=   5/tmp/pip-unpacked-wheel-zevpxvmc/pypdf/_doc_common.pyconvert_to_int`   s
    r?   c                   @   sf  e Zd ZdZddddZeee dddZeee dd	d
Z	eee dddZ
eee dddZeee dddZeee dddZeee dddZeee dddZeee dddZeee dddZeee dddZeee dddZeee ddd Zeee dd!d"Zeee dd#d$ZdS )%DocumentInformationa  
    A class representing the basic document metadata provided in a PDF File.
    This class is accessible through
    :py:class:`PdfReader.metadata<pypdf.PdfReader.metadata>`.

    All text properties of the document metadata have
    *two* properties, eg. author and author_raw. The non-raw property will
    always return a ``TextStringObject``, making it ideal for a case where
    the metadata is being displayed. The raw property can sometimes return
    a ``ByteStringObject``, if pypdf was unable to decode the string's
    text encoding; this requires additional safety in the caller and
    therefore is not as commonly accessed.
    Nr9   c                 C   s   t |  d S N)r&   __init__selfr=   r=   r>   rC   w   s    zDocumentInformation.__init__keyr9   c                 C   s   |  |d }t|tr|S d S rB   )get
isinstancer0   )rE   rG   retvalr=   r=   r>   	_get_textz   s    
zDocumentInformation._get_textc                 C   s,   |  tjr(| tjp*|  tj S dS )z
        Read-only property accessing the document's title.

        Returns a ``TextStringObject`` or ``None`` if the title is not
        specified.
        N)rH   DITITLErK   
get_objectrD   r=   r=   r>   title   s    

zDocumentInformation.titlec                 C   s   |  tjS )z>The "raw" version of title; can return a ``ByteStringObject``.)rH   rL   rM   rD   r=   r=   r>   	title_raw   s    zDocumentInformation.title_rawc                 C   s   |  tjS )z
        Read-only property accessing the document's author.

        Returns a ``TextStringObject`` or ``None`` if the author is not
        specified.
        )rK   rL   AUTHORrD   r=   r=   r>   author   s    zDocumentInformation.authorc                 C   s   |  tjS )z?The "raw" version of author; can return a ``ByteStringObject``.)rH   rL   rQ   rD   r=   r=   r>   
author_raw   s    zDocumentInformation.author_rawc                 C   s   |  tjS )z
        Read-only property accessing the document's subject.

        Returns a ``TextStringObject`` or ``None`` if the subject is not
        specified.
        )rK   rL   SUBJECTrD   r=   r=   r>   subject   s    zDocumentInformation.subjectc                 C   s   |  tjS )z@The "raw" version of subject; can return a ``ByteStringObject``.)rH   rL   rT   rD   r=   r=   r>   subject_raw   s    zDocumentInformation.subject_rawc                 C   s   |  tjS )ac  
        Read-only property accessing the document's creator.

        If the document was converted to PDF from another format, this is the
        name of the application (e.g. OpenOffice) that created the original
        document from which it was converted. Returns a ``TextStringObject`` or
        ``None`` if the creator is not specified.
        )rK   rL   CREATORrD   r=   r=   r>   creator   s    
zDocumentInformation.creatorc                 C   s   |  tjS )z@The "raw" version of creator; can return a ``ByteStringObject``.)rH   rL   rW   rD   r=   r=   r>   creator_raw   s    zDocumentInformation.creator_rawc                 C   s   |  tjS )aI  
        Read-only property accessing the document's producer.

        If the document was converted to PDF from another format, this is the
        name of the application (for example, macOS Quartz) that converted it to
        PDF. Returns a ``TextStringObject`` or ``None`` if the producer is not
        specified.
        )rK   rL   PRODUCERrD   r=   r=   r>   producer   s    
zDocumentInformation.producerc                 C   s   |  tjS )zAThe "raw" version of producer; can return a ``ByteStringObject``.)rH   rL   rZ   rD   r=   r=   r>   producer_raw   s    z DocumentInformation.producer_rawc                 C   s   t | tjS )z:Read-only property accessing the document's creation date.)r   rK   rL   CREATION_DATErD   r=   r=   r>   creation_date   s    z!DocumentInformation.creation_datec                 C   s   |  tjS )z
        The "raw" version of creation date; can return a ``ByteStringObject``.

        Typically in the format ``D:YYYYMMDDhhmmss[+Z-]hh'mm`` where the suffix
        is the offset from UTC.
        )rH   rL   r]   rD   r=   r=   r>   creation_date_raw   s    z%DocumentInformation.creation_date_rawc                 C   s   t | tjS )z
        Read-only property accessing the document's modification date.

        The date and time the document was most recently modified.
        )r   rK   rL   MOD_DATErD   r=   r=   r>   modification_date   s    z%DocumentInformation.modification_datec                 C   s   |  tjS )z
        The "raw" version of modification date; can return a
        ``ByteStringObject``.

        Typically in the format ``D:YYYYMMDDhhmmss[+Z-]hh'mm`` where the suffix
        is the offset from UTC.
        )rH   rL   r`   rD   r=   r=   r>   modification_date_raw   s    	z)DocumentInformation.modification_date_raw)__name__
__module____qualname____doc__rC   strr
   rK   propertyrO   rP   rR   rS   rU   rV   rX   rY   r[   r\   r   r^   r_   ra   rb   r=   r=   r=   r>   r@   h   s>   			r@   c                	   @   s  e Zd ZU dZdZeed< dZee	 ed< e
eedddZe
eedd	d
Zeeeef ee dddZeeeedddZe
eee dddZe
ee dddZe
ee dddZedzedee f edee f eeef dddZe
ee dddZ dZ!ee"e#  ed< edddZ$ee#dd d!Z%e
eeef dd"d#Z&e'dd$d%Z(d{ee)df ee eeef d&d'd(Z*d|ee) eeeef  ee eeeef  d)d*d+Z+eed,d-d.Z,ee)ef eeef eedd/d0d1Z-ee)ef eedd)d2d3Z.eeedd4d5d6Z/d}eeeef d7d8d9Z0ee1eef e"e# d:d;d<Z2e
ede3e4e5f dd=d>Z6e6j7edee3e#f dd?d@d>Z6e
e8ddAdBZ9d~ee ee e8dCdDdEZ:e
ee' ddFdGZ;eedee<ef ee ddHdIZ=e#ee dJdKdLZ>e3ee dMdNdOZ?eee"ee@ede<ef   e3dPdQdRZAeee3 dSdTdUZBe
e"e# ddVdWZCe
e"e ddXdYZDe
ee ddZd[ZEe
eeF dd\d]ZGdedee#f eeeef  ee dd^d_d`ZHdeee#ef eddadbdcZIeeee dddedfZJeeeef dgdhdiZKe
eeL ddjdkZMe
eeddldmZNe
eeeef  ddndoZOe
ePee"eQ f ddpdqZRe"e ddrdsZSee"eQ dtdudvZTdee eeeeQe"eQ f f dwdxdyZUdS )PdfDocCommonzm
    Common functions from PdfWriter and PdfReader objects.

    This root class is strongly abstracted.
    FstrictN_encryptionrA   c                 C   s   d S rB   r=   rD   r=   r=   r>   root_object  s    zPdfDocCommon.root_objectc                 C   s   d S rB   r=   rD   r=   r=   r>   
pdf_header  s    zPdfDocCommon.pdf_header)indirect_referencer9   c                 C   s   d S rB   r=   rE   rn   r=   r=   r>   rN     s    zPdfDocCommon.get_object)indirectobjr9   c                 C   s   d S rB   r=   )rE   rp   rq   r=   r=   r>   _replace_object  s    zPdfDocCommon._replace_objectc                 C   s   d S rB   r=   rD   r=   r=   r>   _info  s    zPdfDocCommon._infoc                 C   s$   t  }| jdkrdS || j |S )a  
        Retrieve the PDF file's document information dictionary, if it exists.

        Note that some PDF files use metadata streams instead of document
        information dictionaries, and these metadata streams will not be
        accessed by this function.
        N)r@   rs   update)rE   rJ   r=   r=   r>   metadata  s
    	
zPdfDocCommon.metadatac                 C   s   d S rB   r=   rD   r=   r=   r>   xmp_metadata*  s    zPdfDocCommon.xmp_metadata)includeexcluder9   c                 C   s   dS )z
        Integration into Jupyter Notebooks.

        This method returns a dictionary that maps a mime-type to it's
        representation.

        See https://ipython.readthedocs.io/en/stable/config/integrating.html
        Nr=   )rE   rw   rx   r=   r=   r>   _repr_mimebundle_.  s    zPdfDocCommon._repr_mimebundle_c                 C   sd   | j tjd}|dkrdS | }t|ts`t|}t|drP| |j	| n|| j t
tj< |S )zCReturns the existing ViewerPreferences as an overloaded dictionary.Nrn   )rl   rH   CDZVIEWER_PREFERENCESrN   rI   r2   hasattrrr   rn   r,   )rE   or=   r=   r>   viewer_preferences>  s    

zPdfDocCommon.viewer_preferencesflattened_pagesc                 C   sB   | j r| jd d S | jdkr&|   | jdk	s4tt| jS dS )a   
        Calculate the number of pages in this PDF file.

        Returns:
            The number of pages of the parsed PDF file

        Raises:
            PdfReadError: if file is encrypted and restrictions prevent
                this action.
        /Pages/CountN)is_encryptedrl   r~   _flattenAssertionErrorlenrD   r=   r=   r>   get_num_pagesO  s    
zPdfDocCommon.get_num_pages)page_numberr9   c                 C   s.   | j dkr|   | j dk	s$td| j | S )a@  
        Retrieve a page by number from this PDF file.
        Most of the time ```.pages[page_number]``` is preferred.

        Args:
            page_number: The page number to retrieve
                (pages begin at zero)

        Returns:
            A :class:`PageObject<pypdf._page.PageObject>` instance.
        Nzhint for mypy)r~   r   r   )rE   r   r=   r=   r>   get_pagee  s    
zPdfDocCommon.get_pagec                 C   s   |   S )zu
        A read-only dictionary which maps names to
        :class:`Destinations<pypdf.generic.Destination>`
        )_get_named_destinationsrD   r=   r=   r>   named_destinationsv  s    zPdfDocCommon.named_destinationsc                 C   s2  t  }tj| jkrt| jtj trtt| jtj }|j}tj|krt|tj trtt|tj }|j}tj|krtt |tj }qt  }||t	tj< n6t
| drt }| |}||t	tj< ||t	tj< nXt
| dr.t }| |}|| jt	tj< t }| |}||t	tj< ||t	tj< |S )N_add_object)r"   CANAMESrl   rI   r&   r   rn   DESTSr,   r{   r   )rE   Z
named_destnamesZ	names_refZdestsZ	dests_refr=   r=   r>   get_named_dest_root~  s8    
 




z PdfDocCommon.get_named_dest_root)treerJ   r9   c                 C   s  |dkrbi }| j }tj|kr.tt|tj }n4tj|krbtt|tj }tj|krbtt|tj }|dkrn|S tj|krtt	|tj D ]}| 
| | qn,tj|krltt|tj }d}|t|k rtt||  }|d7 }t|tsqz||  }W n tk
r"   Y qY nX |d7 }t|trLd|kr|d }nq| ||}	|	dk	r|	||< qnb| D ]X\}
}| }t|trd|krt|d  }nqt| |
|}	|	dk	rt|	||
< qt|S )z
        Retrieve the named destinations present in the document.

        Args:
            tree:
            retval:

        Returns:
            A dictionary which maps names to
            :class:`Destinations<pypdf.generic.Destination>`.
        Nr   r   /D)rl   r   r   r   r1   r   r&   PAKIDSr"   r   rN   r   rg   rI   
IndexError_build_destinationitems)rE   r   rJ   catalogr   kidirG   valuedestZk__Zv__valr=   r=   r>   r     sX    







z$PdfDocCommon._get_named_destinations)r   rJ   fileobjr9   c           
      C   s   t  }|t  |dkrLi }| j}tj|krHttt	 |tj }ndS |dkrX|S | 
||| |D ] }||krj| ||||  qqjd|krtt|d }|D ]}| }	| |	||| q|S )aX  
        Extract field data if this PDF contains interactive form fields.

        The *tree* and *retval* parameters are for recursive use.

        Args:
            tree:
            retval:
            fileobj: A file object (usually a text file) to write
                a report to on all interactive form fields found.

        Returns:
            A dictionary where each key is a field name, and each
            value is a :class:`Field<pypdf.generic.Field>` object. By
            default, the mapping name is used for keys.
            ``None`` if form data could not be located.
        Nz/Fields)FAZattributes_dictrt   r   rl   rz   Z	ACRO_FORMr   r
   r1   _check_kids_build_fieldr"   rN   )
rE   r   rJ   r   field_attributesr   attrfieldsffieldr=   r=   r>   
get_fields  s*    
zPdfDocCommon.get_fields)parentr9   c                 C   s^   d|krt t|d S d|krH| t t|d d t t|dd S t t|ddS d S )N/TM/Parent./T )r   rg   _get_qualified_field_namer&   rH   )rE   r   r=   r=   r>   r     s    z&PdfDocCommon._get_qualified_field_name)r   rJ   r   r   r9   c           
      C   sV  |  ||| ztt|d }W nj tk
r   z<d|krT| tt|d d }nd}|tt|d 7 }W n tk
r   Y Y d S X Y nX |r| ||| |d t|||< || j	
 }|tjddkr|ttj || td< |tjdd	kr^d
|kr^tt|d
 d  || td< d|| d krR|| td td n|tjdd	krR|tjdtjj@ dkrRg }t||| td< |tji D ]R}|
 }t|d
 d  D ]}	|	|kr||	 qt||| td< q|tjdtjj@ dkrRd|| d krR|| d || d d= d S )Nr   r   r   r   r   
/Chz	/_States_/Btnz/APz/Nz/Offr   )r   r   rg   KeyErrorr   r&   _write_fieldwriter(   rn   rN   rH   r   FTr,   ZOptr"   listkeysappendZFfZFfBitsZRadioKidsZNoToggleToOffindex)
rE   r   rJ   r   r   rG   rq   Zstatesksr=   r=   r>   r   *  sV    
0
zPdfDocCommon._build_fieldc                 C   s0   t j|kr,|t j D ]}| | || qd S rB   )r   r   r   rN   )rE   r   rJ   r   r   r=   r=   r>   r   `  s    
zPdfDocCommon._check_kids)r   r   r   r9   c           	   	   C   s  t  }|t  }|D ]}|t jt jfkr.q|| }z|t jkr|ddddd}|| |kr|| d|||   d nr|t jkrz|| t j }W n" t	k
r   || t j
 }Y nX || d| d n|| d||  d W q t	k
r   Y qX qd S )NZButtonTextZChoice	Signature)r   /Txr   z/Sig: r   )r   
attributesr   r   ZAAr   r   ZParentZTMr   T)	rE   r   r   r   Zfield_attributes_tupler   	attr_nametypesnamer=   r=   r>   r   h  s8    

 
zPdfDocCommon._write_field)full_qualified_namer9   c                 C   s   t tttf t ddd}|  }|dkr.i S i }| D ]H\}}|ddkr:|rd|d||< q:|d||tt |d |< q:|S )	a  
        Retrieve form fields from the document with textual data.

        Args:
            full_qualified_name: to get full name

        Returns:
            A dictionary. The key is the name of the form field,
            the value is the content of the field.

            If the document contains multiple form fields with the same name, the
            second and following will get the suffix .2, .3, ...
        )r   r   r9   c                    s6    |kr S  d t t fdd|D d  S d S )Nr   c                    s   g | ]}|  d  rdqS )r   r   )
startswith).0kkr   r=   r>   
<listcomp>  s      zJPdfDocCommon.get_form_text_fields.<locals>.indexed_key.<locals>.<listcomp>   )rg   sum)r   r   r=   r   r>   indexed_key  s    z6PdfDocCommon.get_form_text_fields.<locals>.indexed_keyN/FTr   z/Vr   )rg   r   r   r   r   rH   r   )rE   r   r   Z
formfieldsffr   r   r=   r=   r>   get_form_text_fields  s     z!PdfDocCommon.get_form_text_fields)r   r9   c              
      s.  t ttd fdd ztt j W n, tk
rV } ztd|W 5 d}~X Y nX  ddkrntdg }dd	d
krdkrd  g}nfddj	D }nldd}|D ]Z dd	d
krdkrdkr|d  g7 }q|fddj	D 7 }qfdd|D S )a  
        Provides list of pages where the field is called.

        Args:
            field: Field Object, PdfObject or IndirectObject referencing a Field

        Returns:
            List of pages:
                - Empty list:
                    The field has no widgets attached
                    (either hidden field or ancestor field).
                - Single page list:
                    Page where the widget is present
                    (most common).
                - Multi-page list:
                    Field with multiple kids widgets
                    (example: radio buttons, field repeated on multiple pages).
        )rq   rG   r9   c                    s8   || kr| | S d| kr0 t t| d  |S d S d S )Nr   )r   r&   rN   )rq   rG   )_get_inheritedr=   r>   r     s     z<PdfDocCommon.get_pages_showing_field.<locals>._get_inheritedzfield type is invalidNr   zfield is not validz/Subtyper   z/Widgetz/Pc                    s"   g | ]} j |d dkr|qS z/Annotsr   rn   rH   r   p)r   r=   r>   r     s   z8PdfDocCommon.get_pages_showing_field.<locals>.<listcomp>z/Kidsr=   r   c                    s"   g | ]} j |d dkr|qS r   r   r   r   r=   r>   r     s   c                    s,   g | ]$}t |tr|n j |j qS r=   )rI   r   pages_get_page_number_by_indirectrn   )r   xrD   r=   r>   r     s   )
r&   rg   r   r   rn   rN   	Exception
ValueErrorrH   r   )rE   r   excretZkidsr=   )r   r   r   rE   r>   get_pages_showing_field  s4    




z$PdfDocCommon.get_pages_showing_fieldc              
   C   s   d| j krdS | j d }t|tr*| }t|tr<t|S t|trz8|dd \}}|dd }t|t|}t	d||W S  t
k
r } zt
d| d| W 5 d}~X Y qX ndS dS )z
        Property to access the opening destination (``/OpenAction`` entry in
        the PDF catalog). It returns ``None`` if the entry does not exist is not
        set.

        Raises:
            Exception: If a destination is invalid.
        z/OpenActionNr   r   Z
OpenActionzInvalid Destination r   )rl   rI   bytesdecoderg   r3   r"   r)   tupler%   r   )rE   Zoapagetyparrayfitr   r=   r=   r>   open_destination  s     




(zPdfDocCommon.open_destination)r   r9   c                 C   s   t dd S )Nzno setter for open_destination)NotImplementedError)rE   r   r=   r=   r>   r     s    c                 C   s   |   S )z
        Read-only property for the outline present in the document.

        (i.e., a collection of 'outline items' which are also known as
        'bookmarks')
        )_get_outlinerD   r=   r=   r>   outline  s    zPdfDocCommon.outline)noder   r9   c                 C   s   |d krbg }| j }tj|krXtt|tj }t|tr:|S |d k	rXd|krXtt|d }|  | _|d krn|S | 	|}|r|
| d|krg }| tt|d | |r|
| d|krqtt|d }qn|S )Nz/Firstz/Next)rl   COZOUTLINESr   r&   rI   r-   r   _namedDests_build_outline_itemr   r   )rE   r   r   r   linesZoutline_objZsub_outliner=   r=   r>   r   %  s0    





zPdfDocCommon._get_outlinec                 C   s(   | j }tj|kr td|tj S dS dS )u   
        Read-only property for the list of threads.

        See §8.3.2 from PDF 1.7 spec.

        It's an array of dictionaries with "/F" and "/I" properties or
        None if there are no articles.
        r"   N)rl   r   ZTHREADSr   )rE   r   r=   r=   r>   threadsN  s    

zPdfDocCommon.threadsc                 C   s   d S rB   r=   ro   r=   r=   r>   r   ^  s    z)PdfDocCommon._get_page_number_by_indirect)r   r9   c                 C   s   |  |jS )a  
        Retrieve page number of a given PageObject.

        Args:
            page: The page to get page number. Should be
                an instance of :class:`PageObject<pypdf._page.PageObject>`

        Returns:
            The page number or None if page is not found
        )r   rn   )rE   r   r=   r=   r>   get_page_numberd  s    zPdfDocCommon.get_page_number)destinationr9   c                 C   s   |  |jS )z
        Retrieve page number of a given Destination object.

        Args:
            destination: The destination to get page number.

        Returns:
            The page number or None if page is not found
        )r   r   )rE   r   r=   r=   r>   get_destination_page_numberq  s    
z(PdfDocCommon.get_destination_page_number)rO   r   r9   c                 C   s   d\}}t |ttfs4t |tr,t|dks4|d krJt }t||t S |dd \}}|dd  }zt||t||dW S  tk
r   t	d| d| t
 | jr | jd j}|d krt n|}t||t  Y S X d S )N)NNr   r   )Zfit_typeZfit_argszUnknown destination:  )rI   r-   rg   r"   r   r%   r)   r   r!   r   rc   rj   r   rn   )rE   rO   r   r   r   tmprn   r=   r=   r>   r   }  s,    	
zPdfDocCommon._build_destination)r   r9   c                 C   s
  d\}}}zt d|d }W n, tk
rH   | jr@td|d}Y nX d|krt t|d }t t|tj }|dkr|tj }n*d|kr|d }t	|trd	|kr|d	 }t	|t
r| ||}nt	|trz| || j| j}W n" tk
r   | |d }Y nX nN|d kr(| ||}n6| jr@td
|ntd|dt | |d }|rd|krt
dd |d D |td< d|kr|d |td< d|kr|d |td< t|dddk|td< ||_z|j|_W n tk
r   Y nX |S )N)NNNrg   z/Titlez(Outline Entry Missing /Title attribute: r   z/Az/GoToz/Destr   zUnexpected destination zRemoved unexpected destination z from destinationz/Cc                 s   s   | ]}t |V  qd S rB   )r*   )r   cr=   r=   r>   	<genexpr>  s     z3PdfDocCommon._build_outline_item.<locals>.<genexpr>/Fr   r   z
/%is_open%)r   r   rj   r!   r&   r,   r   SDrI   r"   r   rg   r   Z
dest_arrayr   rc   r#   rH   r   rn   AttributeError)rE   r   r   rO   Zoutline_itemactionZaction_typer=   r=   r>   r     sf    


 





z PdfDocCommon._build_outline_itemc                 C   s   t | j| jS )aR  
        Property that emulates a list of :class:`PageObject<pypdf._page.PageObject>`.
        this property allows to get a page or a range of pages.

        For PdfWriter Only:
        It provides also capability to remove a page/range of page from the list
        (through del operator)
        Note: only the page entry is removed. As the objects beneath can be used
        somewhere else.
        A solution to completely remove them - if they are not used anywhere -
        is to write to a buffer/temporary file and to load it into a new PdfWriter
        object afterwards.
        )r   r   r   rD   r=   r=   r>   r     s    zPdfDocCommon.pagesc                    s    fddt t jD S )z
        A list of labels for the pages in this document.

        This property is read-only. The labels are in the order that the pages
        appear in the document.
        c                    s   g | ]}t  |qS r=   )page_index2page_label)r   r   rD   r=   r>   r     s     z,PdfDocCommon.page_labels.<locals>.<listcomp>)ranger   r   rD   r=   rD   r>   page_labels  s    zPdfDocCommon.page_labelsc                 C   s0   zt t| jtj W S  tk
r*   Y dS X dS )a  
        Get the page layout currently being used.

        .. list-table:: Valid ``layout`` values
           :widths: 50 200

           * - /NoLayout
             - Layout explicitly not specified
           * - /SinglePage
             - Show one page at a time
           * - /OneColumn
             - Show one column at a time
           * - /TwoColumnLeft
             - Show pages in two columns, odd-numbered pages on the left
           * - /TwoColumnRight
             - Show pages in two columns, odd-numbered pages on the right
           * - /TwoPageLeft
             - Show two pages at a time, odd-numbered pages on the left
           * - /TwoPageRight
             - Show two pages at a time, odd-numbered pages on the right
        N)r   r,   rl   rz   ZPAGE_LAYOUTr   rD   r=   r=   r>   page_layout  s    zPdfDocCommon.page_layoutc                 C   s(   z| j d W S  tk
r"   Y dS X dS )a2  
        Get the page mode currently being used.

        .. list-table:: Valid ``mode`` values
           :widths: 50 200

           * - /UseNone
             - Do not show outline or thumbnails panels
           * - /UseOutlines
             - Show outline (aka bookmarks) panel
           * - /UseThumbs
             - Show page thumbnails panel
           * - /FullScreen
             - Fullscreen view
           * - /UseOC
             - Show Optional Content Group (OCG) panel
           * - /UseAttachments
             - Show attachments panel
        z	/PageModeN)rl   r   rD   r=   r=   r>   	page_mode  s    zPdfDocCommon.page_mode)r   inheritrn   r9   c                 C   sV  t tjt tjt tjt tjf}|d kr0i }|d kr^| j}|d  }t|t	sXt
g | _tj|krztt|tj }ntj|krd}nd}|dkr|D ]}||kr|| ||< qtt|tj D ]6}i }	t|tr||	d< | }
|
r| j|
|f|	 qnT|dkrRt| D ]\}}||kr|||< qt| |}|| | j| d S )Nr   z/Pagern   )r,   PGZ	RESOURCESZMEDIABOXZCROPBOXZROTATErl   rN   rI   r&   r   r~   r   ZTYPEr   rg   r   r"   r+   r   r   r   r   rt   r   )rE   r   r   rn   Zinheritable_page_attributesr   tr   r   Zaddtrq   Zattr_inr   Zpage_objr=   r=   r>   r   9  sF    






zPdfDocCommon._flatten)r   cleanr9   c                 C   s   | j dkr|   | j dk	s tt|trN| }t|tsJtdt dS |}t|t	sz| j 
|}W n  tk
r   tdt Y dS X d|  krt| j k sn tdt dS | j| j}| j|= |r|dk	r| |t  dS )a  
        Remove page from pages list.

        Args:
            page: int / PageObject / IndirectObject
                PageObject : page to be removed. If the page appears many times
                only the first one will be removed

                IndirectObject: Reference to page to be removed

                int: Page number to be removed

            clean: replace PageObject with NullObject to prevent destination,
                annotation to reference a detached page
        Nz(IndirectObject is not referencing a pagezCannot find page in pagesr   zPage number is out of range)r~   r   r   rI   r+   rN   r   r   rc   intr   r   r   r   rn   rr   r-   )rE   r   r  r   indr=   r=   r>   remove_pageo  s,    






zPdfDocCommon.remove_page)numgenr9   c                 C   s   t |||  S )a/  
        Used to ease development.

        This is equivalent to generic.IndirectObject(num,gen,self).get_object()

        Args:
            num: The object number of the indirect object.
            gen: The generation number of the indirect object.

        Returns:
            A PdfObject
        )r+   rN   )rE   r  r  r=   r=   r>   _get_indirect_object  s    z!PdfDocCommon._get_indirect_object)permissions_coder9   c              	      sJ   t dddd tjtjtjtjtjtjtjtj	d} fdd|
 D S )z>Take the permissions as an integer, return the allowed access.decode_permissionsuser_access_permissionsz5.0.0)Zold_namenew_nameZ
removed_in)printmodifycopyannotationsZformsZaccessabilityZassembleZprint_high_qualityc                    s   i | ]\}}| |@ d kqS )r   r=   )r   rG   flagr
  r=   r>   
<dictcomp>  s    z3PdfDocCommon.decode_permissions.<locals>.<dictcomp>)r   r   ZPRINTZMODIFYZEXTRACTZADD_OR_MODIFYZFILL_FORM_FIELDSZEXTRACT_TEXT_AND_GRAPHICSZASSEMBLE_DOCZPRINT_TO_REPRESENTATIONr   )rE   r
  Zpermissions_mappingr=   r  r>   r    s"    
zPdfDocCommon.decode_permissionsc                 C   s   | j dkrdS t| j jS )zWGet the user access permissions for encrypted documents. Returns None if not encrypted.N)rk   r   PrD   r=   r=   r>   r    s    
z$PdfDocCommon.user_access_permissionsc                 C   s   dS )z
        Read-only boolean property showing whether this PDF file is encrypted.

        Note that this property, if true, will remain true even after the
        :meth:`decrypt()<pypdf.PdfReader.decrypt>` method is called.
        Nr=   rD   r=   r=   r>   r     s    	zPdfDocCommon.is_encryptedc           
      C   s   d }i }| j }d|ks|d s"d S tt|d }d|krtt|d }t|}|D ]H}|}t|}t|trRttt	 |
 }|rRtt|j}	|	||< qR|S )Nz	/AcroFormz/XFA)rl   r   r1   r"   iternextrI   r+   r
   r'   rN   zlib
decompressr   _data)
rE   r   rJ   r   r   r   r   tagr   esr=   r=   r>   xfa  s$    

zPdfDocCommon.xfac                    s   t  fdd  D S )Nc                    s   i | ]}| j |fqS r=   )_get_attachment_list)r   r   rD   r=   r>   r    s    z,PdfDocCommon.attachments.<locals>.<dictcomp>)LazyDict_list_attachmentsrD   r=   rD   r>   attachments  s
    
zPdfDocCommon.attachmentsc                 C   sX   | j }z&tttttt|d d d }W n tk
rD   g  Y S X dd |D }|S )zv
        Retrieves the list of filenames of file attachments.

        Returns:
            list of filenames
        /Names/EmbeddedFilesc                 S   s   g | ]}t |tr|qS r=   )rI   rg   )r   r   r=   r=   r>   r     s     
 z2PdfDocCommon._list_attachments.<locals>.<listcomp>)rl   r   r"   r&   r   )rE   r   	filenamesZattachments_namesr=   r=   r>   r     s    
zPdfDocCommon._list_attachments)r   r9   c                 C   s"   |  || }t|tr|S |gS rB   )_get_attachmentsrI   r   )rE   r   outr=   r=   r>   r    s    
z!PdfDocCommon._get_attachment_list)filenamer9   c           
      C   s   | j }z&tttttt|d d d }W n tk
rD   i  Y S X i }tt|D ]}|| }t|trV|dk	r~||kr~qV|}||d  	 }|d d 
 }	||krt|| ts|| g||< || |	 qV|	||< qV|S )a  
        Retrieves all or selected file attachments of the PDF as a dictionary of file names
        and the file data as a bytestring.

        Args:
            filename: If filename is None, then a dictionary of all attachments
                will be returned, where the key is the filename and the value
                is the content. Otherwise, a dictionary with just a single key
                - the filename - and its content will be returned.

        Returns:
            dictionary of filename -> Union[bytestring or List[ByteString]]
            If the filename exists multiple times a list of the different versions will be provided.
        r"  r#  Nr   z/EFr   )rl   r   r"   r&   r   r   r   rI   rg   rN   get_datar   r   )
rE   r'  r   r$  r!  r   r   r   Zf_dictZf_datar=   r=   r>   r%    s8    


zPdfDocCommon._get_attachments)NN)NN)NNN)F)NN)NNN)F)N)Vrc   rd   re   rf   rj   bool__annotations__rk   r
   r   rh   r   r&   rl   rg   rm   r   r  r+   r/   rN   rr   rs   r@   ru   r6   rv   r   r   r   ry   r2   r}   r~   r   r   r   r   r   r"   r   r1   r   r   r   r   r   r   r   r(   r   r%   r0   r$   r   setterr4   r   r   r   r-   r   r   r   r.   r   r   r   r   r   r5   r   r   r  r	  r  r   r  r   r  r	   r   r!  r   r  r%  r=   r=   r=   r>   ri      s"  

  
%  

K   2

7
  %'F
    ) K	   9 -
	 ri   c                   @   s\   e Zd ZeeddddZeedddZee dd	d
Ze	dddZ
edddZdS )r  N)argskwr9   c                 O   s   t ||| _d S rB   )dict	_raw_dict)rE   r,  r-  r=   r=   r>   rC   F  s    zLazyDict.__init__rF   c                 C   s   | j |\}}||S rB   )r/  __getitem__)rE   rG   funcargr=   r=   r>   r0  I  s    zLazyDict.__getitem__rA   c                 C   s
   t | jS rB   )r  r/  rD   r=   r=   r>   __iter__M  s    zLazyDict.__iter__c                 C   s
   t | jS rB   )r   r/  rD   r=   r=   r>   __len__P  s    zLazyDict.__len__c                 C   s   dt |   dS )NzLazyDict(keys=))r   r   rD   r=   r=   r>   __str__S  s    zLazyDict.__str__)rc   rd   re   r   rC   rg   r0  r   r3  r  r4  r6  r=   r=   r=   r>   r  E  s
   r  )Or;   r  abcr   r   typingr   r   r   r   r   r	   r
   r   r   r   rk   r   Z_pager   r   Z_page_labelsr   r   _utilsr   r   r   r   	constantsr   r   r   rz   r   r   r   r   r   r   rL   r   r   r   r  r    r   errorsr!   Zgenericr"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r   r4   r5   xmpr6   r   r  r?   r@   ri   r  r=   r=   r=   r>   <module>   sB   0P"         V