diff --git a/pypdf/annotations/__init__.py b/pypdf/annotations/__init__.py index 44ed1dab5c..247f792c9d 100644 --- a/pypdf/annotations/__init__.py +++ b/pypdf/annotations/__init__.py @@ -13,6 +13,7 @@ from ._base import NO_FLAGS, AnnotationDictionary from ._markup_annotations import ( + AbstractPolyLine, Ellipse, FreeText, Highlight, @@ -27,6 +28,7 @@ __all__ = [ "NO_FLAGS", + "AbstractPolyLine", "AnnotationDictionary", "Ellipse", "FreeText", diff --git a/pypdf/annotations/_markup_annotations.py b/pypdf/annotations/_markup_annotations.py index 6cfa38709b..3de7c6785c 100644 --- a/pypdf/annotations/_markup_annotations.py +++ b/pypdf/annotations/_markup_annotations.py @@ -52,6 +52,41 @@ def __init__(self, *, title_bar: Optional[str] = None) -> None: self[NameObject("/T")] = TextStringObject(title_bar) +class AbstractPolyLine(MarkupAnnotation, ABC): + """ + Base class for Polygon and PolyLine + + Args: + vertices: List of coordinates of each vertex; + + """ + def __init__( + self, + vertices: Union[list[Vertex], ArrayObject], + **kwargs: Any + ) -> None: + super().__init__(**kwargs) + if len(vertices) == 0: + raise ValueError(f"A {type(self).__name__.lower()} needs at least 1 vertex with two coordinates") + + @staticmethod + def _determine_vertices( + vertices: Union[list[Vertex], ArrayObject] + ) -> tuple[list[Vertex], list[NumberObject]]: + coord_list: ArrayObject = ArrayObject() + if isinstance(vertices, ArrayObject): + coord_list = vertices + args = [iter(vertices)] * 2 # Adapted def grouper() + vertices = list(zip(*args)) # from https://docs.python.org/3.9/library/itertools.html#itertools-recipes + + else: + for x, y in vertices: + coord_list.append(NumberObject(x)) + coord_list.append(NumberObject(y)) + + return vertices, coord_list + + class Text(MarkupAnnotation): """ A text annotation. @@ -186,19 +221,15 @@ def __init__( ) -class PolyLine(MarkupAnnotation): +class PolyLine(AbstractPolyLine): def __init__( self, - vertices: list[Vertex], + vertices: Union[list[Vertex], ArrayObject], **kwargs: Any, ) -> None: - super().__init__(**kwargs) - if len(vertices) == 0: - raise ValueError("A polyline needs at least 1 vertex with two coordinates") - coord_list = [] - for x, y in vertices: - coord_list.append(NumberObject(x)) - coord_list.append(NumberObject(y)) + super().__init__(vertices=vertices, **kwargs) + + vertices, coord_list = self._determine_vertices(vertices) self.update( { NameObject("/Subtype"): NameObject("/PolyLine"), @@ -280,20 +311,14 @@ def __init__( ) -class Polygon(MarkupAnnotation): +class Polygon(AbstractPolyLine): def __init__( self, - vertices: list[tuple[float, float]], + vertices: Union[list[Vertex], ArrayObject], **kwargs: Any, ) -> None: - super().__init__(**kwargs) - if len(vertices) == 0: - raise ValueError("A polygon needs at least 1 vertex with two coordinates") - - coord_list = [] - for x, y in vertices: - coord_list.append(NumberObject(x)) - coord_list.append(NumberObject(y)) + super().__init__(vertices=vertices, **kwargs) + vertices, coord_list = self._determine_vertices(vertices) self.update( { NameObject("/Type"): NameObject("/Annot"), diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py index d94635bf50..aa88105fc0 100644 --- a/pypdf/generic/_data_structures.py +++ b/pypdf/generic/_data_structures.py @@ -29,6 +29,7 @@ __author__ = "Mathieu Fenniak" __author_email__ = "biziqe@mathieu.fenniak.net" +import inspect import logging import re import sys @@ -263,6 +264,40 @@ def read_from_stream( class DictionaryObject(dict[Any, Any], PdfObject): + + _init_alt_arg_names_: Optional[dict[str, str]] = None + + + """ + Used to map DictionaryObject keyword to __init__ arg names + when cloning. key -> arg_name. + + Args: + key: string identifying DictionaryObject keyword + arg_name: string identifying argument name (value) + """ + def set_alt_arg_name(self, key: str, arg_name: str) -> None: + if self._init_alt_arg_names_ is None: + self._init_alt_arg_names_ = {} + + if not isinstance(key, str) or not isinstance(arg_name, str): + raise TypeError + + self._init_alt_arg_names_[key] = arg_name + + """ + Used to return __init__ arg names when cloning + Args: + key: string identifying DictionaryObject keyword + Returns: + Returns None if not found, else the string representing the + argument name + """ + def get_alt_arg_name(self, key: str) -> Optional[str]: + if self._init_alt_arg_names_ is None or not isinstance(key, str): + return None + return self._init_alt_arg_names_.get(key, None) + def replicate( self, pdf_dest: PdfWriterProtocol, @@ -291,9 +326,22 @@ def clone( pass visited: set[tuple[int, int]] = set() # (idnum, generation) + + kwargs = {} + inspector = inspect.getfullargspec(self.__class__.__init__) + + for key, val in self.items(): + alt_arg_name = self.get_alt_arg_name(key) + key_stripped = key.removeprefix("/").lower() + + if alt_arg_name: + kwargs[alt_arg_name] = val + elif key_stripped in inspector.args or key_stripped in inspector.kwonlyargs: + kwargs[key_stripped] = val + d__ = cast( "DictionaryObject", - self._reference_clone(self.__class__(), pdf_dest, force_duplicate), + self._reference_clone(self.__class__(**kwargs), pdf_dest, force_duplicate), ) if ignore_fields is None: ignore_fields = [] diff --git a/tests/generic/test_files.py b/tests/generic/test_files.py index 9d488e0681..8282a9def7 100644 --- a/tests/generic/test_files.py +++ b/tests/generic/test_files.py @@ -8,6 +8,7 @@ import pytest from pypdf import PdfReader, PdfWriter +from pypdf.annotations._markup_annotations import Polygon from pypdf.constants import AFRelationship from pypdf.errors import PdfReadError, PyPdfError from pypdf.generic import ( @@ -575,3 +576,27 @@ def test_embedded_file__order(): "test.txt", attachment4.pdf_object.indirect_reference, "xyz.txt", attachment3.pdf_object.indirect_reference, ] + + +def test_merge_page_with_annotation(): + # added and adapted from issue #3467 + writer = PdfWriter() + writer2 = PdfWriter() + writer.add_blank_page(100, 100) + writer2.add_blank_page(100, 100) + + annotation = Polygon( + vertices=[(50, 550), (200, 650), (70, 750), (50, 700)], + ) + + writer.add_annotation(0, annotation) + + page1 = writer.pages[0] + page2 = writer2.pages[0] + page2.merge_page(page1) + + assert page2.annotations[0].get_object()["/Type"] == annotation["/Type"] + assert page2.annotations[0].get_object()["/Subtype"] == annotation["/Subtype"] + assert page2.annotations[0].get_object()["/Vertices"] == annotation["/Vertices"] + assert page2.annotations[0].get_object()["/IT"] == annotation["/IT"] + assert page2.annotations[0].get_object()["/Rect"] == annotation["/Rect"] diff --git a/tests/test_generic.py b/tests/test_generic.py index fde9ddd791..b06c8975df 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -54,6 +54,18 @@ RESOURCE_ROOT = PROJECT_ROOT / "resources" +class DummyDictObject(DictionaryObject): + def __init__(self, test_name: str) -> None: + super().__init__() + self.set_alt_arg_name("/TestName", "test_name") + + self.update( + { + NameObject("/TestName"): test_name + } + ) + + class ChildDummy(DictionaryObject): @property def indirect_reference(self): @@ -1282,6 +1294,22 @@ def test_coverage_arrayobject(): assert isinstance(k, int) assert isinstance(v, PdfObject) +def test_alt_keyword_when_cloning(): + obj = DummyDictObject(test_name="testval") + obj2: DummyDictObject = None + + clone_obj = obj.clone(obj2) + + assert clone_obj.get("/TestName") == "testval" + +def test_type_error_when_using_alt_arg_keys(): + dict_obj = DictionaryObject() + with pytest.raises(TypeError): + dict_obj.set_alt_arg_name(1, "test_key") + + with pytest.raises(TypeError): + dict_obj.set_alt_arg_name("test_key", 1) + def test_coverage_streamobject(): writer = PdfWriter()