Skip to content

Commit 7701789

Browse files
committed
Added GZIP input support for V1
1 parent a5fbe16 commit 7701789

File tree

3 files changed

+66
-2
lines changed

3 files changed

+66
-2
lines changed

javaobj/utils.py

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,10 @@
3030
from __future__ import absolute_import
3131

3232
# Standard library
33-
from typing import Tuple # noqa: F401
33+
from typing import IO, Tuple # noqa: F401
34+
import gzip
3435
import logging
36+
import os
3537
import struct
3638
import sys
3739

@@ -107,6 +109,36 @@ def read_string(data, length_fmt="H"):
107109
# ------------------------------------------------------------------------------
108110

109111

112+
def java_data_fd(original_df):
113+
# type: (IO[bytes]) -> IO[bytes]
114+
"""
115+
Ensures that the input file descriptor contains a Java serialized content.
116+
Automatically uncompresses GZipped data
117+
118+
:param original_df: Input file descriptor
119+
:return: Input file descriptor or a fake one to access uncompressed data
120+
:raise IOError: Error reading input file
121+
"""
122+
# Read the first bytes
123+
start_idx = original_df.tell()
124+
magic_header = original_df.read(2)
125+
original_df.seek(start_idx, os.SEEK_SET)
126+
127+
if magic_header[0] == 0xAC:
128+
# Consider we have a raw seralized stream: use it
129+
original_df.seek(start_idx, os.SEEK_SET)
130+
return original_df
131+
elif magic_header[0] == 0x1F and magic_header[1] == 0x8B:
132+
# Open the GZip file
133+
return gzip.open(original_df, "rb")
134+
else:
135+
# Let the parser raise the error
136+
return original_df
137+
138+
139+
# ------------------------------------------------------------------------------
140+
141+
110142
def hexdump(src, start_offset=0, length=16):
111143
# type: (str, int, int) -> str
112144
"""

javaobj/v1/core.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
from .marshaller import JavaObjectMarshaller
4848
from .unmarshaller import JavaObjectUnmarshaller
4949
from .transformers import DefaultObjectTransformer
50+
from ..utils import java_data_fd
5051

5152
# ------------------------------------------------------------------------------
5253

@@ -81,6 +82,9 @@ def load(file_object, *transformers, **kwargs):
8182
trailing bytes are remaining
8283
:return: The deserialized object
8384
"""
85+
# Check file format (uncompress if necessary)
86+
file_object = java_data_fd(file_object)
87+
8488
# Read keyword argument
8589
ignore_remaining_data = kwargs.get("ignore_remaining_data", False)
8690

tests/tests.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343

4444
# Local
4545
import javaobj.v1 as javaobj
46-
from javaobj.utils import hexdump
46+
from javaobj.utils import hexdump, java_data_fd
4747

4848
# ------------------------------------------------------------------------------
4949

@@ -141,6 +141,34 @@ def test_chars_rw(self):
141141
self.assertEqual(pobj, expected)
142142
self._try_marshalling(jobj, pobj)
143143

144+
def test_gzip_open(self):
145+
"""
146+
Tests if the GZip auto-uncompress works
147+
"""
148+
with java_data_fd(self.read_file("testChars.ser", stream=True)) as fd:
149+
base = fd.read()
150+
151+
with java_data_fd(
152+
self.read_file("testChars.ser.gz", stream=True)
153+
) as fd:
154+
gzipped = fd.read()
155+
156+
self.assertEqual(
157+
base, gzipped, "Uncompressed content doesn't match the original"
158+
)
159+
160+
def test_chars_gzip(self):
161+
"""
162+
Reads testChars.ser.gz
163+
"""
164+
# Expected string as a UTF-16 string
165+
expected = "python-javaobj".encode("utf-16-be").decode("latin1")
166+
167+
jobj = self.read_file("testChars.ser.gz")
168+
pobj = javaobj.loads(jobj)
169+
_logger.debug("Read char objects: %s", pobj)
170+
self.assertEqual(pobj, expected)
171+
144172
def test_double_rw(self):
145173
"""
146174
Reads testDouble.ser and checks the serialization process

0 commit comments

Comments
 (0)