|
30 | 30 | from __future__ import absolute_import |
31 | 31 |
|
32 | 32 | # Standard library |
33 | | -from typing import Tuple # noqa: F401 |
| 33 | +from typing import IO, Tuple # noqa: F401 |
| 34 | +import gzip |
34 | 35 | import logging |
| 36 | +import os |
35 | 37 | import struct |
36 | 38 | import sys |
37 | 39 |
|
@@ -107,6 +109,36 @@ def read_string(data, length_fmt="H"): |
107 | 109 | # ------------------------------------------------------------------------------ |
108 | 110 |
|
109 | 111 |
|
| 112 | +def java_data_fd(original_df): |
| 113 | + # type: (IO[bytes]) -> IO[bytes] |
| 114 | + """ |
| 115 | + Ensures that the input file descriptor contains a Java serialized content. |
| 116 | + Automatically uncompresses GZipped data |
| 117 | +
|
| 118 | + :param original_df: Input file descriptor |
| 119 | + :return: Input file descriptor or a fake one to access uncompressed data |
| 120 | + :raise IOError: Error reading input file |
| 121 | + """ |
| 122 | + # Read the first bytes |
| 123 | + start_idx = original_df.tell() |
| 124 | + magic_header = original_df.read(2) |
| 125 | + original_df.seek(start_idx, os.SEEK_SET) |
| 126 | + |
| 127 | + if magic_header[0] == 0xAC: |
| 128 | + # Consider we have a raw seralized stream: use it |
| 129 | + original_df.seek(start_idx, os.SEEK_SET) |
| 130 | + return original_df |
| 131 | + elif magic_header[0] == 0x1F and magic_header[1] == 0x8B: |
| 132 | + # Open the GZip file |
| 133 | + return gzip.open(original_df, "rb") |
| 134 | + else: |
| 135 | + # Let the parser raise the error |
| 136 | + return original_df |
| 137 | + |
| 138 | + |
| 139 | +# ------------------------------------------------------------------------------ |
| 140 | + |
| 141 | + |
110 | 142 | def hexdump(src, start_offset=0, length=16): |
111 | 143 | # type: (str, int, int) -> str |
112 | 144 | """ |
|
0 commit comments