Source code for compoundfiles.streams

#!/usr/bin/env python
# vim: set et sw=4 sts=4 fileencoding=utf-8:
#
# A library for reading Microsoft's OLE Compound Document format
# Copyright (c) 2014 Dave Jones <dave@waveform.org.uk>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from __future__ import (
    unicode_literals,
    absolute_import,
    print_function,
    division,
    )
native_str = str
str = type('')


import io
import warnings
from array import array
from abc import abstractmethod

from compoundfiles.errors import (
    CompoundFileNoMiniFatError,
    CompoundFileNormalLoopError,
    CompoundFileDirSizeWarning,
    CompoundFileTruncatedWarning,
    )
from compoundfiles.const import END_OF_CHAIN


[docs]class CompoundFileStream(io.RawIOBase): """ Abstract base class for streams within an OLE Compound Document. Instances of :class:`CompoundFileStream` are not constructed directly, but are returned by the :meth:`CompoundFileReader.open` method. They support all common methods associated with read-only streams (:meth:`read`, :meth:`seek`, :meth:`tell`, and so forth). """ def __init__(self): super(CompoundFileStream, self).__init__() self._sectors = array(native_str('L')) self._sector_index = None self._sector_offset = None def _load_sectors(self, start, fat): # To guard against cyclic FAT chains we use the tortoise'n'hare # algorithm here. If hare is ever equal to tortoise after a step, then # the hare somehow got transported behind the tortoise (via a loop) so # we raise an error hare = start tortoise = start while tortoise != END_OF_CHAIN: self._sectors.append(tortoise) tortoise = fat[tortoise] if hare != END_OF_CHAIN: hare = fat[hare] if hare != END_OF_CHAIN: hare = fat[hare] if hare == tortoise: raise CompoundFileNormalLoopError( 'cyclic FAT chain found starting at %d' % start) @abstractmethod def _set_pos(self, value): raise NotImplementedError
[docs] def readable(self): """ Returns ``True``, indicating that the stream supports :meth:`read`. """ return True
[docs] def writable(self): """ Returns ``False``, indicating that the stream doesn't support :meth:`write` or :meth:`truncate`. """ return False
[docs] def seekable(self): """ Returns ``True``, indicating that the stream supports :meth:`seek`. """ return True
[docs] def tell(self): """ Return the current stream position. """ return (self._sector_index * self._sector_size) + self._sector_offset
[docs] def seek(self, offset, whence=io.SEEK_SET): """ Change the stream position to the given byte *offset*. *offset* is interpreted relative to the position indicated by *whence*. Values for *whence* are: * ``SEEK_SET`` or ``0`` - start of the stream (the default); *offset* should be zero or positive * ``SEEK_CUR`` or ``1`` - current stream position; *offset* may be negative * ``SEEK_END`` or ``2`` - end of the stream; *offset* is usually negative Return the new absolute position. """ if whence == io.SEEK_CUR: offset = self.tell() + offset elif whence == io.SEEK_END: offset = self._length + offset if offset < 0: raise ValueError( 'New position is before the start of the stream') self._set_pos(offset) return offset
[docs] @abstractmethod def read1(self, n=-1): """ Read up to *n* bytes from the stream using only a single call to the underlying object. In the case of :class:`CompoundFileStream` this roughly corresponds to returning the content from the current position up to the end of the current sector. """ raise NotImplementedError
[docs] def read(self, n=-1): """ Read up to *n* bytes from the stream and return them. As a convenience, if *n* is unspecified or -1, :meth:`readall` is called. Fewer than *n* bytes may be returned if there are fewer than *n* bytes from the current stream position to the end of the stream. If 0 bytes are returned, and *n* was not 0, this indicates end of the stream. """ if n == -1: n = max(0, self._length - self.tell()) else: n = max(0, min(n, self._length - self.tell())) result = bytearray(n) i = 0 while i < n: buf = self.read1(n - i) if not buf: warnings.warn( CompoundFileTruncatedWarning( 'compound document appears to be truncated')) break result[i:i + len(buf)] = buf i += len(buf) return bytes(result)
class CompoundFileNormalStream(CompoundFileStream): def __init__(self, parent, start, length=None): super(CompoundFileNormalStream, self).__init__() self._load_sectors(start, parent._normal_fat) self._sector_size = parent._normal_sector_size self._header_size = parent._header_size self._mmap = parent._mmap min_length = (len(self._sectors) - 1) * self._sector_size max_length = len(self._sectors) * self._sector_size if length is None: self._length = max_length elif not (min_length <= length <= max_length): warnings.warn( CompoundFileDirSizeWarning( 'length (%d) of stream at sector %d exceeds bounds ' '(%d-%d)' % (length, start, min_length, max_length))) self._length = max_length else: self._length = length self._set_pos(0) def close(self): self._mmap = None def _set_pos(self, value): self._sector_index = value // self._sector_size self._sector_offset = value % self._sector_size def read1(self, n=-1): if n == -1: n = max(0, self._length - self.tell()) else: n = max(0, min(n, self._length - self.tell())) n = min(n, self._sector_size - self._sector_offset) if n == 0: return b'' offset = ( self._header_size + ( self._sectors[self._sector_index] * self._sector_size) + self._sector_offset) result = self._mmap[offset:offset + n] self._set_pos(self.tell() + n) return result class CompoundFileMiniStream(CompoundFileStream): def __init__(self, parent, start, length=None): super(CompoundFileMiniStream, self).__init__() if not parent._mini_fat: raise CompoundFileNoMiniFatError( 'no mini FAT in compound document') self._load_sectors(start, parent._mini_fat) self._sector_size = parent._mini_sector_size self._header_size = 0 self._file = CompoundFileNormalStream( parent, parent.root._start_sector, parent.root.size) max_length = len(self._sectors) * self._sector_size if length is not None and length > max_length: warnings.warn( CompoundFileDirSizeWarning( 'length (%d) of stream at sector %d exceeds ' 'max (%d)' % (length, start, max_length))) self._length = min(max_length, length or max_length) self._set_pos(0) def close(self): try: self._file.close() finally: self._file = None def _set_pos(self, value): self._sector_index = value // self._sector_size self._sector_offset = value % self._sector_size if self._sector_index < len(self._sectors): self._file.seek( self._header_size + (self._sectors[self._sector_index] * self._sector_size) + self._sector_offset) def read1(self, n=-1): if n == -1: n = max(0, self._length - self.tell()) else: n = max(0, min(n, self._length - self.tell())) n = min(n, self._sector_size - self._sector_offset) if n == 0: return b'' result = self._file.read1(n) # Only perform a seek to a different sector if we've crossed into one if self._sector_offset + n < self._sector_size: self._sector_offset += n else: self._set_pos(self.tell() + n) return result