Source code for compoundfiles.entities

#!/usr/bin/env python
# vim: set et sw=4 sts=4 fileencoding=utf-8:
#
# A library for reading Microsoft's OLE Compound Document format
# Copyright (c) 2014 Dave Jones <dave@waveform.org.uk>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from __future__ import (
    unicode_literals,
    absolute_import,
    print_function,
    division,
    )
str = type('')


import warnings
import datetime as dt
from pprint import pformat

from compoundfiles.errors import (
    CompoundFileDirLoopError,
    CompoundFileDirEntryWarning,
    CompoundFileDirNameWarning,
    CompoundFileDirTypeWarning,
    CompoundFileDirIndexWarning,
    CompoundFileDirTimeWarning,
    CompoundFileDirSectorWarning,
    CompoundFileDirSizeWarning,
    )
from compoundfiles.const import (
    NO_STREAM,
    DIR_INVALID,
    DIR_STORAGE,
    DIR_STREAM,
    DIR_ROOT,
    DIR_HEADER,
    FILENAME_ENCODING,
    )


[docs]class CompoundFileEntity(object): """ Represents an entity in an OLE Compound Document. An entity in an OLE Compound Document can be a "stream" (analogous to a file in a file-system) which has a :attr:`size` and can be opened by a call to the parent object's :meth:`~CompoundFileReader.open` method. Alternatively, it can be a "storage" (analogous to a directory in a file-system), which has no size but has :attr:`created` and :attr:`modified` time-stamps, and can contain other streams and storages. If the entity is a storage, it will act as an iterable read-only sequence, indexable by ordinal or by name, and compatible with the ``in`` operator and built-in :func:`len` function. .. attribute:: created For storage entities (where :attr:`isdir` is ``True``), this returns the creation date of the storage. Returns ``None`` for stream entities. .. attribute:: isdir Returns True if this is a storage entity which can contain other entities. .. attribute:: isfile Returns True if this is a stream entity which can be opened. .. attribute:: modified For storage entities (where :attr:`isdir` is True), this returns the last modification date of the storage. Returns ``None`` for stream entities. .. attribute:: name Returns the name of entity. This can be up to 31 characters long and may contain any character representable in UTF-16 except the NULL character. Names are considered case-insensitive for comparison purposes. .. attribute:: size For stream entities (where :attr:`isfile` is ``True``), this returns the number of bytes occupied by the stream. Returns 0 for storage entities. """ def __init__(self, parent, stream, index): super(CompoundFileEntity, self).__init__() self._index = index self._children = None ( name, name_len, self._entry_type, self._entry_color, self._left_index, self._right_index, self._child_index, self.uuid, user_flags, created, modified, self._start_sector, size_low, size_high, ) = DIR_HEADER.unpack(stream.read(DIR_HEADER.size)) self.name = name.decode('utf-16le') try: self.name = self.name[:self.name.index('\0')] except ValueError: warnings.warn( CompoundFileDirNameWarning( 'missing NULL terminator in name')) self.name = self.name[:(name_len // 2) - 1] if index == 0: if self._entry_type != DIR_ROOT: warnings.warn( CompoundFileDirTypeWarning('invalid type')) self._entry_type = DIR_ROOT elif not self._entry_type in (DIR_STREAM, DIR_STORAGE, DIR_INVALID): warnings.warn( CompoundFileDirTypeWarning('invalid type')) self._entry_type = DIR_INVALID if self._entry_type == DIR_INVALID: if self.name != '': warnings.warn( CompoundFileDirNameWarning('non-empty name')) if name_len != 0: warnings.warn( CompoundFileDirNameWarning('non-zero name length')) if user_flags != 0: warnings.warn( CompoundFileDirEntryWarning('non-zero user flags')) else: # Name length is in bytes, including NULL terminator ... for a # unicode encoded name ... *headdesk* if (len(self.name) + 1) * 2 != name_len: warnings.warn( CompoundFileDirNameWarning('invalid name length (%d)' % name_len)) if self._entry_type in (DIR_INVALID, DIR_ROOT): if self._left_index != NO_STREAM: warnings.warn( CompoundFileDirIndexWarning('invalid left sibling')) if self._right_index != NO_STREAM: warnings.warn( CompoundFileDirIndexWarning('invalid right sibling')) self._left_index = NO_STREAM self._right_index = NO_STREAM if self._entry_type in (DIR_INVALID, DIR_STREAM): if self._child_index != NO_STREAM: warnings.warn( CompoundFileDirIndexWarning('invalid child index')) if self.uuid != b'\0' * 16: warnings.warn( CompoundFileDirEntryWarning('non-zero UUID')) if created != 0: warnings.warn( CompoundFileDirTimeWarning('non-zero creation timestamp')) if modified != 0: warnings.warn( CompoundFileDirTimeWarning('non-zero modification timestamp')) self._child_index = NO_STREAM self.uuid = b'\0' * 16 created = 0 modified = 0 if self._entry_type in (DIR_INVALID, DIR_STORAGE): if self._start_sector != 0: warnings.warn( CompoundFileDirSectorWarning( 'non-zero start sector (%d)' % self._start_sector)) if size_low != 0: warnings.warn( CompoundFileDirSizeWarning( 'non-zero size low-bits (%d)' % size_low)) if size_high != 0: warnings.warn( CompoundFileDirSizeWarning( 'non-zero size high-bits (%d)' % size_high)) self._start_sector = 0 size_low = 0 size_high = 0 if parent._normal_sector_size == 512: # Surely this should be checking DLL version instead of sector # size?! But the spec does state sector size ... if size_high != 0: warnings.warn( CompoundFileDirSizeWarning( 'invalid size in small sector file')) size_high = 0 if size_low >= 1<<31: warnings.warn( CompoundFileDirSizeWarning( 'size too large for small sector file')) self.size = (size_high << 32) | size_low epoch = dt.datetime(1601, 1, 1) self.created = ( epoch + dt.timedelta(microseconds=created // 10) if created != 0 else None) self.modified = ( epoch + dt.timedelta(microseconds=created // 10) if modified != 0 else None) @property def isfile(self): return self._entry_type == DIR_STREAM @property def isdir(self): return self._entry_type in (DIR_STORAGE, DIR_ROOT) def _build_tree(self, entries): def walk(index): node = entries[index] entries[index] = None if node is None: raise CompoundFileDirLoopError( 'loop detected in directory hierarchy ' '(points to index %d)' % index) if node._left_index != NO_STREAM: try: walk(node._left_index) except IndexError: warnings.warn( CompoundFileDirIndexWarning( 'invalid left index (%d) in entry at ' 'index %d' % (node._left_index, index))) self._children.append(node) if node._right_index != NO_STREAM: try: walk(node._right_index) except IndexError: warnings.warn( CompoundFileDirIndexWarning( 'invalid right index (%d) in entry at ' 'index %d' % (node._right_index, index))) node._build_tree(entries) if self.isdir: self._children = [] try: walk(self._child_index) except IndexError: if self._child_index != NO_STREAM: warnings.warn( CompoundFileDirIndexWarning('invalid child index')) def __len__(self): return len(self._children) def __iter__(self): return iter(self._children) def __contains__(self, name_or_obj): if isinstance(name_or_obj, bytes): name_or_obj = name_or_obj.decode(FILENAME_ENCODING) if isinstance(name_or_obj, str): try: self.__getitem__(name_or_obj) return True except KeyError: return False else: return name_or_obj in self._children def __getitem__(self, index_or_name): if isinstance(index_or_name, bytes): index_or_name = index_or_name.decode(FILENAME_ENCODING) if isinstance(index_or_name, str): name = index_or_name.lower() for item in self._children: if item.name.lower() == name: return item raise KeyError(index_or_name) else: return self._children[index_or_name] def __repr__(self): return ( "<CompoundFileEntity name='%s'>" % self.name if self.isfile else pformat([ "<CompoundFileEntity dir='%s'>" % c.name if c.isdir else repr(c) for c in self._children ]) if self.isdir else "<CompoundFileEntry ???>" )