Пропустил ExtractMsg.py и OleFileIO_PL.py через 2to3-3.2, который подправил синтаксис. Сижу, смотрю код. И вот натыкаюсь на следующее:
class _OleDirectoryEntry: """ OLE2 Directory Entry """ #[PL] parsing code moved from OleFileIO.loaddirectory # struct to parse directory entries: # <: little-endian byte order # 64s: string containing entry name in unicode (max 31 chars) + null char # H: uint16, number of bytes used in name buffer, including null = (len+1)*2 # B: uint8, dir entry type (between 0 and 5) # B: uint8, color: 0=black, 1=red # I: uint32, index of left child node in the red-black tree, NOSTREAM if none # I: uint32, index of right child node in the red-black tree, NOSTREAM if none # I: uint32, index of child root node if it is a storage, else NOSTREAM # 16s: CLSID, unique identifier (only used if it is a storage) # I: uint32, user flags # 8s: uint64, creation timestamp or zero # 8s: uint64, modification timestamp or zero # I: uint32, SID of first sector if stream or ministream, SID of 1st sector # of stream containing ministreams if root entry, 0 otherwise # I: uint32, total stream size in bytes if stream (low 32 bits), 0 otherwise # I: uint32, total stream size in bytes if stream (high 32 bits), 0 otherwise STRUCT_DIRENTRY = '<64sHBBIII16sI8s8sIII' # size of a directory entry: 128 bytes DIRENTRY_SIZE = 128 assert struct.calcsize(STRUCT_DIRENTRY) == DIRENTRY_SIZE def __init__(self, entry, sid, olefile): """ Constructor for an _OleDirectoryEntry object. Parses a 128-bytes entry from the OLE Directory stream. entry : string (must be 128 bytes long) sid : index of this directory entry in the OLE file directory olefile: OleFileIO containing this directory entry """ self.sid = sid # ref to olefile is stored for future use self.olefile = olefile # kids is a list of children entries, if this entry is a storage: # (list of _OleDirectoryEntry objects) self.kids = [] # kids_dict is a dictionary of children entries, indexed by their # name in lowercase: used to quickly find an entry, and to detect # duplicates self.kids_dict = {} # flag used to detect if the entry is referenced more than once in # directory: self.used = False # decode DirEntry ( name, namelength, self.entry_type, self.color, self.sid_left, self.sid_right, self.sid_child, clsid, self.dwUserFlags, self.createTime, self.modifyTime, self.isectStart, sizeLow, sizeHigh ) = struct.unpack(_OleDirectoryEntry.STRUCT_DIRENTRY, entry) if self.entry_type not in [STGTY_ROOT, STGTY_STORAGE, STGTY_STREAM, STGTY_EMPTY]: olefile._raise_defect(DEFECT_INCORRECT, 'unhandled OLE storage type') # only first directory entry can (and should) be root: if self.entry_type == STGTY_ROOT and sid != 0: olefile._raise_defect(DEFECT_INCORRECT, 'duplicate OLE root entry') if sid == 0 and self.entry_type != STGTY_ROOT: olefile._raise_defect(DEFECT_INCORRECT, 'incorrect OLE root entry') #debug (struct.unpack(fmt_entry, entry[:len_entry])) # name should be at most 31 unicode characters + null character, # so 64 bytes in total (31*2 + 2): if namelength>64: olefile._raise_defect(DEFECT_INCORRECT, 'incorrect DirEntry name length') # if exception not raised, namelength is set to the maximum value: namelength = 64 # only characters without ending null char are kept: name = name[:(namelength-2)] # name is converted from unicode to Latin-1: self.name = _unicode(name) debug('DirEntry SID=%d: %s' % (self.sid, repr(self.name))) debug(' - type: %d' % self.entry_type) debug(' - sect: %d' % self.isectStart) debug(' - SID left: %d, right: %d, child: %d' % (self.sid_left, self.sid_right, self.sid_child)) # sizeHigh is only used for 4K sectors, it should be zero for 512 bytes # sectors, BUT apparently some implementations set it as 0xFFFFFFFFL, 1 # or some other value so it cannot be raised as a defect in general: if olefile.sectorsize == 512: if sizeHigh != 0 and sizeHigh != 0xFFFFFFFF: debug('sectorsize=%d, sizeLow=%d, sizeHigh=%d (%X)' % (olefile.sectorsize, sizeLow, sizeHigh, sizeHigh)) olefile._raise_defect(DEFECT_UNSURE, 'incorrect OLE stream size') self.size = sizeLow else: self.size = sizeLow + (int(sizeHigh)<<32) debug(' - size: %d (sizeLow=%d, sizeHigh=%d)' % (self.size, sizeLow, sizeHigh)) self.clsid = _clsid(clsid) # a storage should have a null size, BUT some implementations such as # Word 8 for Mac seem to allow non-null values => Potential defect: if self.entry_type == STGTY_STORAGE and self.size != 0: olefile._raise_defect(DEFECT_POTENTIAL, 'OLE storage with size>0') # check if stream is not already referenced elsewhere: if self.entry_type in (STGTY_ROOT, STGTY_STREAM) and self.size>0: if self.size < olefile.minisectorcutoff \ and self.entry_type==STGTY_STREAM: # only streams can be in MiniFAT # ministream object minifat = True else: minifat = False olefile._check_duplicate_stream(self.isectStart, minifat)
Traceback (most recent call last): File "ExtractMsg.py", line 492, in <module> msg = Message(filename) File "ExtractMsg.py", line 193, in __init__ OleFile.OleFileIO.__init__(self, filename) File "/home/pete/downloads/msg-extractor-master/OleFileIO_PL.py", line 1010, in __init__ self.open(filename) File "/home/pete/downloads/msg-extractor-master/OleFileIO_PL.py", line 1213, in open self.loaddirectory(self.sectDirStart)#i32(header, 48)) File "/home/pete/downloads/msg-extractor-master/OleFileIO_PL.py", line 1522, in loaddirectory root_entry = self._load_direntry(0) File "/home/pete/downloads/msg-extractor-master/OleFileIO_PL.py", line 1549, in _load_direntry self.direntries[sid] = _OleDirectoryEntry(entry, sid, self) File "/home/pete/downloads/msg-extractor-master/OleFileIO_PL.py", line 818, in __init__ ) = struct.unpack(_OleDirectoryEntry.STRUCT_DIRENTRY, entry) TypeError: 'str' does not support the buffer interface
Traceback (most recent call last):
File "ExtractMsg.py", line 492, in <module> msg = Message(filename) File "ExtractMsg.py", line 193, in __init__ OleFile.OleFileIO.__init__(self, filename) File "/home/pete/downloads/msg-extractor-master/OleFileIO_PL.py", line 1010, in __init__ self.open(filename) File "/home/pete/downloads/msg-extractor-master/OleFileIO_PL.py", line 1213, in open self.loaddirectory(self.sectDirStart)#i32(header, 48)) File "/home/pete/downloads/msg-extractor-master/OleFileIO_PL.py", line 1522, in loaddirectory root_entry = self._load_direntry(0) File "/home/pete/downloads/msg-extractor-master/OleFileIO_PL.py", line 1549, in _load_direntry self.direntries[sid] = _OleDirectoryEntry(entry, sid, self) File "/home/pete/downloads/msg-extractor-master/OleFileIO_PL.py", line 818, in __init__ ) = struct.unpack(_OleDirectoryEntry.STRUCT_DIRENTRY, entry) struct.error: unpack requires a bytes object of length 128
Итак, вопрос: как надо правильно конвертировать строку в байты? На что мне еще надо обратить внимание в этом коде? Прошу прощения за длинный пост, надо было привести код.