# TARReader # # Read .tar.zip files. Interface mostly identical to ZIPReader. # # Why .tar.zip instead of .tar.bz2, .tar.gz, .tar.zst, or something normal? # Godot supports loading files with GZip and Zstandard compression, but only # files that it's saved (with a header/footer), so it can't load normal .tar.gz # or .tar.zst files. It can load zips, though. # # DO NOT USE THIS ON UNTRUSTED DATA. extends RefCounted class_name KiriTARReader #region Internal data class TarFileRecord: extends RefCounted var filename : String var offset : int var file_size : int # Unix file permissions. # # Technically this is an int, but we're just going to leave it as an octal # string because that's what we can feed right into chmod. var mode : String # Symlinks. var is_link : bool var link_destination : String var is_directory : bool var type_indicator : String var _internal_file_list = [] var _reader : ZIPReader = null var _tar_file_cache : PackedByteArray = [] func _load_record(record : TarFileRecord) -> PackedByteArray: load_cache() return _tar_file_cache.slice(record.offset, record.offset + record.file_size) #endregion #region Cache wrangling # We have to load the entire .tar file into memory with the way the ZipReader # API works, but we'll at least include an option to nuke the cache to free up # memory if you want to just leave the file open. # # This lets us avoid re-opening and decompressing the entire .tar every time we # need something out of it, while still letting us manually free the memory when # we won't need it for a while. func clear_cache(): _tar_file_cache = [] func load_cache() -> Error: assert(_reader) if len(_tar_file_cache): # Cache already in-memory. return OK var zip_file_list = _reader.get_files() if len(zip_file_list) != 1: return ERR_FILE_UNRECOGNIZED _tar_file_cache = _reader.read_file(zip_file_list[0]) return OK #endregion #region Number wrangling func _octal_str_to_int(s : String) -> int: var ret : int = 0; var digit_multiplier = 1; while len(s): var lsb = s.substr(len(s) - 1, 1) s = s.substr(0, len(s) - 1) ret += digit_multiplier * lsb.to_int() digit_multiplier *= 8 return ret func _pad_to_512(x : int) -> int: var x_lowbits = x & 511 var x_hibits = x & ~511 if x_lowbits: x_hibits += 512 return x_hibits #endregion #region Public API func close() -> Error: _internal_file_list = [] _reader.close() _reader = null clear_cache() return OK func file_exists(path: String, case_sensitive: bool = true) -> bool: for record : TarFileRecord in _internal_file_list: if case_sensitive: if record.filename == path: return true else: if record.filename.nocasecmp_to(path) == 0: return true return false func get_files() -> PackedStringArray: var ret : PackedStringArray = [] for record : TarFileRecord in _internal_file_list: ret.append(record.filename) return ret func open(path: String) -> Error: assert(not _reader) _reader = ZIPReader.new() var err = _reader.open(path) if err != OK: _reader.close() _reader = null return err load_cache() var tar_file_offset = 0 var zero_filled_record_count = 0 var zero_filled_record : PackedByteArray = [] zero_filled_record.resize(512) zero_filled_record.fill(0) var paxheader_next_file = {} var paxheader_global = {} while tar_file_offset < len(_tar_file_cache): var chunk = _tar_file_cache.slice(tar_file_offset, tar_file_offset + 512) if chunk == zero_filled_record: zero_filled_record_count += 1 if zero_filled_record_count >= 2: break tar_file_offset += 512 continue var tar_record : TarFileRecord = TarFileRecord.new() var tar_chunk_name = chunk.slice(0, 100) var tar_chunk_size = chunk.slice(124, 124+12) var tar_chunk_mode = chunk.slice(100, 100+8) var tar_chunk_link_indicator = chunk.slice(156, 156+1) var tar_chunk_link_file = chunk.slice(157, 157+100) # FIXME: Technically "ustar\0" but we'll skip the \0 var tar_ustar_indicator = chunk.slice(257, 257+5) var tar_ustar_file_prefix = chunk.slice(345, 345+155) # Pluck out the relevant bits we need for the record. tar_record.filename = tar_chunk_name.get_string_from_utf8() tar_record.file_size = _octal_str_to_int(tar_chunk_size.get_string_from_utf8()) tar_record.mode = tar_chunk_mode.get_string_from_utf8() tar_record.is_link = (tar_chunk_link_indicator[0] != 0 and tar_chunk_link_indicator.get_string_from_utf8()[0] == "2") tar_record.link_destination = tar_chunk_link_file.get_string_from_utf8() tar_record.is_directory = (tar_chunk_link_indicator[0] != 0 and tar_chunk_link_indicator.get_string_from_utf8()[0] == "5") if tar_chunk_link_indicator[0] != 0: tar_record.type_indicator = tar_chunk_link_indicator.get_string_from_utf8() else: tar_record.type_indicator = "" # Append prefix if this is the "ustar" format. # TODO: Test this. if tar_ustar_indicator.get_string_from_utf8() == "ustar": tar_record.filename = \ tar_ustar_file_prefix.get_string_from_utf8() + \ tar_record.filename # TODO: Things we skipped: # - owner id (108, 108+8) # - group id (116, 116+8) # - modification time (136, 136+12) # - checksum (148, 148+8) # - mosty related to USTAR format # Skip header. tar_file_offset += 512 # Record start offset. tar_record.offset = tar_file_offset # Skip file contents. tar_file_offset += _pad_to_512(tar_record.file_size) if tar_record.filename.get_file() == "@PaxHeader": # This is a special file entry that just has some extended data # about the next file or all the following files. It's not an actual # file. var paxheader_data : PackedByteArray = _tar_file_cache.slice( tar_record.offset, tar_record.offset + tar_record.file_size) var paxheader_str : String = paxheader_data.get_string_from_utf8() # FIXME: Do some error checking here. var paxheader_lines = paxheader_str.split("\n", false) for line in paxheader_lines: var length_and_the_rest = line.split(" ") var key_and_value = length_and_the_rest[1].split("=") var key = key_and_value[0] var value = key_and_value[1] if tar_record.type_indicator == "x": paxheader_next_file[key] = value elif tar_record.type_indicator == "g": paxheader_global[key] = value else: # Apply paxheader. We're just using "path" for now. # See here for other available fields: # https://pubs.opengroup.org/onlinepubs/009695399/utilities/pax.html var merged_paxheader : Dictionary = paxheader_global.duplicate() merged_paxheader.merge(paxheader_next_file, true) paxheader_next_file = {} if merged_paxheader.has("path"): tar_record.filename = merged_paxheader["path"] print("fixing path for paxheader: ", tar_record.filename) # Add it to our record list. _internal_file_list.append(tar_record) return OK # Extract a file into memory as a PackedByteArray. func read_file(path : String, case_sensitive : bool = true) -> PackedByteArray: for record : TarFileRecord in _internal_file_list: if case_sensitive: if record.filename == path: return _load_record(record) else: if record.filename.nocasecmp_to(path) == 0: return _load_record(record) return [] # Extract a file to a specific path. Sets permissions when possible, handles # symlinks and directories. Will extract to the dest_path plus the internal # relative path. # # Example: # dest_path: "foo/bar", filename: "butts/whatever/thingy.txt" # extracts to: "foo/bar/butts/whatever/thingy.txt" func unpack_file(dest_path : String, filename : String, overwrite : bool = false): var full_dest_path : String = dest_path.path_join(filename) DirAccess.make_dir_recursive_absolute(full_dest_path.get_base_dir()) for record : TarFileRecord in _internal_file_list: if record.filename.is_absolute_path(): # hmmmmmmmmmmmmmm assert(false) continue if record.filename.simplify_path().begins_with(".."): assert(false) continue # FIXME: There are probably a million other ways to do directory # traversal attacks. if record.filename == filename: # FIXME: Somehow this is slower than just overwriting the file. # Awesome. /s if overwrite == false and FileAccess.file_exists(full_dest_path): continue if record.is_link: # Okay, look. I know that symbolic links technically exist on # Windows, but they're messy and hardly ever used. FIXME later # if for some reason you need to support that. -Kiri assert(OS.get_name() != "Windows") # Fire off a command to make a symbolic link on *normal* OSes. var err = OS.execute("ln", [ "-s", record.link_destination, ProjectSettings.globalize_path(full_dest_path) ]) assert(err != -1) elif record.is_directory: # It's just a directory. Make it. DirAccess.make_dir_recursive_absolute(full_dest_path) else: # Okay this is an actual file. Extract it. var file_data : PackedByteArray = read_file(record.filename) var out_file = FileAccess.open(full_dest_path, FileAccess.WRITE) out_file.store_buffer(file_data) out_file.close() # Set permissions (on normal OSes, not Windows). I don't think this # applies to symlinks, though. if not record.is_link: if OS.get_name() != "Windows": var err = OS.execute("chmod", [ record.mode, ProjectSettings.globalize_path(full_dest_path) ]) assert(err != -1) #endregion