|
//
// Compressor/Decompressor for ZLIB data streams (RFC1950)
const std = @import("std");
const io = std.io;
const fs = std.fs;
const testing = std.testing;
const mem = std.mem;
const deflate = std.compress.deflate;
// Zlib header format as specified in RFC1950
const ZLibHeader = packed struct {
checksum: u5,
preset_dict: u1,
compression_level: u2,
compression_method: u4,
compression_info: u4,
const DEFLATE = 8;
const WINDOW_32K = 7;
};
|
DecompressStream()
|
pub fn DecompressStream(comptime ReaderType: type) type {
return struct {
const Self = @This();
pub const Error = ReaderType.Error ||
deflate.Decompressor(ReaderType).Error ||
error{ WrongChecksum, Unsupported };
pub const Reader = io.Reader(*Self, Error, read);
allocator: mem.Allocator,
inflater: deflate.Decompressor(ReaderType),
in_reader: ReaderType,
hasher: std.hash.Adler32,
fn init(allocator: mem.Allocator, source: ReaderType) !Self {
// Zlib header format is specified in RFC1950
const header_u16 = try source.readInt(u16, .big);
// verify the header checksum
if (header_u16 % 31 != 0)
return error.BadHeader;
const header = @as(ZLibHeader, @bitCast(header_u16));
// The CM field must be 8 to indicate the use of DEFLATE
if (header.compression_method != ZLibHeader.DEFLATE)
return error.InvalidCompression;
// CINFO is the base-2 logarithm of the LZ77 window size, minus 8.
// Values above 7 are unspecified and therefore rejected.
if (header.compression_info > ZLibHeader.WINDOW_32K)
return error.InvalidWindowSize;
const dictionary = null;
// TODO: Support this case
if (header.preset_dict != 0)
return error.Unsupported;
return Self{
.allocator = allocator,
.inflater = try deflate.decompressor(allocator, source, dictionary),
.in_reader = source,
.hasher = std.hash.Adler32.init(),
};
}
|
deinit()
|
pub fn deinit(self: *Self) void {
self.inflater.deinit();
}
// Implements the io.Reader interface
|
read()
|
pub fn read(self: *Self, buffer: []u8) Error!usize {
if (buffer.len == 0)
return 0;
// Read from the compressed stream and update the computed checksum
const r = try self.inflater.read(buffer);
if (r != 0) {
self.hasher.update(buffer[0..r]);
return r;
}
// We've reached the end of stream, check if the checksum matches
const hash = try self.in_reader.readInt(u32, .big);
if (hash != self.hasher.final())
return error.WrongChecksum;
return 0;
}
|
reader()
|
pub fn reader(self: *Self) Reader {
return .{ .context = self };
}
};
}
|
decompressStream()
|
pub fn decompressStream(allocator: mem.Allocator, reader: anytype) !DecompressStream(@TypeOf(reader)) {
return DecompressStream(@TypeOf(reader)).init(allocator, reader);
}
|
CompressionLevel
|
pub const CompressionLevel = enum(u2) {
no_compression = 0,
fastest = 1,
default = 2,
maximum = 3,
};
|
CompressStreamOptions
|
pub const CompressStreamOptions = struct {
level: CompressionLevel = .default,
};
|
CompressStream()
|
pub fn CompressStream(comptime WriterType: type) type {
return struct {
const Self = @This();
const Error = WriterType.Error ||
deflate.Compressor(WriterType).Error;
pub const Writer = io.Writer(*Self, Error, write);
allocator: mem.Allocator,
deflator: deflate.Compressor(WriterType),
in_writer: WriterType,
hasher: std.hash.Adler32,
fn init(allocator: mem.Allocator, dest: WriterType, options: CompressStreamOptions) !Self {
var header = ZLibHeader{
.compression_info = ZLibHeader.WINDOW_32K,
.compression_method = ZLibHeader.DEFLATE,
.compression_level = @intFromEnum(options.level),
.preset_dict = 0,
.checksum = 0,
};
header.checksum = @as(u5, @truncate(31 - @as(u16, @bitCast(header)) % 31));
try dest.writeInt(u16, @as(u16, @bitCast(header)), .big);
const compression_level: deflate.Compression = switch (options.level) {
.no_compression => .no_compression,
.fastest => .best_speed,
.default => .default_compression,
.maximum => .best_compression,
};
return Self{
.allocator = allocator,
.deflator = try deflate.compressor(allocator, dest, .{ .level = compression_level }),
.in_writer = dest,
.hasher = std.hash.Adler32.init(),
};
}
|
write()
|
pub fn write(self: *Self, bytes: []const u8) Error!usize {
if (bytes.len == 0) {
return 0;
}
const w = try self.deflator.write(bytes);
self.hasher.update(bytes[0..w]);
return w;
}
|
writer()
|
pub fn writer(self: *Self) Writer {
return .{ .context = self };
}
|
deinit()
|
pub fn deinit(self: *Self) void {
self.deflator.deinit();
}
|
finish()
|
pub fn finish(self: *Self) !void {
const hash = self.hasher.final();
try self.deflator.close();
try self.in_writer.writeInt(u32, hash, .big);
}
};
}
|
compressStream()
|
pub fn compressStream(allocator: mem.Allocator, writer: anytype, options: CompressStreamOptions) !CompressStream(@TypeOf(writer)) {
return CompressStream(@TypeOf(writer)).init(allocator, writer, options);
}
fn testDecompress(data: []const u8, expected: []const u8) !void {
var in_stream = io.fixedBufferStream(data);
var zlib_stream = try decompressStream(testing.allocator, in_stream.reader());
defer zlib_stream.deinit();
// Read and decompress the whole file
const buf = try zlib_stream.reader().readAllAlloc(testing.allocator, std.math.maxInt(usize));
defer testing.allocator.free(buf);
// Check against the reference
try testing.expectEqualSlices(u8, expected, buf);
}
// All the test cases are obtained by compressing the RFC1951 text
//
// https://tools.ietf.org/rfc/rfc1951.txt length=36944 bytes
// SHA256=5ebf4b5b7fe1c3a0c0ab9aa3ac8c0f3853a7dc484905e76e03b0b0f301350009
|
Test:compressed data
|
test "compressed data" {
const rfc1951_txt = @embedFile("testdata/rfc1951.txt");
// Compressed with compression level = 0
try testDecompress(
@embedFile("testdata/rfc1951.txt.z.0"),
rfc1951_txt,
);
// Compressed with compression level = 9
try testDecompress(
@embedFile("testdata/rfc1951.txt.z.9"),
rfc1951_txt,
);
// Compressed with compression level = 9 and fixed Huffman codes
try testDecompress(
@embedFile("testdata/rfc1951.txt.fixed.z.9"),
rfc1951_txt,
);
}
|
Test:don't read past deflate stream's end
|
test "don't read past deflate stream's end" {
try testDecompress(&[_]u8{
0x08, 0xd7, 0x63, 0xf8, 0xcf, 0xc0, 0xc0, 0x00, 0xc1, 0xff,
0xff, 0x43, 0x30, 0x03, 0x03, 0xc3, 0xff, 0xff, 0xff, 0x01,
0x83, 0x95, 0x0b, 0xf5,
}, &[_]u8{
0x00, 0xff, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0xff,
0x00, 0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00,
0x00, 0x00, 0xff, 0xff, 0xff,
});
}
|
Test:sanity checks
|
test "sanity checks" {
// Truncated header
try testing.expectError(
error.EndOfStream,
testDecompress(&[_]u8{0x78}, ""),
);
// Failed FCHECK check
try testing.expectError(
error.BadHeader,
testDecompress(&[_]u8{ 0x78, 0x9D }, ""),
);
// Wrong CM
try testing.expectError(
error.InvalidCompression,
testDecompress(&[_]u8{ 0x79, 0x94 }, ""),
);
// Wrong CINFO
try testing.expectError(
error.InvalidWindowSize,
testDecompress(&[_]u8{ 0x88, 0x98 }, ""),
);
// Wrong checksum
try testing.expectError(
error.WrongChecksum,
testDecompress(&[_]u8{ 0x78, 0xda, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00 }, ""),
);
// Truncated checksum
try testing.expectError(
error.EndOfStream,
testDecompress(&[_]u8{ 0x78, 0xda, 0x03, 0x00, 0x00 }, ""),
);
}
|
Test:compress data
|
test "compress data" {
const allocator = testing.allocator;
const rfc1951_txt = @embedFile("testdata/rfc1951.txt");
for (std.meta.tags(CompressionLevel)) |level| {
var compressed_data = std.ArrayList(u8).init(allocator);
defer compressed_data.deinit();
var compressor = try compressStream(allocator, compressed_data.writer(), .{ .level = level });
defer compressor.deinit();
try compressor.writer().writeAll(rfc1951_txt);
try compressor.finish();
try testDecompress(compressed_data.items, rfc1951_txt);
}
}
|
|