use crate::checksum;
use crate::deflate;
use crate::finish::{Complete, Finish};
use crate::lz77;
use std::{ffi::CString, io, time};
const GZIP_ID: [u8; 2] = [31, 139];
const COMPRESSION_METHOD_DEFLATE: u8 = 8;
const OS_FAT: u8 = 0;
const OS_AMIGA: u8 = 1;
const OS_VMS: u8 = 2;
const OS_UNIX: u8 = 3;
const OS_VM_CMS: u8 = 4;
const OS_ATARI_TOS: u8 = 5;
const OS_HPFS: u8 = 6;
const OS_MACINTOSH: u8 = 7;
const OS_Z_SYSTEM: u8 = 8;
const OS_CPM: u8 = 9;
const OS_TOPS20: u8 = 10;
const OS_NTFS: u8 = 11;
const OS_QDOS: u8 = 12;
const OS_ACORN_RISCOS: u8 = 13;
const OS_UNKNOWN: u8 = 255;
const F_TEXT: u8 = 0b00_0001;
const F_HCRC: u8 = 0b00_0010;
const F_EXTRA: u8 = 0b00_0100;
const F_NAME: u8 = 0b00_1000;
const F_COMMENT: u8 = 0b01_0000;
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum CompressionLevel {
Fastest,
Slowest,
Unknown,
}
impl CompressionLevel {
fn to_u8(&self) -> u8 {
match *self {
CompressionLevel::Fastest => 4,
CompressionLevel::Slowest => 2,
CompressionLevel::Unknown => 0,
}
}
fn from_u8(x: u8) -> Self {
match x {
4 => CompressionLevel::Fastest,
2 => CompressionLevel::Slowest,
_ => CompressionLevel::Unknown,
}
}
}
impl From<lz77::CompressionLevel> for CompressionLevel {
fn from(f: lz77::CompressionLevel) -> Self {
match f {
lz77::CompressionLevel::Fast => CompressionLevel::Fastest,
lz77::CompressionLevel::Best => CompressionLevel::Slowest,
_ => CompressionLevel::Unknown,
}
}
}
#[derive(Debug, Clone)]
pub(crate) struct Trailer {
crc32: u32,
input_size: u32,
}
impl Trailer {
pub fn crc32(&self) -> u32 {
self.crc32
}
pub fn read_from<R>(mut reader: R) -> io::Result<Self>
where
R: io::Read,
{
let mut buf = [0; 4];
reader.read_exact(&mut buf)?;
let crc32 = u32::from_le_bytes(buf);
reader.read_exact(&mut buf)?;
let input_size = u32::from_le_bytes(buf);
Ok(Trailer { crc32, input_size })
}
fn write_to<W>(&self, mut writer: W) -> io::Result<()>
where
W: io::Write,
{
writer.write_all(&self.crc32.to_le_bytes())?;
writer.write_all(&self.input_size.to_le_bytes())?;
Ok(())
}
}
#[derive(Debug, Clone)]
pub struct HeaderBuilder {
header: Header,
}
impl HeaderBuilder {
pub fn new() -> Self {
#[cfg(not(target_arch = "wasm32"))]
let modification_time = time::UNIX_EPOCH
.elapsed()
.map(|d| d.as_secs() as u32)
.unwrap_or(0);
#[cfg(target_arch = "wasm32")]
let modification_time = 0;
let header = Header {
modification_time,
compression_level: CompressionLevel::Unknown,
os: Os::Unix,
is_text: false,
is_verified: false,
extra_field: None,
filename: None,
comment: None,
};
HeaderBuilder { header }
}
pub fn modification_time(&mut self, modification_time: u32) -> &mut Self {
self.header.modification_time = modification_time;
self
}
pub fn os(&mut self, os: Os) -> &mut Self {
self.header.os = os;
self
}
pub fn text(&mut self) -> &mut Self {
self.header.is_text = true;
self
}
pub fn verify(&mut self) -> &mut Self {
self.header.is_verified = true;
self
}
pub fn extra_field(&mut self, extra: ExtraField) -> &mut Self {
self.header.extra_field = Some(extra);
self
}
pub fn filename(&mut self, filename: CString) -> &mut Self {
self.header.filename = Some(filename);
self
}
pub fn comment(&mut self, comment: CString) -> &mut Self {
self.header.comment = Some(comment);
self
}
pub fn finish(&self) -> Header {
self.header.clone()
}
}
impl Default for HeaderBuilder {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone)]
pub struct Header {
modification_time: u32,
compression_level: CompressionLevel,
os: Os,
is_text: bool,
is_verified: bool,
extra_field: Option<ExtraField>,
filename: Option<CString>,
comment: Option<CString>,
}
impl Header {
pub fn modification_time(&self) -> u32 {
self.modification_time
}
pub fn compression_level(&self) -> CompressionLevel {
self.compression_level.clone()
}
pub fn os(&self) -> Os {
self.os.clone()
}
pub fn is_text(&self) -> bool {
self.is_text
}
pub fn is_verified(&self) -> bool {
self.is_verified
}
pub fn extra_field(&self) -> Option<&ExtraField> {
self.extra_field.as_ref()
}
pub fn filename(&self) -> Option<&CString> {
self.filename.as_ref()
}
pub fn comment(&self) -> Option<&CString> {
self.comment.as_ref()
}
fn flags(&self) -> u8 {
[
(F_TEXT, self.is_text),
(F_HCRC, self.is_verified),
(F_EXTRA, self.extra_field.is_some()),
(F_NAME, self.filename.is_some()),
(F_COMMENT, self.comment.is_some()),
]
.iter()
.filter(|e| e.1)
.map(|e| e.0)
.sum()
}
fn crc16(&self) -> u16 {
let mut crc = checksum::Crc32::new();
let mut buf = Vec::new();
Header {
is_verified: false,
..self.clone()
}
.write_to(&mut buf)
.unwrap();
crc.update(&buf);
crc.value() as u16
}
fn write_to<W>(&self, mut writer: W) -> io::Result<()>
where
W: io::Write,
{
writer.write_all(&GZIP_ID)?;
writer.write_all(&[COMPRESSION_METHOD_DEFLATE, self.flags()])?;
writer.write_all(&self.modification_time.to_le_bytes())?;
writer.write_all(&[self.compression_level.to_u8(), self.os.to_u8()])?;
if let Some(ref x) = self.extra_field {
x.write_to(&mut writer)?;
}
if let Some(ref x) = self.filename {
writer.write_all(x.as_bytes_with_nul())?;
}
if let Some(ref x) = self.comment {
writer.write_all(x.as_bytes_with_nul())?;
}
if self.is_verified {
writer.write_all(&self.crc16().to_le_bytes())?;
}
Ok(())
}
pub(crate) fn read_from<R>(mut reader: R) -> io::Result<Self>
where
R: io::Read,
{
let mut this = HeaderBuilder::new().finish();
let mut buf = [0; 2 + 1 + 1 + 4 + 1 + 1];
reader.read_exact(&mut buf)?;
let id = &buf[0..2];
if id != GZIP_ID {
return Err(invalid_data_error!(
"Unexpected GZIP ID: value={:?}, \
expected={:?}",
id,
GZIP_ID
));
}
let compression_method = buf[2];
if compression_method != COMPRESSION_METHOD_DEFLATE {
return Err(invalid_data_error!(
"Compression methods other than DEFLATE(8) are \
unsupported: method={}",
compression_method
));
}
let flags = buf[3];
this.modification_time = u32::from_le_bytes([buf[4], buf[5], buf[6], buf[7]]);
this.compression_level = CompressionLevel::from_u8(buf[8]);
this.os = Os::from_u8(buf[9]);
if flags & F_EXTRA != 0 {
this.extra_field = Some(ExtraField::read_from(&mut reader)?);
}
if flags & F_NAME != 0 {
this.filename = Some(read_cstring(&mut reader)?);
}
if flags & F_COMMENT != 0 {
this.comment = Some(read_cstring(&mut reader)?);
}
if flags & F_HCRC != 0 && cfg!(not(fuzzing)) {
let mut buf = [0; 2];
reader.read_exact(&mut buf)?;
let crc = u16::from_le_bytes(buf);
let expected = this.crc16();
if crc != expected {
return Err(invalid_data_error!(
"CRC16 of GZIP header mismatched: value={}, \
expected={}",
crc,
expected
));
}
this.is_verified = true;
}
Ok(this)
}
}
fn read_cstring<R>(mut reader: R) -> io::Result<CString>
where
R: io::Read,
{
let mut buf = Vec::new();
loop {
let mut cbuf = [0; 1];
reader.read_exact(&mut cbuf)?;
if cbuf[0] == 0 {
return Ok(unsafe { CString::from_vec_unchecked(buf) });
}
buf.push(cbuf[0]);
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct ExtraField {
pub subfields: Vec<ExtraSubField>,
}
impl ExtraField {
fn read_from<R>(mut reader: R) -> io::Result<Self>
where
R: io::Read,
{
let mut subfields = Vec::new();
let mut buf = [0; 2];
reader.read_exact(&mut buf)?;
let data_size = u16::from_le_bytes(buf) as usize;
let mut reader = reader.take(data_size as u64);
while reader.limit() > 0 {
subfields.push(ExtraSubField::read_from(&mut reader)?);
}
Ok(ExtraField { subfields })
}
fn write_to<W>(&self, mut writer: W) -> io::Result<()>
where
W: io::Write,
{
let len = self.subfields.iter().map(|f| f.write_len()).sum::<usize>();
if len > 0xFFFF {
return Err(invalid_data_error!("extra field too long: {}", len));
}
writer.write_all(&(len as u16).to_le_bytes())?;
for f in &self.subfields {
f.write_to(&mut writer)?;
}
Ok(())
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct ExtraSubField {
pub id: [u8; 2],
pub data: Vec<u8>,
}
impl ExtraSubField {
fn read_from<R>(mut reader: R) -> io::Result<Self>
where
R: io::Read,
{
let mut field = ExtraSubField {
id: [0; 2],
data: Vec::new(),
};
reader.read_exact(&mut field.id)?;
let mut buf = [0; 2];
reader.read_exact(&mut buf)?;
let data_size = u16::from_le_bytes(buf) as usize;
field.data.resize(data_size, 0);
reader.read_exact(&mut field.data)?;
Ok(field)
}
fn write_to<W>(&self, mut writer: W) -> io::Result<()>
where
W: io::Write,
{
writer.write_all(&self.id)?;
writer.write_all(&(self.data.len() as u16).to_le_bytes())?;
writer.write_all(&self.data)?;
Ok(())
}
fn write_len(&self) -> usize {
4 + self.data.len()
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum Os {
Fat,
Amiga,
Vms,
Unix,
VmCms,
AtariTos,
Hpfs,
Macintosh,
ZSystem,
CpM,
Tops20,
Ntfs,
Qdos,
AcornRiscos,
Unknown,
Undefined(u8),
}
impl Os {
fn to_u8(&self) -> u8 {
match *self {
Os::Fat => OS_FAT,
Os::Amiga => OS_AMIGA,
Os::Vms => OS_VMS,
Os::Unix => OS_UNIX,
Os::VmCms => OS_VM_CMS,
Os::AtariTos => OS_ATARI_TOS,
Os::Hpfs => OS_HPFS,
Os::Macintosh => OS_MACINTOSH,
Os::ZSystem => OS_Z_SYSTEM,
Os::CpM => OS_CPM,
Os::Tops20 => OS_TOPS20,
Os::Ntfs => OS_NTFS,
Os::Qdos => OS_QDOS,
Os::AcornRiscos => OS_ACORN_RISCOS,
Os::Unknown => OS_UNKNOWN,
Os::Undefined(os) => os,
}
}
fn from_u8(x: u8) -> Self {
match x {
OS_FAT => Os::Fat,
OS_AMIGA => Os::Amiga,
OS_VMS => Os::Vms,
OS_UNIX => Os::Unix,
OS_VM_CMS => Os::VmCms,
OS_ATARI_TOS => Os::AtariTos,
OS_HPFS => Os::Hpfs,
OS_MACINTOSH => Os::Macintosh,
OS_Z_SYSTEM => Os::ZSystem,
OS_CPM => Os::CpM,
OS_TOPS20 => Os::Tops20,
OS_NTFS => Os::Ntfs,
OS_QDOS => Os::Qdos,
OS_ACORN_RISCOS => Os::AcornRiscos,
OS_UNKNOWN => Os::Unknown,
os => Os::Undefined(os),
}
}
}
#[derive(Debug)]
pub struct EncodeOptions<E>
where
E: lz77::Lz77Encode,
{
header: Header,
options: deflate::EncodeOptions<E>,
}
impl Default for EncodeOptions<lz77::DefaultLz77Encoder> {
fn default() -> Self {
EncodeOptions {
header: HeaderBuilder::new().finish(),
options: Default::default(),
}
}
}
impl EncodeOptions<lz77::DefaultLz77Encoder> {
pub fn new() -> Self {
Self::default()
}
}
impl<E> EncodeOptions<E>
where
E: lz77::Lz77Encode,
{
pub fn with_lz77(lz77: E) -> Self {
let mut header = HeaderBuilder::new().finish();
header.compression_level = From::from(lz77.compression_level());
EncodeOptions {
header,
options: deflate::EncodeOptions::with_lz77(lz77),
}
}
pub fn no_compression(mut self) -> Self {
self.options = self.options.no_compression();
self.header.compression_level = CompressionLevel::Unknown;
self
}
pub fn header(mut self, header: Header) -> Self {
self.header = header;
self
}
pub fn block_size(mut self, size: usize) -> Self {
self.options = self.options.block_size(size);
self
}
pub fn fixed_huffman_codes(mut self) -> Self {
self.options = self.options.fixed_huffman_codes();
self
}
}
pub struct Encoder<W, E = lz77::DefaultLz77Encoder> {
header: Header,
crc32: checksum::Crc32,
input_size: u32,
writer: deflate::Encoder<W, E>,
}
impl<W> Encoder<W, lz77::DefaultLz77Encoder>
where
W: io::Write,
{
pub fn new(inner: W) -> io::Result<Self> {
Self::with_options(inner, EncodeOptions::new())
}
}
impl<W, E> Encoder<W, E>
where
W: io::Write,
E: lz77::Lz77Encode,
{
pub fn with_options(mut inner: W, options: EncodeOptions<E>) -> io::Result<Self> {
options.header.write_to(&mut inner)?;
Ok(Encoder {
header: options.header.clone(),
crc32: checksum::Crc32::new(),
input_size: 0,
writer: deflate::Encoder::with_options(inner, options.options),
})
}
pub fn header(&self) -> &Header {
&self.header
}
pub fn finish(self) -> Finish<W, io::Error> {
let trailer = Trailer {
crc32: self.crc32.value(),
input_size: self.input_size,
};
let mut inner = finish_try!(self.writer.finish());
match trailer.write_to(&mut inner).and_then(|_| inner.flush()) {
Ok(_) => Finish::new(inner, None),
Err(e) => Finish::new(inner, Some(e)),
}
}
pub fn as_inner_ref(&self) -> &W {
self.writer.as_inner_ref()
}
pub fn as_inner_mut(&mut self) -> &mut W {
self.writer.as_inner_mut()
}
pub fn into_inner(self) -> W {
self.writer.into_inner()
}
}
impl<W, E> io::Write for Encoder<W, E>
where
W: io::Write,
E: lz77::Lz77Encode,
{
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
let written_size = self.writer.write(buf)?;
self.crc32.update(&buf[..written_size]);
self.input_size = self.input_size.wrapping_add(written_size as u32);
Ok(written_size)
}
fn flush(&mut self) -> io::Result<()> {
self.writer.flush()
}
}
impl<W, E> Complete for Encoder<W, E>
where
W: io::Write,
E: lz77::Lz77Encode,
{
fn complete(self) -> io::Result<()> {
self.finish().into_result().map(|_| ())
}
}
#[derive(Debug)]
pub struct Decoder<R> {
header: Header,
reader: deflate::Decoder<R>,
crc32: checksum::Crc32,
eos: bool,
}
impl<R> Decoder<R>
where
R: io::Read,
{
pub fn new(mut inner: R) -> io::Result<Self> {
let header = Header::read_from(&mut inner)?;
Ok(Self::with_header(inner, header))
}
pub fn header(&self) -> &Header {
&self.header
}
pub fn as_inner_ref(&self) -> &R {
self.reader.as_inner_ref()
}
pub fn as_inner_mut(&mut self) -> &mut R {
self.reader.as_inner_mut()
}
pub fn into_inner(self) -> R {
self.reader.into_inner()
}
fn with_header(inner: R, header: Header) -> Self {
Decoder {
header,
reader: deflate::Decoder::new(inner),
crc32: checksum::Crc32::new(),
eos: false,
}
}
fn reset(&mut self, header: Header) {
self.header = header;
self.reader.reset();
self.crc32 = checksum::Crc32::new();
self.eos = false;
}
}
impl<R> io::Read for Decoder<R>
where
R: io::Read,
{
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
if self.eos {
Ok(0)
} else {
let read_size = self.reader.read(buf)?;
self.crc32.update(&buf[..read_size]);
if read_size == 0 {
self.eos = true;
let trailer = Trailer::read_from(self.reader.as_inner_mut())?;
if cfg!(not(fuzzing)) && trailer.crc32 != self.crc32.value() {
Err(invalid_data_error!(
"CRC32 mismatched: value={}, expected={}",
self.crc32.value(),
trailer.crc32
))
} else {
Ok(0)
}
} else {
Ok(read_size)
}
}
}
}
#[derive(Debug)]
pub struct MultiDecoder<R> {
decoder: Decoder<R>,
eos: bool,
}
impl<R> MultiDecoder<R>
where
R: io::Read,
{
pub fn new(inner: R) -> io::Result<Self> {
let decoder = Decoder::new(inner)?;
Ok(MultiDecoder {
decoder,
eos: false,
})
}
pub fn header(&self) -> &Header {
self.decoder.header()
}
pub fn as_inner_ref(&self) -> &R {
self.decoder.as_inner_ref()
}
pub fn as_inner_mut(&mut self) -> &mut R {
self.decoder.as_inner_mut()
}
pub fn into_inner(self) -> R {
self.decoder.into_inner()
}
}
impl<R> io::Read for MultiDecoder<R>
where
R: io::Read,
{
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
if self.eos {
return Ok(0);
}
let read_size = self.decoder.read(buf)?;
if read_size == 0 {
match Header::read_from(self.as_inner_mut()) {
Err(e) => {
if e.kind() == io::ErrorKind::UnexpectedEof {
self.eos = true;
Ok(0)
} else {
Err(e)
}
}
Ok(header) => {
self.decoder.reset(header);
self.read(buf)
}
}
} else {
Ok(read_size)
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::finish::AutoFinish;
use std::io::{self, Write};
fn decode(buf: &[u8]) -> io::Result<Vec<u8>> {
let mut decoder = Decoder::new(buf).unwrap();
let mut buf = Vec::with_capacity(buf.len());
io::copy(&mut decoder, &mut buf)?;
Ok(buf)
}
fn decode_multi(buf: &[u8]) -> io::Result<Vec<u8>> {
let mut decoder = MultiDecoder::new(buf).unwrap();
let mut buf = Vec::with_capacity(buf.len());
io::copy(&mut decoder, &mut buf)?;
Ok(buf)
}
fn encode(text: &[u8]) -> io::Result<Vec<u8>> {
let mut encoder = Encoder::new(Vec::new()).unwrap();
io::copy(&mut &text[..], &mut encoder).unwrap();
encoder.finish().into_result()
}
#[test]
fn encode_works() {
let plain = b"Hello World! Hello GZIP!!";
let mut encoder = Encoder::new(Vec::new()).unwrap();
io::copy(&mut &plain[..], &mut encoder).unwrap();
let encoded = encoder.finish().into_result().unwrap();
assert_eq!(decode(&encoded).unwrap(), plain);
}
#[test]
fn encoder_auto_finish_works() {
let plain = b"Hello World! Hello GZIP!!";
let mut buf = Vec::new();
{
let mut encoder = AutoFinish::new(Encoder::new(&mut buf).unwrap());
io::copy(&mut &plain[..], &mut encoder).unwrap();
}
assert_eq!(decode(&buf).unwrap(), plain);
}
#[test]
fn multi_decode_works() {
use std::iter;
let text = b"Hello World!";
let encoded: Vec<u8> = iter::repeat(encode(text).unwrap())
.take(2)
.flat_map(|b| b)
.collect();
assert_eq!(decode(&encoded).unwrap(), b"Hello World!");
assert_eq!(decode_multi(&encoded).unwrap(), b"Hello World!Hello World!");
}
#[test]
fn issue_15_1() {
let data = b"\x1F\x8B\x08\xC1\x7B\x7B\x7B\x7B\x7B\x7B\x7B\x7B\x7B\x7B\x7B\x7B\x7B\x7B\x7B\x7B\x7B\x7B\x80\x80\x80\x80\x7B\x7B\x7B\x7B\x7B\x7B\x97\x7B\x7B\x7B\x86\x27\xEB\x60\xA7\xA8\x46\x6E\x1F\x33\x51\x5C\x34\xE0\xD2\x2E\xE8\x0C\x19\x1D\x3D\x3C\xFD\x3B\x6A\xFA\x63\xDF\x28\x87\x86\xF2\xA6\xAC\x87\x86\xF2\xA6\xAC\xD5";
assert!(decode(&data[..]).is_err());
}
#[test]
fn issue_15_2() {
let data = b"\x1F\x8B\x08\xC1\x7B\x7B\x7B\x7B\x7B\xFC\x5D\x2D\xDC\x08\xC1\x7B\x7B\x7B\x7B\x7B\xFC\x5D\x2D\xDC\x08\xC1\x7B\x7F\x7B\x7B\x7B\xFC\x5D\x2D\xDC\x69\x32\x48\x22\x5A\x81\x81\x42\x42\x81\x7E\x81\x81\x81\x81\xF2\x17";
assert!(decode(&data[..]).is_err());
}
#[test]
fn issue_15_3() {
let data = b"\x1F\x8B\x08\xC1\x91\x28\x71\xDC\xF2\x2D\x34\x35\x31\x35\x34\x30\x70\x6E\x60\x35\x31\x32\x32\x33\x32\x33\x37\x32\x36\x38\xDD\x1C\xE5\x2A\xDD\xDD\xDD\x22\xDD\xDD\xDD\xDC\x88\x13\xC9\x40\x60\xA7";
assert!(decode(&data[..]).is_err());
}
#[test]
fn extra_field() {
let f = ExtraField {
subfields: vec![ExtraSubField {
id: [0, 0x42],
data: "abc".into(),
}],
};
let mut buf = Vec::new();
f.write_to(&mut buf).unwrap();
assert_eq!(ExtraField::read_from(&buf[..]).unwrap(), f);
}
#[test]
fn encode_with_extra_field() {
let mut buf = Vec::new();
let extra_field = ExtraField {
subfields: vec![ExtraSubField {
id: [0, 0x42],
data: "abc".into(),
}],
};
{
let header = HeaderBuilder::new()
.extra_field(extra_field.clone())
.finish();
let ops = EncodeOptions::new().header(header);
let mut encoder = Encoder::with_options(&mut buf, ops).unwrap();
write!(encoder, "hello world").unwrap();
encoder.finish().as_result().unwrap();
}
{
let mut decoder = Decoder::new(&buf[..]).unwrap();
io::copy(&mut decoder, &mut io::sink()).unwrap();
assert_eq!(decoder.header().extra_field(), Some(&extra_field));
}
}
}