diff --git a/bwt.rs b/bwt.rs index b35545b..91cd521 100644 --- a/bwt.rs +++ b/bwt.rs @@ -48,6 +48,7 @@ This is an original (mostly trivial) implementation. */ use std::{io, iter, num, vec}; +use shared::FiniteWriter; pub static total_symbols: uint = 0x100; @@ -167,7 +168,6 @@ pub fn encode_brute(input: &[u8], suf: &mut [Suffix], fn_out: |u8|) -> Suffix { } } - assert!( origin.is_some() ); origin.unwrap() } @@ -335,7 +335,7 @@ impl Reader for Decoder { self.header = true; } let mut amt = dst.len(); - let len = amt; + let dst_len = amt; while amt > 0 { if self.output.len() == self.start { @@ -344,19 +344,19 @@ impl Reader for Decoder { break } } - let n = num::min( amt, self.output.len() - self.start ); + let n = num::min(amt, self.output.len() - self.start); vec::bytes::copy_memory( - dst.mut_slice_from(len - amt), - self.output.slice_from(self.start) + dst.mut_slice_from(dst_len - amt), + self.output.slice(self.start, self.start + n) ); self.start += n; amt -= n; } - if len == amt { + if dst_len == amt { Err(io::standard_error(io::EndOfFile)) } else { - Ok(len - amt) + Ok(dst_len - amt) } } } @@ -403,12 +403,13 @@ impl Encoder { Ok(()) } - /// This function is used to flag that this session of compression is done - /// with. The stream is finished up (final bytes are written), and then the - /// wrapped writer is returned. - pub fn finish(mut self) -> (W, io::IoResult<()>) { - let result = self.flush(); - (self.w, result) + /// End the current block + fn finish_block(&mut self) -> io::IoResult<()> { + if self.buf.len() > 0 { + self.encode_block() + } else { + Ok(()) + } } } @@ -432,12 +433,23 @@ impl Writer for Encoder { } fn flush(&mut self) -> io::IoResult<()> { - let ret = if self.buf.len() > 0 { - self.encode_block() - } else { - Ok(()) - }; - ret.and(self.w.flush()) + self.finish_block().and(self.w.flush()) + } +} + +impl FiniteWriter for Encoder { + fn write_terminator(&mut self) -> io::IoResult<()> { + self.finish_block().and(self.w.write_terminator()) + } +} + +impl Encoder { + /// This function is used to flag that this session of compression is done + /// with. The stream is finished up (final bytes are written), and then the + /// wrapped writer is returned. + pub fn finish(mut self) -> (W, io::IoResult<()>) { + let result = (&mut self as &mut FiniteWriter).write_terminator(); + (self.w, result) } } diff --git a/entropy/ari.rs b/entropy/ari.rs index 74db340..6b44a26 100644 --- a/entropy/ari.rs +++ b/entropy/ari.rs @@ -33,6 +33,7 @@ This is an original implementation. */ use std::{io, vec}; +use shared::FiniteWriter; pub type Symbol = u8; static symbol_bits: uint = 8; @@ -184,16 +185,6 @@ impl Encoder { self.stream.write(bytes) } - /// Finish decoding by writing the code tail word - pub fn finish(mut self) -> (W, io::IoResult<()>) { - assert!(border_bits == 32); - self.bytes_written += 4; - let code = self.range.get_code_tail(); - let result = self.stream.write_be_u32(code); - let result = result.and(self.stream.flush()); - (self.stream, result) - } - /// Flush the output stream pub fn flush(&mut self) -> io::IoResult<()> { self.stream.flush() @@ -205,6 +196,24 @@ impl Encoder { } } +impl Encoder { + /// Finish decoding by writing the code tail word + pub fn finish(mut self) -> (W, io::IoResult<()>) { + let ret = self.write_terminator(); + (self.stream, ret) + } + + /// Write code tail bits + pub fn write_terminator(&mut self) -> io::IoResult<()> { + assert!(border_bits == 32); + self.bytes_written += 4; + let code = self.range.get_code_tail(); + let result = self.stream.write_be_u32(code); + result.and(self.stream.write_terminator()) + } +} + + /// An arithmetic decoder helper pub struct Decoder { priv stream: R, @@ -435,6 +444,7 @@ impl Model for FrequencyTable { /// A basic byte-encoding arithmetic +/// uses a special terminator code to end the stream pub struct ByteEncoder { /// A lower level encoder encoder: Encoder, @@ -448,7 +458,7 @@ impl ByteEncoder { let freq_max = range_default_threshold >> 2; ByteEncoder { encoder: Encoder::new(w), - freq: FrequencyTable::new_flat(symbol_total, freq_max), + freq: FrequencyTable::new_flat(symbol_total+1, freq_max), } } } @@ -468,13 +478,32 @@ impl Writer for ByteEncoder { } } +impl FiniteWriter for ByteEncoder { + fn write_terminator(&mut self) -> io::IoResult<()> { + self.encoder.encode(symbol_total, &self.freq). + and(self.encoder.write_terminator()) + } +} + +impl ByteEncoder { + /// Finish encoding and return the underlying stream + pub fn finish(mut self) -> (W, io::IoResult<()>) { + let ret = self.write_terminator(); + let (w, ret_encoder) = self.encoder.finish(); + (w, ret.and(ret_encoder)) + } +} + /// A basic byte-decoding arithmetic +/// expects a special terminator code for the end of the stream pub struct ByteDecoder { /// A lower level decoder decoder: Decoder, /// A basic frequency table freq: FrequencyTable, + /// Remember if we found the terminator code + priv is_eof: bool, } impl ByteDecoder { @@ -484,7 +513,8 @@ impl ByteDecoder { let freq_max = range_default_threshold >> 2; ByteDecoder { decoder: Decoder::new(r), - freq: FrequencyTable::new_flat(symbol_total, freq_max), + freq: FrequencyTable::new_flat(symbol_total+1, freq_max), + is_eof: false, } } } @@ -494,20 +524,21 @@ impl Reader for ByteDecoder { if self.decoder.tell() == 0 { if_ok!(self.decoder.start()); } - let mut ret = Ok(dst.len()); + if self.is_eof { + return Err(io::standard_error(io::EndOfFile)) + } + let mut amount = 0u; for out_byte in dst.mut_iter() { - match self.decoder.decode(&self.freq) { - Ok(value) => { - self.freq.update(value, 10, 1); - *out_byte = value as u8; - }, - Err(e) => { - ret = Err(e); - break - } + let value = if_ok!(self.decoder.decode(&self.freq)); + if value == symbol_total { + self.is_eof = true; + break } + self.freq.update(value, 10, 1); + *out_byte = value as u8; + amount += 1; } - ret + Ok(amount) } } @@ -523,12 +554,12 @@ mod test { info!("Roundtrip Ari of size {}", bytes.len()); let mut e = ByteEncoder::new(MemWriter::new()); e.write(bytes).unwrap(); - let (e, r) = e.encoder.finish(); + let (e, r) = e.finish(); r.unwrap(); let encoded = e.unwrap(); debug!("Roundtrip input {:?} encoded {:?}", bytes, encoded); let mut d = ByteDecoder::new(BufReader::new(encoded)); - let decoded = d.read_bytes(bytes.len()).unwrap(); + let decoded = d.read_to_end().unwrap(); assert_eq!(bytes.as_slice(), decoded.as_slice()); } diff --git a/lib.rs b/lib.rs index 9c41591..248a27f 100644 --- a/lib.rs +++ b/lib.rs @@ -8,7 +8,10 @@ extern mod extra; +pub use self::shared::FiniteWriter; + mod adler32; +mod shared; pub mod bwt; pub mod dc; diff --git a/lz4.rs b/lz4.rs index a20d67c..adcb68a 100644 --- a/lz4.rs +++ b/lz4.rs @@ -26,6 +26,7 @@ can be found at https://github.com/bkaradzic/go-lz4. use std::io; use std::num; use std::vec; +use shared::FiniteWriter; static MAGIC: u32 = 0x184d2204; @@ -243,6 +244,7 @@ impl Decoder { // raw block to read n if n & 0x80000000 != 0 => { let amt = (n & 0x7fffffff) as uint; + debug!("decoding a raw block of size {}", amt) self.output.truncate(0); self.output.reserve(amt); if_ok!(self.r.push_bytes(&mut self.output, amt)); @@ -252,12 +254,14 @@ impl Decoder { // actual block to decompress n => { + debug!("decoding a compressed block of size {}", n); let n = n as uint; self.temp.truncate(0); self.temp.reserve(n); if_ok!(self.r.push_bytes(&mut self.temp, n)); let target = num::min(self.max_block_size, 4 * n / 3); + debug!("target size: {}", target); self.output.truncate(0); self.output.reserve(target); let mut decoder = BlockDecoder { @@ -269,6 +273,7 @@ impl Decoder { }; self.start = 0; self.end = decoder.decode(); + debug!("end of block: {}", self.end); } } @@ -276,6 +281,8 @@ impl Decoder { let cksum = if_ok!(self.r.read_le_u32()); debug!("ignoring block checksum {:?}", cksum); } + + debug!("block is done"); return Ok(true); } @@ -357,15 +364,13 @@ impl Encoder { false } - /// This function is used to flag that this session of compression is done - /// with. The stream is finished up (final bytes are written), and then the - /// wrapped writer is returned. - pub fn finish(mut self) -> (W, io::IoResult<()>) { - let result = self.flush(); - let result = result.and(self.w.write_le_u32(0)); - // XXX: this checksum is wrong - let result = result.and(self.w.write_le_u32(0)); - (self.w, result) + /// End the current block + fn finish_block(&mut self) -> io::IoResult<()> { + if self.buf.len() > 0 { + self.encode_block() + } else { + Ok(()) + } } } @@ -396,13 +401,31 @@ impl Writer for Encoder { } fn flush(&mut self) -> io::IoResult<()> { - if self.buf.len() > 0 { - if_ok!(self.encode_block()); - } - self.w.flush() + self.finish_block().and(self.w.flush()) } } +impl FiniteWriter for Encoder { + fn write_terminator(&mut self) -> io::IoResult<()> { + let result = self.finish_block(); + let result = result.and(self.w.write_le_u32(0)); + // XXX: this checksum is wrong + let result = result.and(self.w.write_le_u32(0)); + result.and(self.w.write_terminator()) + } +} + +impl Encoder { + /// This function is used to flag that this session of compression is done + /// with. The stream is finished up (final bytes are written), and then the + /// wrapped writer is returned. + pub fn finish(mut self) -> (W, io::IoResult<()>) { + let result = (&mut self as &mut FiniteWriter).write_terminator(); + (self.w, result) + } +} + + #[cfg(test)] mod test { use extra::test; diff --git a/main.rs b/main.rs index 3211a53..2a5bcbb 100644 --- a/main.rs +++ b/main.rs @@ -15,7 +15,8 @@ extern mod compress; use std::hashmap::HashMap; use std::{io, os, str, vec}; use compress::{bwt, lz4}; -//use compress::entropy::ari; +use compress::entropy::ari; +use compress::FiniteWriter; static MAGIC : u32 = 0x73632172; //=r!cs @@ -54,8 +55,8 @@ impl Config { } struct Pass { - encode: 'static |~Writer,&Config| -> ~io::Writer, - decode: 'static |~Reader,&Config| -> ~io::Reader, + encode: 'static |~FiniteWriter, &Config| -> ~FiniteWriter, + decode: 'static |~Reader, &Config| -> ~Reader, info: ~str, } @@ -68,19 +69,18 @@ pub fn main() { decode: |r,_| r, info: ~"pass-through", }); - /* // unclear what to do with Ari since it requires the size to be known passes.insert(~"ari", Pass { encode: |w,_c| { - ~ari::ByteEncoder::new(w) as ~Writer + ~ari::ByteEncoder::new(w) as ~FiniteWriter }, decode: |r,_c| { ~ari::ByteDecoder::new(r) as ~Reader }, info: ~"Adaptive arithmetic byte coder", - });*/ + }); passes.insert(~"bwt", Pass { encode: |w,c| { - ~bwt::Encoder::new(w, c.block_size) as ~Writer + ~bwt::Encoder::new(w, c.block_size) as ~FiniteWriter }, decode: |r,_c| { ~bwt::Decoder::new(r, true) as ~Reader @@ -90,7 +90,7 @@ pub fn main() { /* // looks like we are missing the encoder implementation passes.insert(~"flate", Pass { encode: |w,_c| { - ~flate::Encoder::new(w, true) as ~Writer + ~flate::Encoder::new(w, true) as ~FiniteWriter }, decode: |r,_c| { ~flate::Decoder::new(r, true) as ~Reader @@ -99,7 +99,7 @@ pub fn main() { });*/ passes.insert(~"lz4", Pass { encode: |w,_c| { - ~lz4::Encoder::new(w) as ~Writer + ~lz4::Encoder::new(w) as ~FiniteWriter }, decode: |r,_c| { // LZ4 decoder seem to work ~lz4::Decoder::new(r) as ~Reader @@ -155,7 +155,7 @@ pub fn main() { output.write_u8(met.len() as u8).unwrap(); output.write_str(*met).unwrap(); } - let mut wsum: ~Writer = ~output; + let mut wsum: ~FiniteWriter = ~output; for met in config.methods.iter() { match passes.find(met) { Some(pa) => wsum = (pa.encode)(wsum, &config), @@ -163,6 +163,6 @@ pub fn main() { } } io::util::copy(&mut input, &mut wsum).unwrap(); - wsum.flush().unwrap(); + wsum.write_terminator().unwrap(); } } diff --git a/shared.rs b/shared.rs new file mode 100644 index 0000000..61ffa45 --- /dev/null +++ b/shared.rs @@ -0,0 +1,34 @@ +/*! + +Common types and functions shared between algorithms. + +*/ + +use std::io; + +/// A writer that knows when to stop. +/// It's designed to work around mozilla/rust#4252 +/// which prevents method calls from the destructor +pub trait FiniteWriter: Writer { + /// mark the end of the stream + fn write_terminator(&mut self) -> io::IoResult<()> { + self.flush() + } +} + +impl Writer for ~FiniteWriter { + fn write(&mut self, buf: &[u8]) -> io::IoResult<()> { + self.write(buf) + } +} + +impl FiniteWriter for ~FiniteWriter { + fn write_terminator(&mut self) -> io::IoResult<()> { + self.write_terminator() + } +} + +impl FiniteWriter for io::MemWriter {} +impl FiniteWriter for io::stdio::StdWriter {} +impl FiniteWriter for io::fs::File {} +impl FiniteWriter for io::BufferedWriter {}