Skip to content

FiniteWriter trait + many important fixes #11

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 31 additions & 19 deletions bwt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ This is an original (mostly trivial) implementation.
*/

use std::{io, iter, num, vec};
use shared::FiniteWriter;

pub static total_symbols: uint = 0x100;

Expand Down Expand Up @@ -167,7 +168,6 @@ pub fn encode_brute(input: &[u8], suf: &mut [Suffix], fn_out: |u8|) -> Suffix {
}
}

assert!( origin.is_some() );
origin.unwrap()
}

Expand Down Expand Up @@ -335,7 +335,7 @@ impl<R: Reader> Reader for Decoder<R> {
self.header = true;
}
let mut amt = dst.len();
let len = amt;
let dst_len = amt;

while amt > 0 {
if self.output.len() == self.start {
Expand All @@ -344,19 +344,19 @@ impl<R: Reader> Reader for Decoder<R> {
break
}
}
let n = num::min( amt, self.output.len() - self.start );
let n = num::min(amt, self.output.len() - self.start);
vec::bytes::copy_memory(
dst.mut_slice_from(len - amt),
self.output.slice_from(self.start)
dst.mut_slice_from(dst_len - amt),
self.output.slice(self.start, self.start + n)
);
self.start += n;
amt -= n;
}

if len == amt {
if dst_len == amt {
Err(io::standard_error(io::EndOfFile))
} else {
Ok(len - amt)
Ok(dst_len - amt)
}
}
}
Expand Down Expand Up @@ -403,12 +403,13 @@ impl<W: Writer> Encoder<W> {
Ok(())
}

/// This function is used to flag that this session of compression is done
/// with. The stream is finished up (final bytes are written), and then the
/// wrapped writer is returned.
pub fn finish(mut self) -> (W, io::IoResult<()>) {
let result = self.flush();
(self.w, result)
/// End the current block
fn finish_block(&mut self) -> io::IoResult<()> {
if self.buf.len() > 0 {
self.encode_block()
} else {
Ok(())
}
}
}

Expand All @@ -432,12 +433,23 @@ impl<W: Writer> Writer for Encoder<W> {
}

fn flush(&mut self) -> io::IoResult<()> {
let ret = if self.buf.len() > 0 {
self.encode_block()
} else {
Ok(())
};
ret.and(self.w.flush())
self.finish_block().and(self.w.flush())
}
}

impl<W: FiniteWriter> FiniteWriter for Encoder<W> {
fn write_terminator(&mut self) -> io::IoResult<()> {
self.finish_block().and(self.w.write_terminator())
}
}

impl<W: FiniteWriter> Encoder<W> {
/// This function is used to flag that this session of compression is done
/// with. The stream is finished up (final bytes are written), and then the
/// wrapped writer is returned.
pub fn finish(mut self) -> (W, io::IoResult<()>) {
let result = (&mut self as &mut FiniteWriter).write_terminator();
(self.w, result)
}
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As with below, it would be nice to have tests for these changes.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will do.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, I don't think more tests needed for this one, because tests call finish(), and that calls the new write_terminator anyway.


Expand Down
81 changes: 56 additions & 25 deletions entropy/ari.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ This is an original implementation.
*/

use std::{io, vec};
use shared::FiniteWriter;

pub type Symbol = u8;
static symbol_bits: uint = 8;
Expand Down Expand Up @@ -184,16 +185,6 @@ impl<W: Writer> Encoder<W> {
self.stream.write(bytes)
}

/// Finish decoding by writing the code tail word
pub fn finish(mut self) -> (W, io::IoResult<()>) {
assert!(border_bits == 32);
self.bytes_written += 4;
let code = self.range.get_code_tail();
let result = self.stream.write_be_u32(code);
let result = result.and(self.stream.flush());
(self.stream, result)
}

/// Flush the output stream
pub fn flush(&mut self) -> io::IoResult<()> {
self.stream.flush()
Expand All @@ -205,6 +196,24 @@ impl<W: Writer> Encoder<W> {
}
}

impl<W: FiniteWriter> Encoder<W> {
/// Finish decoding by writing the code tail word
pub fn finish(mut self) -> (W, io::IoResult<()>) {
let ret = self.write_terminator();
(self.stream, ret)
}

/// Write code tail bits
pub fn write_terminator(&mut self) -> io::IoResult<()> {
assert!(border_bits == 32);
self.bytes_written += 4;
let code = self.range.get_code_tail();
let result = self.stream.write_be_u32(code);
result.and(self.stream.write_terminator())
}
}


/// An arithmetic decoder helper
pub struct Decoder<R> {
priv stream: R,
Expand Down Expand Up @@ -435,6 +444,7 @@ impl Model for FrequencyTable {


/// A basic byte-encoding arithmetic
/// uses a special terminator code to end the stream
pub struct ByteEncoder<W> {
/// A lower level encoder
encoder: Encoder<W>,
Expand All @@ -448,7 +458,7 @@ impl<W: Writer> ByteEncoder<W> {
let freq_max = range_default_threshold >> 2;
ByteEncoder {
encoder: Encoder::new(w),
freq: FrequencyTable::new_flat(symbol_total, freq_max),
freq: FrequencyTable::new_flat(symbol_total+1, freq_max),
}
}
}
Expand All @@ -468,13 +478,32 @@ impl<W: Writer> Writer for ByteEncoder<W> {
}
}

impl<W: FiniteWriter> FiniteWriter for ByteEncoder<W> {
fn write_terminator(&mut self) -> io::IoResult<()> {
self.encoder.encode(symbol_total, &self.freq).
and(self.encoder.write_terminator())
}
}

impl<W: FiniteWriter> ByteEncoder<W> {
/// Finish encoding and return the underlying stream
pub fn finish(mut self) -> (W, io::IoResult<()>) {
let ret = self.write_terminator();
let (w, ret_encoder) = self.encoder.finish();
(w, ret.and(ret_encoder))
}
}


/// A basic byte-decoding arithmetic
/// expects a special terminator code for the end of the stream
pub struct ByteDecoder<R> {
/// A lower level decoder
decoder: Decoder<R>,
/// A basic frequency table
freq: FrequencyTable,
/// Remember if we found the terminator code
priv is_eof: bool,
}

impl<R: Reader> ByteDecoder<R> {
Expand All @@ -484,7 +513,8 @@ impl<R: Reader> ByteDecoder<R> {
let freq_max = range_default_threshold >> 2;
ByteDecoder {
decoder: Decoder::new(r),
freq: FrequencyTable::new_flat(symbol_total, freq_max),
freq: FrequencyTable::new_flat(symbol_total+1, freq_max),
is_eof: false,
}
}
}
Expand All @@ -494,20 +524,21 @@ impl<R: Reader> Reader for ByteDecoder<R> {
if self.decoder.tell() == 0 {
if_ok!(self.decoder.start());
}
let mut ret = Ok(dst.len());
if self.is_eof {
return Err(io::standard_error(io::EndOfFile))
}
let mut amount = 0u;
for out_byte in dst.mut_iter() {
match self.decoder.decode(&self.freq) {
Ok(value) => {
self.freq.update(value, 10, 1);
*out_byte = value as u8;
},
Err(e) => {
ret = Err(e);
break
}
let value = if_ok!(self.decoder.decode(&self.freq));
if value == symbol_total {
self.is_eof = true;
break
}
self.freq.update(value, 10, 1);
*out_byte = value as u8;
amount += 1;
}
ret
Ok(amount)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add tests for your changes in this module?

}
}

Expand All @@ -523,12 +554,12 @@ mod test {
info!("Roundtrip Ari of size {}", bytes.len());
let mut e = ByteEncoder::new(MemWriter::new());
e.write(bytes).unwrap();
let (e, r) = e.encoder.finish();
let (e, r) = e.finish();
r.unwrap();
let encoded = e.unwrap();
debug!("Roundtrip input {:?} encoded {:?}", bytes, encoded);
let mut d = ByteDecoder::new(BufReader::new(encoded));
let decoded = d.read_bytes(bytes.len()).unwrap();
let decoded = d.read_to_end().unwrap();
assert_eq!(bytes.as_slice(), decoded.as_slice());
}

Expand Down
3 changes: 3 additions & 0 deletions lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@

extern mod extra;

pub use self::shared::FiniteWriter;

mod adler32;
mod shared;

pub mod bwt;
pub mod dc;
Expand Down
49 changes: 36 additions & 13 deletions lz4.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ can be found at https://github.com/bkaradzic/go-lz4.
use std::io;
use std::num;
use std::vec;
use shared::FiniteWriter;

static MAGIC: u32 = 0x184d2204;

Expand Down Expand Up @@ -243,6 +244,7 @@ impl<R: Reader> Decoder<R> {
// raw block to read
n if n & 0x80000000 != 0 => {
let amt = (n & 0x7fffffff) as uint;
debug!("decoding a raw block of size {}", amt)
self.output.truncate(0);
self.output.reserve(amt);
if_ok!(self.r.push_bytes(&mut self.output, amt));
Expand All @@ -252,12 +254,14 @@ impl<R: Reader> Decoder<R> {

// actual block to decompress
n => {
debug!("decoding a compressed block of size {}", n);
let n = n as uint;
self.temp.truncate(0);
self.temp.reserve(n);
if_ok!(self.r.push_bytes(&mut self.temp, n));

let target = num::min(self.max_block_size, 4 * n / 3);
debug!("target size: {}", target);
self.output.truncate(0);
self.output.reserve(target);
let mut decoder = BlockDecoder {
Expand All @@ -269,13 +273,16 @@ impl<R: Reader> Decoder<R> {
};
self.start = 0;
self.end = decoder.decode();
debug!("end of block: {}", self.end);
}
}

if self.blk_checksum {
let cksum = if_ok!(self.r.read_le_u32());
debug!("ignoring block checksum {:?}", cksum);
}

debug!("block is done");
return Ok(true);
}

Expand Down Expand Up @@ -357,15 +364,13 @@ impl<W: Writer> Encoder<W> {
false
}

/// This function is used to flag that this session of compression is done
/// with. The stream is finished up (final bytes are written), and then the
/// wrapped writer is returned.
pub fn finish(mut self) -> (W, io::IoResult<()>) {
let result = self.flush();
let result = result.and(self.w.write_le_u32(0));
// XXX: this checksum is wrong
let result = result.and(self.w.write_le_u32(0));
(self.w, result)
/// End the current block
fn finish_block(&mut self) -> io::IoResult<()> {
if self.buf.len() > 0 {
self.encode_block()
} else {
Ok(())
}
}
}

Expand Down Expand Up @@ -396,13 +401,31 @@ impl<W: Writer> Writer for Encoder<W> {
}

fn flush(&mut self) -> io::IoResult<()> {
if self.buf.len() > 0 {
if_ok!(self.encode_block());
}
self.w.flush()
self.finish_block().and(self.w.flush())
}
}

impl<W: FiniteWriter> FiniteWriter for Encoder<W> {
fn write_terminator(&mut self) -> io::IoResult<()> {
let result = self.finish_block();
let result = result.and(self.w.write_le_u32(0));
// XXX: this checksum is wrong
let result = result.and(self.w.write_le_u32(0));
result.and(self.w.write_terminator())
}
}

impl<W: FiniteWriter> Encoder<W> {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This means that you can't unwrap any stream other than a FiniteWriter stream, that seems wrong?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I understand your concern, and open to ideas on how to make it better. If we implement it for just W: Writer, then we will not be able to call self.w.write_terminator(), which is essential...

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What exactly is the bug that prevents you from calling this in Drop?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rust-lang/rust#4252
I should probably have mentioned it in FiniteWriter comments.

/// This function is used to flag that this session of compression is done
/// with. The stream is finished up (final bytes are written), and then the
/// wrapped writer is returned.
pub fn finish(mut self) -> (W, io::IoResult<()>) {
let result = (&mut self as &mut FiniteWriter).write_terminator();
(self.w, result)
}
}


#[cfg(test)]
mod test {
use extra::test;
Expand Down
Loading