1
+ use std:: collections:: VecDeque ;
1
2
use std:: hash:: Hasher ;
3
+ use std:: mem:: { self , MaybeUninit } ;
4
+ use std:: { io, ptr, slice} ;
5
+
2
6
use twox_hash:: XxHash64 ;
3
7
4
8
pub struct Decodebuffer {
5
- pub buffer : Vec < u8 > ,
9
+ buffer : VecDeque < u8 > ,
6
10
pub dict_content : Vec < u8 > ,
7
11
8
12
pub window_size : usize ,
9
13
total_output_counter : u64 ,
10
14
pub hash : XxHash64 ,
11
15
}
12
16
13
- impl std :: io:: Read for Decodebuffer {
14
- fn read ( & mut self , target : & mut [ u8 ] ) -> std :: result :: Result < usize , std :: io :: Error > {
17
+ impl io:: Read for Decodebuffer {
18
+ fn read ( & mut self , target : & mut [ u8 ] ) -> io :: Result < usize > {
15
19
let max_amount = self . can_drain_to_window_size ( ) . unwrap_or ( 0 ) ;
20
+ let amount = max_amount. min ( target. len ( ) ) ;
16
21
17
- let amount = if max_amount > target. len ( ) {
18
- target. len ( )
19
- } else {
20
- max_amount
21
- } ;
22
-
23
- if amount == 0 {
24
- return Ok ( 0 ) ;
25
- }
26
-
27
- self . hash . write ( & self . buffer [ 0 ..amount] ) ;
28
- target[ ..amount] . copy_from_slice ( & self . buffer [ ..amount] ) ;
29
- self . buffer . drain ( 0 ..amount) ;
30
-
22
+ let mut written = 0 ;
23
+ self . drain_to ( amount, |buf| {
24
+ target[ written..] [ ..buf. len ( ) ] . copy_from_slice ( buf) ;
25
+ written += buf. len ( ) ;
26
+ Ok ( ( ) )
27
+ } ) ?;
31
28
Ok ( amount)
32
29
}
33
30
}
34
31
35
32
impl Decodebuffer {
36
33
pub fn new ( window_size : usize ) -> Decodebuffer {
37
34
Decodebuffer {
38
- buffer : Vec :: new ( ) ,
35
+ buffer : VecDeque :: new ( ) ,
39
36
dict_content : Vec :: new ( ) ,
40
37
window_size,
41
38
total_output_counter : 0 ,
@@ -61,7 +58,7 @@ impl Decodebuffer {
61
58
}
62
59
63
60
pub fn push ( & mut self , data : & [ u8 ] ) {
64
- self . buffer . extend_from_slice ( data) ;
61
+ self . buffer . extend ( data) ;
65
62
self . total_output_counter += data. len ( ) as u64 ;
66
63
}
67
64
@@ -102,19 +99,53 @@ impl Decodebuffer {
102
99
} else {
103
100
let start_idx = self . buffer . len ( ) - offset;
104
101
102
+ self . buffer . reserve ( match_length) ;
103
+
105
104
if start_idx + match_length > self . buffer . len ( ) {
106
- self . buffer . reserve ( match_length) ;
107
105
//need to copy byte by byte. can be optimized more but for now lets leave it like this
108
106
//TODO batch whats possible
109
107
for x in 0 ..match_length {
110
- self . buffer . push ( self . buffer [ start_idx + x] ) ;
108
+ self . buffer . push_back ( self . buffer [ start_idx + x] ) ;
111
109
}
112
110
} else {
113
- // can just copy parts of the existing buffer,
114
- // which is exactly what Vec::extend_from_within was create for
115
- let end_idx = start_idx + match_length;
116
- self . buffer . extend_from_within ( start_idx..end_idx) ;
111
+ let mut buf = [ MaybeUninit :: < u8 > :: uninit ( ) ; 4096 ] ;
112
+
113
+ // can just copy parts of the existing buffer
114
+
115
+ let mut start_idx = start_idx;
116
+ let mut match_length = match_length;
117
+ while match_length > 0 {
118
+ let filled = {
119
+ let ( slice1, slice2) = self . buffer . as_slices ( ) ;
120
+
121
+ let slice = if slice1. len ( ) > start_idx {
122
+ & slice1[ start_idx..]
123
+ } else {
124
+ & slice2[ start_idx - slice1. len ( ) ..]
125
+ } ;
126
+ let slice = & slice[ ..match_length. min ( slice. len ( ) ) . min ( buf. len ( ) ) ] ;
127
+
128
+ // TODO: replace with MaybeUninit::write_slice once it's stable.
129
+ // SAFETY: we initialize a portion of `buf` and then we return a slice
130
+ // of the initialized portion of `buf`.
131
+ unsafe {
132
+ debug_assert ! ( slice. len( ) <= buf. len( ) ) ;
133
+
134
+ ptr:: copy_nonoverlapping (
135
+ slice. as_ptr ( ) . cast :: < MaybeUninit < u8 > > ( ) ,
136
+ buf. as_mut_ptr ( ) ,
137
+ slice. len ( ) ,
138
+ ) ;
139
+ slice:: from_raw_parts ( buf. as_ptr ( ) . cast :: < u8 > ( ) , slice. len ( ) )
140
+ }
141
+ } ;
142
+
143
+ self . buffer . extend ( filled) ;
144
+ start_idx += filled. len ( ) ;
145
+ match_length -= filled. len ( ) ;
146
+ }
117
147
}
148
+
118
149
self . total_output_counter += match_length as u64 ;
119
150
}
120
151
@@ -142,65 +173,107 @@ impl Decodebuffer {
142
173
match self . can_drain_to_window_size ( ) {
143
174
None => None ,
144
175
Some ( can_drain) => {
145
- self . hash . write ( & self . buffer [ 0 ..can_drain] ) ;
146
- Some ( self . buffer . drain ( 0 ..can_drain) . collect ( ) )
176
+ let mut vec = Vec :: with_capacity ( can_drain) ;
177
+ self . drain_to ( can_drain, |buf| {
178
+ vec. extend_from_slice ( buf) ;
179
+ Ok ( ( ) )
180
+ } )
181
+ . ok ( ) ?;
182
+ Some ( vec)
147
183
}
148
184
}
149
185
}
150
186
151
- pub fn drain_to_window_size_writer (
152
- & mut self ,
153
- sink : & mut dyn std:: io:: Write ,
154
- ) -> Result < usize , std:: io:: Error > {
187
+ pub fn drain_to_window_size_writer ( & mut self , sink : & mut dyn io:: Write ) -> io:: Result < usize > {
155
188
match self . can_drain_to_window_size ( ) {
156
189
None => Ok ( 0 ) ,
157
190
Some ( can_drain) => {
158
- self . hash . write ( & self . buffer [ 0 ..can_drain] ) ;
159
- let mut buf = [ 0u8 ; 1 ] ; //TODO batch to reasonable size
160
- for x in self . buffer . drain ( 0 ..can_drain) {
161
- buf[ 0 ] = x;
162
- sink. write_all ( & buf[ ..] ) ?;
163
- }
191
+ self . drain_to ( can_drain, |buf| {
192
+ sink. write_all ( buf) ?;
193
+ Ok ( ( ) )
194
+ } ) ?;
164
195
Ok ( can_drain)
165
196
}
166
197
}
167
198
}
168
199
169
200
//drain the buffer completely
170
201
pub fn drain ( & mut self ) -> Vec < u8 > {
171
- self . hash . write ( & self . buffer ) ;
202
+ let new_buffer = VecDeque :: with_capacity ( self . buffer . capacity ( ) ) ;
172
203
173
- let new_buffer = Vec :: with_capacity ( self . buffer . capacity ( ) ) ;
174
- std :: mem :: replace ( & mut self . buffer , new_buffer )
175
- }
204
+ let ( slice1 , slice2 ) = self . buffer . as_slices ( ) ;
205
+ self . hash . write ( slice1 ) ;
206
+ self . hash . write ( slice2 ) ;
176
207
177
- pub fn drain_to_writer (
178
- & mut self ,
179
- sink : & mut dyn std:: io:: Write ,
180
- ) -> Result < usize , std:: io:: Error > {
181
- self . hash . write ( & self . buffer ) ;
182
- sink. write_all ( & self . buffer ) ?;
208
+ mem:: replace ( & mut self . buffer , new_buffer) . into ( )
209
+ }
183
210
211
+ pub fn drain_to_writer ( & mut self , sink : & mut dyn io:: Write ) -> io:: Result < usize > {
184
212
let len = self . buffer . len ( ) ;
185
- self . buffer . clear ( ) ;
213
+ self . drain_to ( len, |buf| {
214
+ sink. write_all ( buf) ?;
215
+ Ok ( ( ) )
216
+ } ) ?;
217
+
186
218
Ok ( len)
187
219
}
188
220
189
- pub fn read_all ( & mut self , target : & mut [ u8 ] ) -> Result < usize , std:: io:: Error > {
190
- let amount = if self . buffer . len ( ) > target. len ( ) {
191
- target. len ( )
192
- } else {
193
- self . buffer . len ( )
194
- } ;
221
+ pub fn read_all ( & mut self , target : & mut [ u8 ] ) -> io:: Result < usize > {
222
+ let amount = self . buffer . len ( ) . min ( target. len ( ) ) ;
195
223
224
+ let mut written = 0 ;
225
+ self . drain_to ( amount, |buf| {
226
+ target[ written..] [ ..buf. len ( ) ] . copy_from_slice ( buf) ;
227
+ written += buf. len ( ) ;
228
+ Ok ( ( ) )
229
+ } ) ?;
230
+ Ok ( amount)
231
+ }
232
+
233
+ fn drain_to (
234
+ & mut self ,
235
+ amount : usize ,
236
+ mut f : impl FnMut ( & [ u8 ] ) -> io:: Result < ( ) > ,
237
+ ) -> io:: Result < ( ) > {
196
238
if amount == 0 {
197
- return Ok ( 0 ) ;
239
+ return Ok ( ( ) ) ;
198
240
}
199
241
200
- self . hash . write ( & self . buffer [ 0 ..amount] ) ;
201
- target[ ..amount] . copy_from_slice ( & self . buffer [ ..amount] ) ;
202
- self . buffer . drain ( 0 ..amount) ;
242
+ struct DrainGuard < ' a > {
243
+ buffer : & ' a mut VecDeque < u8 > ,
244
+ amount : usize ,
245
+ }
203
246
204
- Ok ( amount)
247
+ impl < ' a > Drop for DrainGuard < ' a > {
248
+ fn drop ( & mut self ) {
249
+ if self . amount != 0 {
250
+ self . buffer . drain ( ..self . amount ) ;
251
+ }
252
+ }
253
+ }
254
+
255
+ let mut drain_guard = DrainGuard {
256
+ buffer : & mut self . buffer ,
257
+ amount : 0 ,
258
+ } ;
259
+
260
+ let ( slice1, slice2) = drain_guard. buffer . as_slices ( ) ;
261
+ let n1 = slice1. len ( ) . min ( amount) ;
262
+ let n2 = slice2. len ( ) . min ( amount - n1) ;
263
+
264
+ f ( & slice1[ ..n1] ) ?;
265
+ self . hash . write ( & slice1[ ..n1] ) ;
266
+ drain_guard. amount += n1;
267
+
268
+ if n2 != 0 {
269
+ f ( & slice2[ ..n2] ) ?;
270
+ self . hash . write ( & slice2[ ..n2] ) ;
271
+ drain_guard. amount += n2;
272
+ }
273
+
274
+ // Make sure we don't accidentally drop `DrainGuard` earlier.
275
+ drop ( drain_guard) ;
276
+
277
+ Ok ( ( ) )
205
278
}
206
279
}
0 commit comments