mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-05 15:21:43 +00:00
161 lines
5.9 KiB
C
161 lines
5.9 KiB
C
|
/* deflate_slow.c -- compress data using the slow strategy of deflation algorithm
|
||
|
*
|
||
|
* Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
|
||
|
* For conditions of distribution and use, see copyright notice in zlib.h
|
||
|
*/
|
||
|
|
||
|
#include "deflate.h"
|
||
|
#include "deflate_p.h"
|
||
|
#include "match.h"
|
||
|
|
||
|
/* ===========================================================================
|
||
|
* Local data
|
||
|
*/
|
||
|
|
||
|
#ifndef TOO_FAR
|
||
|
# define TOO_FAR 4096
|
||
|
#endif
|
||
|
/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */
|
||
|
|
||
|
/* ===========================================================================
|
||
|
* Same as deflate_medium, but achieves better compression. We use a lazy
|
||
|
* evaluation for matches: a match is finally adopted only if there is
|
||
|
* no better match at the next window position.
|
||
|
*/
|
||
|
block_state deflate_slow(deflate_state *s, int flush) {
|
||
|
IPos hash_head; /* head of hash chain */
|
||
|
int bflush; /* set if current block must be flushed */
|
||
|
|
||
|
/* Process the input block. */
|
||
|
for (;;) {
|
||
|
/* Make sure that we always have enough lookahead, except
|
||
|
* at the end of the input file. We need MAX_MATCH bytes
|
||
|
* for the next match, plus MIN_MATCH bytes to insert the
|
||
|
* string following the next match.
|
||
|
*/
|
||
|
if (s->lookahead < MIN_LOOKAHEAD) {
|
||
|
fill_window(s);
|
||
|
if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) {
|
||
|
return need_more;
|
||
|
}
|
||
|
if (s->lookahead == 0)
|
||
|
break; /* flush the current block */
|
||
|
}
|
||
|
|
||
|
/* Insert the string window[strstart .. strstart+2] in the
|
||
|
* dictionary, and set hash_head to the head of the hash chain:
|
||
|
*/
|
||
|
hash_head = NIL;
|
||
|
if (s->lookahead >= MIN_MATCH) {
|
||
|
hash_head = insert_string(s, s->strstart);
|
||
|
}
|
||
|
|
||
|
/* Find the longest match, discarding those <= prev_length.
|
||
|
*/
|
||
|
s->prev_length = s->match_length, s->prev_match = s->match_start;
|
||
|
s->match_length = MIN_MATCH-1;
|
||
|
|
||
|
if (hash_head != NIL && s->prev_length < s->max_lazy_match && s->strstart - hash_head <= MAX_DIST(s)) {
|
||
|
/* To simplify the code, we prevent matches with the string
|
||
|
* of window index 0 (in particular we have to avoid a match
|
||
|
* of the string with itself at the start of the input file).
|
||
|
*/
|
||
|
s->match_length = longest_match(s, hash_head);
|
||
|
/* longest_match() sets match_start */
|
||
|
|
||
|
if (s->match_length <= 5 && (s->strategy == Z_FILTERED
|
||
|
#if TOO_FAR <= 32767
|
||
|
|| (s->match_length == MIN_MATCH && s->strstart - s->match_start > TOO_FAR)
|
||
|
#endif
|
||
|
)) {
|
||
|
|
||
|
/* If prev_match is also MIN_MATCH, match_start is garbage
|
||
|
* but we will ignore the current match anyway.
|
||
|
*/
|
||
|
s->match_length = MIN_MATCH-1;
|
||
|
}
|
||
|
}
|
||
|
/* If there was a match at the previous step and the current
|
||
|
* match is not better, output the previous match:
|
||
|
*/
|
||
|
if (s->prev_length >= MIN_MATCH && s->match_length <= s->prev_length) {
|
||
|
uInt max_insert = s->strstart + s->lookahead - MIN_MATCH;
|
||
|
/* Do not insert strings in hash table beyond this. */
|
||
|
|
||
|
check_match(s, s->strstart-1, s->prev_match, s->prev_length);
|
||
|
|
||
|
_tr_tally_dist(s, s->strstart -1 - s->prev_match, s->prev_length - MIN_MATCH, bflush);
|
||
|
|
||
|
/* Insert in hash table all strings up to the end of the match.
|
||
|
* strstart-1 and strstart are already inserted. If there is not
|
||
|
* enough lookahead, the last two strings are not inserted in
|
||
|
* the hash table.
|
||
|
*/
|
||
|
s->lookahead -= s->prev_length-1;
|
||
|
|
||
|
#ifdef NOT_TWEAK_COMPILER
|
||
|
s->prev_length -= 2;
|
||
|
do {
|
||
|
if (++s->strstart <= max_insert) {
|
||
|
insert_string(s, s->strstart);
|
||
|
}
|
||
|
} while (--s->prev_length != 0);
|
||
|
s->match_available = 0;
|
||
|
s->match_length = MIN_MATCH-1;
|
||
|
s->strstart++;
|
||
|
#else
|
||
|
{
|
||
|
uInt mov_fwd = s->prev_length - 2;
|
||
|
uInt insert_cnt = mov_fwd;
|
||
|
if (unlikely(insert_cnt > max_insert - s->strstart))
|
||
|
insert_cnt = max_insert - s->strstart;
|
||
|
|
||
|
bulk_insert_str(s, s->strstart + 1, insert_cnt);
|
||
|
s->prev_length = 0;
|
||
|
s->match_available = 0;
|
||
|
s->match_length = MIN_MATCH-1;
|
||
|
s->strstart += mov_fwd + 1;
|
||
|
}
|
||
|
#endif /*NOT_TWEAK_COMPILER*/
|
||
|
|
||
|
if (bflush) FLUSH_BLOCK(s, 0);
|
||
|
|
||
|
} else if (s->match_available) {
|
||
|
/* If there was no match at the previous position, output a
|
||
|
* single literal. If there was a match but the current match
|
||
|
* is longer, truncate the previous match to a single literal.
|
||
|
*/
|
||
|
Tracevv((stderr, "%c", s->window[s->strstart-1]));
|
||
|
_tr_tally_lit(s, s->window[s->strstart-1], bflush);
|
||
|
if (bflush) {
|
||
|
FLUSH_BLOCK_ONLY(s, 0);
|
||
|
}
|
||
|
s->strstart++;
|
||
|
s->lookahead--;
|
||
|
if (s->strm->avail_out == 0)
|
||
|
return need_more;
|
||
|
} else {
|
||
|
/* There is no previous match to compare with, wait for
|
||
|
* the next step to decide.
|
||
|
*/
|
||
|
s->match_available = 1;
|
||
|
s->strstart++;
|
||
|
s->lookahead--;
|
||
|
}
|
||
|
}
|
||
|
Assert(flush != Z_NO_FLUSH, "no flush?");
|
||
|
if (s->match_available) {
|
||
|
Tracevv((stderr, "%c", s->window[s->strstart-1]));
|
||
|
_tr_tally_lit(s, s->window[s->strstart-1], bflush);
|
||
|
s->match_available = 0;
|
||
|
}
|
||
|
s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1;
|
||
|
if (flush == Z_FINISH) {
|
||
|
FLUSH_BLOCK(s, 1);
|
||
|
return finish_done;
|
||
|
}
|
||
|
if (s->last_lit)
|
||
|
FLUSH_BLOCK(s, 0);
|
||
|
return block_done;
|
||
|
}
|