ClickHouse/contrib/libzlib-ng/deflate_medium.c

323 lines
10 KiB
C
Raw Normal View History

/* deflate_medium.c -- The deflate_medium deflate strategy
*
* Copyright (C) 2013 Intel Corporation. All rights reserved.
* Authors:
* Arjan van de Ven <arjan@linux.intel.com>
*
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#ifdef MEDIUM_STRATEGY
#include "deflate.h"
#include "deflate_p.h"
#include "match.h"
struct match {
unsigned int match_start;
unsigned int match_length;
unsigned int strstart;
unsigned int orgstart;
};
#define MAX_DIST2 ((1 << MAX_WBITS) - MIN_LOOKAHEAD)
static int tr_tally_dist(deflate_state *s, int distance, int length) {
return _tr_tally(s, distance, length);
}
static int tr_tally_lit(deflate_state *s, int c) {
return _tr_tally(s, 0, c);
}
static int emit_match(deflate_state *s, struct match match) {
int flush = 0;
/* matches that are not long enough we need to emit as litterals */
if (match.match_length < MIN_MATCH) {
while (match.match_length) {
flush += tr_tally_lit(s, s->window[match.strstart]);
s->lookahead--;
match.strstart++;
match.match_length--;
}
return flush;
}
check_match(s, match.strstart, match.match_start, match.match_length);
flush += tr_tally_dist(s, match.strstart - match.match_start, match.match_length - MIN_MATCH);
s->lookahead -= match.match_length;
return flush;
}
static void insert_match(deflate_state *s, struct match match) {
if (unlikely(s->lookahead <= match.match_length + MIN_MATCH))
return;
/* matches that are not long enough we need to emit as litterals */
if (match.match_length < MIN_MATCH) {
#ifdef NOT_TWEAK_COMPILER
while (match.match_length) {
match.strstart++;
match.match_length--;
if (match.match_length) {
if (match.strstart >= match.orgstart) {
insert_string(s, match.strstart);
}
}
}
#else
if (likely(match.match_length == 1)) {
match.strstart++;
match.match_length = 0;
}else{
match.strstart++;
match.match_length--;
if (match.strstart >= match.orgstart) {
bulk_insert_str(s, match.strstart, match.match_length);
}
match.strstart += match.match_length;
match.match_length = 0;
}
#endif
return;
}
/* Insert new strings in the hash table only if the match length
* is not too large. This saves time but degrades compression.
*/
if (match.match_length <= 16* s->max_insert_length && s->lookahead >= MIN_MATCH) {
match.match_length--; /* string at strstart already in table */
match.strstart++;
#ifdef NOT_TWEAK_COMPILER
do {
if (likely(match.strstart >= match.orgstart)) {
insert_string(s, match.strstart);
}
match.strstart++;
/* strstart never exceeds WSIZE-MAX_MATCH, so there are
* always MIN_MATCH bytes ahead.
*/
} while (--match.match_length != 0);
#else
if (likely(match.strstart >= match.orgstart)) {
bulk_insert_str(s, match.strstart, match.match_length);
}
match.strstart += match.match_length;
match.match_length = 0;
#endif
} else {
match.strstart += match.match_length;
match.match_length = 0;
s->ins_h = s->window[match.strstart];
if (match.strstart >= 1)
UPDATE_HASH(s, s->ins_h, match.strstart+2-MIN_MATCH);
#if MIN_MATCH != 3
#warning Call UPDATE_HASH() MIN_MATCH-3 more times
#endif
/* If lookahead < MIN_MATCH, ins_h is garbage, but it does not
* matter since it will be recomputed at next deflate call.
*/
}
}
static void fizzle_matches(deflate_state *s, struct match *current, struct match *next) {
IPos limit;
unsigned char *match, *orig;
int changed = 0;
struct match c, n;
/* step zero: sanity checks */
if (current->match_length <= 1)
return;
if (unlikely(current->match_length > 1 + next->match_start))
return;
if (unlikely(current->match_length > 1 + next->strstart))
return;
match = s->window - current->match_length + 1 + next->match_start;
orig = s->window - current->match_length + 1 + next->strstart;
/* quick exit check.. if this fails then don't bother with anything else */
if (likely(*match != *orig))
return;
/* check the overlap case and just give up. We can do better in theory,
* but unlikely to be worth it
*/
if (next->match_start + next->match_length >= current->strstart)
return;
c = *current;
n = *next;
/* step one: try to move the "next" match to the left as much as possible */
limit = next->strstart > MAX_DIST2 ? next->strstart - MAX_DIST2 : 0;
match = s->window + n.match_start - 1;
orig = s->window + n.strstart - 1;
while (*match == *orig) {
if (c.match_length < 1)
break;
if (n.strstart <= limit)
break;
if (n.match_length >= 256)
break;
if (n.match_start <= 0)
break;
n.strstart--;
n.match_start--;
n.match_length++;
c.match_length--;
match--;
orig--;
changed++;
}
if (!changed)
return;
if (c.match_length <= 1 && n.match_length != 2) {
n.orgstart++;
*current = c;
*next = n;
} else {
return;
}
}
block_state deflate_medium(deflate_state *s, int flush) {
struct match current_match, next_match;
memset(&current_match, 0, sizeof(struct match));
memset(&next_match, 0, sizeof(struct match));
for (;;) {
IPos hash_head = 0; /* head of the hash chain */
int bflush; /* set if current block must be flushed */
/* Make sure that we always have enough lookahead, except
* at the end of the input file. We need MAX_MATCH bytes
* for the next match, plus MIN_MATCH bytes to insert the
* string following the next current_match.
*/
if (s->lookahead < MIN_LOOKAHEAD) {
fill_window(s);
if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) {
return need_more;
}
if (s->lookahead == 0)
break; /* flush the current block */
next_match.match_length = 0;
}
s->prev_length = 2;
/* Insert the string window[strstart .. strstart+2] in the
* dictionary, and set hash_head to the head of the hash chain:
*/
/* If we already have a future match from a previous round, just use that */
if (next_match.match_length > 0) {
current_match = next_match;
next_match.match_length = 0;
} else {
hash_head = 0;
if (s->lookahead >= MIN_MATCH) {
hash_head = insert_string(s, s->strstart);
}
/* set up the initial match to be a 1 byte literal */
current_match.match_start = 0;
current_match.match_length = 1;
current_match.strstart = s->strstart;
current_match.orgstart = current_match.strstart;
/* Find the longest match, discarding those <= prev_length.
* At this point we have always match_length < MIN_MATCH
*/
if (hash_head != 0 && s->strstart - hash_head <= MAX_DIST2) {
/* To simplify the code, we prevent matches with the string
* of window index 0 (in particular we have to avoid a match
* of the string with itself at the start of the input file).
*/
current_match.match_length = longest_match(s, hash_head);
current_match.match_start = s->match_start;
if (current_match.match_length < MIN_MATCH)
current_match.match_length = 1;
if (current_match.match_start >= current_match.strstart) {
/* this can happen due to some restarts */
current_match.match_length = 1;
}
}
}
insert_match(s, current_match);
/* now, look ahead one */
if (s->lookahead > MIN_LOOKAHEAD) {
s->strstart = current_match.strstart + current_match.match_length;
hash_head = insert_string(s, s->strstart);
/* set up the initial match to be a 1 byte literal */
next_match.match_start = 0;
next_match.match_length = 1;
next_match.strstart = s->strstart;
next_match.orgstart = next_match.strstart;
/* Find the longest match, discarding those <= prev_length.
* At this point we have always match_length < MIN_MATCH
*/
if (hash_head != 0 && s->strstart - hash_head <= MAX_DIST2) {
/* To simplify the code, we prevent matches with the string
* of window index 0 (in particular we have to avoid a match
* of the string with itself at the start of the input file).
*/
next_match.match_length = longest_match(s, hash_head);
next_match.match_start = s->match_start;
if (next_match.match_start >= next_match.strstart) {
/* this can happen due to some restarts */
next_match.match_length = 1;
}
if (next_match.match_length < MIN_MATCH)
next_match.match_length = 1;
else
fizzle_matches(s, &current_match, &next_match);
}
/* short matches with a very long distance are rarely a good idea encoding wise */
if (next_match.match_length == 3 && (next_match.strstart - next_match.match_start) > 12000)
next_match.match_length = 1;
s->strstart = current_match.strstart;
} else {
next_match.match_length = 0;
}
/* now emit the current match */
bflush = emit_match(s, current_match);
/* move the "cursor" forward */
s->strstart += current_match.match_length;
if (bflush)
FLUSH_BLOCK(s, 0);
}
s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1;
if (flush == Z_FINISH) {
FLUSH_BLOCK(s, 1);
return finish_done;
}
if (s->last_lit)
FLUSH_BLOCK(s, 0);
return block_done;
}
#endif