1 /* LzFind.c -- Match finder for LZ algorithms
2 2008-10-04 : Igor Pavlov : Public domain */
9 #define kEmptyHashValue 0
10 #define kMaxValForNormalize ((UInt32)0xFFFFFFFF)
11 #define kNormalizeStepMin (1 << 10) /* it must be power of 2 */
12 #define kNormalizeMask (~(kNormalizeStepMin - 1))
13 #define kMaxHistorySize ((UInt32)3 << 30)
15 #define kStartMaxLen 3
17 static void LzInWindow_Free(CMatchFinder *p, ISzAlloc *alloc)
21 alloc->Free(alloc, p->bufferBase);
26 /* keepSizeBefore + keepSizeAfter + keepSizeReserv must be < 4G) */
28 static int LzInWindow_Create(CMatchFinder *p, UInt32 keepSizeReserv, ISzAlloc *alloc)
30 UInt32 blockSize = p->keepSizeBefore + p->keepSizeAfter + keepSizeReserv;
33 p->blockSize = blockSize;
36 if (p->bufferBase == 0 || p->blockSize != blockSize)
38 LzInWindow_Free(p, alloc);
39 p->blockSize = blockSize;
40 p->bufferBase = (Byte *)alloc->Alloc(alloc, (size_t)blockSize);
42 return (p->bufferBase != 0);
45 Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; }
46 Byte MatchFinder_GetIndexByte(CMatchFinder *p, Int32 index) { return p->buffer[index]; }
48 UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return p->streamPos - p->pos; }
50 void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue)
52 p->posLimit -= subValue;
54 p->streamPos -= subValue;
57 static void MatchFinder_ReadBlock(CMatchFinder *p)
59 if (p->streamEndWasReached || p->result != SZ_OK)
63 Byte *dest = p->buffer + (p->streamPos - p->pos);
64 size_t size = (p->bufferBase + p->blockSize - dest);
67 p->result = p->stream->Read(p->stream, dest, &size);
68 if (p->result != SZ_OK)
72 p->streamEndWasReached = 1;
75 p->streamPos += (UInt32)size;
76 if (p->streamPos - p->pos > p->keepSizeAfter)
81 void MatchFinder_MoveBlock(CMatchFinder *p)
83 memmove(p->bufferBase,
84 p->buffer - p->keepSizeBefore,
85 (size_t)(p->streamPos - p->pos + p->keepSizeBefore));
86 p->buffer = p->bufferBase + p->keepSizeBefore;
89 int MatchFinder_NeedMove(CMatchFinder *p)
91 /* if (p->streamEndWasReached) return 0; */
92 return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter);
95 void MatchFinder_ReadIfRequired(CMatchFinder *p)
97 if (p->streamEndWasReached)
99 if (p->keepSizeAfter >= p->streamPos - p->pos)
100 MatchFinder_ReadBlock(p);
103 static void MatchFinder_CheckAndMoveAndRead(CMatchFinder *p)
105 if (MatchFinder_NeedMove(p))
106 MatchFinder_MoveBlock(p);
107 MatchFinder_ReadBlock(p);
110 static void MatchFinder_SetDefaultSettings(CMatchFinder *p)
115 /* p->skipModeBits = 0; */
120 #define kCrcPoly 0xEDB88320
122 void MatchFinder_Construct(CMatchFinder *p)
128 MatchFinder_SetDefaultSettings(p);
130 for (i = 0; i < 256; i++)
134 for (j = 0; j < 8; j++)
135 r = (r >> 1) ^ (kCrcPoly & ~((r & 1) - 1));
140 static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAlloc *alloc)
142 alloc->Free(alloc, p->hash);
146 void MatchFinder_Free(CMatchFinder *p, ISzAlloc *alloc)
148 MatchFinder_FreeThisClassMemory(p, alloc);
149 LzInWindow_Free(p, alloc);
152 static CLzRef* AllocRefs(UInt32 num, ISzAlloc *alloc)
154 size_t sizeInBytes = (size_t)num * sizeof(CLzRef);
155 if (sizeInBytes / sizeof(CLzRef) != num)
157 return (CLzRef *)alloc->Alloc(alloc, sizeInBytes);
160 int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
161 UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
165 if (historySize > kMaxHistorySize)
167 MatchFinder_Free(p, alloc);
170 sizeReserv = historySize >> 1;
171 if (historySize > ((UInt32)2 << 30))
172 sizeReserv = historySize >> 2;
173 sizeReserv += (keepAddBufferBefore + matchMaxLen + keepAddBufferAfter) / 2 + (1 << 19);
175 p->keepSizeBefore = historySize + keepAddBufferBefore + 1;
176 p->keepSizeAfter = matchMaxLen + keepAddBufferAfter;
177 /* we need one additional byte, since we use MoveBlock after pos++ and before dictionary using */
178 if (LzInWindow_Create(p, sizeReserv, alloc))
180 UInt32 newCyclicBufferSize = (historySize /* >> p->skipModeBits */) + 1;
182 p->matchMaxLen = matchMaxLen;
184 p->fixedHashSize = 0;
185 if (p->numHashBytes == 2)
189 hs = historySize - 1;
195 /* hs >>= p->skipModeBits; */
196 hs |= 0xFFFF; /* don't change it! It's required for Deflate */
199 if (p->numHashBytes == 3)
207 if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size;
208 if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size;
209 if (p->numHashBytes > 4) p->fixedHashSize += kHash4Size;
210 hs += p->fixedHashSize;
214 UInt32 prevSize = p->hashSizeSum + p->numSons;
216 p->historySize = historySize;
218 p->cyclicBufferSize = newCyclicBufferSize;
219 p->numSons = (p->btMode ? newCyclicBufferSize * 2 : newCyclicBufferSize);
220 newSize = p->hashSizeSum + p->numSons;
221 if (p->hash != 0 && prevSize == newSize)
223 MatchFinder_FreeThisClassMemory(p, alloc);
224 p->hash = AllocRefs(newSize, alloc);
227 p->son = p->hash + p->hashSizeSum;
232 MatchFinder_Free(p, alloc);
236 static void MatchFinder_SetLimits(CMatchFinder *p)
238 UInt32 limit = kMaxValForNormalize - p->pos;
239 UInt32 limit2 = p->cyclicBufferSize - p->cyclicBufferPos;
242 limit2 = p->streamPos - p->pos;
243 if (limit2 <= p->keepSizeAfter)
249 limit2 -= p->keepSizeAfter;
253 UInt32 lenLimit = p->streamPos - p->pos;
254 if (lenLimit > p->matchMaxLen)
255 lenLimit = p->matchMaxLen;
256 p->lenLimit = lenLimit;
258 p->posLimit = p->pos + limit;
261 void MatchFinder_Init(CMatchFinder *p)
264 for (i = 0; i < p->hashSizeSum; i++)
265 p->hash[i] = kEmptyHashValue;
266 p->cyclicBufferPos = 0;
267 p->buffer = p->bufferBase;
268 p->pos = p->streamPos = p->cyclicBufferSize;
270 p->streamEndWasReached = 0;
271 MatchFinder_ReadBlock(p);
272 MatchFinder_SetLimits(p);
275 static UInt32 MatchFinder_GetSubValue(CMatchFinder *p)
277 return (p->pos - p->historySize - 1) & kNormalizeMask;
280 void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, UInt32 numItems)
283 for (i = 0; i < numItems; i++)
285 UInt32 value = items[i];
286 if (value <= subValue)
287 value = kEmptyHashValue;
294 static void MatchFinder_Normalize(CMatchFinder *p)
296 UInt32 subValue = MatchFinder_GetSubValue(p);
297 MatchFinder_Normalize3(subValue, p->hash, p->hashSizeSum + p->numSons);
298 MatchFinder_ReduceOffsets(p, subValue);
301 static void MatchFinder_CheckLimits(CMatchFinder *p)
303 if (p->pos == kMaxValForNormalize)
304 MatchFinder_Normalize(p);
305 if (!p->streamEndWasReached && p->keepSizeAfter == p->streamPos - p->pos)
306 MatchFinder_CheckAndMoveAndRead(p);
307 if (p->cyclicBufferPos == p->cyclicBufferSize)
308 p->cyclicBufferPos = 0;
309 MatchFinder_SetLimits(p);
312 static UInt32 * Hc_GetMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
313 UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
314 UInt32 *distances, UInt32 maxLen)
316 son[_cyclicBufferPos] = curMatch;
319 UInt32 delta = pos - curMatch;
320 if (cutValue-- == 0 || delta >= _cyclicBufferSize)
323 const Byte *pb = cur - delta;
324 curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)];
325 if (pb[maxLen] == cur[maxLen] && *pb == *cur)
328 while (++len != lenLimit)
329 if (pb[len] != cur[len])
333 *distances++ = maxLen = len;
334 *distances++ = delta - 1;
343 UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
344 UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
345 UInt32 *distances, UInt32 maxLen)
347 CLzRef *ptr0 = son + (_cyclicBufferPos << 1) + 1;
348 CLzRef *ptr1 = son + (_cyclicBufferPos << 1);
349 UInt32 len0 = 0, len1 = 0;
352 UInt32 delta = pos - curMatch;
353 if (cutValue-- == 0 || delta >= _cyclicBufferSize)
355 *ptr0 = *ptr1 = kEmptyHashValue;
359 CLzRef *pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
360 const Byte *pb = cur - delta;
361 UInt32 len = (len0 < len1 ? len0 : len1);
362 if (pb[len] == cur[len])
364 if (++len != lenLimit && pb[len] == cur[len])
365 while (++len != lenLimit)
366 if (pb[len] != cur[len])
370 *distances++ = maxLen = len;
371 *distances++ = delta - 1;
380 if (pb[len] < cur[len])
398 static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
399 UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue)
401 CLzRef *ptr0 = son + (_cyclicBufferPos << 1) + 1;
402 CLzRef *ptr1 = son + (_cyclicBufferPos << 1);
403 UInt32 len0 = 0, len1 = 0;
406 UInt32 delta = pos - curMatch;
407 if (cutValue-- == 0 || delta >= _cyclicBufferSize)
409 *ptr0 = *ptr1 = kEmptyHashValue;
413 CLzRef *pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
414 const Byte *pb = cur - delta;
415 UInt32 len = (len0 < len1 ? len0 : len1);
416 if (pb[len] == cur[len])
418 while (++len != lenLimit)
419 if (pb[len] != cur[len])
430 if (pb[len] < cur[len])
449 ++p->cyclicBufferPos; \
451 if (++p->pos == p->posLimit) MatchFinder_CheckLimits(p);
453 #define MOVE_POS_RET MOVE_POS return offset;
455 static void MatchFinder_MovePos(CMatchFinder *p) { MOVE_POS; }
457 #define GET_MATCHES_HEADER2(minLen, ret_op) \
458 UInt32 lenLimit; UInt32 hashValue; const Byte *cur; UInt32 curMatch; \
459 lenLimit = p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \
462 #define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return 0)
463 #define SKIP_HEADER(minLen) GET_MATCHES_HEADER2(minLen, continue)
465 #define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
467 #define GET_MATCHES_FOOTER(offset, maxLen) \
468 offset = (UInt32)(GetMatchesSpec1(lenLimit, curMatch, MF_PARAMS(p), \
469 distances + offset, maxLen) - distances); MOVE_POS_RET;
471 #define SKIP_FOOTER \
472 SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS;
474 static UInt32 Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
477 GET_MATCHES_HEADER(2)
479 curMatch = p->hash[hashValue];
480 p->hash[hashValue] = p->pos;
482 GET_MATCHES_FOOTER(offset, 1)
485 UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
488 GET_MATCHES_HEADER(3)
490 curMatch = p->hash[hashValue];
491 p->hash[hashValue] = p->pos;
493 GET_MATCHES_FOOTER(offset, 2)
496 static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
498 UInt32 hash2Value, delta2, maxLen, offset;
499 GET_MATCHES_HEADER(3)
503 delta2 = p->pos - p->hash[hash2Value];
504 curMatch = p->hash[kFix3HashSize + hashValue];
506 p->hash[hash2Value] =
507 p->hash[kFix3HashSize + hashValue] = p->pos;
512 if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur)
514 for (; maxLen != lenLimit; maxLen++)
515 if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen])
517 distances[0] = maxLen;
518 distances[1] = delta2 - 1;
520 if (maxLen == lenLimit)
522 SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p));
526 GET_MATCHES_FOOTER(offset, maxLen)
529 static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
531 UInt32 hash2Value, hash3Value, delta2, delta3, maxLen, offset;
532 GET_MATCHES_HEADER(4)
536 delta2 = p->pos - p->hash[ hash2Value];
537 delta3 = p->pos - p->hash[kFix3HashSize + hash3Value];
538 curMatch = p->hash[kFix4HashSize + hashValue];
540 p->hash[ hash2Value] =
541 p->hash[kFix3HashSize + hash3Value] =
542 p->hash[kFix4HashSize + hashValue] = p->pos;
546 if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur)
548 distances[0] = maxLen = 2;
549 distances[1] = delta2 - 1;
552 if (delta2 != delta3 && delta3 < p->cyclicBufferSize && *(cur - delta3) == *cur)
555 distances[offset + 1] = delta3 - 1;
561 for (; maxLen != lenLimit; maxLen++)
562 if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen])
564 distances[offset - 2] = maxLen;
565 if (maxLen == lenLimit)
567 SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p));
573 GET_MATCHES_FOOTER(offset, maxLen)
576 static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
578 UInt32 hash2Value, hash3Value, delta2, delta3, maxLen, offset;
579 GET_MATCHES_HEADER(4)
583 delta2 = p->pos - p->hash[ hash2Value];
584 delta3 = p->pos - p->hash[kFix3HashSize + hash3Value];
585 curMatch = p->hash[kFix4HashSize + hashValue];
587 p->hash[ hash2Value] =
588 p->hash[kFix3HashSize + hash3Value] =
589 p->hash[kFix4HashSize + hashValue] = p->pos;
593 if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur)
595 distances[0] = maxLen = 2;
596 distances[1] = delta2 - 1;
599 if (delta2 != delta3 && delta3 < p->cyclicBufferSize && *(cur - delta3) == *cur)
602 distances[offset + 1] = delta3 - 1;
608 for (; maxLen != lenLimit; maxLen++)
609 if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen])
611 distances[offset - 2] = maxLen;
612 if (maxLen == lenLimit)
614 p->son[p->cyclicBufferPos] = curMatch;
620 offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
621 distances + offset, maxLen) - (distances));
625 UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
628 GET_MATCHES_HEADER(3)
630 curMatch = p->hash[hashValue];
631 p->hash[hashValue] = p->pos;
632 offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
633 distances, 2) - (distances));
637 static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
643 curMatch = p->hash[hashValue];
644 p->hash[hashValue] = p->pos;
650 void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
656 curMatch = p->hash[hashValue];
657 p->hash[hashValue] = p->pos;
663 static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
670 curMatch = p->hash[kFix3HashSize + hashValue];
671 p->hash[hash2Value] =
672 p->hash[kFix3HashSize + hashValue] = p->pos;
678 static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
682 UInt32 hash2Value, hash3Value;
685 curMatch = p->hash[kFix4HashSize + hashValue];
686 p->hash[ hash2Value] =
687 p->hash[kFix3HashSize + hash3Value] = p->pos;
688 p->hash[kFix4HashSize + hashValue] = p->pos;
694 static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
698 UInt32 hash2Value, hash3Value;
701 curMatch = p->hash[kFix4HashSize + hashValue];
702 p->hash[ hash2Value] =
703 p->hash[kFix3HashSize + hash3Value] =
704 p->hash[kFix4HashSize + hashValue] = p->pos;
705 p->son[p->cyclicBufferPos] = curMatch;
711 void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
717 curMatch = p->hash[hashValue];
718 p->hash[hashValue] = p->pos;
719 p->son[p->cyclicBufferPos] = curMatch;
725 void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable)
727 vTable->Init = (Mf_Init_Func)MatchFinder_Init;
728 vTable->GetIndexByte = (Mf_GetIndexByte_Func)MatchFinder_GetIndexByte;
729 vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes;
730 vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos;
733 vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches;
734 vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip;
736 else if (p->numHashBytes == 2)
738 vTable->GetMatches = (Mf_GetMatches_Func)Bt2_MatchFinder_GetMatches;
739 vTable->Skip = (Mf_Skip_Func)Bt2_MatchFinder_Skip;
741 else if (p->numHashBytes == 3)
743 vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches;
744 vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip;
748 vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches;
749 vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip;