mirror of https://github.com/octoleo/plantuml.git
175 lines
6.0 KiB
Java
175 lines
6.0 KiB
Java
package net.sourceforge.plantuml.code.deflate;
|
|
|
|
/*
|
|
* Simple DEFLATE decompressor
|
|
* Copyright (c) Project Nayuki
|
|
*
|
|
* https://www.nayuki.io/page/simple-deflate-decompressor
|
|
* https://github.com/nayuki/Simple-DEFLATE-decompressor
|
|
*/
|
|
|
|
import java.io.IOException;
|
|
import java.util.Arrays;
|
|
import java.util.Objects;
|
|
|
|
|
|
/**
|
|
* A canonical Huffman code, where the code values for each symbol is derived
|
|
* from a given sequence of code lengths. This data structure is immutable.
|
|
* This could be transformed into an explicit Huffman code tree.
|
|
* <p>Example:</p>
|
|
* <pre> Code lengths (canonical code):
|
|
* Symbol A: 1
|
|
* Symbol B: 0 (no code)
|
|
* Symbol C: 3
|
|
* Symbol D: 2
|
|
* Symbol E: 3
|
|
*
|
|
* Generated Huffman codes:
|
|
* Symbol A: 0
|
|
* Symbol B: (Absent)
|
|
* Symbol C: 110
|
|
* Symbol D: 10
|
|
* Symbol E: 111
|
|
*
|
|
* Huffman code tree:
|
|
* .
|
|
* / \
|
|
* A .
|
|
* / \
|
|
* D .
|
|
* / \
|
|
* C E</pre>
|
|
*/
|
|
final class CanonicalCode {
|
|
|
|
/*
|
|
* These arrays store the Huffman codes and values necessary for decoding.
|
|
* symbolCodeBits contains Huffman codes, each padded with a 1 bit at the
|
|
* beginning to disambiguate codes of different lengths (e.g. otherwise we
|
|
* can't distinguish 0b01 from 0b0001). Each symbolCodeBits[i] decodes to its
|
|
* corresponding symbolValues[i]. Values in symbolCodeBits are strictly increasing.
|
|
*
|
|
* For the example of codeLengths=[1,0,3,2,3], we would have:
|
|
* i | symbolCodeBits[i] | symbolValues[i]
|
|
* --+-------------------+----------------
|
|
* 0 | 0b1_0 | 0
|
|
* 1 | 0b1_10 | 3
|
|
* 2 | 0b1_110 | 2
|
|
* 3 | 0b1_111 | 4
|
|
*/
|
|
private int[] symbolCodeBits;
|
|
private int[] symbolValues;
|
|
|
|
|
|
|
|
/**
|
|
* Constructs a canonical Huffman code from the specified array of symbol code lengths.
|
|
* Each code length must be non-negative. Code length 0 means no code for the symbol.
|
|
* The collection of code lengths must represent a proper full Huffman code tree.
|
|
* <p>Examples of code lengths that result in correct full Huffman code trees:</p>
|
|
* <ul>
|
|
* <li>[1, 1] (result: A=0, B=1)</li>
|
|
* <li>[2, 2, 1, 0, 0, 0] (result: A=10, B=11, C=0)</li>
|
|
* <li>[3, 3, 3, 3, 3, 3, 3, 3] (result: A=000, B=001, C=010, ..., H=111)</li>
|
|
* </ul>
|
|
* <p>Examples of code lengths that result in under-full Huffman code trees:</p>
|
|
* <ul>
|
|
* <li>[0, 2, 0] (result: B=00, unused=01, unused=1)</li>
|
|
* <li>[0, 1, 0, 2] (result: B=0, D=10, unused=11)</li>
|
|
* </ul>
|
|
* <p>Examples of code lengths that result in over-full Huffman code trees:</p>
|
|
* <ul>
|
|
* <li>[1, 1, 1] (result: A=0, B=1, C=overflow)</li>
|
|
* <li>[1, 1, 2, 2, 3, 3, 3, 3] (result: A=0, B=1, C=overflow, ...)</li>
|
|
* </ul>
|
|
* @param canonicalCodeLengths array of symbol code lengths (not {@code null})
|
|
* @throws NullPointerException if the array is {@code null}
|
|
* @throws IllegalArgumentException if any element is negative, any value exceeds MAX_CODE_LENGTH,
|
|
* or the collection of code lengths would yield an under-full or over-full Huffman code tree
|
|
*/
|
|
public CanonicalCode(int[] codeLengths) {
|
|
// Check argument values
|
|
Objects.requireNonNull(codeLengths);
|
|
for (int x : codeLengths) {
|
|
if (x < 0)
|
|
throw new IllegalArgumentException("Negative code length");
|
|
if (x > MAX_CODE_LENGTH)
|
|
throw new IllegalArgumentException("Maximum code length exceeded");
|
|
}
|
|
|
|
// Allocate code values to symbols. Symbols are processed in the order
|
|
// of shortest code length first, breaking ties by lowest symbol value.
|
|
symbolCodeBits = new int[codeLengths.length];
|
|
symbolValues = new int[codeLengths.length];
|
|
int numSymbolsAllocated = 0;
|
|
int nextCode = 0;
|
|
for (int codeLength = 1; codeLength <= MAX_CODE_LENGTH; codeLength++) {
|
|
nextCode <<= 1;
|
|
int startBit = 1 << codeLength;
|
|
for (int symbol = 0; symbol < codeLengths.length; symbol++) {
|
|
if (codeLengths[symbol] != codeLength)
|
|
continue;
|
|
if (nextCode >= startBit)
|
|
throw new IllegalArgumentException("This canonical code produces an over-full Huffman code tree");
|
|
|
|
symbolCodeBits[numSymbolsAllocated] = startBit | nextCode;
|
|
symbolValues [numSymbolsAllocated] = symbol;
|
|
numSymbolsAllocated++;
|
|
nextCode++;
|
|
}
|
|
}
|
|
if (nextCode != 1 << MAX_CODE_LENGTH)
|
|
throw new IllegalArgumentException("This canonical code produces an under-full Huffman code tree");
|
|
|
|
// Trim unused trailing elements
|
|
symbolCodeBits = Arrays.copyOf(symbolCodeBits, numSymbolsAllocated);
|
|
symbolValues = Arrays.copyOf(symbolValues , numSymbolsAllocated);
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
* Decodes the next symbol from the specified bit input stream based on this
|
|
* canonical code. The returned symbol value is in the range [0, codeLengths.length).
|
|
* @param in the bit input stream to read from
|
|
* @return the next decoded symbol
|
|
* @throws IOException if an I/O exception occurred
|
|
*/
|
|
public int decodeNextSymbol(BitInputStream in) throws IOException {
|
|
Objects.requireNonNull(in);
|
|
int codeBits = 1; // The start bit
|
|
while (true) {
|
|
// Accumulate one bit at a time on the right side until a match is
|
|
// found in the symbolCodeBits array. Because the Huffman code tree is
|
|
// full, this loop must terminate after at most MAX_CODE_LENGTH iterations.
|
|
codeBits = codeBits << 1 | in.readNoEof();
|
|
int index = Arrays.binarySearch(symbolCodeBits, codeBits);
|
|
if (index >= 0)
|
|
return symbolValues[index];
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* Returns a string representation of this canonical code,
|
|
* useful for debugging only, and the format is subject to change.
|
|
* @return a string representation of this canonical code
|
|
*/
|
|
public String toString() {
|
|
StringBuilder sb = new StringBuilder();
|
|
for (int i = 0; i < symbolCodeBits.length; i++) {
|
|
sb.append(String.format("Code %s: Symbol %d%n",
|
|
Integer.toBinaryString(symbolCodeBits[i]).substring(1),
|
|
symbolValues[i]));
|
|
}
|
|
return sb.toString();
|
|
}
|
|
|
|
|
|
|
|
// The maximum Huffman code length allowed in the DEFLATE standard.
|
|
private static final int MAX_CODE_LENGTH = 15;
|
|
|
|
}
|