Skip to content

Commit

Permalink
add new huffman tree lib which will be used for flate decoder
Browse files Browse the repository at this point in the history
  • Loading branch information
robertmuth committed Nov 13, 2023
1 parent 42838c2 commit 831d9c1
Show file tree
Hide file tree
Showing 3 changed files with 222 additions and 0 deletions.
123 changes: 123 additions & 0 deletions FrontEnd/Lib/huffman.cw
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
@doc """canonical huffman trees

https://datatracker.ietf.org/doc/html/rfc1951 Section 3.2
https://en.wikipedia.org/wiki/Canonical_Huffman_code

"""
(module huffman [] :

(import bitstream)

(global BAD_SYMBOL u16 0xffff)

@doc """Decode the next symbol from a bitstream

This function has two failure modes:
* the bitstream may run out of bits
This must be checked by the caller
* the retrieved bits are out of range
This will result in BAD_SYMBOL to be returned

Note counts[0] is not used
"""
(fun NextSymbol [(param bs (ptr @mut bitstream::Stream32))
(param counts (slice u16))
(param symbols (slice u16))] u16 :
(let @mut offset u32 0)
(let @mut base u32 0)

(for level 1 (len counts) 1 :
(<<= offset 1)
(+= offset (bitstream::Stream32GetBits [bs 1]))
(let count u32 (as (at counts level) u32))
(if (< offset count) :
(+= base offset)
(return (at symbols base))
:)
(+= base count)
(-= offset count)
)
(return BAD_SYMBOL)
)


(global BAD_TREE_ENCODING u16 0xffff)
(global MAX_SYMBOLS uint 0xff00)


@doc """Check that symbol count at a level can be encoded

"""
(fun CountsAreFeasible [(param counts (slice u16))] bool :
(let @mut available u16 2)
(for level 1 (len counts) 1 :
(let used auto (at counts level))
(if (> used available) :
(return false)
:
(= available (* (- available used) 2))
)
)
(return (== available 0))
)


@doc """
Returns highest symbol encountered. Usually that value is
(len lengths) - 1

Note counts[0] is always 0
"""
(fun @pub ComputeCountsAndSymbolsFromLengths [
(param lengths (slice u16))
(param counts (slice @mut u16))
(param symbols (slice @mut u16))] u16 :
(if (> (len lengths) MAX_SYMBOLS) : (return BAD_TREE_ENCODING) :)
(if (< (len lengths) 2) : (return BAD_TREE_ENCODING) :)

(for level 0 (len counts) 1 :
(= (at counts level) 0))
(let @mut last u16 0)
(for i 0 (len lengths) 1 :
(let bits auto (at lengths i))
(if (!= bits 0) :
(= last (as i u16))
(+= (at counts bits) 1)
:)
)
(if (! (CountsAreFeasible [counts])) :
(return BAD_TREE_ENCODING)
:)

@doc "accumulate counts to get offsets"
(let @mut n u16 0)
(for i 1 (len counts) 1 :
(+= n (at counts i))
(= (at counts i) n)
)

@doc "fill in symbols"
(for i 0 (len lengths) 1 :
(let bits auto (at lengths i))
(if (!= bits 0) :
(let offset auto (at counts (- bits 1)))
(= (at symbols offset) (as i u16))
(+= (at counts (- bits 1)) 1)
:)
)

@doc """de-accumulate to get back original count
n0 is the original value of the element at index i-2
n0 is the original value of the element at index i-1"""
(let @mut n0 u16 0)
(let @mut n1 u16 0)
(for i 1 (len counts) 1 :
(let d auto(- n1 n0))
(= n0 n1)
(= n1 (at counts i))
(= (at counts i) d)
)
(return last)
)

)
97 changes: 97 additions & 0 deletions FrontEnd/Lib/huffman_test.cw
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
(module main [] :
(import test)
(import fmt)

(import huffman)


@doc r"""
*
/ \
/ \
/ \
/ \
/ \
/ \
/ \
/ \
/ \
/ \
/ \
* *
/ \ / \
/ \ / \
/ \ / \
/ \ / \
/ \ / \
* * * *
/ \ / \ / \ F 2
/ \ / \ / \
/ \ / \ / \
* * * * * *
/ \ / \ E G H I 3
* * * *
/ \ A B C 4
* *
/ \ D 5
* *
J K 6

"""
(global Tree1Length auto (array_val 11 u16 [
(index_val 4)
(index_val 4)
(index_val 4)
(index_val 5)
(index_val 3)
(index_val 2)
(index_val 3)
(index_val 3)
(index_val 3)
(index_val 6)
(index_val 6)
]))

(global Tree1ExpectedSymbols auto (array_val 11 u16 [
(index_val (- 'F' 'A'))
(index_val (- 'E' 'A'))
(index_val (- 'G' 'A'))
(index_val (- 'H' 'A'))
(index_val (- 'I' 'A'))
(index_val (- 'A' 'A'))
(index_val (- 'B' 'A'))
(index_val (- 'C' 'A'))
(index_val (- 'D' 'A'))
(index_val (- 'J' 'A'))
(index_val (- 'K' 'A'))
]))

(global Tree1ExpectedCounts auto (array_val 7 u16 [
(index_val 0)
(index_val 0)
(index_val 1)
(index_val 4)
(index_val 3)
(index_val 1)
(index_val 2)
]))

(fun test_tree_decoding [] void :
(let @mut counts (array 7 u16))
(let @mut symbols (array 11 u16))

(test::AssertEq!
(huffman::ComputeCountsAndSymbolsFromLengths [Tree1Length counts symbols])
10_u16)

(test::AssertSliceEq! symbols Tree1ExpectedSymbols)
(test::AssertSliceEq! counts Tree1ExpectedCounts)

)

(fun @cdecl main [(param argc s32) (param argv (ptr (ptr u8)))] s32 :
(stmt (test_tree_decoding []))
@doc "test end"
(test::Success!)
(return 0))
)
2 changes: 2 additions & 0 deletions FrontEnd/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ tests_lang_a32: \
$(DIR)/expr_sint_test.lang.a64.exe

tests_lib_x64: \
$(DIR)/huffman_test.lib.x64.exe \
$(DIR)/flate_test.lib.x64.exe \
$(DIR)/fmt_test.lib.x64.exe \
$(DIR)/string_test.lib.x64.exe \
Expand All @@ -83,6 +84,7 @@ tests_lib_x64: \
$(DIR)/bitstream_test.lib.x64.exe

tests_lib_a64: \
$(DIR)/huffman_test.lib.a64.exe \
$(DIR)/flate_test.lib.a64.exe \
$(DIR)/fmt_test.lib.a64.exe \
$(DIR)/string_test.lib.a64.exe \
Expand Down

0 comments on commit 831d9c1

Please sign in to comment.